PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
control.c
Go to the documentation of this file.
1 
14 #include <stdio.h>
15 #include <stdlib.h>
16 #if defined( _WIN32 ) || defined( _WIN64 )
17 #include "plasmawinthread.h"
18 #else
19 #include <pthread.h>
20 #endif
21 #include "common.h"
22 #include "auxiliary.h"
23 #include "allocate.h"
24 
25 /***************************************************************************/
29 {
30 #ifdef BUSY_WAITING
31  int core;
32 
33  for (core = 0; core < CONTEXT_THREADS_MAX; core++) {
34  plasma->barrier_in[core] = 0;
35  plasma->barrier_out[core] = 0;
36  }
37 #else
38  plasma->barrier_id = 0;
39  plasma->barrier_nblocked_thrds = 0;
40  pthread_mutex_init(&(plasma->barrier_synclock), NULL);
41  pthread_cond_init( &(plasma->barrier_synccond), NULL);
42 #endif
43 }
44 
45 /***************************************************************************/
49 {
50 #ifndef BUSY_WAITING
53 #endif
54 }
55 
56 /***************************************************************************/
60 {
61 #ifdef BUSY_WAITING
62  int core;
63 
64  if (PLASMA_RANK == 0) {
65  for (core = 1; core < PLASMA_SIZE; core++)
66  while (plasma->barrier_in[core] == 0);
67 
68  for (core = 1; core < PLASMA_SIZE; core++)
69  plasma->barrier_in[core] = 0;
70 
71  for (core = 1; core < PLASMA_SIZE; core++)
72  plasma->barrier_out[core] = 1;
73  }
74  else
75  {
76  plasma->barrier_in[PLASMA_RANK] = 1;
77  while (plasma->barrier_out[PLASMA_RANK] == 0);
78  plasma->barrier_out[PLASMA_RANK] = 0;
79  }
80 #else
81  int id;
82 
84  id = plasma->barrier_id;
85  plasma->barrier_nblocked_thrds++;
86  if (plasma->barrier_nblocked_thrds == PLASMA_SIZE) {
87  plasma->barrier_nblocked_thrds = 0;
88  plasma->barrier_id++;
90  }
91  while (id == plasma->barrier_id)
92  pthread_cond_wait(&(plasma->barrier_synccond), &(plasma->barrier_synclock));
94 #endif
95 }
96 
97 /***************************************************************************/
100 void *plasma_parallel_section(void *plasma_ptr)
101 {
102  plasma_context_t *plasma = (plasma_context_t*)(plasma_ptr);
103  PLASMA_enum action;
104 
105  /* Set thread affinity for the worker */
106  plasma_setaffinity(plasma->thread_bind[plasma_rank(plasma)]);
107 
108  plasma_barrier(plasma);
109  while(1) {
111  while ((action = plasma->action) == PLASMA_ACT_STAND_BY)
112  pthread_cond_wait(&plasma->action_condt, &plasma->action_mutex);
114  plasma_barrier(plasma);
115 
116  switch (action) {
117  case PLASMA_ACT_PARALLEL:
118  plasma->parallel_func_ptr(plasma);
119  break;
120  case PLASMA_ACT_DYNAMIC:
121  QUARK_Worker_Loop(plasma->quark, plasma_rank(plasma));
122  break;
123  case PLASMA_ACT_FINALIZE:
124  return NULL;
125  default:
126  plasma_fatal_error("plasma_parallel_section", "undefined action");
127  return NULL;
128  }
129  plasma_barrier(plasma);
130  }
131  return NULL;
132 }
133 
134 /***************************************************************************/
153 int PLASMA_Init(int cores)
154 {
155  return PLASMA_Init_Affinity(cores, NULL);
156 }
157 
158 /***************************************************************************/
184 int PLASMA_Init_Affinity(int cores, int *coresbind)
185 {
187  int status;
188  int core;
189 
190  /* Create context and insert in the context map */
191  plasma = plasma_context_create();
192  if (plasma == NULL) {
193  plasma_fatal_error("PLASMA_Init", "plasma_context_create() failed");
195  }
196  status = plasma_context_insert(plasma, pthread_self());
197  if (status != PLASMA_SUCCESS) {
198  plasma_fatal_error("PLASMA_Init", "plasma_context_insert() failed");
200  }
201  /* Init number of cores and topology */
203 
204  /* Set number of cores */
205  if ( cores < 1 ) {
206  plasma->world_size = plasma_get_numthreads();
207  if ( plasma->world_size == -1 ) {
208  plasma->world_size = 1;
209  plasma_warning("PLASMA_Init", "Could not find the number of cores: the thread number is set to 1");
210  }
211  }
212  else
213  plasma->world_size = cores;
214 
215  if (plasma->world_size <= 0) {
216  plasma_fatal_error("PLASMA_Init", "failed to get system size");
217  return PLASMA_ERR_NOT_FOUND;
218  }
219  /* Check if not more cores than the hard limit */
220  if (plasma->world_size > CONTEXT_THREADS_MAX) {
221  plasma_fatal_error("PLASMA_Init", "not supporting so many cores");
223  }
224 
225  /* Get the size of each NUMA node */
227  while ( ((plasma->world_size)%(plasma->group_size)) != 0 )
228  (plasma->group_size)--;
229 
230  /* Initialize barrier */
231  plasma_barrier_init(plasma);
232 
233  /* Initialize default thread attributes */
234  status = pthread_attr_init(&plasma->thread_attr);
235  if (status != 0) {
236  plasma_fatal_error("PLASMA_Init", "pthread_attr_init() failed");
237  return status;
238  }
239  /* Set scope to system */
241  if (status != 0) {
242  plasma_fatal_error("PLASMA_Init", "pthread_attr_setscope() failed");
243  return status;
244  }
245  /* Set concurrency */
246  status = pthread_setconcurrency(plasma->world_size);
247  if (status != 0) {
248  plasma_fatal_error("PLASMA_Init", "pthread_setconcurrency() failed");
249  return status;
250  }
251  /* Launch threads */
252  memset(plasma->thread_id, 0, CONTEXT_THREADS_MAX*sizeof(pthread_t));
253  if (coresbind != NULL) {
254  memcpy(plasma->thread_bind, coresbind, plasma->world_size*sizeof(int));
255  }
256  else {
258  }
259  /* Assign rank and thread ID for the master */
260  plasma->thread_rank[0] = 0;
261  plasma->thread_id[0] = pthread_self();
262 
263  for (core = 1; core < plasma->world_size; core++) {
264  plasma->thread_rank[core] = core;
266  &plasma->thread_id[core],
267  &plasma->thread_attr,
269  (void*)plasma);
270  }
271  /* Set thread affinity for the master */
272  plasma_setaffinity(plasma->thread_bind[0]);
273 
274  /* Initialize the dynamic scheduler */
275  plasma->quark = QUARK_Setup(plasma->world_size);
276 
277  plasma_barrier(plasma);
278  return PLASMA_SUCCESS;
279 }
280 
281 /***************************************************************************/
294 {
295  int core;
296  int status;
297  void *exitcodep;
299 
300  plasma = plasma_context_self();
301  if (plasma == NULL) {
302  plasma_fatal_error("PLASMA_Finalize()", "PLASMA not initialized");
304  }
305 
306  /* Terminate the dynamic scheduler */
308 
309  /* Free quark structures */
310  QUARK_Free(plasma->quark);
311 
312  /* Set termination action */
314  plasma->action = PLASMA_ACT_FINALIZE;
317 
318  /* Barrier and clear action */
319  plasma_barrier(plasma);
320  plasma->action = PLASMA_ACT_STAND_BY;
321 
322  // Join threads
323  for (core = 1; core < plasma->world_size; core++) {
324  status = pthread_join(plasma->thread_id[core], &exitcodep);
325  if (status != 0) {
326  plasma_fatal_error("PLASMA_Finalize", "pthread_join() failed");
327  return status;
328  }
329  }
330  plasma_barrier_finalize(plasma);
331 
332  /* Destroy thread attributes */
333  status = pthread_attr_destroy(&plasma->thread_attr);
334  if (status != 0)
335  plasma_fatal_error("PLASMA_Finalize", "pthread_attr_destroy() failed");
336 
337  /* Destroy topology */
339 
340  status = plasma_context_remove(plasma, pthread_self());
341  if (status != PLASMA_SUCCESS) {
342  plasma_fatal_error("PLASMA_Finalize", "plasma_context_remove() failed");
343  return status;
344  }
345 
346  /* Restore the concurency */
347  /* actually it's really bad, we shoulde set the concurrency only
348  * if it's not already done and restore it only we had change it */
350 
351  return PLASMA_SUCCESS;
352 }