PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
eztrace_convert_coreblas.c
Go to the documentation of this file.
1 
27 #define _GNU_SOURCE
28 #include <stdio.h>
29 #include <assert.h>
30 #include <GTG.h>
31 #include <ev_codes.h>
32 #include <eztrace_list.h>
33 #include <eztrace_convert.h>
34 #include "coreblas_ev_codes.h"
35 
36 #ifndef min
37 #define min( a, b ) ( (a) < (b) ? (a) : (b) )
38 #endif
39 #ifndef max
40 #define max( a, b ) ( (a) > (b) ? (a) : (b) )
41 #endif
42 
43 #define COREBLAS_STATE "ST_Thread"
44 #define COREBLAS_TASK_NAME "Submitted Tasks counter"
45 #define COREBLAS_TASK_ALIAS "STasks"
46 
47 #define COREBLAS_TASKR_NAME "Global Ready Tasks counter"
48 #define COREBLAS_TASKR_ALIAS "GRTasks"
49 
50 #define COREBLAS_TASKWR_NAME "Local Ready Tasks counter"
51 #define COREBLAS_TASKWR_ALIAS "LRTasks"
52 
53 #define COREBLAS_THREADS_MAX 4096
54 
55 /*
56  * Statistics
57  */
58 typedef struct coreblas_stats_s {
59  int nb;
60  double sum;
61  double min;
62  double max;
64 
65 static coreblas_stats_t *statsarray = NULL;
66 
67 typedef struct coreblas_thrdstate_s {
68  unsigned int tid;
69  int active;
70  double lasttime;
72 
73 static coreblas_thrdstate_t *thrdstate = NULL;
74 static int nbtrhd = 0;
75 
76 #include "coreblas_string.c"
77 
78 /*
79  * Case with priority and request need to be handle correctly in GTG
80  * before to be included here
81  */
82 #ifdef TRACE_BY_SEQUENCE
83 
84 #define MAX_SEQUENCE 100
85 typedef struct sequence_s {
86  uint64_t id;
87  char *name;
88 } sequence_t;
89 
90 sequence_t *seqtab;
91 gtg_color_t colors[20];
92 
93 void sequenceInit(){
94  seqtab = (sequence_t*)malloc(MAX_SEQUENCE * sizeof(sequence_t));
95  memset(seqtab, 0, MAX_SEQUENCE * sizeof(sequence_t));
96 
97  colors[ 0] = GTG_RED;
98  colors[ 1] = GTG_GREEN;
99  colors[ 2] = GTG_BLUE;
100  colors[ 3] = GTG_WHITE;
101  colors[ 4] = GTG_TEAL;
102  colors[ 5] = GTG_DARKGREY;
103  colors[ 6] = GTG_YELLOW;
104  colors[ 7] = GTG_PURPLE;
105  colors[ 8] = GTG_LIGHTBROWN;
106  colors[ 9] = GTG_DARKBLUE;
107  colors[10] = GTG_PINK;
108  colors[11] = GTG_DARKPINK;
109  colors[12] = GTG_SEABLUE;
110  colors[13] = GTG_KAKI;
111  colors[14] = GTG_REDBLOOD;
112  colors[15] = GTG_BROWN;
113  colors[16] = GTG_GRENAT;
114  colors[17] = GTG_ORANGE;
115  colors[18] = GTG_MAUVE;
116  colors[19] = GTG_LIGHTPINK;
117 
118 }
119 
120 void sequenceDestroy(){
121  int i=0;
122  while( i < MAX_SEQUENCE && seqtab[i].id != 0)
123  {
124  free(seqtab[i].name);
125  i++;
126  }
127  free(seqtab);
128 }
129 
130 char *getSequence(uint64_t seq)
131 {
132  int i=0;
133 
134  while ( (i < MAX_SEQUENCE)
135  && (seqtab[i].id != 0)
136  && (seqtab[i].id != seq) )
137  i++;
138 
139  if (i < MAX_SEQUENCE)
140  {
141  if ( seqtab[i].id == seq )
142  {
143  return seqtab[i].name;
144  }
145  else
146  {
147  seqtab[i].id = seq;
148  if ( asprintf(&(seqtab[i].name), "Sequence%03d", i) < 0 ) {
149  fprintf(stderr, "Failed to create new sequence name\n");
150  exit(-1);
151  }
152 
153  addEntityValue(seqtab[i].name, COREBLAS_STATE, seqtab[i].name, colors[i%20] );
154  return seqtab[i].name;
155  }
156  } else {
157  fprintf(stderr, "WARNING: Too many sequences, you need to increase the limit and recompile\n");
158  return "SequenceOutOfRange";
159  }
160 }
161 #define HANDLE(func) \
162  void handle_coreblas_##func##_start (struct fxt_ev_64 *ev) \
163  { \
164  FUNC_NAME; \
165  INIT_THREAD_ID(_threadstr); \
166  if ( GET_NBPARAMS(ev) > 0 ) { \
167  CHANGE() setState (CURRENT, COREBLAS_STATE, _threadstr, getSequence(GET_PARAM(ev, 2))); \
168  } else { \
169  CHANGE() setState (CURRENT, COREBLAS_STATE, _threadstr, #func); \
170  } \
171  free(_threadstr); \
172  }
173 #else
174 #define HANDLE(func) \
175  void handle_coreblas_##func##_start (struct fxt_ev_64 *ev) \
176  { \
177  FUNC_NAME; \
178  INIT_THREAD_ID(_threadstr); \
179  if ( GET_NBPARAMS(ev) > 0 ) { \
180  CHANGE() setState (CURRENT, COREBLAS_STATE, _threadstr, #func); \
181  } else { \
182  CHANGE() setState (CURRENT, COREBLAS_STATE, _threadstr, #func); \
183  } \
184  free(_threadstr); \
185  }
186 #endif
187 
188 void handle_coreblas_task (struct fxt_ev_64 *ev)
189 {
190  FUNC_NAME;
191  INIT_PROCESS_ID(process_id);
192  assert( GET_NBPARAMS(ev) == 1 );
193  int value = (int)GET_PARAM(ev, 1);
194  CHANGE() addVar (CURRENT, COREBLAS_TASK_ALIAS, process_id, (varPrec)value);
195  free(process_id);
196 }
197 
198 void handle_coreblas_taskw (struct fxt_ev_64 *ev)
199 {
200  FUNC_NAME;
201  assert( GET_NBPARAMS(ev) == 2 );
202  INIT_PROCESS_ID(process_id);
203  INIT_SPECIFIC_THREAD_ID(thread_id, CUR_ID, (unsigned int)GET_PARAM(ev, 1));
204  int value = (int)GET_PARAM(ev, 2);
205  CHANGE() addVar (CURRENT, COREBLAS_TASKR_ALIAS, process_id, (varPrec)value);
206  CHANGE() addVar (CURRENT, COREBLAS_TASKWR_ALIAS, thread_id, (varPrec)value);
207  free(thread_id);
208  free(process_id);
209 }
210 
211 /* Level 3 Blas */
212 HANDLE(gemm )
213 HANDLE(herk )
214 HANDLE(syrk )
215 HANDLE(hemm )
216 HANDLE(symm )
217 HANDLE(trmm )
218 HANDLE(trsm )
219 HANDLE(her2k)
220 HANDLE(syr2k)
221 
222 /* Level 2 Blas */
223 HANDLE(gemv )
224 HANDLE(gbmv )
225 HANDLE(hemv )
226 HANDLE(hbmv )
227 HANDLE(hpmv )
228 HANDLE(symv )
229 HANDLE(sbmv )
230 HANDLE(spmv )
231 HANDLE(trmv )
232 HANDLE(tbmv )
233 HANDLE(tpmv )
234 HANDLE(trsv )
235 HANDLE(tbsv )
236 HANDLE(tpsv )
237 HANDLE(ger )
238 HANDLE(geru )
239 HANDLE(gerc )
240 HANDLE(her )
241 HANDLE(hpr )
242 HANDLE(her2 )
243 HANDLE(hpr2 )
244 HANDLE(syr )
245 HANDLE(spr )
246 HANDLE(syr2 )
247 HANDLE(spr2 )
248 
249 /* Level 1 BLAS */
250 HANDLE(rotg )
251 HANDLE(rotmg)
252 HANDLE(rot )
253 HANDLE(rotm )
254 HANDLE(swap )
255 HANDLE(scal )
256 HANDLE(copy )
257 HANDLE(axpy )
258 HANDLE(geadd)
259 HANDLE(dot )
260 HANDLE(dotu )
261 HANDLE(dotc )
262 HANDLE(xdot )
263 HANDLE(nrm2 )
264 HANDLE(asum )
265 HANDLE(amax )
266 
267 /* lapack */
268 HANDLE(lacpy)
269 HANDLE(lange)
270 HANDLE(lanhe)
271 HANDLE(lansy)
272 HANDLE(larfb)
273 HANDLE(larft)
274 HANDLE(laswp)
275 HANDLE(lauum)
276 HANDLE(potrf)
277 HANDLE(trtri)
278 HANDLE(laset)
279 
280 /* plasma coreblas */
281 HANDLE(gelqt)
282 HANDLE(geqrt)
283 HANDLE(gessm)
284 HANDLE(getrf)
285 HANDLE(getro)
286 HANDLE(ssssm)
287 HANDLE(titro)
288 HANDLE(trbmm)
289 HANDLE(trgmm)
290 HANDLE(tslqt)
291 HANDLE(tsmlq)
292 HANDLE(tsmqr)
293 HANDLE(tsqrt)
294 HANDLE(tsrfb)
295 HANDLE(tstrf)
296 HANDLE(ttlqt)
297 HANDLE(ttmlq)
298 HANDLE(ttmqr)
299 HANDLE(ttqrt)
300 HANDLE(ttrfb)
301 HANDLE(unmlq)
302 HANDLE(unmqr)
303 HANDLE(getrip)
304 HANDLE(plghe)
305 HANDLE(plgsy)
306 HANDLE(shift)
307 HANDLE(shiftw)
308 HANDLE(swpab)
309 HANDLE(plrnt)
310 
311 HANDLE(brdalg)
312 HANDLE(trdalg)
313 HANDLE(hegst)
314 HANDLE(sygst)
315 HANDLE(herfb)
316 HANDLE(syrfb)
317 
318 void
320 {
321  FUNC_NAME;
322  INIT_THREAD_ID(_threadstr);
323  CHANGE() setState (CURRENT, COREBLAS_STATE, _threadstr, "wait");
324  free(_threadstr);
325 }
326 
327 int
329 {
330  addVarType( COREBLAS_TASK_ALIAS, COREBLAS_TASK_NAME, "CT_Process" );
331  addVarType( COREBLAS_TASKR_ALIAS, COREBLAS_TASKR_NAME, "CT_Process" );
332  addVarType( COREBLAS_TASKWR_ALIAS, COREBLAS_TASKWR_NAME, "CT_Thread" );
333 
334  /* Level 3 Blas */
335  addEntityValue("gemm" , COREBLAS_STATE, "gemm" , GTG_YELLOW );
336  addEntityValue("herk" , COREBLAS_STATE, "herk" , GTG_WHITE );
337  addEntityValue("syrk" , COREBLAS_STATE, "syrk" , GTG_WHITE );
338  addEntityValue("hemm" , COREBLAS_STATE, "hemm" , GTG_DARKPINK);
339  addEntityValue("symm" , COREBLAS_STATE, "symm" , GTG_DARKPINK);
340  addEntityValue("trmm" , COREBLAS_STATE, "trmm" , GTG_PURPLE );
341  addEntityValue("trsm" , COREBLAS_STATE, "trsm" , GTG_RED );
342  addEntityValue("her2k", COREBLAS_STATE, "her2k", GTG_PINK );
343  addEntityValue("syr2k", COREBLAS_STATE, "syr2k", GTG_PINK );
344 
345  /* Level 2 Blas */
346  addEntityValue("gemv" , COREBLAS_STATE, "gemv" , GTG_TEAL );
347  addEntityValue("gbmv" , COREBLAS_STATE, "gbmv" , GTG_TEAL );
348  addEntityValue("hemv" , COREBLAS_STATE, "hemv" , GTG_TEAL );
349  addEntityValue("hbmv" , COREBLAS_STATE, "hbmv" , GTG_TEAL );
350  addEntityValue("hpmv" , COREBLAS_STATE, "hpmv" , GTG_TEAL );
351  addEntityValue("symv" , COREBLAS_STATE, "symv" , GTG_TEAL );
352  addEntityValue("sbmv" , COREBLAS_STATE, "sbmv" , GTG_TEAL );
353  addEntityValue("spmv" , COREBLAS_STATE, "spmv" , GTG_TEAL );
354  addEntityValue("trmv" , COREBLAS_STATE, "trmv" , GTG_TEAL );
355  addEntityValue("tbmv" , COREBLAS_STATE, "tbmv" , GTG_TEAL );
356  addEntityValue("tpmv" , COREBLAS_STATE, "tpmv" , GTG_TEAL );
357  addEntityValue("trsv" , COREBLAS_STATE, "trsv" , GTG_ORANGE );
358  addEntityValue("tbsv" , COREBLAS_STATE, "tbsv" , GTG_ORANGE );
359  addEntityValue("tpsv" , COREBLAS_STATE, "tpsv" , GTG_ORANGE );
360  addEntityValue("ger" , COREBLAS_STATE, "ger" , GTG_SEABLUE );
361  addEntityValue("geru" , COREBLAS_STATE, "geru" , GTG_SEABLUE );
362  addEntityValue("gerc" , COREBLAS_STATE, "gerc" , GTG_SEABLUE );
363  addEntityValue("her" , COREBLAS_STATE, "her" , GTG_SEABLUE );
364  addEntityValue("hpr" , COREBLAS_STATE, "hpr" , GTG_SEABLUE );
365  addEntityValue("her2" , COREBLAS_STATE, "her2" , GTG_SEABLUE );
366  addEntityValue("hpr2" , COREBLAS_STATE, "hpr2" , GTG_SEABLUE );
367  addEntityValue("syr" , COREBLAS_STATE, "syr" , GTG_SEABLUE );
368  addEntityValue("spr" , COREBLAS_STATE, "spr" , GTG_SEABLUE );
369  addEntityValue("syr2" , COREBLAS_STATE, "syr2" , GTG_SEABLUE );
370  addEntityValue("spr2" , COREBLAS_STATE, "spr2" , GTG_SEABLUE );
371 
372  /* Level 1 BLAS */
373  addEntityValue("rotg" , COREBLAS_STATE, "rotg" , GTG_PURPLE );
374  addEntityValue("rotmg", COREBLAS_STATE, "rotmg", GTG_PURPLE );
375  addEntityValue("rot" , COREBLAS_STATE, "rot" , GTG_PURPLE );
376  addEntityValue("rotm" , COREBLAS_STATE, "rotm" , GTG_PURPLE );
377  addEntityValue("swap" , COREBLAS_STATE, "swap" , GTG_ORANGE );
378  addEntityValue("scal" , COREBLAS_STATE, "scal" , GTG_ORANGE );
379  addEntityValue("copy" , COREBLAS_STATE, "copy" , GTG_ORANGE );
380  addEntityValue("axpy" , COREBLAS_STATE, "axpy" , GTG_ORANGE );
381  addEntityValue("geadd", COREBLAS_STATE, "geadd", GTG_ORANGE );
382  addEntityValue("dot" , COREBLAS_STATE, "dot" , GTG_LIGHTPINK);
383  addEntityValue("dotu" , COREBLAS_STATE, "dotu" , GTG_LIGHTPINK);
384  addEntityValue("dotc" , COREBLAS_STATE, "dotc" , GTG_LIGHTPINK);
385  addEntityValue("xdot" , COREBLAS_STATE, "xdot" , GTG_LIGHTPINK);
386  addEntityValue("nrm2" , COREBLAS_STATE, "nrm2" , GTG_LIGHTPINK);
387  addEntityValue("asum" , COREBLAS_STATE, "asum" , GTG_LIGHTPINK);
388  addEntityValue("amax" , COREBLAS_STATE, "amax" , GTG_LIGHTPINK);
389 
390  /* Lapack */
391  addEntityValue("lacpy", COREBLAS_STATE, "lacpy", GTG_LIGHTPINK );
392  addEntityValue("lange", COREBLAS_STATE, "lange", GTG_LIGHTPINK );
393  addEntityValue("lanhe", COREBLAS_STATE, "lanhe", GTG_LIGHTPINK );
394  addEntityValue("lansy", COREBLAS_STATE, "lansy", GTG_LIGHTPINK );
395  addEntityValue("larfb", COREBLAS_STATE, "larfb", GTG_YELLOW );
396  addEntityValue("larft", COREBLAS_STATE, "larft", GTG_RED );
397  addEntityValue("laswp", COREBLAS_STATE, "laswp", GTG_ORANGE );
398  addEntityValue("lauum", COREBLAS_STATE, "lauum", GTG_LIGHTPINK );
399  addEntityValue("potrf", COREBLAS_STATE, "potrf", GTG_GREEN );
400  addEntityValue("trtri", COREBLAS_STATE, "trtri", GTG_LIGHTPINK );
401  addEntityValue("laset", COREBLAS_STATE, "laset", GTG_LIGHTPINK );
402 
403  /* PLASMA coreblas */
404  addEntityValue("gelqt", COREBLAS_STATE, "gelqt", GTG_GREEN );
405  addEntityValue("geqrt", COREBLAS_STATE, "geqrt", GTG_GREEN );
406  addEntityValue("gessm", COREBLAS_STATE, "gessm", GTG_BLUE );
407  addEntityValue("getrf", COREBLAS_STATE, "getrf", GTG_GREEN );
408  addEntityValue("getro", COREBLAS_STATE, "getro", GTG_ORANGE );
409  addEntityValue("ssssm", COREBLAS_STATE, "ssssm", GTG_YELLOW );
410  addEntityValue("titro", COREBLAS_STATE, "titro", GTG_LIGHTPINK );
411  addEntityValue("trbmm", COREBLAS_STATE, "trbmm", GTG_BLUE );
412  addEntityValue("trgmm", COREBLAS_STATE, "trgmm", GTG_BLUE );
413  addEntityValue("tslqt", COREBLAS_STATE, "tslqt", GTG_RED );
414  addEntityValue("tsmlq", COREBLAS_STATE, "tsmlq", GTG_YELLOW );
415  addEntityValue("tsmqr", COREBLAS_STATE, "tsmqr", GTG_YELLOW );
416  addEntityValue("tsqrt", COREBLAS_STATE, "tsqrt", GTG_RED );
417  addEntityValue("tsrfb", COREBLAS_STATE, "tsrfb", GTG_BLUE );
418  addEntityValue("tstrf", COREBLAS_STATE, "tstrf", GTG_BLUE );
419  addEntityValue("ttlqt", COREBLAS_STATE, "ttlqt", GTG_REDBLOOD );
420  addEntityValue("ttmlq", COREBLAS_STATE, "ttmlq", GTG_ORANGE );
421  addEntityValue("ttmqr", COREBLAS_STATE, "ttmqr", GTG_ORANGE );
422  addEntityValue("ttqrt", COREBLAS_STATE, "ttqrt", GTG_REDBLOOD );
423  addEntityValue("ttrfb", COREBLAS_STATE, "ttrfb", GTG_SEABLUE );
424  addEntityValue("unmlq", COREBLAS_STATE, "unmlq", GTG_YELLOW );
425  addEntityValue("unmqr", COREBLAS_STATE, "unmqr", GTG_YELLOW );
426  addEntityValue("getrip",COREBLAS_STATE, "getrip",GTG_LIGHTPINK );
427  addEntityValue("plghe", COREBLAS_STATE, "plghe", GTG_LIGHTPINK );
428  addEntityValue("plgsy", COREBLAS_STATE, "plgsy", GTG_LIGHTPINK );
429  addEntityValue("shift", COREBLAS_STATE, "shift", GTG_LIGHTPINK );
430  addEntityValue("shiftw",COREBLAS_STATE, "shiftw",GTG_LIGHTPINK );
431  addEntityValue("swpab", COREBLAS_STATE, "swpab", GTG_LIGHTPINK );
432  addEntityValue("plrnt", COREBLAS_STATE, "plrnt", GTG_LIGHTPINK );
433 
434  addEntityValue("brdalg", COREBLAS_STATE, "brdalg", GTG_LIGHTPINK );
435  addEntityValue("trdalg", COREBLAS_STATE, "trdalg", GTG_LIGHTPINK );
436  addEntityValue("hegst", COREBLAS_STATE, "hegst", GTG_LIGHTPINK );
437  addEntityValue("sygst", COREBLAS_STATE, "sygst", GTG_LIGHTPINK );
438  addEntityValue("herfb", COREBLAS_STATE, "herfb", GTG_LIGHTPINK );
439  addEntityValue("syrfb", COREBLAS_STATE, "syrfb", GTG_LIGHTPINK );
440 
441  /* plasma coreblas */
442  addEntityValue ("wait", COREBLAS_STATE, "wait", GTG_BLACK );
443 
444 #ifdef TRACE_BY_SEQUENCE
445  sequenceInit();
446 #endif
447  return 0;
448 }
449 
450 int
451 handle_coreblas_events(struct fxt_ev_64 *ev)
452 {
453 
454  switch (ev->code)
455  {
456  /* Level 3 Blas */
457  case FUT_COREBLAS_GEMM : handle_coreblas_gemm_start(ev); break;
458  case FUT_COREBLAS_HERK : handle_coreblas_herk_start(ev); break;
459  case FUT_COREBLAS_SYRK : handle_coreblas_syrk_start(ev); break;
460  case FUT_COREBLAS_HEMM : handle_coreblas_hemm_start(ev); break;
461  case FUT_COREBLAS_SYMM : handle_coreblas_symm_start(ev); break;
462  case FUT_COREBLAS_TRMM : handle_coreblas_trmm_start(ev); break;
463  case FUT_COREBLAS_TRSM : handle_coreblas_trsm_start(ev); break;
464  case FUT_COREBLAS_HER2K : handle_coreblas_her2k_start(ev); break;
465  case FUT_COREBLAS_SYR2K : handle_coreblas_syr2k_start(ev); break;
466 
467  /* Level 2 Blas */
468  case FUT_COREBLAS_GEMV : handle_coreblas_gemv_start(ev); break;
469  case FUT_COREBLAS_GBMV : handle_coreblas_gbmv_start(ev); break;
470  case FUT_COREBLAS_HEMV : handle_coreblas_hemv_start(ev); break;
471  case FUT_COREBLAS_HBMV : handle_coreblas_hbmv_start(ev); break;
472  case FUT_COREBLAS_HPMV : handle_coreblas_hpmv_start(ev); break;
473  case FUT_COREBLAS_SYMV : handle_coreblas_symv_start(ev); break;
474  case FUT_COREBLAS_SBMV : handle_coreblas_sbmv_start(ev); break;
475  case FUT_COREBLAS_SPMV : handle_coreblas_spmv_start(ev); break;
476  case FUT_COREBLAS_TRMV : handle_coreblas_trmv_start(ev); break;
477  case FUT_COREBLAS_TBMV : handle_coreblas_tbmv_start(ev); break;
478  case FUT_COREBLAS_TPMV : handle_coreblas_tpmv_start(ev); break;
479  case FUT_COREBLAS_TRSV : handle_coreblas_trsv_start(ev); break;
480  case FUT_COREBLAS_TBSV : handle_coreblas_tbsv_start(ev); break;
481  case FUT_COREBLAS_TPSV : handle_coreblas_tpsv_start(ev); break;
482  case FUT_COREBLAS_GER : handle_coreblas_ger_start(ev); break;
483  case FUT_COREBLAS_GERU : handle_coreblas_geru_start(ev); break;
484  case FUT_COREBLAS_GERC : handle_coreblas_gerc_start(ev); break;
485  case FUT_COREBLAS_HER : handle_coreblas_her_start(ev); break;
486  case FUT_COREBLAS_HPR : handle_coreblas_hpr_start(ev); break;
487  case FUT_COREBLAS_HER2 : handle_coreblas_her2_start(ev); break;
488  case FUT_COREBLAS_HPR2 : handle_coreblas_hpr2_start(ev); break;
489  case FUT_COREBLAS_SYR : handle_coreblas_syr_start(ev); break;
490  case FUT_COREBLAS_SPR : handle_coreblas_spr_start(ev); break;
491  case FUT_COREBLAS_SYR2 : handle_coreblas_syr2_start(ev); break;
492  case FUT_COREBLAS_SPR2 : handle_coreblas_spr2_start(ev); break;
493 
494  /* Level 1 BLAS */
495  case FUT_COREBLAS_ROTG : handle_coreblas_rotg_start(ev); break;
496  case FUT_COREBLAS_ROTMG : handle_coreblas_rotmg_start(ev); break;
497  case FUT_COREBLAS_ROT : handle_coreblas_rot_start(ev); break;
498  case FUT_COREBLAS_ROTM : handle_coreblas_rotm_start(ev); break;
499  case FUT_COREBLAS_SWAP : handle_coreblas_swap_start(ev); break;
500  case FUT_COREBLAS_SCAL : handle_coreblas_scal_start(ev); break;
501  case FUT_COREBLAS_COPY : handle_coreblas_copy_start(ev); break;
502  case FUT_COREBLAS_AXPY : handle_coreblas_axpy_start(ev); break;
503  case FUT_COREBLAS_GEADD : handle_coreblas_geadd_start(ev); break;
504  case FUT_COREBLAS_DOT : handle_coreblas_dot_start(ev); break;
505  case FUT_COREBLAS_DOTU : handle_coreblas_dotu_start(ev); break;
506  case FUT_COREBLAS_DOTC : handle_coreblas_dotc_start(ev); break;
507  case FUT_COREBLAS_xDOT : handle_coreblas_xdot_start(ev); break;
508  case FUT_COREBLAS_NRM2 : handle_coreblas_nrm2_start(ev); break;
509  case FUT_COREBLAS_ASUM : handle_coreblas_asum_start(ev); break;
510  case FUT_COREBLAS_AMAX : handle_coreblas_amax_start(ev); break;
511 
512  /* Lapack */
513  case FUT_COREBLAS_LACPY : handle_coreblas_lacpy_start(ev); break;
514  case FUT_COREBLAS_LANGE : handle_coreblas_lange_start(ev); break;
515  case FUT_COREBLAS_LANHE : handle_coreblas_lanhe_start(ev); break;
516  case FUT_COREBLAS_LANSY : handle_coreblas_lansy_start(ev); break;
517  case FUT_COREBLAS_LARFB : handle_coreblas_larfb_start(ev); break;
518  case FUT_COREBLAS_LARFT : handle_coreblas_larft_start(ev); break;
519  case FUT_COREBLAS_LASWP : handle_coreblas_laswp_start(ev); break;
520  case FUT_COREBLAS_LAUUM : handle_coreblas_lauum_start(ev); break;
521  case FUT_COREBLAS_POTRF : handle_coreblas_potrf_start(ev); break;
522  case FUT_COREBLAS_TRTRI : handle_coreblas_trtri_start(ev); break;
523  case FUT_COREBLAS_LASET : handle_coreblas_laset_start(ev); break;
524 
525  /* PLASMA coreblas */
526  case FUT_COREBLAS_GELQT : handle_coreblas_gelqt_start(ev); break;
527  case FUT_COREBLAS_GEQRT : handle_coreblas_geqrt_start(ev); break;
528  case FUT_COREBLAS_GESSM : handle_coreblas_gessm_start(ev); break;
529  case FUT_COREBLAS_GETRF : handle_coreblas_getrf_start(ev); break;
530  case FUT_COREBLAS_GETRO : handle_coreblas_getro_start(ev); break;
531  case FUT_COREBLAS_SSSSM : handle_coreblas_ssssm_start(ev); break;
532  case FUT_COREBLAS_TITRO : handle_coreblas_titro_start(ev); break;
533  case FUT_COREBLAS_TRBMM : handle_coreblas_trbmm_start(ev); break;
534  case FUT_COREBLAS_TRGMM : handle_coreblas_trgmm_start(ev); break;
535  case FUT_COREBLAS_TSLQT : handle_coreblas_tslqt_start(ev); break;
536  case FUT_COREBLAS_TSMLQ : handle_coreblas_tsmlq_start(ev); break;
537  case FUT_COREBLAS_TSMQR : handle_coreblas_tsmqr_start(ev); break;
538  case FUT_COREBLAS_TSQRT : handle_coreblas_tsqrt_start(ev); break;
539  case FUT_COREBLAS_TSRFB : handle_coreblas_tsrfb_start(ev); break;
540  case FUT_COREBLAS_TSTRF : handle_coreblas_tstrf_start(ev); break;
541  case FUT_COREBLAS_TTLQT : handle_coreblas_ttlqt_start(ev); break;
542  case FUT_COREBLAS_TTMLQ : handle_coreblas_ttmlq_start(ev); break;
543  case FUT_COREBLAS_TTMQR : handle_coreblas_ttmqr_start(ev); break;
544  case FUT_COREBLAS_TTQRT : handle_coreblas_ttqrt_start(ev); break;
545  case FUT_COREBLAS_TTRFB : handle_coreblas_ttrfb_start(ev); break;
546  case FUT_COREBLAS_UNMLQ : handle_coreblas_unmlq_start(ev); break;
547  case FUT_COREBLAS_UNMQR : handle_coreblas_unmqr_start(ev); break;
548  case FUT_COREBLAS_GETRIP: handle_coreblas_getrip_start(ev); break;
549  case FUT_COREBLAS_PLGHE : handle_coreblas_plghe_start(ev); break;
550  case FUT_COREBLAS_PLGSY : handle_coreblas_plgsy_start(ev); break;
551  case FUT_COREBLAS_SHIFT : handle_coreblas_shift_start(ev); break;
552  case FUT_COREBLAS_SHIFTW: handle_coreblas_shiftw_start(ev); break;
553  case FUT_COREBLAS_SWPAB : handle_coreblas_swpab_start(ev); break;
554  case FUT_COREBLAS_PLRNT : handle_coreblas_plrnt_start(ev); break;
555 
556  case FUT_COREBLAS_BRDALG : handle_coreblas_brdalg_start(ev); break;
557  case FUT_COREBLAS_TRDALG : handle_coreblas_trdalg_start(ev); break;
558  case FUT_COREBLAS_HEGST : handle_coreblas_hegst_start(ev); break;
559  case FUT_COREBLAS_SYGST : handle_coreblas_sygst_start(ev); break;
560  case FUT_COREBLAS_HERFB : handle_coreblas_herfb_start(ev); break;
561  case FUT_COREBLAS_SYRFB : handle_coreblas_syrfb_start(ev); break;
562 
563  case FUT_COREBLAS_STOP : handle_coreblas_stop(ev); break;
564  case FUT_COREBLAS_TASK : handle_coreblas_task(ev); break;
565  case FUT_COREBLAS_TASKW : handle_coreblas_taskw(ev); break;
566 
567  default:
568  return 0;
569  }
570 
571  return 1;
572 
573 }
574 
575 void
577 {
578 
579 }
580 
581 
582 int
583 handle_coreblas_stats(struct fxt_ev_64 *ev)
584 {
585  int i;
586  double time;
587 
588  if ( statsarray == NULL ) {
589  statsarray = (coreblas_stats_t *)malloc(COREBLAS_NBMAX_EVENTS * sizeof(coreblas_stats_t));
590  memset(statsarray, 0, COREBLAS_NBMAX_EVENTS * sizeof(coreblas_stats_t));
591 
592  thrdstate = (coreblas_thrdstate_t*)malloc(COREBLAS_THREADS_MAX * sizeof(coreblas_thrdstate_t));
593  memset( thrdstate, 0, COREBLAS_THREADS_MAX * sizeof(coreblas_thrdstate_t));
594  }
595 
596  switch (ev->code)
597  {
598  case FUT_COREBLAS_STOP :
599  {
600  for (i=0; i<nbtrhd; i++) {
601  if ( thrdstate[i].tid == (unsigned int)CUR_THREAD_ID) {
602  if ( thrdstate[i].active == 0 ) {
603  fprintf(stderr, "WARNING: The end of a state appears before the beginning\n");
604  return 0;
605  }
606 
607  time = ( CURRENT - thrdstate[i].lasttime );
608 
609  if( statsarray[ thrdstate[i].active ].nb == 0 ) {
610  statsarray[ thrdstate[i].active ].sum = 0.;
611  statsarray[ thrdstate[i].active ].max = 0.;
612  statsarray[ thrdstate[i].active ].min = 999999999999.;
613  }
614  statsarray[ thrdstate[i].active ].nb++;
615  statsarray[ thrdstate[i].active ].sum += time;
616  statsarray[ thrdstate[i].active ].max = max( statsarray[ thrdstate[i].active ].max, time );
617  statsarray[ thrdstate[i].active ].min = min( statsarray[ thrdstate[i].active ].min, time );
618 
619  thrdstate[i].active = 0;
620  thrdstate[i].lasttime = 0;
621  return 1;
622  }
623  }
624  return 0;
625  }
626  break;
627 
628  case FUT_COREBLAS_TASK :
629  break;
630  case FUT_COREBLAS_TASKW :
631  break;
632 
633  default: /* All the different states */
634  if ( ( (ev->code) & COREBLAS_PREFIX) ) {
635  for (i=0; i<nbtrhd; i++) {
636  if ( thrdstate[i].tid == (unsigned int)CUR_THREAD_ID) {
637  if ( thrdstate[i].active != 0 ) {
638  fprintf(stderr, "WARNING: thread %d change to state %d before to stop previous state %d\n",
639  (int)CUR_THREAD_ID, thrdstate[i].active, (int)( (ev->code) & COREBLAS_MASK_EVENTS));
640  }
641 
642  thrdstate[i].active = (ev->code) & COREBLAS_MASK_EVENTS;
643  thrdstate[i].lasttime = CURRENT;
644  return 1;
645  }
646  }
647 
648  /* Thread not found, we add it */
649  if ( nbtrhd < COREBLAS_THREADS_MAX ) {
650  thrdstate[nbtrhd].tid = (unsigned int)CUR_THREAD_ID;
651  thrdstate[i].active = ev->code & COREBLAS_MASK_EVENTS;
652  thrdstate[nbtrhd].lasttime = CURRENT;
653  nbtrhd++;
654  return 1;
655  }
656  }
657  return 0;
658  }
659 
660  return 1;
661 }
662 
663 /*
664  * Print the results of statistics.
665  */
667  int i;
668 
670 
671  printf ( "\nCoreblas Module:\n");
672  printf ( "-----------\n");
673 
674  for(i=0; i<COREBLAS_NBMAX_EVENTS; i++) {
675  if ( statsarray[ i ].nb > 0 ) {
676  printf ( "%s : %d calls\n"
677  "\tAverage time: %.3f ms\n"
678  "\tMaximun time: %.3f ms\n"
679  "\tMinimun time: %.3f ms\n",
680  coreblas_stats_strings[ i ], statsarray[ i ].nb,
681  statsarray[ i ].sum / (double)(statsarray[ i ].nb), statsarray[ i ].max, statsarray[ i ].min);
682  }
683  }
684 }
685 
686 struct eztrace_convert_module coreblas_module;
687 
688 void libinit(void) __attribute__ ((constructor));
689 void libinit(void)
690 {
691  /* Specify the initialization function.
692  * This function will be called once all the plugins are loaded
693  * and the trace is started.
694  * This function usually declared StateTypes, LinkTypes, etc.
695  */
697 
698  /* Specify the function to call for handling an event
699  */
701 
702  /* Specify the function to call for handling an event when eztrace_stats is called
703  */
704  coreblas_module.handle_stats = handle_coreblas_stats;
705 
706  /* Print the results of statistics
707  */
709 
710  /* Specify the module prefix */
711  coreblas_module.module_prefix = COREBLAS_EVENTS_ID;
712 
713  if ( asprintf(&coreblas_module.name, "coreblas") < 0 ) {
714  fprintf(stderr, "Failed to create module name\n");
715  exit(-1);
716  }
717  if ( asprintf(&coreblas_module.description, "Module for kernels used in PLASMA (BLAS, LAPACK and coreblas)") < 0 ) {
718  fprintf(stderr, "Failed to create module description\n");
719  exit(-1);
720  }
721 
722  coreblas_module.token.data = &coreblas_module;
723 
724  /* Register the module to eztrace_convert */
725  eztrace_convert_register_module(&coreblas_module);
726 
727  printf("module Coreblas loaded\n");
728 }
729 
730 void libfinalize(void) __attribute__ ((destructor));
731 void libfinalize(void)
732 {
733 #ifdef TRACE_BY_SEQUENCE
734  sequenceDestroy();
735 #endif
736 }