PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dgecfi2.c
Go to the documentation of this file.
1 
21 #include <sys/types.h>
22 #include "common.h"
23 #include "dgecfi2.h"
24 
25 #define PLASMA_pdgetmi2(idep, odep, storev, m, n, mb, nb, A) \
26  plasma_parallel_call_10( \
27  plasma_pdgetmi2, \
28  PLASMA_enum, (idep), \
29  PLASMA_enum, (odep), \
30  PLASMA_enum, (storev), \
31  int, (m), \
32  int, (n), \
33  int, (mb), \
34  int, (nb), \
35  double*, (A), \
36  PLASMA_sequence*, sequence, \
37  PLASMA_request*, request);
38 
39 #define PLASMA_dshift(m, n, mb, nb, A) \
40  plasma_dshift(plasma, (m), (n), (A), \
41  ( (n) / (nb) ), ( (m) / (mb) ), (nb), (mb), \
42  sequence, request);
43 
44 #define PLASMA_dshiftr(m, n, mb, nb, A) \
45  plasma_dshift(plasma, (m), (n), (A), \
46  ( (n) / (nb) ), (nb), ( (m) / (mb) ), (mb), \
47  sequence, request);
48 
90 /*
91  * Shift inside panels
92  */
93 int ipt_dcm2ccrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
94  PLASMA_sequence *sequence, PLASMA_request *request)
95 {
96  if( (m == 0) || (n == 0) )
97  return PLASMA_SUCCESS;
98 
99  PLASMA_dshift(m, n, mb, nb, A);
100  ipt_dpanel2tile(plasma, m, n, A, mb, nb, sequence, request);
101 
102  return PLASMA_SUCCESS;
103 }
104 
105 int ipt_dccrb2cm(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
106  PLASMA_sequence *sequence, PLASMA_request *request)
107 {
108  if( (m == 0) || (n == 0) )
109  return PLASMA_SUCCESS;
110 
111  ipt_dtile2panel(plasma, m, n, A, mb, nb, sequence, request);
112  PLASMA_dshiftr(m, n, mb, nb, A);
113 
114  return PLASMA_SUCCESS;
115 }
116 
117 /*
118  * Transpose each tile
119  */
120 int ipt_dccrb2crrb(plasma_context_t *plasma, PLASMA_enum idep, PLASMA_enum odep, int m, int n, double *A, int mb, int nb,
121  PLASMA_sequence *sequence, PLASMA_request *request)
122 {
123  if( (m == 0) || (n == 0) )
124  return PLASMA_SUCCESS;
125 
126  PLASMA_pdgetmi2(idep, odep, PlasmaColumnwise, m, n, mb, nb, A);
127 
128  return PLASMA_SUCCESS;
129 }
130 
131 int ipt_dcrrb2ccrb(plasma_context_t *plasma, PLASMA_enum idep, PLASMA_enum odep, int m, int n, double *A, int mb, int nb,
132  PLASMA_sequence *sequence, PLASMA_request *request)
133 {
134  if( (m == 0) || (n == 0) )
135  return PLASMA_SUCCESS;
136 
137  PLASMA_pdgetmi2(idep, odep, PlasmaRowwise, n, m, nb, mb, A);
138 
139  return PLASMA_SUCCESS;
140 }
141 
142 int ipt_drcrb2rrrb(plasma_context_t *plasma, PLASMA_enum idep, PLASMA_enum odep, int m, int n, double *A, int mb, int nb,
143  PLASMA_sequence *sequence, PLASMA_request *request)
144 {
145  if( (m == 0) || (n == 0) )
146  return PLASMA_SUCCESS;
147 
148  PLASMA_pdgetmi2(idep, odep, PlasmaRowwise, m, n, mb, nb, A);
149 
150  return PLASMA_SUCCESS;
151 }
152 
153 int ipt_drrrb2rcrb(plasma_context_t *plasma, PLASMA_enum idep, PLASMA_enum odep, int m, int n, double *A, int mb, int nb,
154  PLASMA_sequence *sequence, PLASMA_request *request)
155 {
156  if( (m == 0) || (n == 0) )
157  return PLASMA_SUCCESS;
158 
159  PLASMA_pdgetmi2(idep, odep, PlasmaColumnwise, n, m, nb, mb, A);
160 
161  return PLASMA_SUCCESS;
162 }
163 
164 /*
165  * Transpose all tiles
166  */
167 int ipt_dccrb2rcrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
168  PLASMA_sequence *sequence, PLASMA_request *request)
169 {
170  int M_, N_;
171 
172  if( (m == 0) || (n == 0) )
173  return PLASMA_SUCCESS;
174 
175  M_ = m / mb;
176  N_ = n / nb;
177 
178  /* quick return */
179  if( (M_ < 2) || (N_ < 2) ) {
180  return PLASMA_SUCCESS;
181  }
182 
183  plasma_dshift(plasma, m, n, A, 1, ( m / mb ), ( n / nb ), (mb*nb),
184  sequence, request);
185 
186  return PLASMA_SUCCESS;
187 }
188 
194 /*
195  * Shift inside panels + Transpose all tiles
196  */
197 int ipt_dcm2rcrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
198  PLASMA_sequence *sequence, PLASMA_request *request)
199 {
200  if( (m == 0) || (n == 0) )
201  return PLASMA_SUCCESS;
202  //ipt_dcm2ccrb( plasma, m, n, A, mb, nb, sequence, request);
203  PLASMA_dshift(m, n, mb, nb, A);
204  ipt_dpanel2all(plasma, m, n, A, mb, nb, sequence, request);
205  ipt_dccrb2rcrb(plasma, m, n, A, mb, nb, sequence, request);
206 
207  return PLASMA_SUCCESS;
208 }
209 
210 int ipt_drcrb2cm(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
211  PLASMA_sequence *sequence, PLASMA_request *request)
212 {
213  if( (m == 0) || (n == 0) )
214  return PLASMA_SUCCESS;
215  ipt_drcrb2ccrb(plasma, m, n, A, mb, nb, sequence, request);
216  ipt_dall2panel(plasma, m, n, A, mb, nb, sequence, request);
217  //ipt_dccrb2cm( plasma, m, n, A, mb, nb, sequence, request);
218  PLASMA_dshiftr(m, n, mb, nb, A);
219 
220  return PLASMA_SUCCESS;
221 }
222 
223 /*
224  * Transpose each tile + Transpose all tiles
225  */
226 int ipt_dccrb2rrrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
227  PLASMA_sequence *sequence, PLASMA_request *request)
228 {
229  if( (m == 0) || (n == 0) )
230  return PLASMA_SUCCESS;
231  ipt_dccrb2rcrb(plasma, m, n, A, mb, nb, sequence, request);
232  ipt_drcrb2rrrb(plasma, PlasmaIPT_All, PlasmaIPT_NoDep, m, n, A, mb, nb, sequence, request);
233 
234  return PLASMA_SUCCESS;
235 }
236 
237 int ipt_drrrb2ccrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
238  PLASMA_sequence *sequence, PLASMA_request *request)
239 {
240  if( (m == 0) || (n == 0) )
241  return PLASMA_SUCCESS;
242  ipt_drrrb2rcrb(plasma, PlasmaIPT_NoDep, PlasmaIPT_All, m, n, A, mb, nb, sequence, request);
243  ipt_drcrb2ccrb(plasma, m, n, A, mb, nb, sequence, request);
244 
245  return PLASMA_SUCCESS;
246 }
247 
248 int ipt_drcrb2crrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
249  PLASMA_sequence *sequence, PLASMA_request *request)
250 {
251  if( (m == 0) || (n == 0) )
252  return PLASMA_SUCCESS;
253  ipt_drcrb2ccrb(plasma, m, n, A, mb, nb, sequence, request);
254  ipt_dccrb2crrb(plasma, PlasmaIPT_All, PlasmaIPT_NoDep, m, n, A, mb, nb, sequence, request);
255 
256  return PLASMA_SUCCESS;
257 }
258 
259 int ipt_dcrrb2rcrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
260  PLASMA_sequence *sequence, PLASMA_request *request)
261 {
262  if( (m == 0) || (n == 0) )
263  return PLASMA_SUCCESS;
264  ipt_dcrrb2ccrb(plasma, PlasmaIPT_NoDep, PlasmaIPT_All, m, n, A, mb, nb, sequence, request);
265  ipt_dccrb2rcrb(plasma, m, n, A, mb, nb, sequence, request);
266 
267  return PLASMA_SUCCESS;
268 }
269 
270 /*
271  * Transpose each tile + Shift inside panels
272  */
273 int ipt_dcm2crrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
274  PLASMA_sequence *sequence, PLASMA_request *request)
275 {
276  if( (m == 0) || (n == 0) )
277  return PLASMA_SUCCESS;
278  //ipt_dcm2ccrb( plasma, m, n, A, mb, nb, sequence, request);
279  PLASMA_dshift(m, n, mb, nb, A);
280  ipt_dccrb2crrb(plasma, PlasmaIPT_Panel, PlasmaIPT_NoDep, m, n, A, mb, nb, sequence, request);
281 
282  return PLASMA_SUCCESS;
283 }
284 
285 int ipt_dcrrb2cm(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
286  PLASMA_sequence *sequence, PLASMA_request *request)
287 {
288  if( (m == 0) || (n == 0) )
289  return PLASMA_SUCCESS;
290  ipt_dcrrb2ccrb(plasma, PlasmaIPT_NoDep, PlasmaIPT_Panel, m, n, A, mb, nb, sequence, request);
291  //ipt_dccrb2cm( plasma, m, n, A, mb, nb, sequence, request);
292  PLASMA_dshiftr(m, n, mb, nb, A);
293 
294  return PLASMA_SUCCESS;
295 }
296 
297 int ipt_drm2rcrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
298  PLASMA_sequence *sequence, PLASMA_request *request)
299 {
300  if( (m == 0) || (n == 0) )
301  return PLASMA_SUCCESS;
302  ipt_drm2rrrb( plasma, m, n, A, mb, nb, sequence, request);
303  ipt_drrrb2rcrb(plasma, PlasmaIPT_Panel, PlasmaIPT_NoDep, m, n, A, mb, nb, sequence, request);
304 
305  return PLASMA_SUCCESS;
306 }
307 
308 int ipt_drcrb2rm(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
309  PLASMA_sequence *sequence, PLASMA_request *request)
310 {
311  if( (m == 0) || (n == 0) )
312  return PLASMA_SUCCESS;
313  ipt_drcrb2rrrb(plasma, PlasmaIPT_NoDep, PlasmaIPT_Panel, m, n, A, mb, nb, sequence, request);
314  ipt_drrrb2rm( plasma, m, n, A, mb, nb, sequence, request);
315 
316  return PLASMA_SUCCESS;
317 }
318 
324 /*
325  * Shift inside panels + Transpose all tiles + Transpose inside each tile
326  */
327 int ipt_dcm2rrrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
328  PLASMA_sequence *sequence, PLASMA_request *request)
329 {
330  if( (m == 0) || (n == 0) )
331  return PLASMA_SUCCESS;
332  //ipt_dcm2ccrb( plasma, m, n, A, mb, nb, sequence, request);
333  PLASMA_dshift(m, n, mb, nb, A);
334  ipt_dccrb2crrb(plasma, PlasmaIPT_Panel, PlasmaIPT_All, m, n, A, mb, nb, sequence, request);
335  ipt_dcrrb2rrrb(plasma, m, n, A, mb, nb, sequence, request);
336 
337  return PLASMA_SUCCESS;
338 }
339 
340 int ipt_drrrb2cm(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
341  PLASMA_sequence *sequence, PLASMA_request *request)
342 {
343  if( (m == 0) || (n == 0) )
344  return PLASMA_SUCCESS;
345  ipt_drrrb2crrb(plasma, m, n, A, mb, nb, sequence, request);
346  ipt_dcrrb2ccrb(plasma, PlasmaIPT_All, PlasmaIPT_Panel, m, n, A, mb, nb, sequence, request);
347  //ipt_dccrb2cm( plasma, m, n, A, mb, nb, sequence, request);
348  PLASMA_dshiftr(m, n, mb, nb, A);
349 
350  return PLASMA_SUCCESS;
351 }
352 
353 int ipt_dccrb2rm(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
354  PLASMA_sequence *sequence, PLASMA_request *request)
355 {
356  if( (m == 0) || (n == 0) )
357  return PLASMA_SUCCESS;
358  ipt_dccrb2rcrb(plasma, m, n, A, mb, nb, sequence, request);
359  ipt_drcrb2rrrb(plasma, PlasmaIPT_All, PlasmaIPT_Panel, m, n, A, mb, nb, sequence, request);
360  ipt_drrrb2rm( plasma, m, n, A, mb, nb, sequence, request);
361 
362  return PLASMA_SUCCESS;
363 }
364 
365 int ipt_drm2ccrb(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
366  PLASMA_sequence *sequence, PLASMA_request *request)
367 {
368  if( (m == 0) || (n == 0) )
369  return PLASMA_SUCCESS;
370  ipt_drm2rrrb( plasma, m, n, A, mb, nb, sequence, request);
371  ipt_drrrb2rcrb(plasma, PlasmaIPT_Panel, PlasmaIPT_All, m, n, A, mb, nb, sequence, request);
372  ipt_drcrb2ccrb(plasma, m, n, A, mb, nb, sequence, request);
373 
374  return PLASMA_SUCCESS;
375 }
376 
382 /*
383  * Shift inside panels + Transpose all tiles
384  * + Transpose inside each tile + Shift inside panels
385  */
386 int ipt_dcm2rm(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
387  PLASMA_sequence *sequence, PLASMA_request *request)
388 {
389  if( (m == 0) || (n == 0) )
390  return PLASMA_SUCCESS;
391  //ipt_dcm2ccrb( plasma, m, n, A, mb, nb, sequence, request);
392  PLASMA_dshift(m, n, mb, nb, A);
393  ipt_dpanel2all(plasma, m, n, A, mb, nb, sequence, request);
394  ipt_dccrb2rcrb(plasma, m, n, A, mb, nb, sequence, request);
395  ipt_drcrb2rrrb(plasma, PlasmaIPT_All, PlasmaIPT_Panel, m, n, A, mb, nb, sequence, request);
396  ipt_drrrb2rm( plasma, m, n, A, mb, nb, sequence, request);
397 
398  return PLASMA_SUCCESS;
399 }
400 
401 int ipt_drm2cm(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
402  PLASMA_sequence *sequence, PLASMA_request *request)
403 {
404  if( (m == 0) || (n == 0) )
405  return PLASMA_SUCCESS;
406  ipt_drm2rrrb( plasma, m, n, A, mb, nb, sequence, request);
407  ipt_drrrb2rcrb(plasma, PlasmaIPT_Panel, PlasmaIPT_All, m, n, A, mb, nb, sequence, request);
408  ipt_drcrb2ccrb(plasma, m, n, A, mb, nb, sequence, request);
409  ipt_dall2panel(plasma, m, n, A, mb, nb, sequence, request);
410  //ipt_dccrb2cm( plasma, m, n, A, mb, nb, sequence, request);
411  PLASMA_dshiftr(m, n, mb, nb, A);
412 
413  return PLASMA_SUCCESS;
414 }
415 
416 
423 int ipt_dtile2panel( plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
424  PLASMA_sequence *sequence, PLASMA_request *request)
425 {
427  return PLASMA_SUCCESS;
428 
429  double *Al;
430  int i,j;
431  int M_ = m / mb;
432  int N_ = n / nb;
433  int bsiz = mb*nb;
434  int psiz = m*nb;
436  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
437 
439  for(j=0; j<N_; j++) {
440  Al = &(A[psiz*j]);
441 
442  for(i=1; i<M_; i++) {
443 
444 #ifdef TRACE_IPT
445  char str[30];
446  sprintf(str, "Foo2 C2RI %d", i*m*nb);
447 #endif
448  QUARK_Insert_Task(plasma->quark, CORE_foo2_quark, &task_flags,
449  sizeof(double)*psiz, Al, INOUT | GATHERV,
450  sizeof(double)*bsiz, &(Al[i*bsiz]), INOUT,
451 #ifdef TRACE_IPT
452  30, str, VALUE | TASKLABEL,
453  4, "red", VALUE | TASKCOLOR,
454 #endif
455  0);
456  }
457  }
458 
459  return PLASMA_SUCCESS;
460 }
461 
462 int ipt_dpanel2tile( plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
463  PLASMA_sequence *sequence, PLASMA_request *request )
464 {
466  return PLASMA_SUCCESS;
467 
468  double *Al;
469  int i,j;
470  int M_ = m / mb;
471  int N_ = n / nb;
472  int bsiz = mb*nb;
473  int psiz = m*nb;
475  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
476 
478  for(j=0; j<N_; j++) {
479  Al = &(A[psiz*j]);
480 
481  for(i=1; i<M_; i++) {
482 
483 #ifdef TRACE_IPT
484  char str[30];
485  sprintf(str, "Foo2 C2RI %d", i*m*nb);
486 #endif
487  QUARK_Insert_Task(plasma->quark, CORE_foo2_quark, &task_flags,
488  sizeof(double)*psiz, Al, INPUT,
489  sizeof(double)*bsiz, &(Al[i*bsiz]), INOUT,
490 #ifdef TRACE_IPT
491  30, str, VALUE | TASKLABEL,
492  4, "red", VALUE | TASKCOLOR,
493 #endif
494  0);
495  }
496  }
497 
498  return PLASMA_SUCCESS;
499 }
500 
501 int ipt_dpanel2all(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
502  PLASMA_sequence *sequence, PLASMA_request *request)
503 {
505  return PLASMA_SUCCESS;
506 
507  int i;
508  int N_ = n / nb;
510  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
511 
512  if ( N_ > 1 ) {
514  for(i=1; i<N_; i++) {
515 #ifdef TRACE_IPT
516  char str[30];
517  sprintf(str, "Foo2 C2RI %d", i*m*nb);
518 #endif
519  QUARK_Insert_Task(plasma->quark, CORE_foo2_quark, &task_flags,
520  sizeof(double)*m*n, A, INOUT | GATHERV,
521  sizeof(double)*m*nb, &(A[i*m*nb]), INPUT,
522 #ifdef TRACE_IPT
523  30, str, VALUE | TASKLABEL,
524  4, "red", VALUE | TASKCOLOR,
525 #endif
526  0);
527  }
528  }
529 
530  return PLASMA_SUCCESS;
531 }
532 
533 int ipt_dall2panel(plasma_context_t *plasma, int m, int n, double *A, int mb, int nb,
534  PLASMA_sequence *sequence, PLASMA_request *request)
535 {
537  return PLASMA_SUCCESS;
538 
539  int i;
540  int N_ = n / nb;
542  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
543 
544  if ( N_ > 1 ) {
546  for(i=1; i<N_; i++) {
547 #ifdef TRACE_IPT
548  char str[30];
549  sprintf(str, "Foo2 C2RI %d", i*m*nb);
550 #endif
551  QUARK_Insert_Task(plasma->quark, CORE_foo2_quark, &task_flags,
552  sizeof(double)*m*n, A, INPUT,
553  sizeof(double)*m*nb, &(A[i*m*nb]), INOUT,
554 #ifdef TRACE_IPT
555  30, str, VALUE | TASKLABEL,
556  4, "red", VALUE | TASKCOLOR,
557 #endif
558  0);
559  }
560  }
561  return PLASMA_SUCCESS;
562 }