PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pdtrmm.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 #define A(m,n) BLKADDR(A, double, m, n)
18 #define B(m,n) BLKADDR(B, double, m, n)
19 
20 #if 0
21 /***************************************************************************/
25 {
30  double alpha;
31  PLASMA_desc A;
32  PLASMA_desc B;
33  PLASMA_sequence *sequence;
34  PLASMA_request *request;
35 
36  int k, m, n;
37  int next_m;
38  int next_n;
39  int lda, ldak, ldb, ldbk;
40  int tempkm, tempkn, tempmm, tempnn;
41 
42  double *lB;
43  double zone = (double)1.0;
44 
45  plasma_unpack_args_9(side, uplo, trans, diag, alpha, A, B, sequence, request);
46  if (sequence->status != PLASMA_SUCCESS)
47  return;
48 
49  n = 0;
50  m = PLASMA_RANK;
51  while (m >= B.mt && n < B.nt) {
52  n++;
53  m = m-B.mt;
54  }
55 
56  while (n < B.nt) {
57  next_m = m;
58  next_n = n;
59 
60  next_m += PLASMA_SIZE;
61  while (next_m >= B.mt && next_n < B.nt) {
62  next_n++;
63  next_m = next_m - B.mt;
64  }
65 
66  tempmm = m == B.mt-1 ? B.m - m * B.mb : B.mb;
67  tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb;
68  ldb = m < B.lm1 ? B.mb : B.lm%B.mb;
69  lB = B(m, n);
70 
71  if ( side == PlasmaLeft ) {
72  if ( uplo == PlasmaUpper ) {
73  if ( trans == PlasmaNoTrans ) {
74  lda = m < A.lm1 ? A.mb : A.lm%B.mb;
75 
76  CORE_dtrmm(
77  side, uplo, trans, diag,
78  tempmm, tempnn,
79  alpha, A(m, m), lda, /* lda * tempkm */
80  lB, ldb); /* ldb * tempnn */
81 
82  for (k = m+1; k < A.mt; k++) {
83  tempkn = k == A.nt-1 ? A.n - k * A.nb : A.nb;
84  ldbk = BLKLDD(B, k);
85 
86  CORE_dgemm(
87  trans, PlasmaNoTrans,
88  tempmm, tempnn, tempkn,
89  alpha, A(m, k), lda,
90  B(k, n), ldbk,
91  zone, lB, ldb);
92  }
93  }
94  /*
95  * PlasmaLeft / PlasmaUpper / Plasma[Conj]Trans
96  */
97  else {
98  lda = m < A.lm1 ? A.mb : A.lm%B.mb;
99 
100  CORE_dtrmm(
101  side, uplo, trans, diag,
102  tempmm, tempnn,
103  alpha, A(m, m), lda, /* lda * tempkm */
104  lB, ldb); /* ldb * tempnn */
105 
106  for (k = 0; k < m; k++) {
107  CORE_dgemm(
108  trans, PlasmaNoTrans,
109  tempmm, tempnn, B.mb,
110  alpha, A(k, m), A.mb,
111  B(k, n), B.mb,
112  zone, lB, ldb);
113  }
114  }
115  }
116  /*
117  * PlasmaLeft / PlasmaLower / PlasmaNoTrans
118  */
119  else {
120  if ( trans == PlasmaNoTrans ) {
121  lda = m < A.lm1 ? A.mb : A.lm%B.mb;
122 
123  CORE_dtrmm(
124  side, uplo, trans, diag,
125  tempmm, tempnn,
126  alpha, A(m, m), lda, /* lda * tempkm */
127  lB, ldb); /* ldb * tempnn */
128 
129  for (k = 0; k < m; k++) {
130  CORE_dgemm(
131  trans, PlasmaNoTrans,
132  tempmm, tempnn, B.mb,
133  alpha, A(m, k), lda,
134  B(k, n), B.mb,
135  zone, lB, ldb);
136  }
137  }
138  /*
139  * PlasmaLeft / PlasmaLower / Plasma[Conj]Trans
140  */
141  else {
142  lda = m < A.lm1 ? A.mb : A.lm%B.mb;
143  CORE_dtrmm(
144  side, uplo, trans, diag,
145  tempmm, tempnn,
146  alpha, A(m, m), lda, /* lda * tempkm */
147  lB, ldb); /* ldb * tempnn */
148 
149  for (k = m+1; k < A.mt; k++) {
150  tempkm = k == A.mt-1 ? A.m - k * A.mb : A.mb;
151  ldak = BLKLDD(A, k);
152  ldbk = BLKLDD(B, k);
153 
154  CORE_dgemm(
155  trans, PlasmaNoTrans,
156  tempmm, tempnn, tempkm,
157  alpha, A(k, m), ldak,
158  B(k, n), ldbk,
159  zone, lB, ldb);
160  }
161  }
162  }
163  }
164  /*
165  * PlasmaRight / PlasmaUpper / PlasmaNoTrans
166  */
167  else {
168  if (uplo == PlasmaUpper) {
169  if ( trans == PlasmaNoTrans ) {
170  lda = n < A.lm1 ? A.mb : A.lm%B.mb;
171 
172  CORE_dtrmm(
173  side, uplo, trans, diag,
174  tempmm, tempnn,
175  alpha, A(n, n), lda, /* lda * tempkm */
176  lB, ldb); /* ldb * tempnn */
177 
178  for (k = 0; k < n; k++) {
179  CORE_dgemm(
180  PlasmaNoTrans, trans,
181  tempmm, tempnn, B.mb,
182  alpha, B(m, k), ldb,
183  A(k, n), A.mb,
184  zone, lB, ldb);
185  }
186  }
187  /*
188  * PlasmaRight / PlasmaUpper / Plasma[Conj]Trans
189  */
190  else {
191  lda = n < A.lm1 ? A.mb : A.lm%B.mb;
192 
193  CORE_dtrmm(
194  side, uplo, trans, diag,
195  tempmm, tempnn,
196  alpha, A(n, n), lda, /* lda * tempkm */
197  lB, ldb); /* ldb * tempnn */
198 
199  for (k = n+1; k < A.mt; k++) {
200  tempkn = k == A.nt-1 ? A.n - k * A.nb : A.nb;
201 
202  CORE_dgemm(
203  PlasmaNoTrans, trans,
204  tempmm, tempnn, tempkn,
205  alpha, B(m, k), ldb,
206  A(n, k), lda,
207  zone, lB, ldb);
208  }
209  }
210  }
211  /*
212  * PlasmaRight / PlasmaLower / PlasmaNoTrans
213  */
214  else {
215  if (trans == PlasmaNoTrans) {
216  lda = n < A.lm1 ? A.mb : A.lm%B.mb;
217 
218  CORE_dtrmm(
219  side, uplo, trans, diag,
220  tempmm, tempnn,
221  alpha, A(n, n), lda, /* lda * tempkm */
222  lB, ldb); /* ldb * tempnn */
223 
224  for (k = n+1; k < A.mt; k++) {
225  ldak = BLKLDD(A, k);
226  tempkn = k == A.nt-1 ? A.n - k * A.nb : A.nb;
227 
228  CORE_dgemm(
229  PlasmaNoTrans, trans,
230  tempmm, tempnn, tempkn,
231  alpha, B(m, k), ldb,
232  A(k, n), ldak,
233  zone, lB, ldb);
234  }
235  }
236  /*
237  * PlasmaRight / PlasmaLower / Plasma[Conj]Trans
238  */
239  else {
240  lda = n < A.lm1 ? A.mb : A.lm%B.mb;
241 
242  CORE_dtrmm(
243  side, uplo, trans, diag,
244  tempmm, tempnn,
245  alpha, A(n, n), lda, /* lda * tempkm */
246  lB, ldb); /* ldb * tempnn */
247 
248  for (k = 0; k < n; k++) {
249  CORE_dgemm(
250  PlasmaNoTrans, trans,
251  tempmm, tempnn, B.mb,
252  alpha, B(m, k), ldb,
253  A(n, k), lda,
254  zone, lB, ldb);
255  }
256  }
257  }
258  }
259 
260  m = next_m;
261  n = next_n;
262  }
263 }
264 #endif
265 
266 /***************************************************************************/
271  double alpha, PLASMA_desc A, PLASMA_desc B,
272  PLASMA_sequence *sequence, PLASMA_request *request)
273 {
276 
277  int k, m, n;
278  int lda, ldak, ldb, ldbk;
279  int tempkm, tempkn, tempmm, tempnn;
280 
281  double zone = (double)1.0;
282 
283  plasma = plasma_context_self();
284  if (sequence->status != PLASMA_SUCCESS)
285  return;
286  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
287  /*
288  * PlasmaLeft / PlasmaUpper / PlasmaNoTrans
289  */
290  if (side == PlasmaLeft) {
291  if (uplo == PlasmaUpper) {
292  if (trans == PlasmaNoTrans) {
293  for (m = 0; m < B.mt; m++) {
294  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
295  ldb = BLKLDD(B, m);
296  lda = BLKLDD(A, m);
297  for (n = 0; n < B.nt; n++) {
298  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
300  plasma->quark, &task_flags,
301  side, uplo, trans, diag,
302  tempmm, tempnn, A.mb,
303  alpha, A(m, m), lda, /* lda * tempkm */
304  B(m, n), ldb); /* ldb * tempnn */
305 
306  for (k = m+1; k < A.mt; k++) {
307  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
308  ldbk = BLKLDD(B, k);
310  plasma->quark, &task_flags,
311  trans, PlasmaNoTrans,
312  tempmm, tempnn, tempkn, A.mb,
313  alpha, A(m, k), lda,
314  B(k, n), ldbk,
315  zone, B(m, n), ldb);
316  }
317  }
318  }
319  }
320  /*
321  * PlasmaLeft / PlasmaUpper / Plasma[Conj]Trans
322  */
323  else {
324  for (m = B.mt-1; m > -1; m--) {
325  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
326  ldb = BLKLDD(B, m);
327  lda = BLKLDD(A, m);
328  for (n = 0; n < B.nt; n++) {
329  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
331  plasma->quark, &task_flags,
332  side, uplo, trans, diag,
333  tempmm, tempnn, A.mb,
334  alpha, A(m, m), lda, /* lda * tempkm */
335  B(m, n), ldb); /* ldb * tempnn */
336 
337  for (k = 0; k < m; k++) {
339  plasma->quark, &task_flags,
340  trans, PlasmaNoTrans,
341  tempmm, tempnn, B.mb, A.mb,
342  alpha, A(k, m), A.mb,
343  B(k, n), B.mb,
344  zone, B(m, n), ldb);
345  }
346  }
347  }
348  }
349  }
350  /*
351  * PlasmaLeft / PlasmaLower / PlasmaNoTrans
352  */
353  else {
354  if (trans == PlasmaNoTrans) {
355  for (m = B.mt-1; m > -1; m--) {
356  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
357  ldb = BLKLDD(B, m);
358  lda = BLKLDD(A, m);
359  for (n = 0; n < B.nt; n++) {
360  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
362  plasma->quark, &task_flags,
363  side, uplo, trans, diag,
364  tempmm, tempnn, A.mb,
365  alpha, A(m, m), lda, /* lda * tempkm */
366  B(m, n), ldb); /* ldb * tempnn */
367 
368  for (k = 0; k < m; k++) {
370  plasma->quark, &task_flags,
371  trans, PlasmaNoTrans,
372  tempmm, tempnn, B.mb, A.mb,
373  alpha, A(m, k), lda,
374  B(k, n), B.mb,
375  zone, B(m, n), ldb);
376  }
377  }
378  }
379  }
380  /*
381  * PlasmaLeft / PlasmaLower / Plasma[Conj]Trans
382  */
383  else {
384  for (m = 0; m < B.mt; m++) {
385  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
386  ldb = BLKLDD(B, m);
387  lda = BLKLDD(A, m);
388  for (n = 0; n < B.nt; n++) {
389  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
391  plasma->quark, &task_flags,
392  side, uplo, trans, diag,
393  tempmm, tempnn, A.mb,
394  alpha, A(m, m), lda, /* lda * tempkm */
395  B(m, n), ldb); /* ldb * tempnn */
396 
397  for (k = m+1; k < A.mt; k++) {
398  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
399  ldak = BLKLDD(A, k);
400  ldbk = BLKLDD(B, k);
402  plasma->quark, &task_flags,
403  trans, PlasmaNoTrans,
404  tempmm, tempnn, tempkm, A.mb,
405  alpha, A(k, m), ldak,
406  B(k, n), ldbk,
407  zone, B(m, n), ldb);
408  }
409  }
410  }
411  }
412  }
413  }
414  /*
415  * PlasmaRight / PlasmaUpper / PlasmaNoTrans
416  */
417  else {
418  if (uplo == PlasmaUpper) {
419  if (trans == PlasmaNoTrans) {
420  for (n = B.nt-1; n > -1; n--) {
421  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
422  lda = BLKLDD(A, n);
423  for (m = 0; m < B.mt; m++) {
424  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
425  ldb = BLKLDD(B, m);
427  plasma->quark, &task_flags,
428  side, uplo, trans, diag,
429  tempmm, tempnn, A.mb,
430  alpha, A(n, n), lda, /* lda * tempkm */
431  B(m, n), ldb); /* ldb * tempnn */
432 
433  for (k = 0; k < n; k++) {
435  plasma->quark, &task_flags,
436  PlasmaNoTrans, trans,
437  tempmm, tempnn, B.mb, A.mb,
438  alpha, B(m, k), ldb,
439  A(k, n), A.mb,
440  zone, B(m, n), ldb);
441  }
442  }
443  }
444  }
445  /*
446  * PlasmaRight / PlasmaUpper / Plasma[Conj]Trans
447  */
448  else {
449  for (n = 0; n < B.nt; n++) {
450  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
451  lda = BLKLDD(A, n);
452  for (m = 0; m < B.mt; m++) {
453  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
454  ldb = BLKLDD(B, m);
456  plasma->quark, &task_flags,
457  side, uplo, trans, diag,
458  tempmm, tempnn, A.mb,
459  alpha, A(n, n), lda, /* lda * tempkm */
460  B(m, n), ldb); /* ldb * tempnn */
461 
462  for (k = n+1; k < A.mt; k++) {
463  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
465  plasma->quark, &task_flags,
466  PlasmaNoTrans, trans,
467  tempmm, tempnn, tempkn, A.mb,
468  alpha, B(m, k), ldb,
469  A(n, k), lda,
470  zone, B(m, n), ldb);
471  }
472  }
473  }
474  }
475  }
476  /*
477  * PlasmaRight / PlasmaLower / PlasmaNoTrans
478  */
479  else {
480  if (trans == PlasmaNoTrans) {
481  for (n = 0; n < B.nt; n++) {
482  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
483  lda = BLKLDD(A, n);
484  for (m = 0; m < B.mt; m++) {
485  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
486  ldb = BLKLDD(B, m);
488  plasma->quark, &task_flags,
489  side, uplo, trans, diag,
490  tempmm, tempnn, A.mb,
491  alpha, A(n, n), lda, /* lda * tempkm */
492  B(m, n), ldb); /* ldb * tempnn */
493 
494  for (k = n+1; k < A.mt; k++) {
495  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
496  ldak = BLKLDD(A, k);
498  plasma->quark, &task_flags,
499  PlasmaNoTrans, trans,
500  tempmm, tempnn, tempkn, A.mb,
501  alpha, B(m, k), ldb,
502  A(k, n), ldak,
503  zone, B(m, n), ldb);
504  }
505  }
506  }
507  }
508  /*
509  * PlasmaRight / PlasmaLower / Plasma[Conj]Trans
510  */
511  else {
512  for (n = B.nt-1; n > -1; n--) {
513  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
514  lda = BLKLDD(A, n);
515  for (m = 0; m < B.mt; m++) {
516  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
517  ldb = BLKLDD(B, m);
519  plasma->quark, &task_flags,
520  side, uplo, trans, diag,
521  tempmm, tempnn, A.mb,
522  alpha, A(n, n), lda, /* lda * tempkm */
523  B(m, n), ldb); /* ldb * tempnn */
524 
525  for (k = 0; k < n; k++) {
527  plasma->quark, &task_flags,
528  PlasmaNoTrans, trans,
529  tempmm, tempnn, B.mb, A.mb,
530  alpha, B(m, k), ldb,
531  A(n, k), lda,
532  zone, B(m, n), ldb);
533  }
534  }
535  }
536  }
537  }
538  }
539 }