01: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
02: /* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
03:  * ///                    Author: Emmanuel Agullo                                            ///
04:  * ///                    Release Date: November, 15th 2009                                  ///
05:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
06:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
07: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
08: #include "common.h"
09: #include "lapack.h"
10: 
11: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
12: //  Parallel tile DGEMM matrix-matrix operations
13: #define A(m,n) &((double*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
14: #define B(m,n) &((double*)B.mat)[B.bsiz*(m)+B.bsiz*B.lmt*(n)]
15: #define C(m,n) &((double*)C.mat)[C.bsiz*(m)+C.bsiz*C.lmt*(n)]
16: 
17: void plasma_pdgemm(plasma_context_t *plasma)
18: {
19:     PLASMA_enum transA;
20:     PLASMA_enum transB;
21:     PLASMA_desc A;
22:     PLASMA_desc B;
23:     PLASMA_desc C;
24:     double alpha;
25:     double beta;
26: 
27:     int K, K1, K2, X, X1, X2, Y, Y1, Y2;
28:     int k, m, n;
29:     int next_m;
30:     int next_n;
31: 
32:     plasma_unpack_args_7(transA, transB, alpha, A, B, beta, C);
33: /*
34:     if (A.n != B.m || C.n =! B.n || C.m != A.m ||
35:         A.j != B.i || C.j != B.j || C.i != A.i)
36:         return -1;
37: */
38:     n = 0;
39:     m = PLASMA_RANK;
40:     while (m >= C.mt && n < C.nt) {
41:         n++;
42:         m = m-C.mt;
43:     }
44: 
45:     while (n < C.nt) {
46:         next_m = m;
47:         next_n = n;
48: 
49:         next_m += PLASMA_SIZE;
50:         while (next_m >= C.mt && next_n < C.nt) {
51:             next_n++;
52:             next_m = next_m-C.mt;
53:         }
54: 
55:         X1 = m == 0 ? C.i%C.nb : 0;
56:         Y1 = n == 0 ? C.j%C.nb : 0;
57:         X2 = m == C.mt-1 ? (C.i+C.m-1)%C.nb+1 : C.nb;
58:         Y2 = n == C.nt-1 ? (C.j+C.n-1)%C.nb+1 : C.nb;
59:         X = X2 - X1;
60:         Y = Y2 - Y1;
61: 
62:         for (k = 0 ; k < A.nt ; k++) {
63:             K1 = k == 0 ? A.j%A.nb : 0;
64:             K2 = k == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
65:             K = K2 - K1;
66: 
67:             CORE_dgemm( transA, transB,
68:                         X,
69:                         Y,
70:                         K,
71:                         alpha, A(m, k), A.nb,
72:                                B(k, n), A.nb,
73:                         k == 0 ? beta : ((double) 1.0),  C(m, n), A.nb);
74:         }
75:         m = next_m;
76:         n = next_n;
77:     }
78: }
79: 
80: