01: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
02: /* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
03:  * ///                    Author: Hatem Ltaief, Jakub Kurzak                                 ///
04:  * ///                    Release Date: November, 15th 2009                                  ///
05:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
06:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
07: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
08: #include "common.h"
09: 
10: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
11: //  Parallel construction of Q using tile V (application to identity)
12: #define A(m,n) &((double*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
13: #define Q(m,n) &((double*)Q.mat)[Q.bsiz*(m)+Q.bsiz*Q.lmt*(n)]
14: #define T(m,n) &((double*)T.mat)[T.bsiz*(m)+T.bsiz*T.lmt*(n)]
15: void plasma_pdorglq(plasma_context_t *plasma)
16: {
17:     PLASMA_desc A;
18:     PLASMA_desc Q;
19:     PLASMA_desc T;
20: 
21:     int k, m, n;
22:     double *work;
23: 
24:     // Currently sequential
25:     if (PLASMA_RANK > 0) return;
26: 
27:     plasma_unpack_args_3(A, Q, T);
28:     work = (double *)plasma_private_alloc(plasma, T.mb*T.nb, T.dtyp);
29: 
30:     for (k = min(A.mt, A.nt) - 1; k >= 0; k--) {
31:         for (n = Q.nt - 1; n > k; n--) {
32:             for (m = 0; m < Q.mt; m++) {
33:                 CORE_dssmlq(
34:                     PlasmaRight, PlasmaNoTrans,
35:                     A.nb,
36:                     n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb,
37:                     m == Q.mt-1 ? Q.m-m*Q.nb : Q.nb,
38:                     T.mb,
39:                     k == A.mt-1 ? A.m-k*A.nb : A.nb,
40:                     Q(m, k), Q.nb,
41:                     Q(m, n), Q.nb,
42:                     A(k, n), A.nb,
43:                     T(k, n), T.mb,
44:                     work, T.nb);
45:             }
46:         }
47:         for (m = 0; m < Q.mt; m++) {
48:             CORE_dormlq(
49:                 PlasmaRight, PlasmaNoTrans,
50:                 m == Q.mt-1 ? Q.m-m*Q.nb : Q.nb,
51:                 k == A.nt-1 ? A.n-k*A.nb : A.nb,
52:                 T.mb,
53:                 k == A.mt-1 ? min(A.m, A.n)-k*A.nb : A.nb,
54:                 A(k, k), A.nb,
55:                 T(k, k), T.mb,
56:                 Q(m, k), Q.nb,
57:                 work, T.nb);
58:         }
59:     }
60:     plasma_private_free(plasma, work);
61: }
62: