001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA auxiliary routines (version 2.1.0)                          ///
003:  * ///                    Author: Jakub Kurzak, Hatem Ltaief                                 ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
011: //  Parallel Cholesky factorization
012: #define A(m,n) &((PLASMA_Complex32_t*)A.mat)[A.bsiz*(m)+A.bsiz*A.lmt*(n)]
013: void plasma_pcpotrf(plasma_context_t *plasma)
014: {
015:     PLASMA_enum uplo;
016:     PLASMA_desc A;
017: 
018:     int k, m, n;
019:     int next_k;
020:     int next_m;
021:     int next_n;
022: 
023:     plasma_unpack_args_2(uplo, A);
024:     ss_init(A.nt, A.nt, 0);
025: 
026:     k = 0;
027:     m = PLASMA_RANK;
028:     while (m >= A.nt) {
029:         k++;
030:         m = m-A.nt+k;
031:     }
032:     n = 0;
033: 
034:     while (k < A.nt && m < A.nt && !ss_aborted()) {
035:         next_n = n;
036:         next_m = m;
037:         next_k = k;
038: 
039:         next_n++;
040:         if (next_n > next_k) {
041:             next_m += PLASMA_SIZE;
042:             while (next_m >= A.nt && next_k < A.nt) {
043:                 next_k++;
044:                 next_m = next_m-A.nt+next_k;
045:             }
046:             next_n = 0;
047:         }
048: 
049:         if (m == k) {
050:             if (n == k) {
051:                 if (uplo == PlasmaLower)
052:                     CORE_cpotrf(
053:                         PlasmaLower,
054:                         k == A.nt-1 ? A.n-k*A.nb : A.nb,
055:                         A(k, k), A.nb,
056:                         &PLASMA_INFO);
057:                 else
058:                     CORE_cpotrf(
059:                         PlasmaUpper,
060:                         k == A.nt-1 ? A.n-k*A.nb : A.nb,
061:                         A(k, k), A.nb,
062:                         &PLASMA_INFO);
063:                 if (PLASMA_INFO != 0) {
064:                     PLASMA_INFO += A.nb*k;
065:                     ss_abort();
066:                 }
067:                 ss_cond_set(k, k, 1);
068:             }
069:             else {
070:                 ss_cond_wait(k, n, 1);
071:                 if (uplo == PlasmaLower)
072:                     CORE_cherk(
073:                          PlasmaLower, PlasmaNoTrans,
074:                          k == A.nt-1 ? A.n-k*A.nb : A.nb,
075:                          A.nb,
076:                         -1.0, A(k, n), A.nb,
077:                          1.0, A(k, k), A.nb);
078:                 else
079:                     CORE_cherk(
080:                          PlasmaUpper, PlasmaConjTrans,
081:                          k == A.nt-1 ? A.n-k*A.nb : A.nb,
082:                          A.nb,
083:                         -1.0, A(n, k), A.nb,
084:                          1.0, A(k, k), A.nb);
085:             }
086:         }
087:         else {
088:             if (n == k) {
089:                 ss_cond_wait(k, k, 1);
090:                 if (uplo == PlasmaLower)
091:                     CORE_ctrsm(
092:                         PlasmaRight, PlasmaLower, PlasmaConjTrans, PlasmaNonUnit,
093:                         m == A.nt-1 ? A.n-m*A.nb : A.nb,
094:                         A.nb,
095:                         1.0, A(k, k), A.nb,
096:                              A(m, k), A.nb);
097:                 else
098:                     CORE_ctrsm(
099:                         PlasmaLeft, PlasmaUpper, PlasmaConjTrans, PlasmaNonUnit,
100:                         A.nb,
101:                         m == A.nt-1 ? A.n-m*A.nb : A.nb,
102:                         1.0, A(k, k), A.nb,
103:                              A(k, m), A.nb);
104:                 ss_cond_set(m, k, 1);
105:             }
106:             else {
107:                 ss_cond_wait(k, n, 1);
108:                 ss_cond_wait(m, n, 1);
109:                 if (uplo == PlasmaLower)
110:                     CORE_cgemm(
111:                         PlasmaNoTrans, PlasmaConjTrans,
112:                         m == A.nt-1 ? A.n-m*A.nb : A.nb,
113:                         A.nb,
114:                         A.nb,
115:                        -1.0, A(m, n), A.nb,
116:                              A(k, n), A.nb,
117:                         1.0, A(m, k), A.nb);
118:                 else
119:                     CORE_cgemm(
120:                         PlasmaConjTrans, PlasmaNoTrans,
121:                         A.nb,
122:                         m == A.nt-1 ? A.n-m*A.nb : A.nb,
123:                         A.nb,
124:                        -1.0, A(n, k), A.nb,
125:                              A(n, m), A.nb,
126:                         1.0, A(k, m), A.nb);
127:             }
128:         }
129:         n = next_n;
130:         m = next_m;
131:         k = next_k;
132:     }
133:     ss_finalize();
134: }
135: