001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Jakub Kurzak                                               ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_dtrsm - Computes triangular solve A*X = B or X*A = B
012: 
013: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
014: // side     PLASMA_enum (IN)
015: //          Specifies whether A appears on the left or on the right of X:
016: //          = PlasmaLeft:  A*X = B
017: //          = PlasmaRight: X*A = B
018: //
019: // uplo     PLASMA_enum (IN)
020: //          Specifies whether the matrix A is upper triangular or lower triangular:
021: //          = PlasmaUpper: Upper triangle of A is stored;
022: //          = PlasmaLower: Lower triangle of A is stored.
023: //
024: // transA   PLASMA_enum (IN)
025: //          Specifies whether the matrix A is transposed, not transposed or conjugate transposed:
026: //          = PlasmaNoTrans:   A is transposed;
027: //          = PlasmaTrans:     A is not transposed;
028: //          = PlasmaTrans: A is conjugate transposed.
029: //
030: // diag     PLASMA_enum (IN)
031: //          Specifies whether or not A is unit triangular:
032: //          = PlasmaNonUnit: A is non unit;
033: //          = PlasmaUnit:    A us unit.
034: //
035: // N        int (IN)
036: //          The order of the matrix A. N >= 0.
037: //
038: // NRHS     int (IN)
039: //          The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
040: //
041: // A        double* (IN)
042: //          The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular
043: //          part of the array A contains the upper triangular matrix, and the strictly lower
044: //          triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N
045: //          lower triangular part of the array A contains the lower triangular matrix, and the
046: //          strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the
047: //          diagonal elements of A are also not referenced and are assumed to be 1.
048: //
049: // LDA      int (IN)
050: //          The leading dimension of the array A. LDA >= max(1,N).
051: //
052: // B        double* (INOUT)
053: //          On entry, the N-by-NRHS right hand side matrix B.
054: //          On exit, if return value = 0, the N-by-NRHS solution matrix X.
055: //
056: // LDB      double* (IN)
057: //          The leading dimension of the array B. LDB >= max(1,N).
058: 
059: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
060: //          = 0: successful exit
061: //          < 0: if -i, the i-th argument had an illegal value
062: 
063: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
064: int PLASMA_dtrsm(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag, int N,
065:                  int NRHS, double *A, int LDA, double *B, int LDB)
066: {
067:     int NB, NT, NTRHS;
068:     int status;
069:     double *Abdl;
070:     double *Bbdl;
071:     plasma_context_t *plasma;
072: 
073:     plasma = plasma_context_self();
074:     if (plasma == NULL) {
075:         plasma_fatal_error("PLASMA_dtrsm", "PLASMA not initialized");
076:         return PLASMA_ERR_NOT_INITIALIZED;
077:     }
078:     /* Check input arguments */
079:     if (side != PlasmaLeft && side != PlasmaRight) {
080:         plasma_error("PLASMA_dtrsm", "illegal value of side");
081:         return -1;
082:     }
083:     if (uplo != PlasmaUpper && uplo != PlasmaLower) {
084:         plasma_error("PLASMA_dtrsm", "illegal value of uplo");
085:         return -2;
086:     }
087:     if (transA != PlasmaTrans && transA != PlasmaNoTrans) {
088:         plasma_error("PLASMA_dtrsm", "illegal value of transA");
089:         return -3;
090:     }
091:     if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
092:         plasma_error("PLASMA_dtrsm", "illegal value of diag");
093:         return -4;
094:     }
095:     if (N < 0) {
096:         plasma_error("PLASMA_dtrsm", "illegal value of N");
097:         return -5;
098:     }
099:     if (NRHS < 0) {
100:         plasma_error("PLASMA_dtrsm", "illegal value of NRHS");
101:         return -6;
102:     }
103:     if (LDA < max(1, N)) {
104:         plasma_error("PLASMA_dtrsm", "illegal value of LDA");
105:         return -8;
106:     }
107:     if (LDB < max(1, N)) {
108:         plasma_error("PLASMA_dtrsm", "illegal value of LDB");
109:         return -10;
110:     }
111:     /* Quick return */
112:     if (min(N, NRHS) == 0)
113:         return PLASMA_SUCCESS;
114: 
115:     /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
116:     status = plasma_tune(PLASMA_FUNC_DPOSV, N, N, NRHS);
117:     if (status != PLASMA_SUCCESS) {
118:         plasma_error("PLASMA_dtrsm", "plasma_tune() failed");
119:         return status;
120:     }
121: 
122:     /* Set NT & NTRHS */
123:     NB = PLASMA_NB;
124:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
125:     NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
126: 
127:     /* Allocate memory for matrices in block layout */
128:     Abdl = (double *)plasma_shared_alloc(plasma, NT*NT*PLASMA_NBNBSIZE, PlasmaRealDouble);
129:     Bbdl = (double *)plasma_shared_alloc(plasma, NT*NTRHS*PLASMA_NBNBSIZE, PlasmaRealDouble);
130:     if (Abdl == NULL || Bbdl == NULL) {
131:         plasma_error("PLASMA_dtrsm", "plasma_shared_alloc() failed");
132:         plasma_shared_free(plasma, Abdl);
133:         plasma_shared_free(plasma, Bbdl);
134:         return PLASMA_ERR_OUT_OF_RESOURCES;
135:     }
136: 
137:     PLASMA_desc descA = plasma_desc_init(
138:         Abdl, PlasmaRealDouble,
139:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
140:         N, N, 0, 0, N, N);
141: 
142:     PLASMA_desc descB = plasma_desc_init(
143:         Bbdl, PlasmaRealDouble,
144:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
145:         N, NRHS, 0, 0, N, NRHS);
146: 
147:     plasma_parallel_call_3(plasma_lapack_to_tile,
148:         double*, A,
149:         int, LDA,
150:         PLASMA_desc, descA);
151: 
152:     plasma_parallel_call_3(plasma_lapack_to_tile,
153:         double*, B,
154:         int, LDB,
155:         PLASMA_desc, descB);
156: 
157:     /* Call the native interface */
158:     status = PLASMA_dtrsm_Tile(PlasmaLeft, uplo, transA, diag, &descA, &descB);
159: 
160:     if (status == PLASMA_SUCCESS)
161:         plasma_parallel_call_3(plasma_tile_to_lapack,
162:             PLASMA_desc, descB,
163:             double*, B,
164:             int, LDB);
165: 
166:     plasma_shared_free(plasma, Abdl);
167:     plasma_shared_free(plasma, Bbdl);
168:     return status;
169: }
170: 
171: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
172: // PLASMA_dtrsm_Tile - Computes triangular solve A*X = B or X*A = B
173: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
174: 
175: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
176: // side     PLASMA_enum (IN)
177: //          Specifies whether A appears on the left or on the right of X:
178: //          = PlasmaLeft:  A*X = B
179: //          = PlasmaRight: X*A = B
180: //
181: // uplo     PLASMA_enum (IN)
182: //          Specifies whether the matrix A is upper triangular or lower triangular:
183: //          = PlasmaUpper: Upper triangle of A is stored;
184: //          = PlasmaLower: Lower triangle of A is stored.
185: //
186: // transA   PLASMA_enum (IN)
187: //          Specifies whether the matrix A is transposed, not transposed or conjugate transposed:
188: //          = PlasmaNoTrans:   A is transposed;
189: //          = PlasmaTrans:     A is not transposed;
190: //          = PlasmaTrans: A is conjugate transposed.
191: //
192: // diag     PLASMA_enum (IN)
193: //          Specifies whether or not A is unit triangular:
194: //          = PlasmaNonUnit: A is non unit;
195: //          = PlasmaUnit:    A us unit.
196: //
197: // A        double* (IN)
198: //          The triangular matrix A. If uplo = PlasmaUpper, the leading N-by-N upper triangular
199: //          part of the array A contains the upper triangular matrix, and the strictly lower
200: //          triangular part of A is not referenced. If uplo = PlasmaLower, the leading N-by-N
201: //          lower triangular part of the array A contains the lower triangular matrix, and the
202: //          strictly upper triangular part of A is not referenced. If diag = PlasmaUnit, the
203: //          diagonal elements of A are also not referenced and are assumed to be 1.
204: //
205: // B        double* (INOUT)
206: //          On entry, the N-by-NRHS right hand side matrix B.
207: //          On exit, if return value = 0, the N-by-NRHS solution matrix X.
208: 
209: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
210: //          = 0: successful exit
211: 
212: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
213: int PLASMA_dtrsm_Tile(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum transA, PLASMA_enum diag,
214:                       PLASMA_desc *A, PLASMA_desc *B)
215: {
216:     PLASMA_desc descA = *A;
217:     PLASMA_desc descB = *B;
218:     plasma_context_t *plasma;
219: 
220:     plasma = plasma_context_self();
221:     if (plasma == NULL) {
222:         plasma_fatal_error("PLASMA_dtrsm_Tile", "PLASMA not initialized");
223:         return PLASMA_ERR_NOT_INITIALIZED;
224:     }
225:     /* Check descriptors for correctness */
226:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
227:         plasma_error("PLASMA_dtrsm_Tile", "invalid first descriptor");
228:         return PLASMA_ERR_ILLEGAL_VALUE;
229:     }
230:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
231:         plasma_error("PLASMA_dtrsm_Tile", "invalid second descriptor");
232:         return PLASMA_ERR_ILLEGAL_VALUE;
233:     }
234:     /* Check input arguments */
235:     if (descA.nb != descA.mb || descB.nb != descB.mb) {
236:         plasma_error("PLASMA_dtrsm_Tile", "only square tiles supported");
237:         return PLASMA_ERR_ILLEGAL_VALUE;
238:     }
239:     if (side != PlasmaLeft && side != PlasmaRight) {
240:         plasma_error("PLASMA_dtrsm_Tile", "illegal value of side");
241:         return -1;
242:     }
243:     if (uplo != PlasmaUpper && uplo != PlasmaLower) {
244:         plasma_error("PLASMA_dtrsm_Tile", "illegal value of uplo");
245:         return -2;
246:     }
247:     if (transA != PlasmaTrans && transA != PlasmaNoTrans) {
248:         plasma_error("PLASMA_dtrsm_Tile", "illegal value of transA");
249:         return -3;
250:     }
251:     if (diag != PlasmaUnit && diag != PlasmaNonUnit) {
252:         plasma_error("PLASMA_dtrsm_Tile", "illegal value of diag");
253:         return -4;
254:     }
255:     /* Quick return */
256: /*
257:     if (min(N, NRHS) == 0)
258:         return PLASMA_SUCCESS;
259: */
260:     plasma_parallel_call_7(plasma_pdtrsm,
261:         PLASMA_enum, PlasmaLeft,
262:         PLASMA_enum, uplo,
263:         PLASMA_enum, transA,
264:         PLASMA_enum, diag,
265:         double, 1.0,
266:         PLASMA_desc, descA,
267:         PLASMA_desc, descB);
268: 
269:     return PLASMA_SUCCESS;
270: }
271: