001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Jakub Kurzak                                               ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_zgeqrs - Compute a minimum-norm solution min || A*X - B || using the RQ factorization
012: // A = R*Q computed by PLASMA_zgeqrf.
013: 
014: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
015: // M        int (IN)
016: //          The number of rows of the matrix A. M >= 0.
017: //
018: // N        int (IN)
019: //          The number of columns of the matrix A. N >= M >= 0.
020: //
021: // NRHS     int (IN)
022: //          The number of columns of B. NRHS >= 0.
023: //
024: // A        PLASMA_Complex64_t* (INOUT)
025: //          Details of the QR factorization of the original matrix A as returned by PLASMA_zgeqrf.
026: //
027: // LDA      int (IN)
028: //          The leading dimension of the array A. LDA >= M.
029: //
030: // T        PLASMA_Complex64_t* (IN)
031: //          Auxiliary factorization data, computed by PLASMA_zgeqrf.
032: //
033: // B        PLASMA_Complex64_t* (INOUT)
034: //          On entry, the m-by-nrhs right hand side matrix B.
035: //          On exit, the n-by-nrhs solution matrix X.
036: //
037: // LDB      int (IN)
038: //          The leading dimension of the array B. LDB >= max(1,N).
039: 
040: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
041: //          = 0: successful exit
042: //          < 0: if -i, the i-th argument had an illegal value
043: 
044: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
045: int PLASMA_zgeqrs(int M, int N, int NRHS, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *T,
046:                   PLASMA_Complex64_t *B, int LDB)
047: {
048:     int NB, MT, NT, NTRHS;
049:     int status;
050:     PLASMA_Complex64_t *Abdl;
051:     PLASMA_Complex64_t *Bbdl;
052:     PLASMA_Complex64_t *Tbdl;
053:     plasma_context_t *plasma;
054: 
055:     plasma = plasma_context_self();
056:     if (plasma == NULL) {
057:         plasma_fatal_error("PLASMA_zgeqrs", "PLASMA not initialized");
058:         return PLASMA_ERR_NOT_INITIALIZED;
059:     }
060:     /* Check input arguments */
061:     if (M < 0) {
062:         plasma_error("PLASMA_zgeqrs", "illegal value of M");
063:         return -1;
064:     }
065:     if (N < 0 || N > M) {
066:         plasma_error("PLASMA_zgeqrs", "illegal value of N");
067:         return -2;
068:     }
069:     if (NRHS < 0) {
070:         plasma_error("PLASMA_zgeqrs", "illegal value of N");
071:         return -3;
072:     }
073:     if (LDA < max(1, M)) {
074:         plasma_error("PLASMA_zgeqrs", "illegal value of LDA");
075:         return -5;
076:     }
077:     if (LDB < max(1, max(1, M))) {
078:         plasma_error("PLASMA_zgeqrs", "illegal value of LDB");
079:         return -8;
080:     }
081:     /* Quick return */
082:     if (min(M, min(N, NRHS)) == 0) {
083:         return PLASMA_SUCCESS;
084:     }
085: 
086:     /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
087:     status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, NRHS);
088:     if (status != PLASMA_SUCCESS) {
089:         plasma_error("PLASMA_zgeqrs", "plasma_tune() failed");
090:         return status;
091:     }
092: 
093:     /* Set MT, NT & NTRHS */
094:     NB = PLASMA_NB;
095:     MT = (M%NB==0) ? (M/NB) : (M/NB+1);
096:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
097:     NTRHS = (NRHS%NB==0) ? (NRHS/NB) : (NRHS/NB+1);
098: 
099:     /* Allocate memory for matrices in block layout */
100:     Abdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, MT*NT*PLASMA_NBNBSIZE, PlasmaComplexDouble);
101:     Tbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, MT*NT*PLASMA_IBNBSIZE, PlasmaComplexDouble);
102:     Bbdl = (PLASMA_Complex64_t *)plasma_shared_alloc(plasma, max(MT, NT)*NTRHS*PLASMA_NBNBSIZE, PlasmaComplexDouble);
103:     if (Abdl == NULL || Tbdl == NULL || Bbdl == NULL) {
104:         plasma_error("PLASMA_zgeqrs", "plasma_shared_alloc() failed");
105:         plasma_shared_free(plasma, Abdl);
106:         plasma_shared_free(plasma, Tbdl);
107:         plasma_shared_free(plasma, Bbdl);
108:         return PLASMA_ERR_OUT_OF_RESOURCES;
109:     }
110: 
111:     PLASMA_desc descA = plasma_desc_init(
112:         Abdl, PlasmaComplexDouble,
113:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
114:         M, N, 0, 0, M, N);
115: 
116:     PLASMA_desc descB = plasma_desc_init(
117:         Bbdl, PlasmaComplexDouble,
118:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
119:         M, NRHS, 0, 0, M, NRHS);
120: 
121:     PLASMA_desc descT = plasma_desc_init(
122:         Tbdl, PlasmaComplexDouble,
123:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
124:         M, N, 0, 0, M, N);
125: 
126:     plasma_parallel_call_3(plasma_lapack_to_tile,
127:         PLASMA_Complex64_t*, A,
128:         int, LDA,
129:         PLASMA_desc, descA);
130: 
131:     plasma_parallel_call_3(plasma_lapack_to_tile,
132:         PLASMA_Complex64_t*, B,
133:         int, LDB,
134:         PLASMA_desc, descB);
135: 
136:     /* Receive T from the user */
137:     plasma_memcpy(Tbdl, T, MT*NT*PLASMA_IBNBSIZE, PlasmaComplexDouble);
138: 
139:     /* Call the native interface */
140:     status = PLASMA_zgeqrs_Tile(&descA, &descT, &descB);
141: 
142:     if (status == PLASMA_SUCCESS) {
143:         plasma_parallel_call_3(plasma_tile_to_lapack,
144:             PLASMA_desc, descA,
145:             PLASMA_Complex64_t*, A,
146:             int, LDA);
147: 
148:         plasma_parallel_call_3(plasma_tile_to_lapack,
149:             PLASMA_desc, descB,
150:             PLASMA_Complex64_t*, B,
151:             int, LDB);
152:     }
153:     plasma_shared_free(plasma, Abdl);
154:     plasma_shared_free(plasma, Tbdl);
155:     plasma_shared_free(plasma, Bbdl);
156:     return status;
157: }
158: 
159: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
160: // PLASMA_zgeqrs_Tile - Compute a minimum-norm solution min || A*X - B || using the RQ factorization
161: // A = R*Q computed by PLASMA_zgeqrf_Tile.
162: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
163: 
164: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
165: // A        PLASMA_Complex64_t* (INOUT)
166: //          Details of the QR factorization of the original matrix A as returned by PLASMA_zgeqrf.
167: //
168: // T        PLASMA_Complex64_t* (IN)
169: //          Auxiliary factorization data, computed by PLASMA_zgeqrf.
170: //
171: // B        PLASMA_Complex64_t* (INOUT)
172: //          On entry, the m-by-nrhs right hand side matrix B.
173: //          On exit, the n-by-nrhs solution matrix X.
174: 
175: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
176: //          = 0: successful exit
177: 
178: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
179: int PLASMA_zgeqrs_Tile(PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B)
180: {
181:     PLASMA_desc descA = *A;
182:     PLASMA_desc descT = *T;
183:     PLASMA_desc descB = *B;
184:     plasma_context_t *plasma;
185: 
186:     plasma = plasma_context_self();
187:     if (plasma == NULL) {
188:         plasma_fatal_error("PLASMA_zgeqrs_Tile", "PLASMA not initialized");
189:         return PLASMA_ERR_NOT_INITIALIZED;
190:     }
191:     /* Check descriptors for correctness */
192:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
193:         plasma_error("PLASMA_zgeqrs_Tile", "invalid first descriptor");
194:         return PLASMA_ERR_ILLEGAL_VALUE;
195:     }
196:     if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
197:         plasma_error("PLASMA_zgeqrs_Tile", "invalid second descriptor");
198:         return PLASMA_ERR_ILLEGAL_VALUE;
199:     }
200:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
201:         plasma_error("PLASMA_zgeqrs_Tile", "invalid third descriptor");
202:         return PLASMA_ERR_ILLEGAL_VALUE;
203:     }
204:     /* Check input arguments */
205:     if (descA.nb != descA.mb || descB.nb != descB.mb) {
206:         plasma_error("PLASMA_zgeqrs_Tile", "only square tiles supported");
207:         return PLASMA_ERR_ILLEGAL_VALUE;
208:     }
209:     /* Quick return */
210: /*
211:     if (min(M, min(N, NRHS)) == 0) {
212:         return PLASMA_SUCCESS;
213:     }
214: */
215:     plasma_parallel_call_3(plasma_pzunmqr,
216:         PLASMA_desc, descA,
217:         PLASMA_desc, descB,
218:         PLASMA_desc, descT);
219: 
220:     plasma_parallel_call_7(plasma_pztrsm,
221:         PLASMA_enum, PlasmaLeft,
222:         PLASMA_enum, PlasmaUpper,
223:         PLASMA_enum, PlasmaNoTrans,
224:         PLASMA_enum, PlasmaNonUnit,
225:         PLASMA_Complex64_t, 1.0,
226:         PLASMA_desc, descA,
227:         PLASMA_desc, descB);
228: 
229:     return PLASMA_SUCCESS;
230: }
231: