001: /* ///////////////////////////// P /// L /// A /// S /// M /// A /////////////////////////////// */
002: /* ///                    PLASMA computational routines (version 2.1.0)                      ///
003:  * ///                    Author: Hatem Ltaief, Jakub Kurzak                                 ///
004:  * ///                    Release Date: November, 15th 2009                                  ///
005:  * ///                    PLASMA is a software package provided by Univ. of Tennessee,       ///
006:  * ///                    Univ. of California Berkeley and Univ. of Colorado Denver          /// */
007: /* ///////////////////////////////////////////////////////////////////////////////////////////// */
008: #include "common.h"
009: 
010: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
011: // PLASMA_dorgqr - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the
012: // first N columns of a product of the elementary reflectors returned by PLASMA_dgeqrf.
013: 
014: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
015: // M        int (IN)
016: //          The number of rows of the matrix Q. M >= 0.
017: //
018: // N        int (IN)
019: //          The number of columns of the matrix Q. N >= M.
020: //
021: // K        int (IN)
022: //          The number of columns of elementary tile reflectors whose product defines the matrix Q.
023: //          M >= K >= 0.
024: //
025: // A        double* (IN)
026: //          Details of the QR factorization of the original matrix A as returned by PLASMA_dgeqrf.
027: //
028: // LDA      int (IN)
029: //          The leading dimension of the array A. LDA >= max(1,M).
030: //
031: // T        double* (IN)
032: //          Auxiliary factorization data, computed by PLASMA_dgeqrf.
033: //
034: // B        double* (OUT)
035: //          On exit, the M-by-N matrix Q.
036: //
037: // LDB      int (IN)
038: //          The leading dimension of the array B. LDB >= max(1,M).
039: 
040: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
041: //          = 0: successful exit
042: //          < 0: if -i, the i-th argument had an illegal value
043: 
044: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
045: int PLASMA_dorgqr(int M, int N, int K, double *A, int LDA, double *T,
046:                   double *B, int LDB)
047: {
048:     int NB, MT, NT, KT;
049:     int status;
050:     double *Abdl;
051:     double *Bbdl;
052:     double *Tbdl;
053:     plasma_context_t *plasma;
054: 
055:     plasma = plasma_context_self();
056:     if (plasma == NULL) {
057:         plasma_fatal_error("PLASMA_dorgqr", "PLASMA not initialized");
058:         return PLASMA_ERR_NOT_INITIALIZED;
059:     }
060:     if (M < 0) {
061:         plasma_error("PLASMA_dorgqr", "illegal value of M");
062:         return -1;
063:     }
064:     if (N < 0 || N > M) {
065:         plasma_error("PLASMA_dorgqr", "illegal value of N");
066:         return -2;
067:     }
068:     if (K < 0 || K > N) {
069:         plasma_error("PLASMA_dorgqr", "illegal value of K");
070:         return -3;
071:     }
072:     if (LDA < max(1, M)) {
073:         plasma_error("PLASMA_dorgqr", "illegal value of LDA");
074:         return -5;
075:     }
076:     if (LDB < max(1, M)) {
077:         plasma_error("PLASMA_dorgqr", "illegal value of LDB");
078:         return -8;
079:     }
080:     if (min(M, min(N, K)) == 0)
081:         return PLASMA_SUCCESS;
082: 
083:     /* Tune NB & IB depending on M & N; Set NBNBSIZE */
084:     status = plasma_tune(PLASMA_FUNC_DGELS, M, N, 0);
085:     if (status != PLASMA_SUCCESS) {
086:         plasma_error("PLASMA_dorgqr", "plasma_tune() failed");
087:         return status;
088:     }
089: 
090:     /* Set MT & NT */
091:     NB = PLASMA_NB;
092:     MT = (M%NB==0) ? (M/NB) : (M/NB+1);
093:     NT = (N%NB==0) ? (N/NB) : (N/NB+1);
094:     KT = (K%NB==0) ? (K/NB) : (K/NB+1);
095: 
096:     /* Allocate memory for matrices in block layout */
097:     Abdl = (double *)plasma_shared_alloc(plasma, MT*KT*PLASMA_NBNBSIZE, PlasmaRealDouble);
098:     Tbdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_IBNBSIZE, PlasmaRealDouble);
099:     Bbdl = (double *)plasma_shared_alloc(plasma, MT*NT*PLASMA_NBNBSIZE, PlasmaRealDouble);
100:     if (Abdl == NULL || Tbdl == NULL || Bbdl == NULL) {
101:         plasma_error("PLASMA_dorgqr", "plasma_shared_alloc() failed");
102:         plasma_shared_free(plasma, Abdl);
103:         plasma_shared_free(plasma, Tbdl);
104:         plasma_shared_free(plasma, Bbdl);
105:         return PLASMA_ERR_OUT_OF_RESOURCES;
106:     }
107: 
108:     PLASMA_desc descA = plasma_desc_init(
109:         Abdl, PlasmaRealDouble,
110:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
111:         M, K, 0, 0, M, K);
112: 
113:     PLASMA_desc descB = plasma_desc_init(
114:         Bbdl, PlasmaRealDouble,
115:         PLASMA_NB, PLASMA_NB, PLASMA_NBNBSIZE,
116:         M, N, 0, 0, M, N);
117: 
118:     PLASMA_desc descT = plasma_desc_init(
119:         Tbdl, PlasmaRealDouble,
120:         PLASMA_IB, PLASMA_NB, PLASMA_IBNBSIZE,
121:         M, N, 0, 0, M, N);
122: 
123:     plasma_parallel_call_3(plasma_lapack_to_tile,
124:         double*, A,
125:         int, LDA,
126:         PLASMA_desc, descA);
127: 
128:     plasma_parallel_call_3(plasma_lapack_to_tile,
129:         double*, B,
130:         int, LDB,
131:         PLASMA_desc, descB);
132: 
133:     /* Receive T from the user */
134:     plasma_memcpy(Tbdl, T, MT*NT*PLASMA_IBNBSIZE, PlasmaRealDouble);
135: 
136:     /* Call the native interface */
137:     status = PLASMA_dorgqr_Tile(&descA, &descT, &descB);
138: 
139:     if (status == PLASMA_SUCCESS)
140:         plasma_parallel_call_3(plasma_tile_to_lapack,
141:             PLASMA_desc, descB,
142:             double*, B,
143:             int, LDB);
144: 
145:     plasma_shared_free(plasma, Abdl);
146:     plasma_shared_free(plasma, Tbdl);
147:     plasma_shared_free(plasma, Bbdl);
148:     return status;
149: }
150: 
151: /* /////////////////////////// P /// U /// R /// P /// O /// S /// E /////////////////////////// */
152: // PLASMA_dorgqr_Tile - Generates an M-by-N matrix Q with orthonormal columns, which is defined as the
153: // first N columns of a product of the elementary reflectors returned by PLASMA_dgeqrf_Tile.
154: // All matrices are passed through descriptors. All dimensions are taken from the descriptors.
155: 
156: /* ///////////////////// A /// R /// G /// U /// M /// E /// N /// T /// S ///////////////////// */
157: // A        double* (IN)
158: //          Details of the QR factorization of the original matrix A as returned by PLASMA_dgeqrf.
159: //
160: // T        double* (IN)
161: //          Auxiliary factorization data, computed by PLASMA_dgeqrf.
162: //
163: // B        double* (OUT)
164: //          On exit, the M-by-N matrix Q.
165: 
166: /* ///////////// R /// E /// T /// U /// R /// N /////// V /// A /// L /// U /// E ///////////// */
167: //          = 0: successful exit
168: 
169: /* //////////////////////////////////// C /// O /// D /// E //////////////////////////////////// */
170: int PLASMA_dorgqr_Tile(PLASMA_desc *A, PLASMA_desc *T, PLASMA_desc *B)
171: {
172:     PLASMA_desc descA = *A;
173:     PLASMA_desc descT = *T;
174:     PLASMA_desc descB = *B;
175:     plasma_context_t *plasma;
176: 
177:     plasma = plasma_context_self();
178:     if (plasma == NULL) {
179:         plasma_fatal_error("PLASMA_dorgqr_Tile", "PLASMA not initialized");
180:         return PLASMA_ERR_NOT_INITIALIZED;
181:     }
182:     /* Check descriptors for correctness */
183:     if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
184:         plasma_error("PLASMA_dorgqr_Tile", "invalid first descriptor");
185:         return PLASMA_ERR_ILLEGAL_VALUE;
186:     }
187:     if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
188:         plasma_error("PLASMA_dorgqr_Tile", "invalid second descriptor");
189:         return PLASMA_ERR_ILLEGAL_VALUE;
190:     }
191:     if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
192:         plasma_error("PLASMA_dorgqr_Tile", "invalid third descriptor");
193:         return PLASMA_ERR_ILLEGAL_VALUE;
194:     }
195:     /* Check input arguments */
196:     if (descA.nb != descA.mb || descB.nb != descB.mb) {
197:         plasma_error("PLASMA_dorgqr_Tile", "only square tiles supported");
198:         return PLASMA_ERR_ILLEGAL_VALUE;
199:     }
200:     /* Quick return */
201: /*
202:     if (N <= 0)
203:         return PLASMA_SUCCESS;
204: */
205:     plasma_parallel_call_3(plasma_pdorgqr,
206:         PLASMA_desc, descA,
207:         PLASMA_desc, descB,
208:         PLASMA_desc, descT);
209: 
210:     return PLASMA_SUCCESS;
211: }
212: