PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dgels.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 /***************************************************************************/
94 int PLASMA_dgels(PLASMA_enum trans, int M, int N, int NRHS,
95  double *A, int LDA,
96  double *T,
97  double *B, int LDB)
98 {
99  int i, j;
100  int NB, IB, IBNB, MT, NT;
101  int status;
103  PLASMA_sequence *sequence = NULL;
105  PLASMA_desc descA, descB, descT;
106 
107  plasma = plasma_context_self();
108  if (plasma == NULL) {
109  plasma_fatal_error("PLASMA_dgels", "PLASMA not initialized");
111  }
112  /* Check input arguments */
113  if (trans != PlasmaNoTrans) {
114  plasma_error("PLASMA_dgels", "only PlasmaNoTrans supported");
116  }
117  if (M < 0) {
118  plasma_error("PLASMA_dgels", "illegal value of M");
119  return -2;
120  }
121  if (N < 0) {
122  plasma_error("PLASMA_dgels", "illegal value of N");
123  return -3;
124  }
125  if (NRHS < 0) {
126  plasma_error("PLASMA_dgels", "illegal value of NRHS");
127  return -4;
128  }
129  if (LDA < max(1, M)) {
130  plasma_error("PLASMA_dgels", "illegal value of LDA");
131  return -6;
132  }
133  if (LDB < max(1, max(M, N))) {
134  plasma_error("PLASMA_dgels", "illegal value of LDB");
135  return -9;
136  }
137  /* Quick return */
138  if (min(M, min(N, NRHS)) == 0) {
139  for (i = 0; i < max(M, N); i++)
140  for (j = 0; j < NRHS; j++)
141  B[j*LDB+i] = 0.0;
142  return PLASMA_SUCCESS;
143  }
144 
145  /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
146  status = plasma_tune(PLASMA_FUNC_DGELS, M, N, NRHS);
147  if (status != PLASMA_SUCCESS) {
148  plasma_error("PLASMA_dgels", "plasma_tune() failed");
149  return status;
150  }
151 
152  /* Set MT, NT & NTRHS */
153  NB = PLASMA_NB;
154  IB = PLASMA_IB;
155  IBNB = IB*NB;
156  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
157  MT = (M%NB==0) ? (M/NB) : (M/NB+1);
158 
159  plasma_sequence_create(plasma, &sequence);
160 
161  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
162  descT = plasma_desc_init(
164  IB, NB, IBNB,
165  MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
166  }
167  else {
168  /* Double the size of T to accomodate the tree reduction phase */
169  descT = plasma_desc_init(
171  IB, NB, IBNB,
172  MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
173  }
174  descT.mat = T;
175 
176  if ( M >= N ) {
178  plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) );
179  plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
180  } else {
181  plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N );
182  plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, M, NRHS);
183  }
184  } else {
186  plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) );
187  plasma_dooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
188  } else {
189  plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N );
190  plasma_diplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
191  }
192  }
193 
194  /* Call the tile interface */
195  PLASMA_dgels_Tile_Async(PlasmaNoTrans, &descA, &descT, &descB, sequence, &request);
196 
198  plasma_dooptile2lap( descA, A, NB, NB, LDA, N );
199  plasma_dooptile2lap( descB, B, NB, NB, LDB, NRHS );
201  plasma_desc_mat_free(&descA);
202  plasma_desc_mat_free(&descB);
203  } else {
204  plasma_diptile2lap( descA, A, NB, NB, LDA, N );
205  plasma_diptile2lap( descB, B, NB, NB, LDB, NRHS );
207  }
208 
209  status = sequence->status;
210  plasma_sequence_destroy(plasma, sequence);
211  return status;
212 }
213 
214 /***************************************************************************/
269 {
271  PLASMA_sequence *sequence = NULL;
273  int status;
274 
275  plasma = plasma_context_self();
276  if (plasma == NULL) {
277  plasma_fatal_error("PLASMA_dgels_Tile", "PLASMA not initialized");
279  }
280  plasma_sequence_create(plasma, &sequence);
281  PLASMA_dgels_Tile_Async(trans, A, T, B, sequence, &request);
283  status = sequence->status;
284  plasma_sequence_destroy(plasma, sequence);
285  return status;
286 }
287 
288 /***************************************************************************/
318  PLASMA_sequence *sequence, PLASMA_request *request)
319 {
320  PLASMA_desc descA = *A;
321  PLASMA_desc descT = *T;
322  PLASMA_desc descB = *B;
324 
325  plasma = plasma_context_self();
326  if (plasma == NULL) {
327  plasma_fatal_error("PLASMA_dgels_Tile", "PLASMA not initialized");
329  }
330  if (sequence == NULL) {
331  plasma_fatal_error("PLASMA_dgels_Tile", "NULL sequence");
332  return PLASMA_ERR_UNALLOCATED;
333  }
334  if (request == NULL) {
335  plasma_fatal_error("PLASMA_dgels_Tile", "NULL request");
336  return PLASMA_ERR_UNALLOCATED;
337  }
338  /* Check sequence status */
339  if (sequence->status == PLASMA_SUCCESS)
340  request->status = PLASMA_SUCCESS;
341  else
342  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
343 
344  /* Check descriptors for correctness */
345  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
346  plasma_error("PLASMA_dgels_Tile", "invalid first descriptor");
347  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
348  }
349  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
350  plasma_error("PLASMA_dgels_Tile", "invalid second descriptor");
351  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
352  }
353  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
354  plasma_error("PLASMA_dgels_Tile", "invalid third descriptor");
355  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
356  }
357  /* Check input arguments */
358  if (descA.nb != descA.mb || descB.nb != descB.mb) {
359  plasma_error("PLASMA_dgels_Tile", "only square tiles supported");
360  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
361  }
362  if (trans != PlasmaNoTrans) {
363  plasma_error("PLASMA_dgels_Tile", "only PlasmaNoTrans supported");
364  return plasma_request_fail(sequence, request, PLASMA_ERR_NOT_SUPPORTED);
365  }
366  /* Quick return - currently NOT equivalent to LAPACK's:
367  if (min(M, min(N, NRHS)) == 0) {
368  for (i = 0; i < max(M, N); i++)
369  for (j = 0; j < NRHS; j++)
370  B[j*LDB+i] = 0.0;
371  return PLASMA_SUCCESS;
372  }
373 */
374  if (descA.m >= descA.n) {
375  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
377  PLASMA_desc, descA,
378  PLASMA_desc, descT,
379  PLASMA_sequence*, sequence,
380  PLASMA_request*, request);
381 
385  PLASMA_desc, descA,
386  PLASMA_desc, descB,
387  PLASMA_desc, descT,
388  PLASMA_sequence*, sequence,
389  PLASMA_request*, request);
390  }
391  else {
392  plasma_dynamic_call_5(plasma_pdgeqrfrh,
393  PLASMA_desc, descA,
394  PLASMA_desc, descT,
396  PLASMA_sequence*, sequence,
397  PLASMA_request*, request);
398 
399  plasma_dynamic_call_8(plasma_pdormqrrh,
402  PLASMA_desc, descA,
403  PLASMA_desc, descB,
404  PLASMA_desc, descT,
406  PLASMA_sequence*, sequence,
407  PLASMA_request*, request);
408  }
414  double, 1.0,
415  PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.n, descA.n),
416  PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.n, descB.n),
417  PLASMA_sequence*, sequence,
418  PLASMA_request*, request);
419  }
420  else {
422  PLASMA_desc, plasma_desc_submatrix(descB, descA.m, 0, descA.n-descA.m, descB.n),
423  PLASMA_sequence*, sequence,
424  PLASMA_request*, request);
425 
426  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
428  PLASMA_desc, descA,
429  PLASMA_desc, descT,
430  PLASMA_sequence*, sequence,
431  PLASMA_request*, request);
432  }
433  else {
434  plasma_dynamic_call_5(plasma_pdgelqfrh,
435  PLASMA_desc, descA,
436  PLASMA_desc, descT,
438  PLASMA_sequence*, sequence,
439  PLASMA_request*, request);
440  }
446  double, 1.0,
447  PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.m, descA.m),
448  PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.m, descB.n),
449  PLASMA_sequence*, sequence,
450  PLASMA_request*, request);
451 
452  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
456  PLASMA_desc, descA,
457  PLASMA_desc, descB,
458  PLASMA_desc, descT,
459  PLASMA_sequence*, sequence,
460  PLASMA_request*, request);
461  }
462  else {
463  plasma_dynamic_call_8(plasma_pdormlqrh,
466  PLASMA_desc, descA,
467  PLASMA_desc, descB,
468  PLASMA_desc, descT,
470  PLASMA_sequence*, sequence,
471  PLASMA_request*, request);
472  }
473  }
474  return PLASMA_SUCCESS;
475 }