PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zgebrd.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 /***************************************************************************/
122 int PLASMA_zgebrd(PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N,
123  PLASMA_Complex64_t *A, int LDA,
124  double *D,
125  double *E,
126  PLASMA_Complex64_t *U, int LDU,
127  PLASMA_Complex64_t *VT, int LDVT,
128  PLASMA_desc *descT)
129 {
130  int NB, IB, MT, NT;
131  int status;
133  PLASMA_sequence *sequence = NULL;
135  PLASMA_desc descA, descU, descVT;
136 
137  plasma = plasma_context_self();
138  if (plasma == NULL) {
139  plasma_fatal_error("PLASMA_zgebrd", "PLASMA not initialized");
141  }
142 
143  /* Tune NB & IB depending on M & N; Set NBNB */
144  status = plasma_tune(PLASMA_FUNC_ZGEBRD, M, N, 0);
145  if (status != PLASMA_SUCCESS) {
146  plasma_error("PLASMA_zgebrd", "plasma_tune() failed");
147  return status;
148  }
149 
150  /* Set MT, NT */
151  NB = PLASMA_NB;
152  IB = PLASMA_IB;
153  MT = (M%NB==0) ? (M/NB) : (M/NB+1);
154  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
155 
156  /* Check input arguments */
157  if (jobu != PlasmaNoVec && jobu !=PlasmaVec) {
158  plasma_error("PLASMA_zgebrd", "illegal value of jobu");
159  return -1;
160  }
161  if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
162  plasma_error("PLASMA_zgebrd", "illegal value of jobvt");
163  return -2;
164  }
165  if (M < 0) {
166  plasma_error("PLASMA_zgebrd", "illegal value of M");
167  return -3;
168  }
169  if (N < 0) {
170  plasma_error("PLASMA_zgebrd", "illegal value of N");
171  return -4;
172  }
173  if (LDA < max(1, M)) {
174  plasma_error("PLASMA_zgebrd", "illegal value of LDA");
175  return -6;
176  }
177  if (LDU < 1) {
178  plasma_error("PLASMA_zgebrd", "illegal value of LDU");
179  return -9;
180  }
181  if (LDVT < 1) {
182  plasma_error("PLASMA_zgebrd", "illegal value of LDVT");
183  return -11;
184  }
185  if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
186  ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
187  plasma_error("PLASMA_zgebrd", "invalid T descriptor");
188  return -12;
189  }
190  /* Quick return */
191  if (min(M, N) == 0) {
192  return PLASMA_SUCCESS;
193  }
194 
195  if (jobu == PlasmaVec) {
196  plasma_error("PLASMA_zgebrd", "computing the singular vectors is not supported in this version");
197  return -1;
198  }
199  if (jobvt == PlasmaVec) {
200  plasma_error("PLASMA_zgebrd", "computing the singular vectors is not supported in this version");
201  return -2;
202  }
203 
204  plasma_sequence_create(plasma, &sequence);
205 
207  plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
208  if (jobu == PlasmaVec){
209  plasma_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
210  }
211  if (jobvt == PlasmaVec){
212  plasma_zooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
213  }
214  } else {
215  plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
216  if (jobu == PlasmaVec){
217  plasma_ziplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M);
218  }
219  if (jobvt == PlasmaVec){
220  plasma_ziplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
221  }
222  }
223 
224  /* Call the tile interface */
225  PLASMA_zgebrd_Tile_Async(jobu, jobvt, &descA, D, E, &descU, &descVT, descT, sequence, &request);
226 
228  plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
229  if (jobu == PlasmaVec){
230  plasma_zooptile2lap( descU, U, NB, NB, LDU, M );
231  }
232  if (jobvt == PlasmaVec){
233  plasma_zooptile2lap( descVT, VT, NB, NB, LDVT, N );
234  }
236  plasma_desc_mat_free(&descA);
237  if (jobu == PlasmaVec){
238  plasma_desc_mat_free(&descU);
239  }
240  if (jobvt == PlasmaVec){
241  plasma_desc_mat_free(&descVT);
242  }
243  } else {
244  plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
245  if (jobu == PlasmaVec){
246  plasma_ziptile2lap( descU, U, NB, NB, LDU, M );
247  }
248  if (jobvt == PlasmaVec){
249  plasma_ziptile2lap( descVT, VT, NB, NB, LDVT, N );
250  }
252  }
253 
254  status = sequence->status;
255  plasma_sequence_destroy(plasma, sequence);
256  return status;
257 }
258 
259 /***************************************************************************/
334  double *D, double *E, PLASMA_desc *U, PLASMA_desc *VT, PLASMA_desc *T)
335 {
337  PLASMA_sequence *sequence = NULL;
339  int status;
340 
341  plasma = plasma_context_self();
342  if (plasma == NULL) {
343  plasma_fatal_error("PLASMA_zgebrd_Tile", "PLASMA not initialized");
345  }
346  plasma_sequence_create(plasma, &sequence);
347  PLASMA_zgebrd_Tile_Async(jobu, jobvt, A, D, E, U, VT, T, sequence, &request);
349  status = sequence->status;
350  plasma_sequence_destroy(plasma, sequence);
351  return status;
352 }
353 
354 /***************************************************************************/
384  double *D, double *E, PLASMA_desc *U, PLASMA_desc *VT, PLASMA_desc *T,
385  PLASMA_sequence *sequence, PLASMA_request *request)
386 {
387  PLASMA_desc descA = *A;
388  PLASMA_desc descT = *T;
389 
391  plasma = plasma_context_self();
392 
393  if (jobu != PlasmaNoVec && jobu !=PlasmaVec) {
394  plasma_error("PLASMA_zgebrd_Tile_Async", "illegal value of jobu");
396  }
397  if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
398  plasma_error("PLASMA_zgebrd_Tile_Async", "illegal value of jobvt");
400  }
401  if (plasma == NULL) {
402  plasma_fatal_error("PLASMA_zgebrd_Tile_Async", "PLASMA not initialized");
404  }
405  if (sequence == NULL) {
406  plasma_fatal_error("PLASMA_zgebrd_Tile_Async", "NULL sequence");
407  return PLASMA_ERR_UNALLOCATED;
408  }
409  if (request == NULL) {
410  plasma_fatal_error("PLASMA_zgebrd_Tile_Async", "NULL request");
411  return PLASMA_ERR_UNALLOCATED;
412  }
413  /* Check sequence status */
414  if (sequence->status == PLASMA_SUCCESS)
415  request->status = PLASMA_SUCCESS;
416  else
417  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
418 
419  /* Check descriptors for correctness */
420  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
421  plasma_error("PLASMA_zgebrd_Tile_Async", "invalid first descriptor");
422  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
423  }
424  if ((jobu != PlasmaNoVec) && (plasma_desc_check(U) != PLASMA_SUCCESS)) {
425  plasma_error("PLASMA_zgebrd_Tile_Async", "invalid third descriptor");
426  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
427  }
428  if ((jobvt != PlasmaNoVec) && (plasma_desc_check(VT) != PLASMA_SUCCESS) ) {
429  plasma_error("PLASMA_zgebrd_Tile_Async", "invalid fourth descriptor");
430  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
431  }
432  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
433  plasma_error("PLASMA_zgebrd_Tile_Async", "invalid fifth descriptor");
434  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
435  }
436  /* Check input arguments */
437  if (descA.nb != descA.mb) {
438  plasma_error("PLASMA_zgebrd_Tile_Async", "only square tiles supported");
439  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
440  }
441  if (( (jobu != PlasmaNoVec) && (U->nb != U->mb) ) || ( (jobvt != PlasmaNoVec) && (VT->nb != VT->mb) )) {
442  plasma_error("PLASMA_zgebrd_Tile_Async", "only square tiles supported");
443  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
444  }
445  if ((jobu == PlasmaVec) || (jobvt == PlasmaVec) ){
446  plasma_error("PLASMA_zgebrd_Tile_Async", "computing the singular vectors is not supported in this version");
447  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
448  }
449 
450  /* Reduction to bidiagonal form
451  * with a two-stage approach.
452  */
453 
454  /* Reduction to BAND bidiagonal form
455  * May be further optimized using the algo described in Trefethen
456  */
457  /* if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { */
459  PLASMA_desc, descA,
460  PLASMA_desc, descT,
461  PLASMA_sequence*, sequence,
462  PLASMA_request*, request);
463  /* } */
464  /* else { */
465  /* plasma_dynamic_call_4(plasma_pzgerbbrh, */
466  /* PLASMA_desc, descA, */
467  /* PLASMA_desc, descT, */
468  /* PLASMA_sequence*, sequence, */
469  /* PLASMA_request*, request); */
470  /* } */
471 
472  /* Build the U of the first stage */
473  /* if (jobu == PlasmaVec){ */
474  /* /\* Initialize U to Identity *\/ */
475  /* plasma_dynamic_call_6(plasma_pzlaset, */
476  /* PLASMA_enum, PlasmaUpperLower, */
477  /* PLASMA_Complex64_t, 0.0, */
478  /* PLASMA_Complex64_t, 1.0, */
479  /* PLASMA_desc, descU, */
480  /* PLASMA_sequence*, sequence, */
481  /* PLASMA_request*, request); */
482  /* /\* Accumulate the transformations from the first stage *\/ */
483  /* plasma_dynamic_call_6(plasma_pzungbr, */
484  /* PLASMA_enum, PlasmaLeft, */
485  /* PLASMA_desc, descA, */
486  /* PLASMA_desc, descU, */
487  /* PLASMA_desc, descT, */
488  /* PLASMA_sequence*, sequence, */
489  /* PLASMA_request*, request); */
490  /* } */
491 
492  /* Build the VT of the first stage */
493  /* if (jobvt == PlasmaVec){ */
494  /* /\* Initialize VT to Identity *\/ */
495  /* plasma_dynamic_call_6(plasma_pzlaset, */
496  /* PLASMA_enum, PlasmaUpperLower, */
497  /* PLASMA_Complex64_t, 0.0, */
498  /* PLASMA_Complex64_t, 1.0, */
499  /* PLASMA_desc, descVT, */
500  /* PLASMA_sequence*, sequence, */
501  /* PLASMA_request*, request); */
502  /* /\* Accumulate the transformations from the first stage *\/ */
503  /* plasma_dynamic_call_6(plasma_pzungbr, */
504  /* PLASMA_enum, PlasmaRight, */
505  /* PLASMA_desc, descA, */
506  /* PLASMA_desc, descVT, */
507  /* PLASMA_desc, descT, */
508  /* PLASMA_sequence*, sequence, */
509  /* PLASMA_request*, request); */
510  /* } */
511 
512  /* Set the V's to zero before the 2nd stage i.e., bulge chasing */
513  plasma_dynamic_call_5(plasma_pzlaset2,
515  PLASMA_Complex64_t, 0.0,
516  PLASMA_desc, descA.m >= descA.n ? descA : plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n),
517  PLASMA_sequence*, sequence,
518  PLASMA_request*, request);
519 
520  plasma_dynamic_call_5(plasma_pzlaset2,
522  PLASMA_Complex64_t, 0.0,
523  PLASMA_desc, descA.m >= descA.n ? plasma_desc_submatrix(descA, 0, descA.nb, descA.m, descA.n-descA.nb) : descA,
524  PLASMA_sequence*, sequence,
525  PLASMA_request*, request);
526 
527  /* Reduction from BAND bidiagonal to the final condensed form */
528  plasma_dynamic_call_7(plasma_pzgbrdb,
529  PLASMA_enum, descA.m >= descA.n ? PlasmaUpper : PlasmaLower,
530  PLASMA_desc, descA,
531  double*, D,
532  double*, E,
533  PLASMA_desc, descT,
534  PLASMA_sequence*, sequence,
535  PLASMA_request*, request);
536 
537  /*
538  */
539  return PLASMA_SUCCESS;
540 }