PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zgesvd.c
Go to the documentation of this file.
1 
16 #include <lapacke.h>
17 #include "common.h"
18 
19 /***************************************************************************/
123 int PLASMA_zgesvd(PLASMA_enum jobu, PLASMA_enum jobvt, int M, int N,
124  PLASMA_Complex64_t *A, int LDA,
125  double *S,
126  PLASMA_Complex64_t *U, int LDU,
127  PLASMA_Complex64_t *VT, int LDVT,
128  PLASMA_desc *descT)
129 {
130  int NB, IB, MT, NT;
131  int status;
133  PLASMA_sequence *sequence = NULL;
135  PLASMA_desc descA, descU, descVT;
136 
137  plasma = plasma_context_self();
138  if (plasma == NULL) {
139  plasma_fatal_error("PLASMA_zgesvd", "PLASMA not initialized");
141  }
142 
143  /* Tune NB & IB depending on M & N; Set NBNB */
144  status = plasma_tune(PLASMA_FUNC_ZGESVD, M, N, 0);
145  if (status != PLASMA_SUCCESS) {
146  plasma_error("PLASMA_zgesvd", "plasma_tune() failed");
147  return status;
148  }
149 
150  /* Set MT, NT */
151  NB = PLASMA_NB;
152  IB = PLASMA_IB;
153  MT = (M%NB==0) ? (M/NB) : (M/NB+1);
154  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
155 
156  /* Check input arguments */
157  if (jobu != PlasmaNoVec && jobu !=PlasmaVec) {
158  plasma_error("PLASMA_zgesvd", "illegal value of jobu");
159  return -1;
160  }
161  if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
162  plasma_error("PLASMA_zgesvd", "illegal value of jobvt");
163  return -2;
164  }
165  if (M < 0) {
166  plasma_error("PLASMA_zgesvd", "illegal value of M");
167  return -3;
168  }
169  if (N < 0) {
170  plasma_error("PLASMA_zgesvd", "illegal value of N");
171  return -4;
172  }
173  if (LDA < max(1, M)) {
174  plasma_error("PLASMA_zgesvd", "illegal value of LDA");
175  return -6;
176  }
177  if (LDU < 1) {
178  plasma_error("PLASMA_zgesvd", "illegal value of LDU");
179  return -9;
180  }
181  if (LDVT < 1) {
182  plasma_error("PLASMA_zgesvd", "illegal value of LDVT");
183  return -11;
184  }
185  if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
186  ( descT->m != MT*IB ) || (descT->n != NT*NB) ) {
187  plasma_error("PLASMA_zgesvd", "invalid T descriptor");
188  return -12;
189  }
190  /* Quick return */
191  if (min(M, N) == 0) {
192  return PLASMA_SUCCESS;
193  }
194 
195  if (jobu == PlasmaVec) {
196  plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version");
197  return -1;
198  }
199  if (jobvt == PlasmaVec) {
200  plasma_error("PLASMA_zgesvd", "computing the singular vectors is not supported in this version");
201  return -2;
202  }
203 
204  plasma_sequence_create(plasma, &sequence);
205 
207  plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)) );
208  if (jobu == PlasmaVec){
209  plasma_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)));
210  }
211  if (jobvt == PlasmaVec){
212  plasma_zooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descU)); plasma_desc_mat_free(&(descVT)));
213  }
214  } else {
215  plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N);
216  if (jobu == PlasmaVec){
217  plasma_ziplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M);
218  }
219  if (jobvt == PlasmaVec){
220  plasma_ziplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N);
221  }
222  }
223 
224  /* Call the tile interface */
225  PLASMA_zgesvd_Tile_Async(jobu, jobvt, &descA, S, &descU, &descVT, descT, sequence, &request);
226 
228  plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
229  if (jobu == PlasmaVec){
230  plasma_zooptile2lap( descU, U, NB, NB, LDU, M );
231  }
232  if (jobvt == PlasmaVec){
233  plasma_zooptile2lap( descVT, VT, NB, NB, LDVT, N );
234  }
236  plasma_desc_mat_free(&descA);
237  if (jobu == PlasmaVec){
238  plasma_desc_mat_free(&descU);
239  }
240  if (jobvt == PlasmaVec){
241  plasma_desc_mat_free(&descVT);
242  }
243  } else {
244  plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
245  if (jobu == PlasmaVec){
246  plasma_ziptile2lap( descU, U, NB, NB, LDU, M );
247  }
248  if (jobvt == PlasmaVec){
249  plasma_ziptile2lap( descVT, VT, NB, NB, LDVT, N );
250  }
252  }
253 
254  status = sequence->status;
255  plasma_sequence_destroy(plasma, sequence);
256  return status;
257 }
258 
259 /***************************************************************************/
334  double *S, PLASMA_desc *U, PLASMA_desc *VT, PLASMA_desc *T)
335 {
337  PLASMA_sequence *sequence = NULL;
339  int status;
340 
341  plasma = plasma_context_self();
342  if (plasma == NULL) {
343  plasma_fatal_error("PLASMA_zgesvd_Tile", "PLASMA not initialized");
345  }
346  plasma_sequence_create(plasma, &sequence);
347  PLASMA_zgesvd_Tile_Async(jobu, jobvt, A, S, U, VT, T, sequence, &request);
349  status = sequence->status;
350  plasma_sequence_destroy(plasma, sequence);
351  return status;
352 }
353 
354 /***************************************************************************/
384  double *S, PLASMA_desc *U, PLASMA_desc *VT, PLASMA_desc *T,
385  PLASMA_sequence *sequence, PLASMA_request *request)
386 {
387  PLASMA_desc descA = *A;
388  PLASMA_desc descT = *T;
389  double *E;
390  int minMN = min(descA.m, descA.n);
391  int NCVT = 0;
392  int NRU = 0;
393  int NCC = 0;
394 
396 
397  plasma = plasma_context_self();
398 
399  if (jobu != PlasmaNoVec && jobu !=PlasmaVec) {
400  plasma_error("PLASMA_zgesvd_Tile_Async", "illegal value of jobu");
402  }
403  if (jobvt != PlasmaNoVec && jobvt != PlasmaVec) {
404  plasma_error("PLASMA_zgesvd_Tile_Async", "illegal value of jobvt");
406  }
407  if (plasma == NULL) {
408  plasma_fatal_error("PLASMA_zgesvd_Tile_Async", "PLASMA not initialized");
410  }
411  if (sequence == NULL) {
412  plasma_fatal_error("PLASMA_zgesvd_Tile_Async", "NULL sequence");
413  return PLASMA_ERR_UNALLOCATED;
414  }
415  if (request == NULL) {
416  plasma_fatal_error("PLASMA_zgesvd_Tile_Async", "NULL request");
417  return PLASMA_ERR_UNALLOCATED;
418  }
419  /* Check sequence status */
420  if (sequence->status == PLASMA_SUCCESS)
421  request->status = PLASMA_SUCCESS;
422  else
423  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
424 
425  /* Check descriptors for correctness */
426  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
427  plasma_error("PLASMA_zgesvd_Tile_Async", "invalid first descriptor");
428  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
429  }
430  if ((jobu != PlasmaNoVec) && (plasma_desc_check(U) != PLASMA_SUCCESS)) {
431  plasma_error("PLASMA_zgesvd_Tile_Async", "invalid second descriptor");
432  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
433  }
434  if ((jobvt != PlasmaNoVec) && (plasma_desc_check(VT) != PLASMA_SUCCESS) ) {
435  plasma_error("PLASMA_zgesvd_Tile_Async", "invalid third descriptor");
436  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
437  }
438  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
439  plasma_error("PLASMA_zgesvd_Tile_Async", "invalid fourth descriptor");
440  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
441  }
442  /* Check input arguments */
443  if (descA.nb != descA.mb) {
444  plasma_error("PLASMA_zgesvd_Tile_Async", "only square tiles supported");
445  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
446  }
447  if (( (jobu != PlasmaNoVec) && (U->nb != U->mb) ) || ( (jobvt != PlasmaNoVec) && (VT->nb != VT->mb) )) {
448  plasma_error("PLASMA_zgesvd_Tile_Async", "only square tiles supported");
449  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
450  }
451  if ((jobu == PlasmaVec) || (jobvt == PlasmaVec) ){
452  plasma_error("PLASMA_zgesvd_Tile_Async", "computing the singular vectors is not supported in this version");
453  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
454  }
455 
456  E = (double *)plasma_shared_alloc(plasma, minMN-1, PlasmaRealDouble);
457 
458  /*
459  * Reduction to bidiagonal form with a two-stage approach.
460  */
461 
462  /*
463  * 1: Reduction to BAND bidiagonal form
464  * May be further optimized using the algo described in Trefethen
465  */
466  /* if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { */
468  PLASMA_desc, descA,
469  PLASMA_desc, descT,
470  PLASMA_sequence*, sequence,
471  PLASMA_request*, request);
472  /* } */
473  /* else { */
474  /* plasma_dynamic_call_4(plasma_pzgerbbrh, */
475  /* PLASMA_desc, descA, */
476  /* PLASMA_desc, descT, */
477  /* PLASMA_sequence*, sequence, */
478  /* PLASMA_request*, request); */
479  /* } */
480 
481  /* Build the U of the first stage */
482  /* if (jobu == PlasmaVec){ */
483  /* /\* Initialize U to Identity *\/ */
484  /* plasma_dynamic_call_6(plasma_pzlaset, */
485  /* PLASMA_enum, PlasmaUpperLower, */
486  /* PLASMA_Complex64_t, 0.0, */
487  /* PLASMA_Complex64_t, 1.0, */
488  /* PLASMA_desc, descU, */
489  /* PLASMA_sequence*, sequence, */
490  /* PLASMA_request*, request); */
491  /* /\* Accumulate the transformations from the first stage *\/ */
492  /* if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { */
493  /* plasma_dynamic_call_6(plasma_pzungbr, */
494  /* PLASMA_enum, PlasmaLeft, */
495  /* PLASMA_desc, descA, */
496  /* PLASMA_desc, descU, */
497  /* PLASMA_desc, descT, */
498  /* PLASMA_sequence*, sequence, */
499  /* PLASMA_request*, request); */
500  /* } */
501  /* else { */
502  /* plasma_dynamic_call_6(plasma_pzungbrrh, */
503  /* PLASMA_enum, PlasmaLeft, */
504  /* PLASMA_desc, descA, */
505  /* PLASMA_desc, descU, */
506  /* PLASMA_desc, descT, */
507  /* PLASMA_sequence*, sequence, */
508  /* PLASMA_request*, request); */
509  /* } */
510  /* } */
511 
512  /* Build the VT of the first stage */
513  /* if (jobvt == PlasmaVec){ */
514  /* /\* Initialize VT to Identity *\/ */
515  /* plasma_dynamic_call_6(plasma_pzlaset, */
516  /* PLASMA_enum, PlasmaUpperLower, */
517  /* PLASMA_Complex64_t, 0.0, */
518  /* PLASMA_Complex64_t, 1.0, */
519  /* PLASMA_desc, descVT, */
520  /* PLASMA_sequence*, sequence, */
521  /* PLASMA_request*, request); */
522 
523  /* /\* Accumulate the transformations from the first stage *\/ */
524  /* if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) { */
525  /* plasma_dynamic_call_6(plasma_pzungbr, */
526  /* PLASMA_enum, PlasmaRight, */
527  /* PLASMA_desc, descA, */
528  /* PLASMA_desc, descVT, */
529  /* PLASMA_desc, descT, */
530  /* PLASMA_sequence*, sequence, */
531  /* PLASMA_request*, request); */
532  /* } */
533  /* else { */
534  /* plasma_dynamic_call_6(plasma_pzungbrrh, */
535  /* PLASMA_enum, PlasmaRight, */
536  /* PLASMA_desc, descA, */
537  /* PLASMA_desc, descVT, */
538  /* PLASMA_desc, descT, */
539  /* PLASMA_sequence*, sequence, */
540  /* PLASMA_request*, request); */
541  /* } */
542  /* } */
543 
544  /*
545  * Set the V's to zero before the 2nd stage i.e., bulge chasing
546  */
547  plasma_dynamic_call_5(plasma_pzlaset2,
549  PLASMA_Complex64_t, 0.0,
550  PLASMA_desc, descA.m >= descA.n ? descA : plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n),
551  PLASMA_sequence*, sequence,
552  PLASMA_request*, request);
553 
554  plasma_dynamic_call_5(plasma_pzlaset2,
556  PLASMA_Complex64_t, 0.0,
557  PLASMA_desc, descA.m >= descA.n ? plasma_desc_submatrix(descA, 0, descA.nb, descA.m, descA.n-descA.nb) : descA,
558  PLASMA_sequence*, sequence,
559  PLASMA_request*, request);
560 
561  /*
562  * 2: Reduction from BAND bidiagonal to the final condensed form
563  */
564  plasma_dynamic_call_7(plasma_pzgbrdb,
565  PLASMA_enum, descA.m >= descA.n ? PlasmaUpper : PlasmaLower,
566  PLASMA_desc, descA,
567  double*, S,
568  double*, E,
569  PLASMA_desc, descT,
570  PLASMA_sequence*, sequence,
571  PLASMA_request*, request);
572 
573  /*
574  * Compute the singular values ONLY for now
575  */
577  if (descA.m >= descA.n)
578  LAPACKE_zbdsqr(
579  LAPACK_COL_MAJOR, lapack_const(PlasmaUpper),
580  minMN, NCVT, NRU, NCC,
581  S, E,
582  NULL, 1, NULL, 1, NULL, 1 );
583  else {
584  LAPACKE_zbdsqr(
585  LAPACK_COL_MAJOR, lapack_const(PlasmaLower),
586  minMN, NCVT, NRU, NCC,
587  S, E,
588  NULL, 1, NULL, 1, NULL, 1 );
589  }
590 
591  plasma_shared_free(plasma, E);
592 
593  return PLASMA_SUCCESS;
594 }