PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zheevd.c
Go to the documentation of this file.
1 
16 #include <lapacke.h>
17 #include "common.h"
18 
19 /***************************************************************************/
98  PLASMA_Complex64_t *A, int LDA,
99  double *W,
100  PLASMA_desc *T,
101  PLASMA_Complex64_t *Q, int LDQ)
102 {
103  int NB, IB, IBNB, NT;
104  int status;
106  PLASMA_sequence *sequence = NULL;
108  PLASMA_desc descA, descQ;
109 
110  plasma = plasma_context_self();
111  if (plasma == NULL) {
112  plasma_error("PLASMA_zheevd", "PLASMA not initialized");
114  }
115 
116  /* Set NT */
117  NB = PLASMA_NB;
118  IB = PLASMA_IB;
119  IBNB = IB*NB;
120  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
121 
122  /* Check input arguments */
123  if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
124  plasma_error("PLASMA_zheevd", "illegal value of jobz");
125  return -1;
126  }
127  if (uplo != PlasmaLower && uplo != PlasmaUpper) {
128  plasma_error("PLASMA_zheevd", "illegal value of uplo");
129  return -2;
130  }
131  if (N < 0) {
132  plasma_error("PLASMA_zheevd", "illegal value of N");
133  return -3;
134  }
135  if (LDA < max(1, N)) {
136  plasma_error("PLASMA_zheevd", "illegal value of LDA");
137  return -5;
138  }
139  if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
140  ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
141  plasma_error("PLASMA_zhegv", "invalid T descriptor");
142  return -7;
143  }
144  if (LDQ < max(1, N)) {
145  plasma_error("PLASMA_zheevd", "illegal value of LDQ");
146  return -9;
147  }
148  /* Quick return */
149  if (N == 0)
150  return PLASMA_SUCCESS;
151 
152  if (jobz == PlasmaVec) {
153  plasma_error("PLASMA_zheevd", "computing the eigenvectors is not supported in this version");
154  return -1;
155  }
156 
157  /* Tune NB & IB depending on N; Set NBNB */
158  status = plasma_tune(PLASMA_FUNC_ZHEEV, N, N, 0);
159  if (status != PLASMA_SUCCESS) {
160  plasma_error("PLASMA_zheevd", "plasma_tune() failed");
161  return status;
162  }
163 
164  plasma_sequence_create(plasma, &sequence);
165 
167  plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
168  if (jobz == PlasmaVec) {
169  /* No need for conversion, it's just output */
170  plasma_zdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N,
171  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)) );
172  }
173  } else {
174  plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
175  if (jobz == PlasmaVec) {
176  /* No need for conversion, it's just output */
177  descQ = plasma_desc_init(
178  PlasmaComplexDouble, NB, NB, NB*NB,
179  LDQ, N, 0, 0, N, N);
180  descQ.mat = Q;
181  }
182  }
183 
184  /* Call the tile interface */
185  PLASMA_zheevd_Tile_Async(jobz, uplo, &descA, &descT, W, &descQ, sequence, &request);
186 
188  plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
189  if (jobz == PlasmaVec) {
190  plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
191  }
193  plasma_desc_mat_free(&descA);
194  if (jobz == PlasmaVec)
195  plasma_desc_mat_free(&descQ);
196  } else {
197  plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
198  if (jobz == PlasmaVec)
199  plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
201  }
202 
203  status = sequence->status;
204  plasma_sequence_destroy(plasma, sequence);
205  return status;
206 }
207 /***************************************************************************/
278  PLASMA_desc *A, double *W, PLASMA_desc *T, PLASMA_desc *Q)
279 {
281  PLASMA_sequence *sequence = NULL;
283  int status;
284 
285  plasma = plasma_context_self();
286  if (plasma == NULL) {
287  plasma_fatal_error("PLASMA_zheevd_Tile", "PLASMA not initialized");
289  }
290  plasma_sequence_create(plasma, &sequence);
291  PLASMA_zheevd_Tile_Async(jobz, uplo, A, W, T, Q, sequence, &request);
293  status = sequence->status;
294  plasma_sequence_destroy(plasma, sequence);
295  return status;
296 }
297 
298 /***************************************************************************/
329  PLASMA_desc *A,
330  double *W,
331  PLASMA_desc *T,
332  PLASMA_desc *Q,
333  PLASMA_sequence *sequence, PLASMA_request *request)
334 {
335  int NB, IB, IBNB, NT, INFO;
336  PLASMA_desc descA = *A;
337  PLASMA_desc descT = *T;
338  double *E;
340 
341  plasma = plasma_context_self();
342  if (plasma == NULL) {
343  plasma_fatal_error("PLASMA_zheevd_Tile_Async", "PLASMA not initialized");
345  }
346  if (sequence == NULL) {
347  plasma_fatal_error("PLASMA_zheevd_Tile_Async", "NULL sequence");
348  return PLASMA_ERR_UNALLOCATED;
349  }
350  if (request == NULL) {
351  plasma_fatal_error("PLASMA_zheevd_Tile_Async", "NULL request");
352  return PLASMA_ERR_UNALLOCATED;
353  }
354  /* Check sequence status */
355  if (sequence->status == PLASMA_SUCCESS)
356  request->status = PLASMA_SUCCESS;
357  else
358  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
359 
360  /* Set NT & NTRHS */
361  NB = PLASMA_NB;
362  IB = PLASMA_IB;
363  IBNB = IB*NB;
364  NT = (descA.ln%NB==0) ? (descA.ln/NB) : (descA.ln/NB+1);
365 
366  /* Check descriptors for correctness */
367  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
368  plasma_error("PLASMA_zheevd_Tile_Async", "invalid descriptor");
369  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
370  }
371  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
372  plasma_error("PLASMA_zheevd_Tile_Async", "invalid descriptor");
373  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
374  }
375  if (jobz == PlasmaVec){
376  if (plasma_desc_check(Q) != PLASMA_SUCCESS) {
377  plasma_error("PLASMA_zheevd_Tile_Async", "invalid descriptor");
378  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
379  }
380  }
381  /* Check input arguments */
382  if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
383  plasma_error("PLASMA_zheevd_Tile_Async", "illegal value of jobz");
384  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
385  }
386  if (descA.m != descA.n) {
387  plasma_error("PLASMA_zheevd_Tile_Async", "matrix need to be square");
388  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
389  }
390  if (descA.nb != descA.mb) {
391  plasma_error("PLASMA_zheevd_Tile_Async", "only square tiles supported");
392  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
393  }
394  if (jobz == PlasmaVec) {
395  plasma_error("PLASMA_zheevd_Tile_Async", "computing the eigenvectors is not supported in this version");
396  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
397  }
398  if (jobz == PlasmaVec){
399  if (Q->nb != Q->mb) {
400  plasma_error("PLASMA_zheevd_Tile_Async", "only square tiles supported");
401  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
402  }
403  }
404 
405  E = (double *)plasma_shared_alloc(plasma, descA.n-1, PlasmaRealDouble);
406 
407  /* Currently NOT equivalent to LAPACK's
408  */
409 
410  /* Reduction to tridiagonal form
411  * with a two-stage approach.
412  */
413 
414 
415  /* Reduction to BAND tridiagonal form
416  */
417  plasma_dynamic_call_5(plasma_pzherbt,
418  PLASMA_enum, uplo,
419  PLASMA_desc, descA,
420  PLASMA_desc, descT,
421  PLASMA_sequence*, sequence,
422  PLASMA_request*, request);
423 
424  /*
425  * Build the Q of the first stage
426  */
427  /* if (jobz == PlasmaVec){ */
428  /* /\* Initialize Q to Identity *\/ */
429  /* plasma_dynamic_call_6(plasma_pzlaset, */
430  /* PLASMA_enum, PlasmaUpperLower, */
431  /* PLASMA_Complex64_t, 0.0, */
432  /* PLASMA_Complex64_t, 1.0, */
433  /* PLASMA_desc, descQ, */
434  /* PLASMA_sequence*, sequence, */
435  /* PLASMA_request*, request); */
436 
437  /* /\* Accumulate the transformations from the first stage *\/ */
438  /* plasma_dynamic_call_6(plasma_pzungtr, */
439  /* PLASMA_enum, uplo, */
440  /* PLASMA_desc, descA, */
441  /* PLASMA_desc, descQ, */
442  /* PLASMA_desc, descT, */
443  /* PLASMA_sequence*, sequence, */
444  /* PLASMA_request*, request); */
445  /* } */
446 
447  /* Set the V's to zero before the 2nd stage (bulge chasing) */
449  plasma_pzlaset2,
450  PLASMA_enum, uplo,
451  PLASMA_Complex64_t, 0.0,
452  PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) :
453  plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
454  PLASMA_sequence*, sequence,
455  PLASMA_request*, request);
456  /* Reduction from BAND tridiagonal to the final condensed form
457  */
458  plasma_dynamic_call_7(plasma_pzhbrdt,
459  PLASMA_enum, uplo,
460  PLASMA_desc, descA,
461  double*, W,
462  double*, E,
463  PLASMA_desc, descT,
464  PLASMA_sequence*, sequence,
465  PLASMA_request*, request);
466  /*
467  * For eigenvalues only, call DSTERF.
468  * For eigenvectors, first call ZUNGTR to generate the unitary matrix,
469  * then call ZSTEQR.
470  */
471  if (jobz == PlasmaNoVec){
472  INFO = LAPACKE_zstedc( LAPACK_COL_MAJOR, lapack_const(PlasmaNoVec),
473  descA.n, W, E, NULL, 1);
474  }else {
475  INFO = LAPACKE_zstedc( LAPACK_COL_MAJOR, lapack_const(PlasmaNoVec),
476  descA.n, W, E, NULL, 1);
477  /* Accumulate the transformations from the second stage */
478  /*
479  plasma_dynamic_call_6(plasma_pzungtr,
480  PLASMA_enum, uplo,
481  PLASMA_desc, descA,
482  PLASMA_desc, descQ,
483  PLASMA_desc, descT,
484  PLASMA_sequence*, sequence,
485  PLASMA_request*, request);
486  LAPACKE_zsteqr(jobz, descA.n, W, E, Q->mat, Q->lm);
487  */
488  }
489  /* If matrix was scaled, then rescale eigenvalues appropriately.
490  */
491 
492  plasma_shared_free(plasma, E);
493 
494  return PLASMA_SUCCESS;
495 }