PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
cheev.c
Go to the documentation of this file.
1 
16 #include <lapacke.h>
17 #include "common.h"
18 
19 /***************************************************************************/
97  PLASMA_Complex32_t *A, int LDA,
98  float *W,
99  PLASMA_desc *descT,
100  PLASMA_Complex32_t *Q, int LDQ)
101 {
102  int NB, IB, NT;
103  int status;
105  PLASMA_sequence *sequence = NULL;
107  PLASMA_desc descA, descQ;
108 
109  plasma = plasma_context_self();
110  if (plasma == NULL) {
111  plasma_error("PLASMA_cheev", "PLASMA not initialized");
113  }
114 
115  /* Tune NB & IB depending on N; Set NBNB */
116  status = plasma_tune(PLASMA_FUNC_CHEEV, N, N, 0);
117  if (status != PLASMA_SUCCESS) {
118  plasma_error("PLASMA_cheev", "plasma_tune() failed");
119  return status;
120  }
121 
122  /* Set NT */
123  NB = PLASMA_NB;
124  IB = PLASMA_IB;
125  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
126 
127  /* Check input arguments */
128  if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
129  plasma_error("PLASMA_cheev", "illegal value of jobz");
130  return -1;
131  }
132  if (uplo != PlasmaLower && uplo != PlasmaUpper) {
133  plasma_error("PLASMA_cheev", "illegal value of uplo");
134  return -2;
135  }
136  if (N < 0) {
137  plasma_error("PLASMA_cheev", "illegal value of N");
138  return -3;
139  }
140  if (LDA < max(1, N)) {
141  plasma_error("PLASMA_cheev", "illegal value of LDA");
142  return -5;
143  }
144  if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
145  ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
146  plasma_error("PLASMA_cheev", "invalid T descriptor");
147  return -7;
148  }
149  if (LDQ < max(1, N)) {
150  plasma_error("PLASMA_cheev", "illegal value of LDQ");
151  return -9;
152  }
153  /* Quick return */
154  if (N == 0)
155  return PLASMA_SUCCESS;
156 
157  if (jobz == PlasmaVec) {
158  plasma_error("PLASMA_cheev", "computing the eigenvectors is not supported in this version");
159  return -1;
160  }
161 
162  plasma_sequence_create(plasma, &sequence);
163 
165  plasma_cooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
166  if (jobz == PlasmaVec) {
167  /* No need for conversion, it's just output */
168  plasma_cdesc_alloc( descQ, NB, NB, LDQ, N, 0, 0, N, N,
169  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)) );
170  }
171  } else {
172  plasma_ciplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
173  if (jobz == PlasmaVec) {
174  /* No need for conversion, it's just output */
175  descQ = plasma_desc_init(
176  PlasmaComplexFloat, NB, NB, NB*NB,
177  LDQ, N, 0, 0, N, N);
178  descQ.mat = Q;
179  }
180  }
181 
182  /* Call the tile interface */
183  PLASMA_cheev_Tile_Async(jobz, uplo, &descA, W, descT, &descQ, sequence, &request);
184 
186  plasma_cooptile2lap( descA, A, NB, NB, LDA, N );
187  if (jobz == PlasmaVec) {
188  plasma_cooptile2lap( descQ, Q, NB, NB, LDQ, N );
189  }
191  plasma_desc_mat_free(&descA);
192  if (jobz == PlasmaVec)
193  plasma_desc_mat_free(&descQ);
194  } else {
195  plasma_ciptile2lap( descA, A, NB, NB, LDA, N );
196  if (jobz == PlasmaVec)
197  plasma_ciptile2lap( descQ, Q, NB, NB, LDQ, N );
199  }
200 
201  status = sequence->status;
202  plasma_sequence_destroy(plasma, sequence);
203  return status;
204 }
205 /***************************************************************************/
275  PLASMA_desc *A, float *W,
277 {
279  PLASMA_sequence *sequence = NULL;
281  int status;
282 
283  plasma = plasma_context_self();
284  if (plasma == NULL) {
285  plasma_fatal_error("PLASMA_cheev_Tile", "PLASMA not initialized");
287  }
288  plasma_sequence_create(plasma, &sequence);
289  PLASMA_cheev_Tile_Async(jobz, uplo, A, W, T, Q, sequence, &request);
291  status = sequence->status;
292  plasma_sequence_destroy(plasma, sequence);
293  return status;
294 }
295 
296 /***************************************************************************/
327  PLASMA_desc *A,
328  float *W,
329  PLASMA_desc *T,
330  PLASMA_desc *Q,
331  PLASMA_sequence *sequence, PLASMA_request *request)
332 {
333  PLASMA_desc descA = *A;
334  PLASMA_desc descT = *T;
335  float *E;
337 
338  plasma = plasma_context_self();
339  if (plasma == NULL) {
340  plasma_fatal_error("PLASMA_cheev_Tile_Async", "PLASMA not initialized");
342  }
343  if (sequence == NULL) {
344  plasma_fatal_error("PLASMA_cheev_Tile_Async", "NULL sequence");
345  return PLASMA_ERR_UNALLOCATED;
346  }
347  if (request == NULL) {
348  plasma_fatal_error("PLASMA_cheev_Tile_Async", "NULL request");
349  return PLASMA_ERR_UNALLOCATED;
350  }
351  /* Check sequence status */
352  if (sequence->status == PLASMA_SUCCESS)
353  request->status = PLASMA_SUCCESS;
354  else
355  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
356 
357  /* Check descriptors for correctness */
358  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
359  plasma_error("PLASMA_cheev_Tile_Async", "invalid descriptor");
360  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
361  }
362  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
363  plasma_error("PLASMA_cheev_Tile_Async", "invalid descriptor");
364  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
365  }
366  if (jobz == PlasmaVec){
367  if (plasma_desc_check(Q) != PLASMA_SUCCESS) {
368  plasma_error("PLASMA_cheev_Tile_Async", "invalid descriptor");
369  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
370  }
371  }
372  /* Check input arguments */
373  if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
374  plasma_error("PLASMA_cheev_Tile_Async", "illegal value of jobz");
375  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
376  }
377  if (uplo != PlasmaLower && uplo != PlasmaUpper) {
378  plasma_error("PLASMA_cheev_Tile_Async", "illegal value of uplo");
379  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
380  }
381  if (descA.m != descA.n) {
382  plasma_error("PLASMA_cheev_Tile_Async", "matrix need to be square");
383  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
384  }
385  if (descA.nb != descA.mb) {
386  plasma_error("PLASMA_cheev_Tile_Async", "only square tiles supported");
387  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
388  }
389  if (jobz == PlasmaVec) {
390  plasma_error("PLASMA_cheev_Tile_Async", "computing the eigenvectors is not supported in this version");
391  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
392  }
393  if (jobz == PlasmaVec){
394  if (Q->nb != Q->mb) {
395  plasma_error("PLASMA_cheev_Tile_Async", "only square tiles supported");
396  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
397  }
398  }
399 
400  E = (float *)plasma_shared_alloc(plasma, descA.n-1, PlasmaRealDouble);
401 
402  /* Currently NOT equivalent to LAPACK's
403  */
404 
405  /* Reduction to tridiagonal form
406  * with a two-stage approach.
407  */
408 
409 
410  /* Reduction to BAND tridiagonal form
411  */
412  plasma_dynamic_call_5(plasma_pcherbt,
413  PLASMA_enum, uplo,
414  PLASMA_desc, descA,
415  PLASMA_desc, descT,
416  PLASMA_sequence*, sequence,
417  PLASMA_request*, request);
418 
419  /*
420  * Build the Q of the first stage
421  */
422  /* if (jobz == PlasmaVec){ */
423  /* /\* Initialize Q to Identity *\/ */
424  /* plasma_dynamic_call_6(plasma_pclaset, */
425  /* PLASMA_enum, PlasmaUpperLower, */
426  /* PLASMA_Complex32_t, 0.0, */
427  /* PLASMA_Complex32_t, 1.0, */
428  /* PLASMA_desc, descQ, */
429  /* PLASMA_sequence*, sequence, */
430  /* PLASMA_request*, request); */
431 
432  /* /\* Accumulate the transformations from the first stage *\/ */
433  /* plasma_dynamic_call_6(plasma_pcungtr, */
434  /* PLASMA_enum, uplo, */
435  /* PLASMA_desc, descA, */
436  /* PLASMA_desc, descQ, */
437  /* PLASMA_desc, descT, */
438  /* PLASMA_sequence*, sequence, */
439  /* PLASMA_request*, request); */
440  /* } */
441 
442  /* Set the V's to zero before the 2nd stage (bulge chasing) */
444  plasma_pclaset2,
445  PLASMA_enum, uplo,
446  PLASMA_Complex32_t, 0.0,
447  PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb) :
448  plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
449  PLASMA_sequence*, sequence,
450  PLASMA_request*, request);
451  /* Reduction from BAND tridiagonal to the final condensed form
452  */
453  plasma_dynamic_call_7(plasma_pchbrdt,
454  PLASMA_enum, uplo,
455  PLASMA_desc, descA,
456  float*, W,
457  float*, E,
458  PLASMA_desc, descT,
459  PLASMA_sequence*, sequence,
460  PLASMA_request*, request);
461 
462  /*
463  * For eigenvalues only, call DSTERF.
464  * For eigenvectors, first call CUNGTR to generate the unitary matrix,
465  * then call ZSTEQR.
466  */
468  if (jobz == PlasmaNoVec){
469  LAPACKE_ssterf(descA.n, W, E);
470  }else {
471  LAPACKE_ssterf(descA.n, W, E);
472  /* Accumulate the transformations from the second stage */
473  /*
474  plasma_dynamic_call_6(plasma_pcungtr,
475  PLASMA_enum, uplo,
476  PLASMA_desc, descA,
477  PLASMA_desc, descQ,
478  PLASMA_desc, descT,
479  PLASMA_sequence*, sequence,
480  PLASMA_request*, request);
481  LAPACKE_csteqr(jobz, descA.n, W, E, Q->mat, Q->lm);
482  */
483  }
484  /* If matrix was scaled, then rescale eigenvalues appropriately.
485  */
486 
487  plasma_shared_free(plasma, E);
488 
489  return PLASMA_SUCCESS;
490 }