PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zhetrd.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 /***************************************************************************/
101  PLASMA_Complex64_t *A, int LDA,
102  double *D,
103  double *E,
104  PLASMA_desc *descT,
105  PLASMA_Complex64_t *Q, int LDQ)
106 {
107  int NB, IB, NT;
108  int status;
110  PLASMA_sequence *sequence = NULL;
112  PLASMA_desc descA, descQ;
113 
114  plasma = plasma_context_self();
115  if (plasma == NULL) {
116  plasma_error("PLASMA_zhetrd", "PLASMA not initialized");
118  }
119 
120  /* Tune NB & IB depending on N; Set NBNB */
121  status = plasma_tune(PLASMA_FUNC_ZHETRD, N, N, 0);
122  if (status != PLASMA_SUCCESS) {
123  plasma_error("PLASMA_zhetrd", "plasma_tune() failed");
124  return status;
125  }
126 
127  /* Set NT */
128  NB = PLASMA_NB;
129  IB = PLASMA_IB;
130  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
131 
132  /* Check input arguments */
133  if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
134  plasma_error("PLASMA_zhetrd", "illegal value of jobz");
135  return -1;
136  }
137  if (uplo != PlasmaLower && uplo != PlasmaUpper) {
138  plasma_error("PLASMA_zhetrd", "illegal value of uplo");
139  return -2;
140  }
141  if (N < 0) {
142  plasma_error("PLASMA_zhetrd", "illegal value of N");
143  return -3;
144  }
145  if (LDA < max(1, N)) {
146  plasma_error("PLASMA_zhetrd", "illegal value of LDA");
147  return -5;
148  }
149  if ( (plasma_desc_check(descT) != PLASMA_SUCCESS) ||
150  ( descT->m != NT*IB ) || (descT->n != NT*NB) ) {
151  plasma_error("PLASMA_zhetrd", "invalid T descriptor");
152  return -8;
153  }
154  if (LDQ < max(1, N)) {
155  plasma_error("PLASMA_zhetrd", "illegal value of LDQ");
156  return -10;
157  }
158  /* Quick return */
159  if (N == 0)
160  return PLASMA_SUCCESS;
161 
162  if (jobz == PlasmaVec) {
163  plasma_error("PLASMA_zhetrd", "computing the eigenvectors is not supported in this version");
164  return -1;
165  }
166 
167  plasma_sequence_create(plasma, &sequence);
168 
170  plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
171  if (jobz == PlasmaVec) {
172  plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N , plasma_desc_mat_free(&(descQ)) );
173  }
174  } else {
175  plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
176  if (jobz == PlasmaVec)
177  plasma_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, N, N );
178  }
179 
180  /* Call the tile interface */
181  PLASMA_zhetrd_Tile_Async(jobz, uplo, &descA, D, E, descT, &descQ, sequence, &request);
182 
184  plasma_zooptile2lap( descA, A, NB, NB, LDA, N );
185  if (jobz == PlasmaVec) {
186  plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
187  }
189  plasma_desc_mat_free(&descA);
190  if (jobz == PlasmaVec)
191  plasma_desc_mat_free(&descQ);
192  } else {
193  plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
194  if (jobz == PlasmaVec)
195  plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
197  }
198 
199  status = sequence->status;
200  plasma_sequence_destroy(plasma, sequence);
201  return status;
202 }
203 /***************************************************************************/
280  PLASMA_desc *A, double *D, double *E, PLASMA_desc *T, PLASMA_desc *Q)
281 {
283  PLASMA_sequence *sequence = NULL;
285  int status;
286 
287  plasma = plasma_context_self();
288  if (plasma == NULL) {
289  plasma_fatal_error("PLASMA_zhetrd_Tile", "PLASMA not initialized");
291  }
292  plasma_sequence_create(plasma, &sequence);
293  PLASMA_zhetrd_Tile_Async(jobz, uplo, A, D, E, T, Q, sequence, &request);
295  status = sequence->status;
296  plasma_sequence_destroy(plasma, sequence);
297  return status;
298 }
299 
300 /***************************************************************************/
332  PLASMA_desc *A,
333  double *D,
334  double *E,
335  PLASMA_desc *T,
336  PLASMA_desc *Q,
337  PLASMA_sequence *sequence, PLASMA_request *request)
338 {
339  PLASMA_desc descA = *A;
340  PLASMA_desc descT = *T;
341 
343 
344  plasma = plasma_context_self();
345  if (plasma == NULL) {
346  plasma_fatal_error("PLASMA_zhetrd_Tile_Async", "PLASMA not initialized");
348  }
349  if (sequence == NULL) {
350  plasma_fatal_error("PLASMA_zhetrd_Tile_Async", "NULL sequence");
351  return PLASMA_ERR_UNALLOCATED;
352  }
353  if (request == NULL) {
354  plasma_fatal_error("PLASMA_zhetrd_Tile_Async", "NULL request");
355  return PLASMA_ERR_UNALLOCATED;
356  }
357  /* Check sequence status */
358  if (sequence->status == PLASMA_SUCCESS)
359  request->status = PLASMA_SUCCESS;
360  else
361  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
362 
363  /* Check descriptors for correctness */
364  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
365  plasma_error("PLASMA_zhetrd_Tile_Async", "invalid descriptor");
366  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
367  }
368  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
369  plasma_error("PLASMA_zhetrd_Tile_Async", "invalid descriptor");
370  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
371  }
372  if ( (jobz == PlasmaVec) && (plasma_desc_check(Q) != PLASMA_SUCCESS) ) {
373  plasma_error("PLASMA_zhetrd_Tile_Async", "invalid descriptor");
374  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
375  }
376 
377  /* Check input arguments */
378  if (jobz != PlasmaNoVec && jobz != PlasmaVec) {
379  plasma_error("PLASMA_zhetrd_Tile_Async", "illegal value of jobz");
380  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
381  }
382  if (descA.m != descA.n) {
383  plasma_error("PLASMA_zhetrd_Tile_Async", "matrix need to be square");
384  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
385  }
386  if (descA.nb != descA.mb) {
387  plasma_error("PLASMA_zhetrd_Tile_Async", "only square tiles supported");
388  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
389  }
390  if (jobz == PlasmaVec) {
391  plasma_error("PLASMA_zhetrd_Tile_Async", "computing the eigenvectors is not supported in this version");
392  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
393  }
394  if ( (jobz == PlasmaVec) && (Q->nb != Q->mb) ) {
395  plasma_error("PLASMA_zhetrd_Tile_Async", "only square tiles supported");
396  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
397  }
398 
399  /* Reduction to tridiagonal form
400  * with a two-stage approach.
401  */
402 
403  /* Reduction to BAND tridiagonal form
404  */
405  plasma_dynamic_call_5(plasma_pzherbt,
406  PLASMA_enum, uplo,
407  PLASMA_desc, descA,
408  PLASMA_desc, descT,
409  PLASMA_sequence*, sequence,
410  PLASMA_request*, request);
411 
412  /*
413  * Build the Q of the first stage
414  */
415  /* if (jobz == PlasmaVec){ */
416  /* /\* Initialize Q to Identity *\/ */
417  /* plasma_dynamic_call_6(plasma_pzlaset, */
418  /* PLASMA_enum, PlasmaUpperLower, */
419  /* PLASMA_Complex64_t, 0.0, */
420  /* PLASMA_Complex64_t, 1.0, */
421  /* PLASMA_desc, descQ, */
422  /* PLASMA_sequence*, sequence, */
423  /* PLASMA_request*, request); */
424  /* /\* Accumulate the transformations from the first stage*\/ */
425  /* plasma_dynamic_call_6(plasma_pzungtr, */
426  /* PLASMA_enum, uplo, */
427  /* PLASMA_desc, descA, */
428  /* PLASMA_desc, descQ, */
429  /* PLASMA_desc, descT, */
430  /* PLASMA_sequence*, sequence, */
431  /* PLASMA_request*, request); */
432  /* } */
433 
434  /* Set the V's to zero before the 2nd stage (bulge chasing) */
435  /*
436  */
437  plasma_dynamic_call_5(plasma_pzlaset2,
438  PLASMA_enum, uplo,
439  PLASMA_Complex64_t, 0.0,
440  PLASMA_desc, uplo == PlasmaLower ? plasma_desc_submatrix(descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb)
441  : plasma_desc_submatrix(descA, 0, descA.nb, descA.m-descA.mb, descA.n-descA.nb),
442  PLASMA_sequence*, sequence,
443  PLASMA_request*, request);
444 
445  /* Reduction from BAND tridiagonal to the final condensed form
446  */
447  plasma_dynamic_call_7(plasma_pzhbrdt,
448  PLASMA_enum, uplo,
449  PLASMA_desc, descA,
450  double*, D,
451  double*, E,
452  PLASMA_desc, descT,
453  PLASMA_sequence*, sequence,
454  PLASMA_request*, request);
455 
456 
457  return PLASMA_SUCCESS;
458 }