PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zherk.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 /***************************************************************************/
86  double alpha, PLASMA_Complex64_t *A, int LDA,
87  double beta, PLASMA_Complex64_t *C, int LDC)
88 {
89  int NB;
90  int Am, An;
91  int status;
92  PLASMA_desc descA, descC;
94  PLASMA_sequence *sequence = NULL;
96 
97  plasma = plasma_context_self();
98  if (plasma == NULL) {
99  plasma_fatal_error("PLASMA_zherk", "PLASMA not initialized");
101  }
102 
103  /* Check input arguments */
104  if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
105  plasma_error("PLASMA_zherk", "illegal value of uplo");
106  return -1;
107  }
108  if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
109  plasma_error("PLASMA_zherk", "illegal value of trans");
110  return -2;
111  }
112  if ( trans == PlasmaNoTrans ) {
113  Am = N; An = K;
114  } else {
115  Am = K; An = N;
116  }
117  if (N < 0) {
118  plasma_error("PLASMA_zherk", "illegal value of N");
119  return -3;
120  }
121  if (K < 0) {
122  plasma_error("PLASMA_zherk", "illegal value of K");
123  return -4;
124  }
125  if (LDA < max(1, Am)) {
126  plasma_error("PLASMA_zherk", "illegal value of LDA");
127  return -7;
128  }
129  if (LDC < max(1, N)) {
130  plasma_error("PLASMA_zherk", "illegal value of LDC");
131  return -10;
132  }
133 
134  /* Quick return */
135  if (N == 0 ||
136  ((alpha == (double)0.0 || K == 0.0) && beta == (double)1.0))
137  return PLASMA_SUCCESS;
138 
139  /* Tune NB depending on M, N & NRHS; Set NBNB */
140  status = plasma_tune(PLASMA_FUNC_ZHERK, N, K, 0);
141  if (status != PLASMA_SUCCESS) {
142  plasma_error("PLASMA_zherk", "plasma_tune() failed");
143  return status;
144  }
145 
146  /* Set MT & NT & KT */
147  NB = PLASMA_NB;
148 
149  plasma_sequence_create(plasma, &sequence);
150 
152  plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
153  plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descC)));
154  } else {
155  plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
156  plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N );
157  }
158 
159  /* Call the tile interface */
160  PLASMA_zherk_Tile_Async(uplo, trans, alpha, &descA, beta, &descC, sequence, &request);
161 
163  plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
165  plasma_desc_mat_free(&descA);
166  plasma_desc_mat_free(&descC);
167  } else {
168  plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
169  plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
171  }
172 
173  status = sequence->status;
174  plasma_sequence_destroy(plasma, sequence);
175  return status;
176 }
177 
178 /***************************************************************************/
228  double alpha, PLASMA_desc *A,
229  double beta, PLASMA_desc *C)
230 {
232  PLASMA_sequence *sequence = NULL;
234  int status;
235 
236  plasma = plasma_context_self();
237  if (plasma == NULL) {
238  plasma_fatal_error("PLASMA_zherk_Tile", "PLASMA not initialized");
240  }
241  plasma_sequence_create(plasma, &sequence);
242  PLASMA_zherk_Tile_Async(uplo, trans, alpha, A, beta, C, sequence, &request);
244  status = sequence->status;
245  plasma_sequence_destroy(plasma, sequence);
246  return status;
247 }
248 
249 /***************************************************************************/
277  double alpha, PLASMA_desc *A,
278  double beta, PLASMA_desc *C,
279  PLASMA_sequence *sequence, PLASMA_request *request)
280 {
282  PLASMA_desc descA = *A;
283  PLASMA_desc descC = *C;
284  int N, K;
285  int Am, An, Amb;
286 
287  plasma = plasma_context_self();
288  if (plasma == NULL) {
289  plasma_fatal_error("PLASMA_zherk_Tile_Async", "PLASMA not initialized");
291  }
292  if (sequence == NULL) {
293  plasma_fatal_error("PLASMA_zherk_Tile_Async", "NULL sequence");
294  return PLASMA_ERR_UNALLOCATED;
295  }
296  if (request == NULL) {
297  plasma_fatal_error("PLASMA_zherk_Tile_Async", "NULL request");
298  return PLASMA_ERR_UNALLOCATED;
299  }
300  /* Check sequence status */
301  if (sequence->status == PLASMA_SUCCESS)
302  request->status = PLASMA_SUCCESS;
303  else
304  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
305 
306  /* Check descriptors for correctness */
307  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
308  plasma_error("PLASMA_zherk_Tile_Async", "invalid first descriptor");
309  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
310  }
311  if (plasma_desc_check(&descC) != PLASMA_SUCCESS) {
312  plasma_error("PLASMA_zherk_Tile_Async", "invalid third descriptor");
313  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
314  }
315  /* Check input arguments */
316  if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
317  plasma_error("PLASMA_zherk", "illegal value of uplo");
318  return plasma_request_fail(sequence, request, -1);
319  }
320  if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
321  plasma_error("PLASMA_zherk", "illegal value of transA");
322  return plasma_request_fail(sequence, request, -2);
323  }
324 
325  if ( trans == PlasmaNoTrans ) {
326  Am = descA.m;
327  An = descA.n;
328  Amb = descA.mb;
329  } else {
330  Am = descA.n;
331  An = descA.m;
332  Amb = descA.nb;
333  }
334 
335  if (descC.mb != descC.nb) {
336  plasma_error("PLASMA_zherk_Tile_Async", "only square tiles are supported");
337  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
338  }
339  if (Amb != descC.mb) {
340  plasma_error("PLASMA_zherk_Tile_Async", "tile sizes have to match");
341  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
342  }
343  if (descC.m != descC.n) {
344  plasma_error("PLASMA_zherk_Tile_Async", "only square matrix C is supported");
345  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
346  }
347  if (Am != descC.m) {
348  plasma_error("PLASMA_zherk_Tile_Async", "sizes of matrices have to match");
349  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
350  }
351 
352  N = descC.m;
353  K = An;
354 
355  /* Quick return */
356  if ( N == 0 ||
357  ((alpha == (double)0.0 || K == 0) && beta == (double)1.0))
358  return PLASMA_SUCCESS;
359 
361  PLASMA_enum, uplo,
362  PLASMA_enum, trans,
363  double, alpha,
364  PLASMA_desc, descA,
365  double, beta,
366  PLASMA_desc, descC,
367  PLASMA_sequence*, sequence,
368  PLASMA_request*, request);
369 
370  return PLASMA_SUCCESS;
371 }