PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
csyr2k.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 /***************************************************************************/
97  PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB,
98  PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC)
99 {
100  int NB;
101  int Am, An;
102  int status;
103  PLASMA_desc descA, descB, descC;
105  PLASMA_sequence *sequence = NULL;
107 
108  plasma = plasma_context_self();
109  if (plasma == NULL) {
110  plasma_fatal_error("PLASMA_csyr2k", "PLASMA not initialized");
112  }
113 
114  /* Check input arguments */
115  if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
116  plasma_error("PLASMA_csyr2k", "illegal value of uplo");
117  return -1;
118  }
119  if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
120  plasma_error("PLASMA_csyr2k", "illegal value of trans");
121  return -2;
122  }
123  if ( trans == PlasmaNoTrans ) {
124  Am = N; An = K;
125  } else {
126  Am = K; An = N;
127  }
128  if (N < 0) {
129  plasma_error("PLASMA_csyr2k", "illegal value of N");
130  return -3;
131  }
132  if (K < 0) {
133  plasma_error("PLASMA_csyr2k", "illegal value of K");
134  return -4;
135  }
136  if (LDA < max(1, Am)) {
137  plasma_error("PLASMA_csyr2k", "illegal value of LDA");
138  return -7;
139  }
140  if (LDB < max(1, Am)) {
141  plasma_error("PLASMA_csyr2k", "illegal value of LDB");
142  return -9;
143  }
144  if (LDC < max(1, N)) {
145  plasma_error("PLASMA_csyr2k", "illegal value of LDC");
146  return -12;
147  }
148 
149  /* Quick return */
150  if (N == 0 ||
151  ((alpha == (PLASMA_Complex32_t)0.0 || K == 0.0) && beta == (PLASMA_Complex32_t)1.0))
152  return PLASMA_SUCCESS;
153 
154  /* Tune NB depending on M, N & NRHS; Set NBNB */
155  status = plasma_tune(PLASMA_FUNC_CSYRK, N, K, 0);
156  if (status != PLASMA_SUCCESS) {
157  plasma_error("PLASMA_csyr2k", "plasma_tune() failed");
158  return status;
159  }
160 
161  /* Set MT & NT & KT */
162  NB = PLASMA_NB;
163 
164  plasma_sequence_create(plasma, &sequence);
165 
167  plasma_cooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
168  plasma_cooplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
169  plasma_cooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
170  } else {
171  plasma_ciplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
172  plasma_ciplap2tile( descB, B, NB, NB, LDB, An, 0, 0, Am, An );
173  plasma_ciplap2tile( descC, C, NB, NB, LDC, N, 0, 0, N, N );
174  }
175 
176  /* Call the tile interface */
177  PLASMA_csyr2k_Tile_Async(uplo, trans, alpha, &descA, &descB, beta, &descC, sequence, &request);
178 
180  plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
182  plasma_desc_mat_free(&descA);
183  plasma_desc_mat_free(&descB);
184  plasma_desc_mat_free(&descC);
185  } else {
186  plasma_ciptile2lap( descA, A, NB, NB, LDA, An );
187  plasma_ciptile2lap( descB, B, NB, NB, LDB, An );
188  plasma_ciptile2lap( descC, C, NB, NB, LDC, N );
190  }
191 
192  status = sequence->status;
193  plasma_sequence_destroy(plasma, sequence);
194  return status;
195 }
196 
197 /***************************************************************************/
253 {
255  PLASMA_sequence *sequence = NULL;
257  int status;
258 
259  plasma = plasma_context_self();
260  if (plasma == NULL) {
261  plasma_fatal_error("PLASMA_csyr2k_Tile", "PLASMA not initialized");
263  }
264  plasma_sequence_create(plasma, &sequence);
265  PLASMA_csyr2k_Tile_Async(uplo, trans, alpha, A, B, beta, C, sequence, &request);
267  status = sequence->status;
268  plasma_sequence_destroy(plasma, sequence);
269  return status;
270 }
271 
272 /***************************************************************************/
302  PLASMA_sequence *sequence, PLASMA_request *request)
303 {
305  PLASMA_desc descA = *A;
306  PLASMA_desc descB = *B;
307  PLASMA_desc descC = *C;
308  int N, K;
309  int Am, An, Amb;
310 
311  plasma = plasma_context_self();
312  if (plasma == NULL) {
313  plasma_fatal_error("PLASMA_csyr2k_Tile_Async", "PLASMA not initialized");
315  }
316  if (sequence == NULL) {
317  plasma_fatal_error("PLASMA_csyr2k_Tile_Async", "NULL sequence");
318  return PLASMA_ERR_UNALLOCATED;
319  }
320  if (request == NULL) {
321  plasma_fatal_error("PLASMA_csyr2k_Tile_Async", "NULL request");
322  return PLASMA_ERR_UNALLOCATED;
323  }
324  /* Check sequence status */
325  if (sequence->status == PLASMA_SUCCESS)
326  request->status = PLASMA_SUCCESS;
327  else
328  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
329 
330  /* Check descriptors for correctness */
331  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
332  plasma_error("PLASMA_csyr2k_Tile_Async", "invalid first descriptor");
333  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
334  }
335  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
336  plasma_error("PLASMA_csyr2k_Tile_Async", "invalid second descriptor");
337  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
338  }
339  if (plasma_desc_check(&descC) != PLASMA_SUCCESS) {
340  plasma_error("PLASMA_csyr2k_Tile_Async", "invalid third descriptor");
341  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
342  }
343  /* Check input arguments */
344  if ((uplo != PlasmaUpper) && (uplo != PlasmaLower)) {
345  plasma_error("PLASMA_csyr2k", "illegal value of uplo");
346  return plasma_request_fail(sequence, request, -1);
347  }
348  if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
349  plasma_error("PLASMA_csyr2k", "illegal value of trans");
350  return plasma_request_fail(sequence, request, -2);
351  }
352 
353  if ( trans == PlasmaNoTrans ) {
354  Am = descA.m;
355  An = descA.n;
356  Amb = descA.mb;
357  } else {
358  Am = descA.n;
359  An = descA.m;
360  Amb = descA.nb;
361  }
362 
363  if (descC.mb != descC.nb) {
364  plasma_error("PLASMA_csyr2k_Tile_Async", "only square tiles for C are supported");
365  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
366  }
367  if ( (descB.mb != descA.mb) || (descB.nb != descA.nb) || (Amb != descC.mb) ){
368  plasma_error("PLASMA_csyr2k_Tile_Async", "tile sizes have to match");
369  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
370  }
371  if (descC.m != descC.n) {
372  plasma_error("PLASMA_csyr2k_Tile_Async", "only square matrix C is supported");
373  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
374  }
375  if ( (descB.m != descA.m) || (descB.n != descA.n) || (Am != descC.m) ){
376  plasma_error("PLASMA_csyr2k_Tile_Async", "sizes of matrices have to match");
377  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
378  }
379 
380  N = descC.m;
381  K = An;
382 
383  /* Quick return */
384  if ( N == 0 ||
385  ((alpha == (PLASMA_Complex32_t)0.0 || K == 0) && beta == (PLASMA_Complex32_t)1.0))
386  return PLASMA_SUCCESS;
387 
389  PLASMA_enum, uplo,
390  PLASMA_enum, trans,
391  PLASMA_Complex32_t, alpha,
392  PLASMA_desc, descA,
393  PLASMA_desc, descB,
394  PLASMA_Complex32_t, beta,
395  PLASMA_desc, descC,
396  PLASMA_sequence*, sequence,
397  PLASMA_request*, request);
398 
399  return PLASMA_SUCCESS;
400 }