PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
chemm.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 /***************************************************************************/
95  PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA,
96  PLASMA_Complex32_t *B, int LDB,
97  PLASMA_Complex32_t beta, PLASMA_Complex32_t *C, int LDC)
98 {
99  int NB;
100  int Am;
101  int status;
102  PLASMA_desc descA, descB, descC;
104  PLASMA_sequence *sequence = NULL;
106 
107  plasma = plasma_context_self();
108  if (plasma == NULL) {
109  plasma_fatal_error("PLASMA_chemm", "PLASMA not initialized");
111  }
112 
113  /* Check input arguments */
114  if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
115  plasma_error("PLASMA_chemm", "illegal value of side");
116  return -1;
117  }
118  if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
119  plasma_error("PLASMA_chemm", "illegal value of uplo");
120  return -2;
121  }
122  Am = ( side == PlasmaLeft ) ? M : N;
123  if (M < 0) {
124  plasma_error("PLASMA_chemm", "illegal value of M");
125  return -3;
126  }
127  if (N < 0) {
128  plasma_error("PLASMA_chemm", "illegal value of N");
129  return -4;
130  }
131  if (LDA < max(1, Am)) {
132  plasma_error("PLASMA_chemm", "illegal value of LDA");
133  return -7;
134  }
135  if (LDB < max(1, M)) {
136  plasma_error("PLASMA_chemm", "illegal value of LDB");
137  return -9;
138  }
139  if (LDC < max(1, M)) {
140  plasma_error("PLASMA_chemm", "illegal value of LDC");
141  return -12;
142  }
143 
144  /* Quick return */
145  if (M == 0 || N == 0 ||
146  ((alpha == (PLASMA_Complex32_t)0.0) && beta == (PLASMA_Complex32_t)1.0))
147  return PLASMA_SUCCESS;
148 
149  /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
150  status = plasma_tune(PLASMA_FUNC_CHEMM, M, N, 0);
151  if (status != PLASMA_SUCCESS) {
152  plasma_error("PLASMA_chemm", "plasma_tune() failed");
153  return status;
154  }
155 
156  /* Set MT & NT & KT */
157  NB = PLASMA_NB;
158 
159  plasma_sequence_create(plasma, &sequence);
160 
162  plasma_cooplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am,
163  plasma_desc_mat_free(&(descA)) );
164  plasma_cooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N,
165  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
166  plasma_cooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N,
167  plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
168  } else {
169  plasma_ciplap2tile( descA, A, NB, NB, LDA, Am, 0, 0, Am, Am );
170  plasma_ciplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N );
171  plasma_ciplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N );
172  }
173 
174  /* Call the tile interface */
176  side, uplo, alpha, &descA, &descB, beta, &descC, sequence, &request);
177 
179  plasma_cooptile2lap( descC, C, NB, NB, LDC, N );
181  plasma_desc_mat_free(&descA);
182  plasma_desc_mat_free(&descB);
183  plasma_desc_mat_free(&descC);
184  } else {
185  plasma_ciptile2lap( descA, A, NB, NB, LDA, Am );
186  plasma_ciptile2lap( descB, B, NB, NB, LDB, N );
187  plasma_ciptile2lap( descC, C, NB, NB, LDC, N );
189  }
190 
191  status = sequence->status;
192  plasma_sequence_destroy(plasma, sequence);
193  return status;
194 }
195 
196 /***************************************************************************/
257 {
259  PLASMA_sequence *sequence = NULL;
261  int status;
262 
263  plasma = plasma_context_self();
264  if (plasma == NULL) {
265  plasma_fatal_error("PLASMA_chemm_Tile", "PLASMA not initialized");
267  }
268  plasma_sequence_create(plasma, &sequence);
269  PLASMA_chemm_Tile_Async(side, uplo, alpha, A, B, beta, C, sequence, &request);
271  status = sequence->status;
272  plasma_sequence_destroy(plasma, sequence);
273  return status;
274 }
275 
276 /***************************************************************************/
306  PLASMA_sequence *sequence, PLASMA_request *request)
307 {
309  PLASMA_desc descA = *A;
310  PLASMA_desc descB = *B;
311  PLASMA_desc descC = *C;
312 
313  plasma = plasma_context_self();
314  if (plasma == NULL) {
315  plasma_fatal_error("PLASMA_chemm_Tile_Async", "PLASMA not initialized");
317  }
318  if (sequence == NULL) {
319  plasma_fatal_error("PLASMA_chemm_Tile_Async", "NULL sequence");
320  return PLASMA_ERR_UNALLOCATED;
321  }
322  if (request == NULL) {
323  plasma_fatal_error("PLASMA_chemm_Tile_Async", "NULL request");
324  return PLASMA_ERR_UNALLOCATED;
325  }
326  /* Check sequence status */
327  if (sequence->status == PLASMA_SUCCESS)
328  request->status = PLASMA_SUCCESS;
329  else
330  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
331 
332  /* Check descriptors for correctness */
333  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
334  plasma_error("PLASMA_chemm_Tile_Async", "invalid first descriptor");
335  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
336  }
337  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
338  plasma_error("PLASMA_chemm_Tile_Async", "invalid second descriptor");
339  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
340  }
341  if (plasma_desc_check(&descC) != PLASMA_SUCCESS) {
342  plasma_error("PLASMA_chemm_Tile_Async", "invalid third descriptor");
343  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
344  }
345  /* Check input arguments */
346  if ( (side != PlasmaLeft) && (side != PlasmaRight) ){
347  plasma_error("PLASMA_chemm_Tile_Async", "illegal value of side");
348  return plasma_request_fail(sequence, request, -1);
349  }
350  if ((uplo != PlasmaLower) && (uplo != PlasmaUpper)) {
351  plasma_error("PLASMA_chemm_Tile_Async", "illegal value of uplo");
352  return plasma_request_fail(sequence, request, -2);
353  }
354 
355  /* Check matrices sizes */
356  if ( (descB.m != descC.m) || (descB.n != descC.n) ) {
357  plasma_error("PLASMA_chemm_Tile_Async", "B and C must have the same size");
358  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
359  }
360  if ( (descA.m != descA.n) ||
361  ( (side == PlasmaLeft) && (descA.m != descB.m ) ) ||
362  ( (side == PlasmaRight) && (descA.m != descB.n ) ) ) {
363  plasma_error("PLASMA_chemm_Tile_Async", "Matrix A must be square of size M or N regarding side.");
364  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
365  }
366 
367  /* Check tiles sizes */
368  if ( (descB.mb != descC.mb) || (descB.nb != descC.nb) ) {
369  plasma_error("PLASMA_chemm_Tile_Async", "B and C must have the same tile sizes");
370  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
371  }
372  if ( (descA.mb != descA.nb) ||
373  ( (side == PlasmaLeft) && (descA.mb != descB.mb ) ) ||
374  ( (side == PlasmaRight) && (descA.mb != descB.nb ) ) ) {
375  plasma_error("PLASMA_chemm_Tile_Async", "Matrix A must be square with square tiles wich fits the reagding tile size of B and C");
376  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
377  }
378 
379  /* Check submatrix starting point */
380  /* if ( (descB.i != descC.i) || (descB.j != descC.j) ) { */
381  /* plasma_error("PLASMA_chemm_Tile_Async", "B and C submatrices doesn't match"); */
382  /* return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); */
383  /* } */
384  /* if ( (descA.i != descA.j) || */
385  /* ( (side == PlasmaLeft) && (descA.i != descB.i ) ) || */
386  /* ( (side == PlasmaRight) && (descA.i != descB.j ) ) ) { */
387  /* plasma_error("PLASMA_chemm_Tile_Async", "Submatrix A must start on diagnonal and match submatrices B and C."); */
388  /* return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE); */
389  /* } */
390  if( (descA.i != 0) || (descA.j != 0) ||
391  (descB.i != 0) || (descB.j != 0) ||
392  (descC.i != 0) || (descC.j != 0) ) {
393  plasma_error("PLASMA_chemm_Tile_Async", "Submatrices are not supported for now");
394  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
395  }
396 
397  /* Quick return */
398  if (descC.m == 0 || descC.n == 0 ||
399  ( (alpha == (PLASMA_Complex32_t)0.0) && (beta == (PLASMA_Complex32_t)1.0) ))
400  return PLASMA_SUCCESS;
401 
403  PLASMA_enum, side,
404  PLASMA_enum, uplo,
405  PLASMA_Complex32_t, alpha,
406  PLASMA_desc, descA,
407  PLASMA_desc, descB,
408  PLASMA_Complex32_t, beta,
409  PLASMA_desc, descC,
410  PLASMA_sequence*, sequence,
411  PLASMA_request*, request);
412 
413  return PLASMA_SUCCESS;
414 }