PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zgemm.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 /***************************************************************************/
96 int PLASMA_zgemm(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K,
97  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA,
98  PLASMA_Complex64_t *B, int LDB,
99  PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
100 {
101  int NB;
102  int Am, An, Bm, Bn;
103  int status;
104  PLASMA_desc descA, descB, descC;
106  PLASMA_sequence *sequence = NULL;
108 
109  plasma = plasma_context_self();
110  if (plasma == NULL) {
111  plasma_fatal_error("PLASMA_zgemm", "PLASMA not initialized");
113  }
114 
115  /* Check input arguments */
116  if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaConjTrans)) {
117  plasma_error("PLASMA_zgemm", "illegal value of transA");
118  return -1;
119  }
120  if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaConjTrans)) {
121  plasma_error("PLASMA_zgemm", "illegal value of transB");
122  return -2;
123  }
124  if ( transA == PlasmaNoTrans ) {
125  Am = M; An = K;
126  } else {
127  Am = K; An = M;
128  }
129  if ( transB == PlasmaNoTrans ) {
130  Bm = K; Bn = N;
131  } else {
132  Bm = N; Bn = K;
133  }
134  if (M < 0) {
135  plasma_error("PLASMA_zgemm", "illegal value of M");
136  return -3;
137  }
138  if (N < 0) {
139  plasma_error("PLASMA_zgemm", "illegal value of N");
140  return -4;
141  }
142  if (K < 0) {
143  plasma_error("PLASMA_zgemm", "illegal value of N");
144  return -5;
145  }
146  if (LDA < max(1, Am)) {
147  plasma_error("PLASMA_zgemm", "illegal value of LDA");
148  return -8;
149  }
150  if (LDB < max(1, Bm)) {
151  plasma_error("PLASMA_zgemm", "illegal value of LDB");
152  return -10;
153  }
154  if (LDC < max(1, M)) {
155  plasma_error("PLASMA_zgemm", "illegal value of LDC");
156  return -13;
157  }
158 
159  /* Quick return */
160  if (M == 0 || N == 0 ||
161  ((alpha == (PLASMA_Complex64_t)0.0 || K == 0) && beta == (PLASMA_Complex64_t)1.0))
162  return PLASMA_SUCCESS;
163 
164  /* Tune NB depending on M, N & NRHS; Set NBNBSIZE */
165  status = plasma_tune(PLASMA_FUNC_ZGEMM, M, N, 0);
166  if (status != PLASMA_SUCCESS) {
167  plasma_error("PLASMA_zgemm", "plasma_tune() failed");
168  return status;
169  }
170 
171  /* Set MT & NT & KT */
172  NB = PLASMA_NB;
173 
174  plasma_sequence_create(plasma, &sequence);
175 
177  plasma_zooplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An, plasma_desc_mat_free(&(descA)) );
178  plasma_zooplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
179  plasma_zooplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)); plasma_desc_mat_free(&(descC)));
180  } else {
181  plasma_ziplap2tile( descA, A, NB, NB, LDA, An, 0, 0, Am, An );
182  plasma_ziplap2tile( descB, B, NB, NB, LDB, Bn, 0, 0, Bm, Bn );
183  plasma_ziplap2tile( descC, C, NB, NB, LDC, N, 0, 0, M, N );
184  }
185 
186  /* Call the tile interface */
188  transA, transB, alpha, &descA, &descB, beta, &descC, sequence, &request);
189 
191  plasma_zooptile2lap( descC, C, NB, NB, LDC, N );
193  plasma_desc_mat_free(&descA);
194  plasma_desc_mat_free(&descB);
195  plasma_desc_mat_free(&descC);
196  } else {
197  plasma_ziptile2lap( descA, A, NB, NB, LDA, An );
198  plasma_ziptile2lap( descB, B, NB, NB, LDB, Bn );
199  plasma_ziptile2lap( descC, C, NB, NB, LDC, N );
201  }
202 
203  status = sequence->status;
204  plasma_sequence_destroy(plasma, sequence);
205  return status;
206 }
207 
208 /***************************************************************************/
267 {
269  PLASMA_sequence *sequence = NULL;
271  int status;
272 
273  plasma = plasma_context_self();
274  if (plasma == NULL) {
275  plasma_fatal_error("PLASMA_zgemm_Tile", "PLASMA not initialized");
277  }
278  plasma_sequence_create(plasma, &sequence);
279  PLASMA_zgemm_Tile_Async(transA, transB, alpha, A, B, beta, C, sequence, &request);
281  status = sequence->status;
282  plasma_sequence_destroy(plasma, sequence);
283  return status;
284 }
285 
286 /***************************************************************************/
316  PLASMA_sequence *sequence, PLASMA_request *request)
317 {
319  PLASMA_desc descA = *A;
320  PLASMA_desc descB = *B;
321  PLASMA_desc descC = *C;
322  int M, N, K;
323  int Am, An, Ai, Aj, Amb, Anb;
324  int Bm, Bn, Bi, Bj, Bmb, Bnb;
325 
326  plasma = plasma_context_self();
327  if (plasma == NULL) {
328  plasma_fatal_error("PLASMA_zgemm_Tile_Async", "PLASMA not initialized");
330  }
331  if (sequence == NULL) {
332  plasma_fatal_error("PLASMA_zgemm_Tile_Async", "NULL sequence");
333  return PLASMA_ERR_UNALLOCATED;
334  }
335  if (request == NULL) {
336  plasma_fatal_error("PLASMA_zgemm_Tile_Async", "NULL request");
337  return PLASMA_ERR_UNALLOCATED;
338  }
339  /* Check sequence status */
340  if (sequence->status == PLASMA_SUCCESS)
341  request->status = PLASMA_SUCCESS;
342  else
343  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
344 
345  /* Check descriptors for correctness */
346  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
347  plasma_error("PLASMA_zgemm_Tile_Async", "invalid first descriptor");
348  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
349  }
350  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
351  plasma_error("PLASMA_zgemm_Tile_Async", "invalid second descriptor");
352  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
353  }
354  if (plasma_desc_check(&descC) != PLASMA_SUCCESS) {
355  plasma_error("PLASMA_zgemm_Tile_Async", "invalid third descriptor");
356  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
357  }
358  /* Check input arguments */
359  if ((transA != PlasmaNoTrans) && (transA != PlasmaTrans) && (transA != PlasmaConjTrans)) {
360  plasma_error("PLASMA_zgemm_Tile_Async", "illegal value of transA");
361  return plasma_request_fail(sequence, request, -1);
362  }
363  if ((transB != PlasmaNoTrans) && (transB != PlasmaTrans) && (transB != PlasmaConjTrans)) {
364  plasma_error("PLASMA_zgemm_Tile_Async", "illegal value of transB");
365  return plasma_request_fail(sequence, request, -2);
366  }
367 
368  if ( transA == PlasmaNoTrans ) {
369  Am = descA.m;
370  An = descA.n;
371  Amb = descA.mb;
372  Anb = descA.nb;
373  Ai = descA.i;
374  Aj = descA.j;
375  } else {
376  Am = descA.n;
377  An = descA.m;
378  Amb = descA.nb;
379  Anb = descA.mb;
380  Ai = descA.j;
381  Aj = descA.i;
382  }
383 
384  if ( transB == PlasmaNoTrans ) {
385  Bm = descB.m;
386  Bn = descB.n;
387  Bmb = descB.mb;
388  Bnb = descB.nb;
389  Bi = descB.i;
390  Bj = descB.j;
391  } else {
392  Bm = descB.n;
393  Bn = descB.m;
394  Bmb = descB.nb;
395  Bnb = descB.mb;
396  Bi = descB.j;
397  Bj = descB.i;
398  }
399 
400  if ( (Amb != descC.mb) || (Anb != Bmb) || (Bnb != descC.nb) ) {
401  plasma_error("PLASMA_zgemm_Tile_Async", "tile sizes have to match");
402  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
403  }
404  if ( (Am != descC.m) || (An != Bm) || (Bn != descC.n) ) {
405  plasma_error("PLASMA_zgemm_Tile_Async", "sizes of matrices have to match");
406  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
407  }
408  if ( (Ai != descC.i) || (Aj != Bi) || (Bj != descC.j) ) {
409  plasma_error("PLASMA_zgemm_Tile_Async", "start indexes have to match");
410  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
411  }
412 
413  M = descC.m;
414  N = descC.n;
415  K = An;
416 
417  /* Quick return */
418  if (M == 0 || N == 0 ||
419  ((alpha == (PLASMA_Complex64_t)0.0 || K == 0) && beta == (PLASMA_Complex64_t)1.0))
420  return PLASMA_SUCCESS;
421 
423  PLASMA_enum, transA,
424  PLASMA_enum, transB,
425  PLASMA_Complex64_t, alpha,
426  PLASMA_desc, descA,
427  PLASMA_desc, descB,
428  PLASMA_Complex64_t, beta,
429  PLASMA_desc, descC,
430  PLASMA_sequence*, sequence,
431  PLASMA_request*, request);
432 
433  return PLASMA_SUCCESS;
434 }