PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_zgemm.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 /***************************************************************************/
24 #if defined(PLASMA_HAVE_WEAK)
25 #pragma weak CORE_zgemm = PCORE_zgemm
26 #define CORE_zgemm PCORE_zgemm
27 #endif
28 void CORE_zgemm(int transA, int transB,
29  int M, int N, int K,
30  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA,
31  PLASMA_Complex64_t *B, int LDB,
32  PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int LDC)
33 {
36  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
37  M, N, K,
38  CBLAS_SADDR(alpha), A, LDA,
39  B, LDB,
40  CBLAS_SADDR(beta), C, LDC);
41 }
42 
43 /***************************************************************************/
46 void QUARK_CORE_zgemm(Quark *quark, Quark_Task_Flags *task_flags,
47  int transA, int transB,
48  int m, int n, int k, int nb,
49  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int lda,
50  PLASMA_Complex64_t *B, int ldb,
51  PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int ldc)
52 {
54  QUARK_Insert_Task(quark, CORE_zgemm_quark, task_flags,
55  sizeof(PLASMA_enum), &transA, VALUE,
56  sizeof(PLASMA_enum), &transB, VALUE,
57  sizeof(int), &m, VALUE,
58  sizeof(int), &n, VALUE,
59  sizeof(int), &k, VALUE,
60  sizeof(PLASMA_Complex64_t), &alpha, VALUE,
61  sizeof(PLASMA_Complex64_t)*nb*nb, A, INPUT,
62  sizeof(int), &lda, VALUE,
63  sizeof(PLASMA_Complex64_t)*nb*nb, B, INPUT,
64  sizeof(int), &ldb, VALUE,
65  sizeof(PLASMA_Complex64_t), &beta, VALUE,
66  sizeof(PLASMA_Complex64_t)*nb*nb, C, INOUT,
67  sizeof(int), &ldc, VALUE,
68  0);
69 }
70 
71 /***************************************************************************/
74 void QUARK_CORE_zgemm2( Quark *quark, Quark_Task_Flags *task_flags,
75  int transA, int transB,
76  int m, int n, int k, int nb,
77  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int lda,
78  PLASMA_Complex64_t *B, int ldb,
79  PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int ldc)
80 {
82  QUARK_Insert_Task(quark, CORE_zgemm_quark, task_flags,
83  sizeof(PLASMA_enum), &transA, VALUE,
84  sizeof(PLASMA_enum), &transB, VALUE,
85  sizeof(int), &m, VALUE,
86  sizeof(int), &n, VALUE,
87  sizeof(int), &k, VALUE,
88  sizeof(PLASMA_Complex64_t), &alpha, VALUE,
89  sizeof(PLASMA_Complex64_t)*nb*nb, A, INPUT,
90  sizeof(int), &lda, VALUE,
91  sizeof(PLASMA_Complex64_t)*nb*nb, B, INPUT,
92  sizeof(int), &ldb, VALUE,
93  sizeof(PLASMA_Complex64_t), &beta, VALUE,
94  sizeof(PLASMA_Complex64_t)*nb*nb, C, INOUT | LOCALITY | GATHERV,
95  sizeof(int), &ldc, VALUE,
96  0);
97 }
98 
99 /***************************************************************************/
102 #if defined(PLASMA_HAVE_WEAK)
103 #pragma weak CORE_zgemm_quark = PCORE_zgemm_quark
104 #define CORE_zgemm_quark PCORE_zgemm_quark
105 #endif
107 {
108  int transA;
109  int transB;
110  int m;
111  int n;
112  int k;
113  PLASMA_Complex64_t alpha;
115  int lda;
117  int ldb;
118  PLASMA_Complex64_t beta;
120  int ldc;
121 
122  quark_unpack_args_13(quark, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
123  cblas_zgemm(
125  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
126  m, n, k,
127  CBLAS_SADDR(alpha), A, lda,
128  B, ldb,
129  CBLAS_SADDR(beta), C, ldc);
130 }
131 
132 /***************************************************************************/
135 void QUARK_CORE_zgemm_f2(Quark *quark, Quark_Task_Flags *task_flags,
136  int transA, int transB,
137  int m, int n, int k, int nb,
138  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int lda,
139  PLASMA_Complex64_t *B, int ldb,
140  PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int ldc,
141  PLASMA_Complex64_t *fake1, int szefake1, int flag1,
142  PLASMA_Complex64_t *fake2, int szefake2, int flag2)
143 {
145  QUARK_Insert_Task(quark, CORE_zgemm_f2_quark, task_flags,
146  sizeof(PLASMA_enum), &transA, VALUE,
147  sizeof(PLASMA_enum), &transB, VALUE,
148  sizeof(int), &m, VALUE,
149  sizeof(int), &n, VALUE,
150  sizeof(int), &k, VALUE,
151  sizeof(PLASMA_Complex64_t), &alpha, VALUE,
152  sizeof(PLASMA_Complex64_t)*nb*nb, A, INPUT,
153  sizeof(int), &lda, VALUE,
154  sizeof(PLASMA_Complex64_t)*nb*nb, B, INPUT,
155  sizeof(int), &ldb, VALUE,
156  sizeof(PLASMA_Complex64_t), &beta, VALUE,
157  sizeof(PLASMA_Complex64_t)*nb*nb, C, INOUT | LOCALITY,
158  sizeof(int), &ldc, VALUE,
159  sizeof(PLASMA_Complex64_t)*szefake1, fake1, flag1,
160  sizeof(PLASMA_Complex64_t)*szefake2, fake2, flag2,
161  0);
162 }
163 
164 /***************************************************************************/
167 #if defined(PLASMA_HAVE_WEAK)
168 #pragma weak CORE_zgemm_f2_quark = PCORE_zgemm_f2_quark
169 #define CORE_zgemm_f2_quark PCORE_zgemm_f2_quark
170 #endif
172 {
173  int transA;
174  int transB;
175  int M;
176  int N;
177  int K;
178  PLASMA_Complex64_t alpha;
180  int LDA;
182  int LDB;
183  PLASMA_Complex64_t beta;
185  int LDC;
186  void *fake1, *fake2;
187 
188  quark_unpack_args_15(quark, transA, transB, M, N, K, alpha,
189  A, LDA, B, LDB, beta, C, LDC, fake1, fake2);
190  cblas_zgemm(
192  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
193  M, N, K,
194  CBLAS_SADDR(alpha), A, LDA,
195  B, LDB,
196  CBLAS_SADDR(beta), C, LDC);
197 }
198 
199 /***************************************************************************/
202 void QUARK_CORE_zgemm_p2(Quark *quark, Quark_Task_Flags *task_flags,
203  int transA, int transB,
204  int m, int n, int k, int nb,
205  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int lda,
206  PLASMA_Complex64_t **B, int ldb,
207  PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int ldc)
208 {
210  QUARK_Insert_Task(quark, CORE_zgemm_p2_quark, task_flags,
211  sizeof(PLASMA_enum), &transA, VALUE,
212  sizeof(PLASMA_enum), &transB, VALUE,
213  sizeof(int), &m, VALUE,
214  sizeof(int), &n, VALUE,
215  sizeof(int), &k, VALUE,
216  sizeof(PLASMA_Complex64_t), &alpha, VALUE,
217  sizeof(PLASMA_Complex64_t)*lda*nb, A, INPUT,
218  sizeof(int), &lda, VALUE,
219  sizeof(PLASMA_Complex64_t*), B, INPUT,
220  sizeof(int), &ldb, VALUE,
221  sizeof(PLASMA_Complex64_t), &beta, VALUE,
222  sizeof(PLASMA_Complex64_t)*ldc*nb, C, INOUT | LOCALITY,
223  sizeof(int), &ldc, VALUE,
224  0);
225 }
226 
227 /***************************************************************************/
230 #if defined(PLASMA_HAVE_WEAK)
231 #pragma weak CORE_zgemm_p2_quark = PCORE_zgemm_p2_quark
232 #define CORE_zgemm_p2_quark PCORE_zgemm_p2_quark
233 #endif
235 {
236  int transA;
237  int transB;
238  int M;
239  int N;
240  int K;
241  PLASMA_Complex64_t alpha;
243  int LDA;
245  int LDB;
246  PLASMA_Complex64_t beta;
248  int LDC;
249 
250  quark_unpack_args_13(quark, transA, transB, M, N, K, alpha,
251  A, LDA, B, LDB, beta, C, LDC);
252  cblas_zgemm(
254  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
255  M, N, K,
256  CBLAS_SADDR(alpha), A, LDA,
257  *B, LDB,
258  CBLAS_SADDR(beta), C, LDC);
259 }
260 
261 /***************************************************************************/
264 void QUARK_CORE_zgemm_p3(Quark *quark, Quark_Task_Flags *task_flags,
265  int transA, int transB,
266  int m, int n, int k, int nb,
267  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int lda,
268  PLASMA_Complex64_t *B, int ldb,
269  PLASMA_Complex64_t beta, PLASMA_Complex64_t **C, int ldc)
270 {
272  QUARK_Insert_Task(quark, CORE_zgemm_p3_quark, task_flags,
273  sizeof(PLASMA_enum), &transA, VALUE,
274  sizeof(PLASMA_enum), &transB, VALUE,
275  sizeof(int), &m, VALUE,
276  sizeof(int), &n, VALUE,
277  sizeof(int), &k, VALUE,
278  sizeof(PLASMA_Complex64_t), &alpha, VALUE,
279  sizeof(PLASMA_Complex64_t)*lda*nb, A, INPUT,
280  sizeof(int), &lda, VALUE,
281  sizeof(PLASMA_Complex64_t)*ldb*nb, B, INPUT,
282  sizeof(int), &ldb, VALUE,
283  sizeof(PLASMA_Complex64_t), &beta, VALUE,
284  sizeof(PLASMA_Complex64_t*), C, INOUT | LOCALITY,
285  sizeof(int), &ldc, VALUE,
286  0);
287 }
288 
289 /***************************************************************************/
292 #if defined(PLASMA_HAVE_WEAK)
293 #pragma weak CORE_zgemm_p3_quark = PCORE_zgemm_p3_quark
294 #define CORE_zgemm_p3_quark PCORE_zgemm_p3_quark
295 #endif
297 {
298  int transA;
299  int transB;
300  int M;
301  int N;
302  int K;
303  PLASMA_Complex64_t alpha;
305  int LDA;
307  int LDB;
308  PLASMA_Complex64_t beta;
310  int LDC;
311 
312  quark_unpack_args_13(quark, transA, transB, M, N, K, alpha,
313  A, LDA, B, LDB, beta, C, LDC);
314  cblas_zgemm(
316  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
317  M, N, K,
318  CBLAS_SADDR(alpha), A, LDA,
319  B, LDB,
320  CBLAS_SADDR(beta), *C, LDC);
321 }
322 
323 /***************************************************************************/
327  int transA, int transB,
328  int m, int n, int k, int nb,
329  PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int lda,
330  PLASMA_Complex64_t **B, int ldb,
331  PLASMA_Complex64_t beta, PLASMA_Complex64_t *C, int ldc,
332  PLASMA_Complex64_t *fake1, int szefake1, int flag1)
333 {
335  QUARK_Insert_Task(quark, CORE_zgemm_p2f1_quark, task_flags,
336  sizeof(PLASMA_enum), &transA, VALUE,
337  sizeof(PLASMA_enum), &transB, VALUE,
338  sizeof(int), &m, VALUE,
339  sizeof(int), &n, VALUE,
340  sizeof(int), &k, VALUE,
341  sizeof(PLASMA_Complex64_t), &alpha, VALUE,
342  sizeof(PLASMA_Complex64_t)*lda*nb, A, INPUT,
343  sizeof(int), &lda, VALUE,
344  sizeof(PLASMA_Complex64_t*), B, INPUT,
345  sizeof(int), &ldb, VALUE,
346  sizeof(PLASMA_Complex64_t), &beta, VALUE,
347  sizeof(PLASMA_Complex64_t)*ldc*nb, C, INOUT | LOCALITY,
348  sizeof(int), &ldc, VALUE,
349  sizeof(PLASMA_Complex64_t)*szefake1, fake1, flag1,
350  0);
351 }
352 
353 /***************************************************************************/
356 #if defined(PLASMA_HAVE_WEAK)
357 #pragma weak CORE_zgemm_p2f1_quark = PCORE_zgemm_p2f1_quark
358 #define CORE_zgemm_p2f1_quark PCORE_zgemm_p2f1_quark
359 #endif
361 {
362  int transA;
363  int transB;
364  int M;
365  int N;
366  int K;
367  PLASMA_Complex64_t alpha;
369  int LDA;
371  int LDB;
372  PLASMA_Complex64_t beta;
374  int LDC;
375  void *fake1;
376 
377  quark_unpack_args_14(quark, transA, transB, M, N, K, alpha,
378  A, LDA, B, LDB, beta, C, LDC, fake1);
379  cblas_zgemm(
381  (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB,
382  M, N, K,
383  CBLAS_SADDR(alpha), A, LDA,
384  *B, LDB,
385  CBLAS_SADDR(beta), C, LDC);
386 }