PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzsyr2k.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
18 #define B(m,n) BLKADDR(B, PLASMA_Complex64_t, m, n)
19 #define C(m,n) BLKADDR(C, PLASMA_Complex64_t, m, n)
20 /***************************************************************************/
24 {
27  PLASMA_Complex64_t alpha;
28  PLASMA_desc A;
29  PLASMA_desc B;
30  PLASMA_Complex64_t beta;
31  PLASMA_desc C;
32  PLASMA_sequence *sequence;
33  PLASMA_request *request;
34 
35  int m, n, k;
36  int next_m;
37  int next_n;
38  int ldam, ldan, ldak;
39  int ldbm, ldbn, ldbk;
40  int ldcm, ldcn;
41  int tempkn, tempkm, tempmm, tempnn;
42 
44  PLASMA_Complex64_t zbeta;
45 
46  plasma_unpack_args_9(uplo, trans, alpha, A, B, beta, C, sequence, request);
47  if (sequence->status != PLASMA_SUCCESS)
48  return;
49 
50  n = 0;
51  m = PLASMA_RANK;
52  while (m >= C.mt && n < C.nt) {
53  n++;
54  m = m-C.mt+n;
55  }
56 
57  while (n < C.nt) {
58  next_n = n;
59  next_m = m + PLASMA_SIZE;
60  while (next_m >= C.mt && next_n < C.nt) {
61  next_n++;
62  next_m = next_m - C.mt + next_n;
63  }
64 
65  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
66  tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
67 
68  ldcn = BLKLDD(C, n);
69  ldcm = BLKLDD(C, m);
70 
71  if (m == n) {
72  /*
73  * PlasmaNoTrans
74  */
75  if (trans == PlasmaNoTrans) {
76  ldam = BLKLDD(A, m);
77  ldbm = BLKLDD(B, m);
78  for (k = 0; k < A.nt; k++) {
79  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
80  zbeta = k == 0 ? beta : zone;
82  uplo, trans,
83  tempnn, tempkn,
84  alpha, A(m, k), ldam,
85  B(m, k), ldbm,
86  zbeta, C(m, m), ldcm);
87  }
88  }
89  /*
90  * Plasma[Conj]Trans
91  */
92  else {
93  for (k = 0; k < A.mt; k++) {
94  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
95  ldak = BLKLDD(A, k);
96  ldbk = BLKLDD(B, k);
97  zbeta = k == 0 ? beta : zone;
99  uplo, trans,
100  tempnn, tempkm,
101  alpha, A(k, m), ldak,
102  B(k, m), ldbk,
103  zbeta, C(m, m), ldcm);
104  }
105  }
106  }
107  else {
108  if (trans == PlasmaNoTrans) {
109  ldam = BLKLDD(A, m);
110  ldan = BLKLDD(A, n);
111  ldbm = BLKLDD(B, m);
112  ldbn = BLKLDD(B, n);
113  /*
114  * PlasmaNoTrans / PlasmaLower
115  */
116  if (uplo == PlasmaLower) {
117  for (k = 0; k < A.nt; k++) {
118  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
119  zbeta = k == 0 ? beta : zone;
120  CORE_zgemm(
121  trans, PlasmaTrans,
122  tempmm, tempnn, tempkn,
123  alpha, A(m, k), ldam,
124  B(n, k), ldbn,
125  zbeta, C(m, n), ldcm);
126 
127  CORE_zgemm(
128  trans, PlasmaTrans,
129  tempmm, tempnn, tempkn,
130  alpha, B(m, k), ldbm,
131  A(n, k), ldan,
132  zone, C(m, n), ldcm);
133  }
134  }
135  /*
136  * PlasmaNoTrans / PlasmaUpper
137  */
138  else {
139  for (k = 0; k < A.nt; k++) {
140  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
141  zbeta = k == 0 ? beta : zone;
142  CORE_zgemm(
143  trans, PlasmaTrans,
144  tempnn, tempmm, tempkn,
145  alpha, A(n, k), ldan,
146  B(m, k), ldbm,
147  zbeta, C(n, m), ldcn);
148 
149  CORE_zgemm(
150  trans, PlasmaTrans,
151  tempnn, tempmm, tempkn,
152  alpha, B(n, k), ldbn,
153  A(m, k), ldam,
154  zone, C(n, m), ldcn);
155  }
156  }
157  }
158  else {
159  /*
160  * Plasma[Conj]Trans / PlasmaLower
161  */
162  if (uplo == PlasmaLower) {
163  for (k = 0; k < A.mt; k++) {
164  ldak = BLKLDD(A, k);
165  ldbk = BLKLDD(B, k);
166  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
167  zbeta = k == 0 ? beta : zone;
168  CORE_zgemm(
169  trans, PlasmaNoTrans,
170  tempmm, tempnn, tempkm,
171  alpha, A(k, m), ldak,
172  B(k, n), ldbk,
173  zbeta, C(m, n), ldcm);
174 
175  CORE_zgemm(
176  trans, PlasmaNoTrans,
177  tempmm, tempnn, tempkm,
178  alpha, B(k, m), ldbk,
179  A(k, n), ldak,
180  zone, C(m, n), ldcm);
181  }
182  }
183  /*
184  * Plasma[Conj]Trans / PlasmaUpper
185  */
186  else {
187  for (k = 0; k < A.mt; k++) {
188  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
189  ldak = BLKLDD(A, k);
190  ldbk = BLKLDD(B, k);
191  zbeta = k == 0 ? beta : zone;
192  CORE_zgemm(
193  trans, PlasmaNoTrans,
194  tempnn, tempmm, tempkm,
195  alpha, A(k, n), ldak,
196  B(k, m), ldbk,
197  zbeta, C(n, m), ldcm);
198 
199  CORE_zgemm(
200  trans, PlasmaNoTrans,
201  tempnn, tempmm, tempkm,
202  alpha, B(k, n), ldbk,
203  A(k, m), ldak,
204  zone, C(n, m), ldcn);
205  }
206  }
207  }
208  }
209  m = next_m;
210  n = next_n;
211  }
212 }
213 
214 /***************************************************************************/
220  PLASMA_sequence *sequence, PLASMA_request *request)
221 {
224 
225  int m, n, k;
226  int ldak, ldam, ldan, ldcm, ldcn;
227  int ldbk, ldbm, ldbn;
228  int tempnn, tempmm, tempkn, tempkm;
229 
231  PLASMA_Complex64_t zbeta;
232 
233  plasma = plasma_context_self();
234  if (sequence->status != PLASMA_SUCCESS)
235  return;
236  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
237 
238  for (n = 0; n < C.nt; n++) {
239  tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
240  ldan = BLKLDD(A, n);
241  ldbn = BLKLDD(B, n);
242  ldcn = BLKLDD(C, n);
243  /*
244  * PlasmaNoTrans
245  */
246  if (trans == PlasmaNoTrans) {
247  for (k = 0; k < A.nt; k++) {
248  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
249  zbeta = k == 0 ? beta : zone;
251  plasma->quark, &task_flags,
252  uplo, trans,
253  tempnn, tempkn, A.mb,
254  alpha, A(n, k), ldan, /* ldan * K */
255  B(n, k), ldbn,
256  zbeta, C(n, n), ldcn); /* ldc * N */
257  }
258  /*
259  * PlasmaNoTrans / PlasmaLower
260  */
261  if (uplo == PlasmaLower) {
262  for (m = n+1; m < C.mt; m++) {
263  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
264  ldam = BLKLDD(A, m);
265  ldbm = BLKLDD(B, m);
266  ldcm = BLKLDD(C, m);
267  for (k = 0; k < A.nt; k++) {
268  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
269  zbeta = k == 0 ? beta : zone;
271  plasma->quark, &task_flags,
272  trans, PlasmaTrans,
273  tempmm, tempnn, tempkn, A.mb,
274  alpha, A(m, k), ldam, /* ldam * K */
275  B(n, k), ldbn, /* ldan * K */
276  zbeta, C(m, n), ldcm); /* ldc * N */
277 
279  plasma->quark, &task_flags,
280  trans, PlasmaTrans,
281  tempmm, tempnn, tempkn, A.mb,
282  alpha, B(m, k), ldbm, /* ldam * K */
283  A(n, k), ldan, /* ldan * K */
284  zone, C(m, n), ldcm); /* ldc * N */
285  }
286  }
287  }
288  /*
289  * PlasmaNoTrans / PlasmaUpper
290  */
291  else {
292  for (m = n+1; m < C.mt; m++) {
293  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
294  ldam = BLKLDD(A, m);
295  ldbm = BLKLDD(B, m);
296  for (k = 0; k < A.nt; k++) {
297  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
298  zbeta = k == 0 ? beta : zone;
300  plasma->quark, &task_flags,
301  trans, PlasmaTrans,
302  tempnn, tempmm, tempkn, A.mb,
303  alpha, A(n, k), ldan, /* ldan * K */
304  B(m, k), ldbm, /* ldam * M */
305  zbeta, C(n, m), ldcn); /* ldc * M */
306 
308  plasma->quark, &task_flags,
309  trans, PlasmaTrans,
310  tempnn, tempmm, tempkn, A.mb,
311  alpha, B(n, k), ldan, /* ldan * K */
312  A(m, k), ldam, /* ldam * M */
313  zone, C(n, m), ldcn); /* ldc * M */
314  }
315  }
316  }
317  }
318  /*
319  * Plasma[Conj]Trans
320  */
321  else {
322  for (k = 0; k < A.mt; k++) {
323  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
324  ldak = BLKLDD(A, k);
325  ldbk = BLKLDD(B, k);
326  zbeta = k == 0 ? beta : zone;
328  plasma->quark, &task_flags,
329  uplo, trans,
330  tempnn, tempkm, A.mb,
331  alpha, A(k, n), ldak, /* lda * N */
332  B(k, n), ldbk,
333  zbeta, C(n, n), ldcn); /* ldc * N */
334  }
335  /*
336  * Plasma[Conj]Trans / PlasmaLower
337  */
338  if (uplo == PlasmaLower) {
339  for (m = n+1; m < C.mt; m++) {
340  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
341  ldcm = BLKLDD(C, m);
342  for (k = 0; k < A.mt; k++) {
343  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
344  ldak = BLKLDD(A, k);
345  ldbk = BLKLDD(B, k);
346  zbeta = k == 0 ? beta : zone;
348  plasma->quark, &task_flags,
349  trans, PlasmaNoTrans,
350  tempmm, tempnn, tempkm, A.mb,
351  alpha, A(k, m), ldak, /* lda * M */
352  B(k, n), ldbk, /* lda * N */
353  zbeta, C(m, n), ldcm); /* ldc * N */
354 
356  plasma->quark, &task_flags,
357  trans, PlasmaNoTrans,
358  tempmm, tempnn, tempkm, A.mb,
359  alpha, B(k, m), ldbk, /* lda * M */
360  A(k, n), ldak, /* lda * N */
361  zone, C(m, n), ldcm); /* ldc * N */
362  }
363  }
364  }
365  /*
366  * Plasma[Conj]Trans / PlasmaUpper
367  */
368  else {
369  for (m = n+1; m < C.mt; m++) {
370  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
371  for (k = 0; k < A.mt; k++) {
372  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
373  ldak = BLKLDD(A, k);
374  ldbk = BLKLDD(B, k);
375  zbeta = k == 0 ? beta : zone;
377  plasma->quark, &task_flags,
378  trans, PlasmaNoTrans,
379  tempnn, tempmm, tempkm, A.mb,
380  alpha, A(k, n), ldak, /* lda * K */
381  B(k, m), ldbk, /* lda * M */
382  zbeta, C(n, m), ldcn); /* ldc * M */
383 
385  plasma->quark, &task_flags,
386  trans, PlasmaNoTrans,
387  tempnn, tempmm, tempkm, A.mb,
388  alpha, B(k, n), ldbk, /* lda * K */
389  A(k, m), ldak, /* lda * M */
390  zone, C(n, m), ldcn); /* ldc * M */
391  }
392  }
393  }
394  }
395  }
396 }