PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzsyrk.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
19 #define C(m,n) BLKADDR(C, PLASMA_Complex64_t, m, n)
20 /***************************************************************************/
24 {
27  PLASMA_Complex64_t alpha;
28  PLASMA_desc A;
29  PLASMA_Complex64_t beta;
30  PLASMA_desc C;
31  PLASMA_sequence *sequence;
32  PLASMA_request *request;
33 
34  int m, n, k;
35  int next_m;
36  int next_n;
37  int ldam, ldan, ldak, ldcm, ldcn;
38  int tempkn, tempkm, tempmm, tempnn;
39 
40  PLASMA_Complex64_t zbeta;
42 
43  plasma_unpack_args_8(uplo, trans, alpha, A, beta, C, sequence, request);
44  if (sequence->status != PLASMA_SUCCESS)
45  return;
46 
47  n = 0;
48  m = PLASMA_RANK;
49  while (m >= C.mt && n < C.nt) {
50  n++;
51  m = m-C.mt+n;
52  }
53 
54  while (n < C.nt) {
55  next_n = n;
56  next_m = m + PLASMA_SIZE;
57  while (next_m >= C.mt && next_n < C.nt) {
58  next_n++;
59  next_m = next_m - C.mt + next_n;
60  }
61 
62  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
63  tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
64 
65  if (m == n) {
66  ldcm = BLKLDD(C, m);
67  /*
68  * PlasmaNoTrans
69  */
70  if (trans == PlasmaNoTrans) {
71  ldam = BLKLDD(A, m);
72  for (k = 0; k < A.nt; k++) {
73  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
74  zbeta = k == 0 ? beta : zone;
75  CORE_zsyrk(
76  uplo, trans,
77  tempnn, tempkn,
78  alpha, A(m, k), ldam,
79  zbeta, C(m, n), ldcm);
80  }
81  }
82  /*
83  * Plasma[Conj]Trans
84  */
85  else {
86  for (k = 0; k < A.mt; k++) {
87  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
88  ldak = BLKLDD(A, k);
89  zbeta = k == 0 ? beta : zone;
90  CORE_zsyrk(
91  uplo, trans,
92  tempnn, tempkm,
93  alpha, A(k, m), ldak,
94  zbeta, C(m, n), ldcm);
95  }
96  }
97  }
98  else {
99  if (trans == PlasmaNoTrans) {
100  ldam = BLKLDD(A, m);
101  ldan = BLKLDD(A, n);
102  /*
103  * PlasmaNoTrans / PlasmaLower
104  */
105  if (uplo == PlasmaLower) {
106  ldcm = BLKLDD(C, m);
107  for (k = 0; k < A.nt; k++) {
108  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
109  zbeta = k == 0 ? beta : zone;
110  CORE_zgemm(
111  trans, PlasmaTrans,
112  tempmm, tempnn, tempkn,
113  alpha, A(m, k), ldam,
114  A(n, k), ldan,
115  zbeta, C(m, n), ldcm);
116  }
117  }
118  /*
119  * PlasmaNoTrans / PlasmaUpper
120  */
121  else {
122  ldcn = BLKLDD(C, n);
123  for (k = 0; k < A.nt; k++) {
124  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
125  zbeta = k == 0 ? beta : zone;
126  CORE_zgemm(
127  trans, PlasmaTrans,
128  tempnn, tempmm, tempkn,
129  alpha, A(n, k), ldan,
130  A(m, k), ldam,
131  zbeta, C(n, m), ldcn);
132  }
133  }
134  }
135  else {
136  /*
137  * Plasma[Conj]Trans / PlasmaLower
138  */
139  if (uplo == PlasmaLower) {
140  ldcm = BLKLDD(C, m);
141  for (k = 0; k < A.mt; k++) {
142  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
143  ldak = BLKLDD(A, k);
144  zbeta = k == 0 ? beta : zone;
145  CORE_zgemm(
146  trans, PlasmaNoTrans,
147  tempmm, tempnn, tempkm,
148  alpha, A(k, m), ldak,
149  A(k, n), ldak,
150  zbeta, C(m, n), ldcm);
151  }
152  }
153  /*
154  * Plasma[Conj]Trans / PlasmaUpper
155  */
156  else {
157  ldcn = BLKLDD(C, n);
158  for (k = 0; k < A.mt; k++) {
159  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
160  ldak = BLKLDD(A, k);
161  zbeta = k == 0 ? beta : zone;
162  CORE_zgemm(
163  trans, PlasmaNoTrans,
164  tempnn, tempmm, tempkm,
165  alpha, A(k, n), ldak,
166  A(k, m), ldak,
167  zbeta, C(n, m), ldcn);
168  }
169  }
170  }
171  }
172  m = next_m;
173  n = next_n;
174  }
175 }
176 
177 /***************************************************************************/
183  PLASMA_sequence *sequence, PLASMA_request *request)
184 {
187 
188  int m, n, k;
189  int ldak, ldam, ldan, ldcm, ldcn;
190  int tempnn, tempmm, tempkn, tempkm;
191 
192  PLASMA_Complex64_t zbeta;
194 
195  plasma = plasma_context_self();
196  if (sequence->status != PLASMA_SUCCESS)
197  return;
198  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
199 
200  for (n = 0; n < C.nt; n++) {
201  tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
202  ldan = BLKLDD(A, n);
203  ldcn = BLKLDD(C, n);
204  /*
205  * PlasmaNoTrans
206  */
207  if (trans == PlasmaNoTrans) {
208  for (k = 0; k < A.nt; k++) {
209  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
210  zbeta = k == 0 ? beta : zone;
212  plasma->quark, &task_flags,
213  uplo, trans,
214  tempnn, tempkn, A.mb,
215  alpha, A(n, k), ldan, /* ldan * K */
216  zbeta, C(n, n), ldcn); /* ldc * N */
217  }
218  /*
219  * PlasmaNoTrans / PlasmaLower
220  */
221  if (uplo == PlasmaLower) {
222  for (m = n+1; m < C.mt; m++) {
223  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
224  ldam = BLKLDD(A, m);
225  ldcm = BLKLDD(C, m);
226  for (k = 0; k < A.nt; k++) {
227  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
228  zbeta = k == 0 ? beta : zone;
230  plasma->quark, &task_flags,
231  trans, PlasmaTrans,
232  tempmm, tempnn, tempkn, A.mb,
233  alpha, A(m, k), ldam, /* ldam * K */
234  A(n, k), ldan, /* ldan * K */
235  zbeta, C(m, n), ldcm); /* ldc * N */
236  }
237  }
238  }
239  /*
240  * PlasmaNoTrans / PlasmaUpper
241  */
242  else {
243  for (m = n+1; m < C.mt; m++) {
244  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
245  ldam = BLKLDD(A, m);
246  for (k = 0; k < A.nt; k++) {
247  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
248  zbeta = k == 0 ? beta : zone;
250  plasma->quark, &task_flags,
251  trans, PlasmaTrans,
252  tempnn, tempmm, tempkn, A.mb,
253  alpha, A(n, k), ldan, /* ldan * K */
254  A(m, k), ldam, /* ldam * M */
255  zbeta, C(n, m), ldcn); /* ldc * M */
256  }
257  }
258  }
259  }
260  /*
261  * PlasmaTrans
262  */
263  else {
264  for (k = 0; k < A.mt; k++) {
265  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
266  ldak = BLKLDD(A, k);
267  zbeta = k == 0 ? beta : zone;
269  plasma->quark, &task_flags,
270  uplo, trans,
271  tempnn, tempkm, A.mb,
272  alpha, A(k, n), ldak, /* lda * N */
273  zbeta, C(n, n), ldcn); /* ldc * N */
274  }
275  /*
276  * PlasmaTrans / PlasmaLower
277  */
278  if (uplo == PlasmaLower) {
279  for (m = n+1; m < C.mt; m++) {
280  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
281  ldcm = BLKLDD(C, m);
282  for (k = 0; k < A.mt; k++) {
283  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
284  ldak = BLKLDD(A, k);
285  zbeta = k == 0 ? beta : zone;
287  plasma->quark, &task_flags,
288  trans, PlasmaNoTrans,
289  tempmm, tempnn, tempkm, A.mb,
290  alpha, A(k, m), ldak, /* lda * M */
291  A(k, n), ldak, /* lda * N */
292  zbeta, C(m, n), ldcm); /* ldc * N */
293  }
294  }
295  }
296  /*
297  * PlasmaTrans / PlasmaUpper
298  */
299  else {
300  for (m = n+1; m < C.mt; m++) {
301  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
302  for (k = 0; k < A.mt; k++) {
303  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
304  ldak = BLKLDD(A, k);
305  zbeta = k == 0 ? beta : zone;
307  plasma->quark, &task_flags,
308  trans, PlasmaNoTrans,
309  tempnn, tempmm, tempkm, A.mb,
310  alpha, A(k, n), ldak, /* lda * K */
311  A(k, m), ldak, /* lda * M */
312  zbeta, C(n, m), ldcn); /* ldc * M */
313  }
314  }
315  }
316  }
317  }
318 }