PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzherk.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
18 #define C(m,n) BLKADDR(C, PLASMA_Complex64_t, m, n)
19 /***************************************************************************/
23 {
26  double alpha;
27  PLASMA_desc A;
28  double beta;
29  PLASMA_desc C;
30  PLASMA_sequence *sequence;
31  PLASMA_request *request;
32 
33  int m, n, k;
34  int next_m;
35  int next_n;
36  int ldam, ldan, ldak, ldcm, ldcn;
37  int tempkn, tempkm, tempmm, tempnn;
38 
40  PLASMA_Complex64_t zalpha;
41  PLASMA_Complex64_t zbeta;
42  double dbeta;
43 
44  plasma_unpack_args_8(uplo, trans, alpha, A, beta, C, sequence, request);
45  zalpha = (PLASMA_Complex64_t)alpha;
46  if (sequence->status != PLASMA_SUCCESS)
47  return;
48 
49  n = 0;
50  m = PLASMA_RANK;
51  while (m >= C.mt && n < C.nt) {
52  n++;
53  m = m-C.mt+n;
54  }
55 
56  while (n < C.nt) {
57  next_n = n;
58  next_m = m + PLASMA_SIZE;
59  while (next_m >= C.mt && next_n < C.nt) {
60  next_n++;
61  next_m = next_m - C.mt + next_n;
62  }
63 
64  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
65  tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
66 
67  if (m == n) {
68  ldcm = BLKLDD(C, m);
69  /*
70  * PlasmaNoTrans
71  */
72  if (trans == PlasmaNoTrans) {
73  ldam = BLKLDD(A, m);
74  for (k = 0; k < A.nt; k++) {
75  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
76  dbeta = k == 0 ? beta : 1.0;
77  CORE_zherk(
78  uplo, trans,
79  tempnn, tempkn,
80  alpha, A(m, k), ldam,
81  dbeta, C(m, n), ldcm);
82  }
83  }
84  /*
85  * Plasma[Conj]Trans
86  */
87  else {
88  for (k = 0; k < A.mt; k++) {
89  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
90  ldak = BLKLDD(A, k);
91  dbeta = k == 0 ? beta : 1.0;
92  CORE_zherk(
93  uplo, trans,
94  tempnn, tempkm,
95  alpha, A(k, m), ldak,
96  dbeta, C(m, n), ldcm);
97  }
98  }
99  }
100  else {
101  if (trans == PlasmaNoTrans) {
102  ldam = BLKLDD(A, m);
103  ldan = BLKLDD(A, n);
104  /*
105  * PlasmaNoTrans / PlasmaLower
106  */
107  if (uplo == PlasmaLower) {
108  ldcm = BLKLDD(C, m);
109  for (k = 0; k < A.nt; k++) {
110  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
111  zbeta = k == 0 ? (PLASMA_Complex64_t)beta : zone;
112  CORE_zgemm(
113  trans, PlasmaConjTrans,
114  tempmm, tempnn, tempkn,
115  zalpha, A(m, k), ldam,
116  A(n, k), ldan,
117  zbeta, C(m, n), ldcm);
118  }
119  }
120  /*
121  * PlasmaNoTrans / PlasmaUpper
122  */
123  else {
124  ldcn = BLKLDD(C, n);
125  for (k = 0; k < A.nt; k++) {
126  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
127  zbeta = k == 0 ? (PLASMA_Complex64_t)beta : zone;
128  CORE_zgemm(
129  trans, PlasmaConjTrans,
130  tempnn, tempmm, tempkn,
131  zalpha, A(n, k), ldan,
132  A(m, k), ldam,
133  zbeta, C(n, m), ldcn);
134  }
135  }
136  }
137  else {
138  /*
139  * Plasma[Conj]Trans / PlasmaLower
140  */
141  if (uplo == PlasmaLower) {
142  ldcm = BLKLDD(C, m);
143  for (k = 0; k < A.mt; k++) {
144  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
145  ldak = BLKLDD(A, k);
146  zbeta = k == 0 ? (PLASMA_Complex64_t)beta : zone;
147  CORE_zgemm(
148  trans, PlasmaNoTrans,
149  tempmm, tempnn, tempkm,
150  zalpha, A(k, m), ldak,
151  A(k, n), ldak,
152  zbeta, C(m, n), ldcm);
153  }
154  }
155  /*
156  * Plasma[Conj]Trans / PlasmaUpper
157  */
158  else {
159  ldcn = BLKLDD(C, n);
160  for (k = 0; k < A.mt; k++) {
161  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
162  ldak = BLKLDD(A, k);
163  zbeta = k == 0 ? (PLASMA_Complex64_t)beta : zone;
164  CORE_zgemm(
165  trans, PlasmaNoTrans,
166  tempnn, tempmm, tempkm,
167  zalpha, A(k, n), ldak,
168  A(k, m), ldak,
169  zbeta, C(n, m), ldcn);
170  }
171  }
172  }
173  }
174  m = next_m;
175  n = next_n;
176  }
177 }
178 
179 /***************************************************************************/
183  double alpha, PLASMA_desc A,
184  double beta, PLASMA_desc C,
185  PLASMA_sequence *sequence, PLASMA_request *request)
186 {
189 
190  int m, n, k;
191  int ldak, ldam, ldan, ldcm, ldcn;
192  int tempnn, tempmm, tempkn, tempkm;
193 
195  PLASMA_Complex64_t zalpha = (PLASMA_Complex64_t)alpha;
196  PLASMA_Complex64_t zbeta;
197  double dbeta;
198 
199  plasma = plasma_context_self();
200  if (sequence->status != PLASMA_SUCCESS)
201  return;
202  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
203 
204  for (n = 0; n < C.nt; n++) {
205  tempnn = n == C.nt-1 ? C.n-n*C.nb : C.nb;
206  ldan = BLKLDD(A, n);
207  ldcn = BLKLDD(C, n);
208  /*
209  * PlasmaNoTrans
210  */
211  if (trans == PlasmaNoTrans) {
212  for (k = 0; k < A.nt; k++) {
213  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
214  dbeta = k == 0 ? beta : 1.0;
216  plasma->quark, &task_flags,
217  uplo, trans,
218  tempnn, tempkn, A.mb,
219  alpha, A(n, k), ldan, /* ldan * K */
220  dbeta, C(n, n), ldcn); /* ldc * N */
221  }
222  /*
223  * PlasmaNoTrans / PlasmaLower
224  */
225  if (uplo == PlasmaLower) {
226  for (m = n+1; m < C.mt; m++) {
227  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
228  ldam = BLKLDD(A, m);
229  ldcm = BLKLDD(C, m);
230  for (k = 0; k < A.nt; k++) {
231  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
232  zbeta = k == 0 ? (PLASMA_Complex64_t)beta : zone;
234  plasma->quark, &task_flags,
235  trans, PlasmaConjTrans,
236  tempmm, tempnn, tempkn, A.mb,
237  zalpha, A(m, k), ldam, /* ldam * K */
238  A(n, k), ldan, /* ldan * K */
239  zbeta, C(m, n), ldcm); /* ldc * N */
240  }
241  }
242  }
243  /*
244  * PlasmaNoTrans / PlasmaUpper
245  */
246  else {
247  for (m = n+1; m < C.mt; m++) {
248  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
249  ldam = BLKLDD(A, m);
250  for (k = 0; k < A.nt; k++) {
251  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
252  zbeta = k == 0 ? (PLASMA_Complex64_t)beta : zone;
254  plasma->quark, &task_flags,
255  trans, PlasmaConjTrans,
256  tempnn, tempmm, tempkn, A.mb,
257  zalpha, A(n, k), ldan, /* ldan * K */
258  A(m, k), ldam, /* ldam * M */
259  zbeta, C(n, m), ldcn); /* ldc * M */
260  }
261  }
262  }
263  }
264  /*
265  * Plasma[Conj]Trans
266  */
267  else {
268  for (k = 0; k < A.mt; k++) {
269  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
270  ldak = BLKLDD(A, k);
271  dbeta = k == 0 ? beta : 1.0;
273  plasma->quark, &task_flags,
274  uplo, trans,
275  tempnn, tempkm, A.mb,
276  alpha, A(k, n), ldak, /* lda * N */
277  dbeta, C(n, n), ldcn); /* ldc * N */
278  }
279  /*
280  * Plasma[Conj]Trans / PlasmaLower
281  */
282  if (uplo == PlasmaLower) {
283  for (m = n+1; m < C.mt; m++) {
284  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
285  ldcm = BLKLDD(C, m);
286  for (k = 0; k < A.mt; k++) {
287  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
288  ldak = BLKLDD(A, k);
289  zbeta = k == 0 ? (PLASMA_Complex64_t)beta : zone;
291  plasma->quark, &task_flags,
292  trans, PlasmaNoTrans,
293  tempmm, tempnn, tempkm, A.mb,
294  zalpha, A(k, m), ldak, /* lda * M */
295  A(k, n), ldak, /* lda * N */
296  zbeta, C(m, n), ldcm); /* ldc * N */
297  }
298  }
299  }
300  /*
301  * Plasma[Conj]Trans / PlasmaUpper
302  */
303  else {
304  for (m = n+1; m < C.mt; m++) {
305  tempmm = m == C.mt-1 ? C.m-m*C.mb : C.mb;
306  for (k = 0; k < A.mt; k++) {
307  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
308  ldak = BLKLDD(A, k);
309  zbeta = k == 0 ? (PLASMA_Complex64_t)beta : zone;
311  plasma->quark, &task_flags,
312  trans, PlasmaNoTrans,
313  tempnn, tempmm, tempkm, A.mb,
314  zalpha, A(k, n), ldak, /* lda * K */
315  A(k, m), ldak, /* lda * M */
316  zbeta, C(n, m), ldcn); /* ldc * M */
317  }
318  }
319  }
320  }
321  }
322 }