18 #define A(m,n) BLKADDR(A, PLASMA_Complex32_t, m, n)
19 #define B(m,n) BLKADDR(B, PLASMA_Complex32_t, m, n)
20 #define C(m,n) BLKADDR(C, PLASMA_Complex32_t, m, n)
39 int lda, ldak, ldb, ldc;
40 int tempmm, tempnn, tempkn, tempkm;
51 while (m >= C.
mt && n < C.
nt) {
61 while (next_m >= C.
mt && next_n < C.
nt) {
63 next_m = next_m - C.
mt;
66 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
67 tempnn = n == C.
nt-1 ? C.
n-n*C.
nb : C.
nb;
76 for (k = 0; k < C.
mt; k++) {
77 tempkm = k == C.
mt-1 ? C.
m-k*C.
mb : C.
mb;
80 zbeta = k == 0 ? beta : zone;
84 tempmm, tempnn, tempkm,
101 tempmm, tempnn, tempkm,
102 alpha,
A(k, m), ldak,
104 zbeta,
C(m, n), ldc);
113 for (k = 0; k < C.
mt; k++) {
114 tempkm = k == C.
mt-1 ? C.
m-k*C.
mb : C.
mb;
117 zbeta = k == 0 ? beta : zone;
121 tempmm, tempnn, tempkm,
122 alpha,
A(k, m), ldak,
124 zbeta,
C(m, n), ldc);
131 alpha,
A(k, k), ldak,
133 zbeta,
C(m, n), ldc);
138 tempmm, tempnn, tempkm,
141 zbeta,
C(m, n), ldc);
154 for (k = 0; k < C.
nt; k++) {
155 tempkn = k == C.
nt-1 ? C.
n-k*C.
nb : C.
nb;
157 zbeta = k == 0 ? beta : zone;
161 tempmm, tempnn, tempkn,
164 zbeta,
C(m, n), ldc);
171 alpha,
A(k, k), ldak,
173 zbeta,
C(m, n), ldc);
178 tempmm, tempnn, tempkn,
181 zbeta,
C(m, n), ldc);
190 for (k = 0; k < C.
nt; k++) {
191 tempkn = k == C.
nt-1 ? C.
n-k*C.
nb : C.
nb;
193 zbeta = k == 0 ? beta : zone;
197 tempmm, tempnn, tempkn,
200 zbeta,
C(m, n), ldc);
207 alpha,
A(k, k), ldak,
209 zbeta,
C(m, n), ldc);
214 tempmm, tempnn, tempkn,
217 zbeta,
C(m, n), ldc);
240 int lda, ldak, ldb, ldc;
241 int tempmm, tempnn, tempkn, tempkm;
251 for (m = 0; m < C.
mt; m++) {
252 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
254 for (n = 0; n < C.
nt; n++) {
255 tempnn = n == C.
nt-1 ? C.
n-n*C.
nb : C.
nb;
262 for (k = 0; k < C.
mt; k++) {
263 tempkm = k == C.
mt-1 ? C.
m-k*C.
mb : C.
mb;
266 zbeta = k == 0 ? beta : zone;
269 plasma->
quark, &task_flags,
271 tempmm, tempnn, tempkm, A.
mb,
274 zbeta,
C(m, n), ldc);
279 plasma->
quark, &task_flags,
281 tempmm, tempnn, A.
mb,
282 alpha, A(k, k), ldak,
284 zbeta,
C(m, n), ldc);
288 plasma->
quark, &task_flags,
290 tempmm, tempnn, tempkm, A.
mb,
291 alpha, A(k, m), ldak,
293 zbeta,
C(m, n), ldc);
302 for (k = 0; k < C.
mt; k++) {
303 tempkm = k == C.
mt-1 ? C.
m-k*C.
mb : C.
mb;
306 zbeta = k == 0 ? beta : zone;
309 plasma->
quark, &task_flags,
311 tempmm, tempnn, tempkm, A.
mb,
312 alpha, A(k, m), ldak,
314 zbeta,
C(m, n), ldc);
319 plasma->
quark, &task_flags,
321 tempmm, tempnn, A.
mb,
322 alpha, A(k, k), ldak,
324 zbeta,
C(m, n), ldc);
328 plasma->
quark, &task_flags,
330 tempmm, tempnn, tempkm, A.
mb,
333 zbeta,
C(m, n), ldc);
346 for (k = 0; k < C.
nt; k++) {
347 tempkn = k == C.
nt-1 ? C.
n-k*C.
nb : C.
nb;
349 zbeta = k == 0 ? beta : zone;
352 plasma->
quark, &task_flags,
354 tempmm, tempnn, tempkn, A.
mb,
357 zbeta,
C(m, n), ldc);
362 plasma->
quark, &task_flags,
364 tempmm, tempnn, A.
mb,
365 alpha, A(k, k), ldak,
367 zbeta,
C(m, n), ldc);
371 plasma->
quark, &task_flags,
373 tempmm, tempnn, tempkn, A.
mb,
376 zbeta,
C(m, n), ldc);
385 for (k = 0; k < C.
nt; k++) {
386 tempkn = k == C.
nt-1 ? C.
n-k*C.
nb : C.
nb;
388 zbeta = k == 0 ? beta : zone;
391 plasma->
quark, &task_flags,
393 tempmm, tempnn, tempkn, A.
mb,
396 zbeta,
C(m, n), ldc);
401 plasma->
quark, &task_flags,
403 tempmm, tempnn, A.
mb,
404 alpha, A(k, k), ldak,
406 zbeta,
C(m, n), ldc);
410 plasma->
quark, &task_flags,
412 tempmm, tempnn, tempkn, A.
mb,
415 zbeta,
C(m, n), ldc);