18 #define A(m,n) BLKADDR(A, PLASMA_Complex32_t, m, n)
19 #define B(m,n) BLKADDR(B, PLASMA_Complex32_t, m, n)
20 #define C(m,n) BLKADDR(C, PLASMA_Complex32_t, m, n)
39 int ldan, ldak, ldam, ldbk, ldbm, ldcm;
40 int tempmm, tempnn, tempkm, tempkn;
51 while (m >= C.
mt && n < C.
nt) {
61 while (next_m >= C.
mt && next_n < C.
nt) {
63 next_m = next_m - C.
mt;
66 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
67 tempnn = n == C.
nt-1 ? C.
n-n*C.
nb : C.
nb;
75 for (k = 0; k < C.
mt; k++) {
76 tempkm = k == C.
mt-1 ? C.
m-k*C.
mb : C.
mb;
79 zbeta = k == 0 ? beta : zone;
83 tempmm, tempnn, tempkm,
86 zbeta,
C(m, n), ldcm);
95 zbeta,
C(m, n), ldcm);
100 tempmm, tempnn, tempkm,
101 alpha,
A(k, m), ldak,
103 zbeta,
C(m, n), ldcm);
112 for (k = 0; k < C.
mt; k++) {
113 tempkm = k == C.
mt-1 ? C.
m-k*C.
mb : C.
mb;
116 zbeta = k == 0 ? beta : zone;
120 tempmm, tempnn, tempkm,
121 alpha,
A(k, m), ldak,
123 zbeta,
C(m, n), ldcm);
130 alpha,
A(k, k), ldak,
132 zbeta,
C(m, n), ldcm);
137 tempmm, tempnn, tempkm,
138 alpha,
A(m, k), ldam,
140 zbeta,
C(m, n), ldcm);
153 for (k = 0; k < C.
nt; k++) {
154 tempkn = k == C.
nt-1 ? C.
n-k*C.
nb : C.
nb;
156 zbeta = k == 0 ? beta : zone;
160 tempmm, tempnn, tempkn,
161 alpha,
B(m, k), ldbm,
163 zbeta,
C(m, n), ldcm);
170 alpha,
A(k, k), ldak,
172 zbeta,
C(m, n), ldcm);
177 tempmm, tempnn, tempkn,
178 alpha,
B(m, k), ldbm,
180 zbeta,
C(m, n), ldcm);
189 for (k = 0; k < C.
nt; k++) {
190 tempkn = k == C.
nt-1 ? C.
n-k*C.
nb : C.
nb;
192 zbeta = k == 0 ? beta : zone;
196 tempmm, tempnn, tempkn,
197 alpha,
B(m, k), ldbm,
199 zbeta,
C(m, n), ldcm);
206 alpha,
A(k, k), ldak,
208 zbeta,
C(m, n), ldcm);
213 tempmm, tempnn, tempkn,
214 alpha,
B(m, k), ldbm,
216 zbeta,
C(m, n), ldcm);
239 int ldam, ldan, ldak, ldbk, ldbm, ldcm;
240 int tempmm, tempnn, tempkn, tempkm;
250 for(m = 0; m < C.
mt; m++) {
251 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
253 for(n = 0; n < C.
nt; n++) {
254 tempnn = n == C.
nt-1 ? C.
n-n*C.
nb : C.
nb;
261 for (k = 0; k < C.
mt; k++) {
262 tempkm = k == C.
mt-1 ? C.
m-k*C.
mb : C.
mb;
265 zbeta = k == 0 ? beta : zone;
268 plasma->
quark, &task_flags,
270 tempmm, tempnn, tempkm, A.
mb,
271 alpha, A(m, k), ldam,
273 zbeta,
C(m, n), ldcm);
278 plasma->
quark, &task_flags,
280 tempmm, tempnn, A.
mb,
281 alpha, A(k, k), ldak,
283 zbeta,
C(m, n), ldcm);
287 plasma->
quark, &task_flags,
289 tempmm, tempnn, tempkm, A.
mb,
290 alpha, A(k, m), ldak,
292 zbeta,
C(m, n), ldcm);
301 for (k = 0; k < C.
mt; k++) {
302 tempkm = k == C.
mt-1 ? C.
m-k*C.
mb : C.
mb;
305 zbeta = k == 0 ? beta : zone;
308 plasma->
quark, &task_flags,
310 tempmm, tempnn, tempkm, A.
mb,
311 alpha, A(k, m), ldak,
313 zbeta,
C(m, n), ldcm);
318 plasma->
quark, &task_flags,
320 tempmm, tempnn, A.
mb,
321 alpha, A(k, k), ldak,
323 zbeta,
C(m, n), ldcm);
327 plasma->
quark, &task_flags,
329 tempmm, tempnn, tempkm, A.
mb,
330 alpha, A(m, k), ldam,
332 zbeta,
C(m, n), ldcm);
345 for (k = 0; k < C.
nt; k++) {
346 tempkn = k == C.
nt-1 ? C.
n-k*C.
nb : C.
nb;
348 zbeta = k == 0 ? beta : zone;
351 plasma->
quark, &task_flags,
353 tempmm, tempnn, tempkn, A.
mb,
354 alpha, B(m, k), ldbm,
356 zbeta,
C(m, n), ldcm);
361 plasma->
quark, &task_flags,
363 tempmm, tempnn, A.
mb,
364 alpha, A(k, k), ldak,
366 zbeta,
C(m, n), ldcm);
370 plasma->
quark, &task_flags,
372 tempmm, tempnn, tempkn, A.
mb,
373 alpha, B(m, k), ldbm,
375 zbeta,
C(m, n), ldcm);
384 for (k = 0; k < C.
nt; k++) {
385 tempkn = k == C.
nt-1 ? C.
n-k*C.
nb : C.
nb;
387 zbeta = k == 0 ? beta : zone;
390 plasma->
quark, &task_flags,
392 tempmm, tempnn, tempkn, A.
mb,
393 alpha, B(m, k), ldbm,
395 zbeta,
C(m, n), ldcm);
400 plasma->
quark, &task_flags,
402 tempmm, tempnn, A.
mb,
403 alpha, A(k, k), ldak,
405 zbeta,
C(m, n), ldcm);
409 plasma->
quark, &task_flags,
411 tempmm, tempnn, tempkn, A.
mb,
412 alpha, B(m, k), ldbm,
414 zbeta,
C(m, n), ldcm);