17 #define A(m,n) BLKADDR(A, PLASMA_Complex32_t, m, n)
18 #define B(m,n) BLKADDR(B, PLASMA_Complex32_t, m, n)
19 #define C(m,n) BLKADDR(C, PLASMA_Complex32_t, m, n)
41 int tempkn, tempkm, tempmm, tempnn;
53 while (m >= C.
mt && n < C.
nt) {
61 while (next_m >= C.
mt && next_n < C.
nt) {
63 next_m = next_m - C.
mt + next_n;
66 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
67 tempnn = n == C.
nt-1 ? C.
n-n*C.
nb : C.
nb;
79 for (k = 0; k < A.
nt; k++) {
80 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
81 dbeta = k == 0 ? beta : 1.0;
87 dbeta,
C(m, m), ldcm);
94 for (k = 0; k < A.
mt; k++) {
95 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
98 dbeta = k == 0 ? beta : 1.0;
102 alpha,
A(k, m), ldak,
104 dbeta,
C(m, m), ldcm);
118 for (k = 0; k < A.
nt; k++) {
119 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
123 tempmm, tempnn, tempkn,
124 alpha,
A(m, k), ldam,
126 zbeta,
C(m, n), ldcm);
130 tempmm, tempnn, tempkn,
131 alpha,
B(m, k), ldbm,
133 zone,
C(m, n), ldcm);
140 for (k = 0; k < A.
nt; k++) {
141 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
145 tempnn, tempmm, tempkn,
146 alpha,
A(n, k), ldan,
148 zbeta,
C(n, m), ldcn);
152 tempnn, tempmm, tempkn,
153 alpha,
B(n, k), ldbn,
155 zone,
C(n, m), ldcn);
164 for (k = 0; k < A.
mt; k++) {
167 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
171 tempmm, tempnn, tempkm,
172 alpha,
A(k, m), ldak,
174 zbeta,
C(m, n), ldcm);
178 tempmm, tempnn, tempkm,
179 alpha,
B(k, m), ldbk,
181 zone,
C(m, n), ldcm);
188 for (k = 0; k < A.
mt; k++) {
189 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
195 tempnn, tempmm, tempkm,
196 alpha,
A(k, n), ldak,
198 zbeta,
C(n, m), ldcm);
202 tempnn, tempmm, tempkm,
203 alpha,
B(k, n), ldbk,
205 zone,
C(n, m), ldcn);
227 int ldak, ldam, ldan, ldcm, ldcn;
228 int ldbk, ldbm, ldbn;
229 int tempnn, tempmm, tempkn, tempkm;
240 for (n = 0; n < C.
nt; n++) {
241 tempnn = n == C.
nt-1 ? C.
n-n*C.
nb : C.
nb;
249 for (k = 0; k < A.
nt; k++) {
250 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
251 dbeta = k == 0 ? beta : 1.0;
253 plasma->
quark, &task_flags,
255 tempnn, tempkn, A.
mb,
256 alpha, A(n, k), ldan,
258 dbeta,
C(n, n), ldcn);
264 for (m = n+1; m < C.
mt; m++) {
265 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
269 for (k = 0; k < A.
nt; k++) {
270 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
273 plasma->
quark, &task_flags,
275 tempmm, tempnn, tempkn, A.
mb,
276 alpha, A(m, k), ldam,
278 zbeta,
C(m, n), ldcm);
281 plasma->
quark, &task_flags,
283 tempmm, tempnn, tempkn, A.
mb,
284 alpha, B(m, k), ldbm,
286 zone,
C(m, n), ldcm);
294 for (m = n+1; m < C.
mt; m++) {
295 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
298 for (k = 0; k < A.
nt; k++) {
299 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
302 plasma->
quark, &task_flags,
304 tempnn, tempmm, tempkn, A.
mb,
305 alpha, A(n, k), ldan,
307 zbeta,
C(n, m), ldcn);
310 plasma->
quark, &task_flags,
312 tempnn, tempmm, tempkn, A.
mb,
313 alpha, B(n, k), ldan,
315 zone,
C(n, m), ldcn);
324 for (k = 0; k < A.
mt; k++) {
325 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
328 dbeta = k == 0 ? beta : 1.0;
330 plasma->
quark, &task_flags,
332 tempnn, tempkm, A.
mb,
333 alpha, A(k, n), ldak,
335 dbeta,
C(n, n), ldcn);
341 for (m = n+1; m < C.
mt; m++) {
342 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
344 for (k = 0; k < A.
mt; k++) {
345 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
350 plasma->
quark, &task_flags,
352 tempmm, tempnn, tempkm, A.
mb,
353 alpha, A(k, m), ldak,
355 zbeta,
C(m, n), ldcm);
358 plasma->
quark, &task_flags,
360 tempmm, tempnn, tempkm, A.
mb,
361 alpha, B(k, m), ldbk,
363 zone,
C(m, n), ldcm);
371 for (m = n+1; m < C.
mt; m++) {
372 tempmm = m == C.
mt-1 ? C.
m-m*C.
mb : C.
mb;
373 for (k = 0; k < A.
mt; k++) {
374 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
379 plasma->
quark, &task_flags,
381 tempnn, tempmm, tempkm, A.
mb,
382 alpha, A(k, n), ldak,
384 zbeta,
C(n, m), ldcn);
387 plasma->
quark, &task_flags,
389 tempnn, tempmm, tempkm, A.
mb,
390 alpha, B(k, n), ldbk,
392 zone,
C(n, m), ldcn);