19 #define A(m,n) BLKADDR(A, float, m, n)
20 #define B(m,n) BLKADDR(B, float, m, n)
41 int tempkm, tempnn, tempmm, tempkn;
43 float zone = (float) 1.0;
44 float mzone = (float)-1.0;
49 minvalpha = mzone / alpha;
65 while (k < B.
mt && m < B.
mt) {
73 while (next_m >= B.
mt && next_k < B.
mt) {
75 next_m = next_m - B.
mt + next_k;
80 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
81 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
83 lalpha = k == 0 ? alpha : zone;
92 tempkm = k == B.
mt-1 ? B.
m-k*B.
mb : B.
mb;
96 side, uplo, trans, diag,
106 tempkm = k == 0 ? B.
m-(B.
mt-1)*B.
mb : B.
mb;
110 side, uplo, trans, diag,
112 lalpha,
A(B.
mt-1-k, B.
mt-1-k), lda,
113 B(B.
mt-1-k, n ), ldb);
129 tempmm, tempnn, B.
mb,
132 lalpha, B(m, n), ldb);
138 tempkm = k == 0 ? A.
m-(A.
mt-1)*A.
mb : A.
mb;
143 B.
mb, tempnn, tempkm,
144 mzone, A(A.
mt-1-k, A.
mt-1-m), lda,
145 B(B.
mt-1-k, n ), ldb,
146 lalpha, B(B.
mt-1-m, n ), B.
mb);
154 tempkm = k == 0 ? A.
m-(A.
mt-1)*A.
mb : A.
mb;
158 B.
mb, tempnn, tempkm,
159 mzone, A(A.
mt-1-m, A.
mt-1-k), A.
mb,
160 B(B.
mt-1-k, n ), ldb,
161 lalpha, B(B.
mt-1-m, n ), B.
mb);
170 tempmm, tempnn, B.
mb,
171 mzone, A(k, m), A.
mb,
173 lalpha, B(m, n), ldb);
195 while (k < B.
nt && n < B.
nt) {
201 if (next_m >= B.
mt) {
203 while (next_n >= B.
nt && next_k < B.
nt) {
205 next_n = next_n - B.
nt + next_k;
210 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
211 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
213 lalpha = k == 0 ? alpha : zone;
221 tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
225 side, uplo, trans, diag,
227 lalpha,
A(B.
nt-1-k, B.
nt-1-k), lda,
228 B(m, B.
nt-1-k), ldb);
234 tempkn = k == B.
nt-1 ? B.
n-k*B.
nb : B.
nb;
238 side, uplo, trans, diag,
249 tempkn = k == B.
nt-1 ? B.
n-k*B.
nb : B.
nb;
253 side, uplo, trans, diag,
255 lalpha,
A(k, k), lda,
262 tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
266 side, uplo, trans, diag,
268 alpha,
A(B.
nt-1-k, B.
nt-1-k), lda,
269 B(m, B.
nt-1-k), ldb);
282 tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
287 tempmm, B.
mb, tempkn,
288 mzone, B(m, B.
nt-1-k), ldb,
289 A(B.
nt-1-k, B.
nt-1-n), lda,
290 lalpha, B(m, B.
nt-1-n), ldb);
300 tempmm, tempnn, B.
mb,
301 minvalpha, B(m, k), ldb,
315 tempmm, tempnn, B.
mb,
318 lalpha,
B(m, n), ldb);
324 tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
328 tempmm, B.
nb, tempkn,
329 minvalpha, B(m, B.
nt-1-k), ldb,
330 A(B.
nt-1-n, B.
nt-1-k), A.
mb,
331 zone, B(m, B.
nt-1-n), ldb);
356 int tempkm, tempkn, tempmm, tempnn;
358 float zone = (float) 1.0;
359 float mzone = (float)-1.0;
360 float minvalpha = (float)-1.0 / alpha;
373 for (k = 0; k < B.
mt; k++) {
374 tempkm = k == 0 ? B.
m-(B.
mt-1)*B.
mb : B.
mb;
377 lalpha = k == 0 ? alpha : zone;
378 for (n = 0; n < B.
nt; n++) {
379 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
381 plasma->
quark, &task_flags,
382 side, uplo, trans, diag,
383 tempkm, tempnn, A.
mb,
384 lalpha, A(B.
mt-1-k, B.
mt-1-k), lda,
385 B(B.
mt-1-k, n), ldb);
387 for (m = k+1; m < B.
mt; m++) {
388 for (n = 0; n < B.
nt; n++) {
389 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
391 plasma->
quark, &task_flags,
393 B.
mb, tempnn, tempkm, A.
mb,
394 mzone, A(B.
mt-1-m, B.
mt-1-k), A.
mb,
395 B(B.
mt-1-k, n ), ldb,
396 lalpha, B(B.
mt-1-m, n ), B.
mb);
405 for (k = 0; k < B.
mt; k++) {
406 tempkm = k == B.
mt-1 ? B.
m-k*B.
mb : B.
mb;
409 lalpha = k == 0 ? alpha : zone;
410 for (n = 0; n < B.
nt; n++) {
411 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
413 plasma->
quark, &task_flags,
414 side, uplo, trans, diag,
415 tempkm, tempnn, A.
mb,
416 lalpha, A(k, k), lda,
419 for (m = k+1; m < B.
mt; m++) {
420 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
422 for (n = 0; n < B.
nt; n++) {
423 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
425 plasma->
quark, &task_flags,
427 tempmm, tempnn, B.
mb, A.
mb,
428 mzone, A(k, m), A.
mb,
430 lalpha, B(m, n), ldb);
441 for (k = 0; k < B.
mt; k++) {
442 tempkm = k == B.
mt-1 ? B.
m-k*B.
mb : B.
mb;
445 lalpha = k == 0 ? alpha : zone;
446 for (n = 0; n < B.
nt; n++) {
447 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
449 plasma->
quark, &task_flags,
450 side, uplo, trans, diag,
451 tempkm, tempnn, A.
mb,
452 lalpha, A(k, k), lda,
455 for (m = k+1; m < B.
mt; m++) {
456 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
459 for (n = 0; n < B.
nt; n++) {
460 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
462 plasma->
quark, &task_flags,
464 tempmm, tempnn, B.
mb, A.
mb,
467 lalpha, B(m, n), ldb);
476 for (k = 0; k < B.
mt; k++) {
477 tempkm = k == 0 ? B.
m-(B.
mt-1)*B.
mb : B.
mb;
480 lalpha = k == 0 ? alpha : zone;
481 for (n = 0; n < B.
nt; n++) {
482 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
484 plasma->
quark, &task_flags,
485 side, uplo, trans, diag,
486 tempkm, tempnn, A.
mb,
487 lalpha, A(B.
mt-1-k, B.
mt-1-k), lda,
488 B(B.
mt-1-k, n), ldb);
490 for (m = k+1; m < B.
mt; m++) {
491 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
492 for (n = 0; n < B.
nt; n++) {
493 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
495 plasma->
quark, &task_flags,
497 B.
mb, tempnn, tempkm, A.
mb,
498 mzone, A(B.
mt-1-k, B.
mt-1-m), lda,
499 B(B.
mt-1-k, n ), ldb,
500 lalpha, B(B.
mt-1-m, n ), B.
mb);
513 for (k = 0; k < B.
nt; k++) {
514 tempkn = k == B.
nt-1 ? B.
n-k*B.
nb : B.
nb;
516 lalpha = k == 0 ? alpha : zone;
517 for (m = 0; m < B.
mt; m++) {
518 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
521 plasma->
quark, &task_flags,
522 side, uplo, trans, diag,
523 tempmm, tempkn, A.
mb,
524 lalpha, A(k, k), lda,
527 for (m = 0; m < B.
mt; m++) {
528 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
530 for (n = k+1; n < B.
nt; n++) {
531 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
533 plasma->
quark, &task_flags,
535 tempmm, tempnn, B.
mb, A.
mb,
538 lalpha,
B(m, n), ldb);
547 for (k = 0; k < B.
nt; k++) {
548 tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
550 for (m = 0; m < B.
mt; m++) {
551 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
554 plasma->
quark, &task_flags,
555 side, uplo, trans, diag,
556 tempmm, tempkn, A.
mb,
557 alpha, A(B.
nt-1-k, B.
nt-1-k), lda,
558 B( m, B.
nt-1-k), ldb);
560 for (n = k+1; n < B.
nt; n++) {
562 plasma->
quark, &task_flags,
564 tempmm, B.
nb, tempkn, A.
mb,
565 minvalpha, B(m, B.
nt-1-k), ldb,
566 A(B.
nt-1-n, B.
nt-1-k), A.
mb,
567 zone, B(m, B.
nt-1-n), ldb);
578 for (k = 0; k < B.
nt; k++) {
579 tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
581 lalpha = k == 0 ? alpha : zone;
582 for (m = 0; m < B.
mt; m++) {
583 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
586 plasma->
quark, &task_flags,
587 side, uplo, trans, diag,
588 tempmm, tempkn, A.
mb,
589 lalpha, A(B.
nt-1-k, B.
nt-1-k), lda,
590 B( m, B.
nt-1-k), ldb);
592 for (n = k+1; n < B.
nt; n++) {
594 plasma->
quark, &task_flags,
596 tempmm, B.
nb, tempkn, A.
mb,
597 mzone, B(m, B.
nt-1-k), ldb,
598 A(B.
nt-1-k, B.
nt-1-n), lda,
599 lalpha, B(m, B.
nt-1-n), ldb);
608 for (k = 0; k < B.
nt; k++) {
609 tempkn = k == B.
nt-1 ? B.
n-k*B.
nb : B.
nb;
611 for (m = 0; m < B.
mt; m++) {
612 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
615 plasma->
quark, &task_flags,
616 side, uplo, trans, diag,
617 tempmm, tempkn, A.
mb,
621 for (n = k+1; n < B.
nt; n++) {
622 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
625 plasma->
quark, &task_flags,
627 tempmm, tempnn, B.
mb, A.
mb,
628 minvalpha, B(m, k), ldb,