Parallel application of Q using tile V - QR factorization (reduction Householder) - dynamic scheduling
{
int k, m, n;
int K, M, RD, lastRD;
int ldaM, ldam, ldan, ldaMRD;
int ldbM, ldbm, ldbMRD;
int tempMm, tempkn, tempnn, tempmm, tempMRDm, tempkmin;
int ib;
return;
for (k = 0; k < K; k++) {
tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
for (M = k; M < A.
mt; M += BS) {
tempMm = M == A.
mt-1 ? A.
m-M*A.
mb : A.
mb;
tempkmin =
min(tempMm, tempkn);
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
tempMm, tempnn,
A(M, k), ldaM,
B(M, n), ldbM);
}
for (m = M+1; m <
min(M+BS, A.
mt); m++) {
tempmm = m == A.
mt-1 ? A.
m-m*A.
mb : A.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
A.
nb, tempnn, tempmm, tempnn,
B(M, n), ldbM,
}
}
}
for (RD = BS; RD < A.
mt-k; RD *= 2) {
for (M = k; M+RD < A.
mt; M += 2*RD) {
tempMRDm = M+RD == A.
mt-1 ? A.
m-(M+RD)*A.
mb : A.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
A.
nb, tempnn, tempMRDm, tempnn,
B (M, n), ldbM,
B (M+RD, n), ldbMRD,
A (M+RD, k), ldaMRD,
}
}
}
}
} else {
for (k = K-1; k >= 0; k--) {
tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
lastRD = 0;
for (RD = BS; RD < A.
mt-k; RD *= 2)
lastRD = RD;
for (RD = lastRD; RD >= BS; RD /= 2) {
for (M = k; M+RD < A.
mt; M += 2*RD) {
tempMRDm = M+RD == A.
mt-1 ? A.
m-(M+RD)*A.
mb : A.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
A.
nb, tempnn, tempMRDm, tempnn,
B (M, n), ldbM,
B (M+RD, n), ldbMRD,
A (M+RD, k), ldaMRD,
}
}
}
for (M = k; M < A.
mt; M += BS) {
tempMm = M == A.
mt-1 ? A.
m-M*A.
mb : A.
mb;
tempkmin =
min(tempMm, tempkn);
for (m =
min(M+BS, A.
mt)-1; m > M; m--) {
tempmm = m == A.
mt-1 ? A.
m-m*A.
mb : A.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
A.
nb, tempnn, tempmm, tempnn,
B(M, n), ldbM,
}
}
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
tempMm, tempnn,
A(M, k), ldaM,
B(M, n), ldbM);
}
}
}
}
} else {
for (k = K-1; k >= 0; k--) {
tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
lastRD = 0;
for (RD = BS; RD < A.
mt-k; RD *= 2)
lastRD = RD;
for (RD = lastRD; RD >= BS; RD /= 2) {
for (M = k; M+RD < A.
mt; M += 2*RD) {
tempMRDm = M+RD == A.
mt-1 ? A.
m-(M+RD)*A.
mb : A.
mb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, B.
nb, tempmm, tempMRDm,
B (m, M), ldbm,
B (m, M+RD), ldbm,
A (M+RD, k), ldaMRD,
}
}
}
for (M = k; M < A.
mt; M += BS) {
tempMm = M == A.
mt-1 ? A.
m-M*A.
mb : A.
mb;
tempkmin =
min(tempMm, tempkn);
for (n =
min(M+BS, A.
mt)-1; n > M; n--) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, tempMm, tempmm, tempnn,
B(m, M), ldbm,
}
}
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, tempMm,
A(M, k), ldaM,
B(m, M), ldbm);
}
}
}
} else {
for (k = 0; k < K; k++) {
tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
for (M = k; M < A.
mt; M += BS) {
tempMm = M == A.
mt-1 ? A.
m-M*A.
mb : A.
mb;
tempkmin =
min(tempMm, tempkn);
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, tempMm,
A(M, k), ldaM,
B(m, M), ldbm);
}
for (n = M+1; n <
min(M+BS, A.
mt); n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, tempMm, tempmm, tempnn,
B(m, M), ldbm,
}
}
}
for (RD = BS; RD < A.
mt-k; RD *= 2) {
for (M = k; M+RD < A.
mt; M += 2*RD) {
tempMRDm = M+RD == A.
mt-1 ? A.
m-(M+RD)*A.
mb : A.
mb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, B.
nb, tempmm, tempMRDm,
B (m, M ), ldbm,
B (m, M+RD), ldbm,
A (M+RD, k), ldaMRD,
}
}
}
}
}
}
}