Parallel application of Q using tile V - LQ factorization (reduction Householder) - dynamic scheduling
Definition at line 27 of file psormlqrh.c.
References A, B, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_IB, PLASMA_SUCCESS, PlasmaLeft, PlasmaNoTrans, plasma_context_struct::quark, QUARK_CORE_sormlq(), QUARK_CORE_stsmlq(), QUARK_CORE_sttmlq(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, T, T2, and TASK_SEQUENCE.
{
int k, m, n;
int K, N, RD, lastRD;
int ldaN, ldak;
int ldbN, ldbm, ldbNRD;
int tempNn, tempkm, tempnn, tempmm, tempNRDn, tempkmin;
int ib;
return;
for (k = 0; k < K; k++) {
tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
for (N = k; N < A.
nt; N += BS) {
tempNn = N == A.
nt-1 ? A.
n-N*A.
nb : A.
nb;
tempkmin =
min(tempkm,tempNn);
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
tempNn, tempnn,
A(k, N), ldak,
B(N, n), ldbN);
}
for (m = N+1; m <
min(N+BS, A.
nt); m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
B.
nb, tempnn, tempmm, tempnn,
B(N, n), ldbN,
}
}
}
for (RD = BS; RD < A.
nt-k; RD *= 2) {
for (N = k; N+RD < A.
nt; N += 2*RD) {
tempNRDn = N+RD == A.
nt-1 ? A.
n-(N+RD)*A.
nb : A.
nb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
B.
mb, tempnn, tempNRDn, tempnn,
B (N, n), ldbN,
B (N+RD, n), ldbNRD,
A (k, N+RD), ldak,
}
}
}
}
} else {
for (k = K-1; k >= 0; k--) {
tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
lastRD = 0;
for (RD = BS; RD < A.
nt-k; RD *= 2)
lastRD = RD;
for (RD = lastRD; RD >= BS; RD /= 2) {
for (N = k; N+RD < A.
nt; N += 2*RD) {
tempNRDn = N+RD == A.
nt-1 ? A.
n-(N+RD)*A.
nb : A.
nb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
B.
nb, tempnn, tempNRDn, tempnn,
B (N, n), ldbN,
B (N+RD, n), ldbNRD,
A (k, N+RD), ldak,
}
}
}
for (N = k; N < A.
nt; N += BS) {
tempNn = N == A.
nt-1 ? A.
n-N*A.
nb : A.
nb;
tempkmin =
min(tempkm,tempNn);
for (m =
min(N+BS, A.
nt)-1; m > N; m--) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
B.
mb, tempnn, tempmm, tempnn,
B(N, n), ldbN,
}
}
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
tempNn, tempnn,
A(k, N), ldak,
B(N, n), ldbN);
}
}
}
}
} else {
for (k = K-1; k >= 0; k--) {
tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
lastRD = 0;
for (RD = BS; RD < A.
nt-k; RD *= 2)
lastRD = RD;
for (RD = lastRD; RD >= BS; RD /= 2) {
for (N = k; N+RD < A.
nt; N += 2*RD) {
tempNRDn = N+RD == A.
nt-1 ? A.
n-(N+RD)*A.
nb : A.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, B.
nb, tempmm, tempNRDn,
B (m, N ), ldbm,
B (m, N+RD), ldbm,
A (k, N+RD), ldak,
}
}
}
for (N = k; N < A.
nt; N += BS) {
tempNn = N == A.
nt-1 ? A.
n-N*A.
nb : A.
nb;
tempkmin =
min(tempkm,tempNn);
for (n =
min(N+BS, A.
nt)-1; n > N; n--) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, B.
nb, tempmm, tempnn,
B(m, N), ldbm,
}
}
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, tempNn,
A(k, N), ldak,
B(m, N), ldbm);
}
}
}
} else {
for (k = 0; k < K; k++) {
tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
for (N = k; N < A.
nt; N += BS) {
tempNn = N == A.
nt-1 ? A.
n-N*A.
nb : A.
nb;
tempkmin =
min(tempkm,tempNn);
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, tempNn,
A(k, N), ldaN,
B(m, N), ldbm);
}
for (n = N+1; n <
min(N+BS, A.
nt); n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, tempNn, tempmm, tempnn,
B(m, N), ldbm,
}
}
}
for (RD = BS; RD < A.
nt-k; RD *= 2) {
for (N = k; N+RD < A.
nt; N += 2*RD) {
tempNRDn = N+RD == A.
nt-1 ? A.
n-(N+RD)*A.
nb : A.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
tempmm, B.
nb, tempmm, tempNRDn,
B (m, N ), ldbm,
B (m, N+RD), ldbm,
A (k, N+RD), ldak,
}
}
}
}
}
}
}