PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver
- Version:
- 2.4.5
- Author:
- Jakub Kurzak
-
Hatem Ltaief
-
Mathieu Faverge
- Date:
- 2010-11-15 c Tue Nov 22 14:35:39 2011
Definition in file pctrsm.c.
Parallel tile triangular solve - static scheduling
Definition at line 24 of file pctrsm.c.
References A, B, BLKLDD, CORE_cgemm(), CORE_ctrsm(), diag, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_RANK, PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_9, PlasmaLeft, PlasmaLower, PlasmaNoTrans, PlasmaUpper, side, ss_cond_set, ss_cond_wait, ss_finalize, ss_init, plasma_sequence_t::status, trans, and uplo.
{
int k, m, n;
int next_k;
int next_m;
int next_n;
int lda, ldb;
int tempkm, tempnn, tempmm, tempkn;
minvalpha = mzone / alpha;
return;
k = 0;
k++;
}
n = 0;
while (k < B.
mt && m < B.
mt) {
next_n = n;
next_m = m;
next_k = k;
next_n++;
while (next_m >= B.
mt && next_k < B.
mt) {
next_k++;
next_m = next_m - B.
mt + next_k;
}
next_n = 0;
}
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
lalpha = k == 0 ? alpha : zone;
if (m == k) {
tempkm = k == B.
mt-1 ? B.
m-k*B.
mb : B.
mb;
side, uplo, trans, diag,
tempkm, tempnn,
}
else {
tempkm = k == 0 ? B.
m-(B.
mt-1)*B.
mb : B.
mb;
side, uplo, trans, diag,
tempkm, tempnn,
lalpha,
A(B.
mt-1-k, B.
mt-1-k), lda,
}
}
else {
mzone, A(m, k), lda,
lalpha, B(m, n), ldb);
}
else {
tempkm = k == 0 ? A.
m-(A.
mt-1)*A.
mb : A.
mb;
mzone, A(A.
mt-1-k, A.
mt-1-m), lda,
lalpha, B(B.
mt-1-m, n ), B.
mb);
}
}
else {
tempkm = k == 0 ? A.
m-(A.
mt-1)*A.
mb : A.
mb;
mzone, A(A.
mt-1-m, A.
mt-1-k), A.
mb,
lalpha, B(B.
mt-1-m, n ), B.
mb);
}
else {
lalpha, B(m, n), ldb);
}
}
}
n = next_n;
m = next_m;
k = next_k;
}
}
else {
k = 0;
k++;
}
m = 0;
while (k < B.
nt && n < B.
nt) {
next_n = n;
next_m = m;
next_k = k;
next_m++;
while (next_n >= B.
nt && next_k < B.
nt) {
next_k++;
next_n = next_n - B.
nt + next_k;
}
next_m = 0;
}
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
lalpha = k == 0 ? alpha : zone;
if (n == k) {
tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
side, uplo, trans, diag,
tempmm, tempkn,
lalpha,
A(B.
nt-1-k, B.
nt-1-k), lda,
}
else {
tempkn = k == B.
nt-1 ? B.
n-k*B.
nb : B.
nb;
side, uplo, trans, diag,
tempmm, tempkn,
}
}
else {
tempkn = k == B.
nt-1 ? B.
n-k*B.
nb : B.
nb;
side, uplo, trans, diag,
tempmm, tempkn,
}
else {
tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
side, uplo, trans, diag,
tempmm, tempkn,
alpha,
A(B.
nt-1-k, B.
nt-1-k), lda,
}
}
}
else {
tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
mzone, B(m, B.
nt-1-k), ldb,
A(B.
nt-1-k, B.
nt-1-n), lda,
lalpha, B(m, B.
nt-1-n), ldb);
}
else {
minvalpha, B(m, k), ldb,
}
}
else {
mzone, B(m, k), ldb,
}
else {
tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
minvalpha, B(m, B.
nt-1-k), ldb,
zone, B(m, B.
nt-1-n), ldb);
}
}
}
n = next_n;
m = next_m;
k = next_k;
}
}
}
Parallel tile triangular solve - dynamic scheduling
Definition at line 347 of file pctrsm.c.
References A, B, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_SUCCESS, PlasmaLeft, PlasmaNoTrans, PlasmaUpper, plasma_context_struct::quark, QUARK_CORE_cgemm(), QUARK_CORE_ctrsm(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, and TASK_SEQUENCE.
{
int k, m, n;
int lda, ldan, ldb;
int tempkm, tempkn, tempmm, tempnn;
return;
for (k = 0; k < B.
mt; k++) {
tempkm = k == 0 ? B.
m-(B.
mt-1)*B.
mb : B.
mb;
lalpha = k == 0 ? alpha : zone;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
lalpha, A(B.
mt-1-k, B.
mt-1-k), lda,
}
for (m = k+1; m < B.
mt; m++) {
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
B.
mb, tempnn, tempkm, A.
mb,
mzone, A(B.
mt-1-m, B.
mt-1-k), A.
mb,
lalpha, B(B.
mt-1-m, n ), B.
mb);
}
}
}
}
else {
for (k = 0; k < B.
mt; k++) {
tempkm = k == B.
mt-1 ? B.
m-k*B.
mb : B.
mb;
lalpha = k == 0 ? alpha : zone;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
lalpha, A(k, k), lda,
}
for (m = k+1; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
tempmm, tempnn, B.
mb, A.
mb,
lalpha, B(m, n), ldb);
}
}
}
}
}
else {
for (k = 0; k < B.
mt; k++) {
tempkm = k == B.
mt-1 ? B.
m-k*B.
mb : B.
mb;
lalpha = k == 0 ? alpha : zone;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
lalpha, A(k, k), lda,
}
for (m = k+1; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
tempmm, tempnn, B.
mb, A.
mb,
mzone, A(m, k), lda,
lalpha, B(m, n), ldb);
}
}
}
}
else {
for (k = 0; k < B.
mt; k++) {
tempkm = k == 0 ? B.
m-(B.
mt-1)*B.
mb : B.
mb;
lalpha = k == 0 ? alpha : zone;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
lalpha, A(B.
mt-1-k, B.
mt-1-k), lda,
}
for (m = k+1; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = 0; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
B.
mb, tempnn, tempkm, A.
mb,
mzone, A(B.
mt-1-k, B.
mt-1-m), lda,
lalpha, B(B.
mt-1-m, n ), B.
mb);
}
}
}
}
}
}
else {
for (k = 0; k < B.
nt; k++) {
tempkn = k == B.
nt-1 ? B.
n-k*B.
nb : B.
nb;
lalpha = k == 0 ? alpha : zone;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
lalpha, A(k, k), lda,
}
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
for (n = k+1; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
tempmm, tempnn, B.
mb, A.
mb,
mzone, B(m, k), ldb,
}
}
}
}
else {
for (k = 0; k < B.
nt; k++) {
tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
alpha, A(B.
nt-1-k, B.
nt-1-k), lda,
for (n = k+1; n < B.
nt; n++) {
plasma->
quark, &task_flags,
tempmm, B.
nb, tempkn, A.
mb,
minvalpha, B(m, B.
nt-1-k), ldb,
zone, B(m, B.
nt-1-n), ldb);
}
}
}
}
}
else {
for (k = 0; k < B.
nt; k++) {
tempkn = k == 0 ? B.
n-(B.
nt-1)*B.
nb : B.
nb;
lalpha = k == 0 ? alpha : zone;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
lalpha, A(B.
nt-1-k, B.
nt-1-k), lda,
for (n = k+1; n < B.
nt; n++) {
plasma->
quark, &task_flags,
tempmm, B.
nb, tempkn, A.
mb,
mzone, B(m, B.
nt-1-k), ldb,
A(B.
nt-1-k, B.
nt-1-n), lda,
lalpha, B(m, B.
nt-1-n), ldb);
}
}
}
}
else {
for (k = 0; k < B.
nt; k++) {
tempkn = k == B.
nt-1 ? B.
n-k*B.
nb : B.
nb;
for (m = 0; m < B.
mt; m++) {
tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
plasma->
quark, &task_flags,
alpha, A(k, k), lda,
for (n = k+1; n < B.
nt; n++) {
tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
plasma->
quark, &task_flags,
tempmm, tempnn, B.
mb, A.
mb,
minvalpha, B(m, k), ldb,
}
}
}
}
}
}
}