PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pcunmlq.c File Reference
#include "common.h"
Include dependency graph for pcunmlq.c:

Go to the source code of this file.

Macros

#define A(m, n)   BLKADDR(A, PLASMA_Complex32_t, m, n)
#define B(m, n)   BLKADDR(B, PLASMA_Complex32_t, m, n)
#define T(m, n)   BLKADDR(T, PLASMA_Complex32_t, m, n)

Functions

void plasma_pcunmlq (plasma_context_t *plasma)
void plasma_pcunmlq_quark (PLASMA_enum side, PLASMA_enum trans, PLASMA_desc A, PLASMA_desc B, PLASMA_desc T, PLASMA_sequence *sequence, PLASMA_request *request)

Detailed Description

PLASMA auxiliary routines PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Jakub Kurzak
Mathieu Faverge
Azzam Haidar
Date:
2010-11-15 c Tue Nov 22 14:35:40 2011

Definition in file pcunmlq.c.


Macro Definition Documentation

#define A (   m,
 
)    BLKADDR(A, PLASMA_Complex32_t, m, n)

Definition at line 20 of file pcunmlq.c.

#define B (   m,
 
)    BLKADDR(B, PLASMA_Complex32_t, m, n)

Definition at line 21 of file pcunmlq.c.

#define T (   m,
 
)    BLKADDR(T, PLASMA_Complex32_t, m, n)

Definition at line 22 of file pcunmlq.c.


Function Documentation

void plasma_pcunmlq ( plasma_context_t plasma)

Parallel application of Q using tile V - LQ factorization - static scheduling

Definition at line 26 of file pcunmlq.c.

References A, B, BLKLDD, CORE_ctsmlq(), CORE_cunmlq(), plasma_desc_t::dtyp, plasma_desc_t::m, plasma_desc_t::mb, min, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, PLASMA_ERR_NOT_SUPPORTED, PLASMA_IB, plasma_private_alloc(), plasma_private_free(), PLASMA_RANK, plasma_request_fail(), PLASMA_SIZE, PLASMA_SUCCESS, plasma_unpack_args_7, PlasmaConjTrans, PlasmaLeft, side, ss_cond_set, ss_cond_wait, ss_finalize, ss_init, plasma_sequence_t::status, T, and trans.

{
PLASMA_sequence *sequence;
PLASMA_request *request;
int k, m, n;
int next_k;
int next_m;
int next_n;
int ldak, ldbk, ldbm;
int tempmm, tempnn, tempkm, tempkmin;
int minMT, minM;
int ib = PLASMA_IB;
plasma_unpack_args_7(side, trans, A, B, T, sequence, request);
if (sequence->status != PLASMA_SUCCESS)
return;
if (side != PlasmaLeft) {
return;
}
if (trans != PlasmaConjTrans) {
return;
}
work = (PLASMA_Complex32_t*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp);
ss_init(B.mt, B.nt, min(A.mt, A.nt));
if (A.m > A.n) {
minM = A.n;
minMT = A.nt;
} else {
minM = A.m;
minMT = A.mt;
}
k = minMT-1;
while (n >= B.nt) {
k--;
n = n-B.nt;
}
m = B.mt-1;
while (k >= 0 && n < B.nt) {
next_n = n;
next_m = m;
next_k = k;
next_m--;
if (next_m == k-1) {
next_n += PLASMA_SIZE;
while (next_n >= B.nt && next_k >= 0) {
next_k--;
next_n = next_n-B.nt;
}
next_m = B.mt-1;
}
tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
tempkm = k == B.mt-1 ? B.m-k*B.mb : B.mb;
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
ldbm = BLKLDD(B, m);
if (m == k) {
side, trans,
tempkm, tempnn, tempkmin, ib,
A(k, k), ldak,
T(k, k), T.mb,
B(k, n), ldbk,
work, T.nb);
ss_cond_set(k, n, k);
}
else {
ss_cond_wait(m, n, k+1);
side, trans,
A.mb, tempnn, tempmm, tempnn, tempkmin, ib,
B(k, n), ldbk,
B(m, n), ldbm,
A(k, m), ldak,
T(k, m), T.mb,
work, ib);
ss_cond_set(m, n, k);
}
m = next_m;
n = next_n;
k = next_k;
}
plasma_private_free(plasma, work);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void plasma_pcunmlq_quark ( PLASMA_enum  side,
PLASMA_enum  trans,
PLASMA_desc  A,
PLASMA_desc  B,
PLASMA_desc  T,
PLASMA_sequence sequence,
PLASMA_request request 
)

Parallel application of Q using tile V - LQ factorization - dynamic scheduling

Definition at line 135 of file pcunmlq.c.

References A, B, BLKLDD, plasma_desc_t::m, plasma_desc_t::mb, plasma_desc_t::mt, plasma_desc_t::n, plasma_desc_t::nb, plasma_desc_t::nt, plasma_context_self(), PLASMA_IB, PLASMA_SUCCESS, PlasmaLeft, PlasmaNoTrans, plasma_context_struct::quark, QUARK_CORE_ctsmlq(), QUARK_CORE_cunmlq(), plasma_sequence_t::quark_sequence, QUARK_Task_Flag_Set(), Quark_Task_Flags_Initializer, plasma_sequence_t::status, T, and TASK_SEQUENCE.

{
int k, m, n;
int ldak, ldbk, ldbm;
int tempmm, tempnn, tempkn, tempkm, tempkmin;
int ib, minMT, minM;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
if (A.m > A.n) {
minM = A.n;
minMT = A.nt;
} else {
minM = A.m;
minMT = A.mt;
}
if (side == PlasmaLeft ) {
if (trans == PlasmaNoTrans) {
/*
* PlasmaLeft / PlasmaNoTrans
*/
for (k = 0; k < minMT; k++) {
tempkm = k == B.mt -1 ? B.m -k*B.mb : B.mb;
tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
plasma->quark, &task_flags,
tempkm, tempnn, tempkmin, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb,
B(k, n), ldbk);
}
for (m = k+1; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldbm = BLKLDD(B, m);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
plasma->quark, &task_flags,
B.mb, tempnn, tempmm, tempnn, tempkmin, ib, T.nb,
B(k, n), ldbk,
B(m, n), ldbm,
A(k, m), ldak,
T(k, m), T.mb);
}
}
}
}
else {
/*
* PlasmaLeft / PlasmaConjTrans
*/
for (k = minMT-1; k >= 0; k--) {
tempkm = k == B.mt -1 ? B.m -k*B.mb : B.mb;
tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
for (m = B.mt-1; m > k; m--) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldbm = BLKLDD(B, m);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
plasma->quark, &task_flags,
B.mb, tempnn, tempmm, tempnn, tempkmin, ib, T.nb,
B(k, n), ldbk,
B(m, n), ldbm,
A(k, m), ldak,
T(k, m), T.mb);
}
}
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
plasma->quark, &task_flags,
tempkm, tempnn, tempkmin, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb,
B(k, n), ldbk);
}
}
}
}
else {
if (trans == PlasmaNoTrans) {
/*
* PlasmaRight / PlasmaNoTrans
*/
for (k = minMT-1; k >= 0; k--) {
tempkn = k == B.nt -1 ? B.n -k*B.nb : B.nb;
tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
ldak = BLKLDD(A, k);
for (n = B.nt-1; n > k; n--) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldbm = BLKLDD(B, m);
plasma->quark, &task_flags,
tempmm, B.nb, tempmm, tempnn, tempkmin, ib, T.nb,
B(m, k), ldbm,
B(m, n), ldbm,
A(k, n), ldak,
T(k, n), T.mb);
}
}
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldbm = BLKLDD(B, m);
plasma->quark, &task_flags,
tempmm, tempkn, tempkmin, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb,
B(m, k), ldbm);
}
}
}
else {
/*
* PlasmaRight / PlasmaConjTrans
*/
for (k = 0; k < minMT; k++) {
tempkn = k == B.nt -1 ? B.n -k*B.nb : B.nb;
tempkmin = k == minMT-1 ? minM-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldbm = BLKLDD(B, m);
plasma->quark, &task_flags,
tempmm, tempkn, tempkmin, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb,
B(m, k), ldbm);
}
for (n = k+1; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
for (m = 0; m < B.mt; m++) {
tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
ldbm = BLKLDD(B, m);
plasma->quark, &task_flags,
tempmm, B.nb, tempmm, tempnn, tempkmin, ib, T.nb,
B(m, k), ldbm,
B(m, n), ldbm,
A(k, n), ldak,
T(k, n), T.mb);
}
}
}
}
}
}

Here is the call graph for this function:

Here is the caller graph for this function: