PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ztsmlq.c File Reference
#include "common.h"
Include dependency graph for core_ztsmlq.c:

Go to the source code of this file.

Functions

int CORE_ztsmlq (int side, int trans, int M1, int N1, int M2, int N2, int K, int IB, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *V, int LDV, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *WORK, int LDWORK)
void QUARK_CORE_ztsmlq (Quark *quark, Quark_Task_Flags *task_flags, int side, int trans, int m1, int n1, int m2, int n2, int k, int ib, int nb, PLASMA_Complex64_t *A1, int lda1, PLASMA_Complex64_t *A2, int lda2, PLASMA_Complex64_t *V, int ldv, PLASMA_Complex64_t *T, int ldt)
void CORE_ztsmlq_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Mathieu Faverge
Jakub Kurzak
Azzam Haidar
Dulceneia Becker
Date:
2010-11-15 normal z -> c d s

Definition in file core_ztsmlq.c.


Function Documentation

int CORE_ztsmlq ( int  side,
int  trans,
int  M1,
int  N1,
int  M2,
int  N2,
int  K,
int  IB,
PLASMA_Complex64_t A1,
int  LDA1,
PLASMA_Complex64_t A2,
int  LDA2,
PLASMA_Complex64_t V,
int  LDV,
PLASMA_Complex64_t T,
int  LDT,
PLASMA_Complex64_t WORK,
int  LDWORK 
)

CORE_ztsmlq overwrites the general complex M1-by-N1 tile A1 and M2-by-N2 tile A2 with

                  SIDE = 'L'        SIDE = 'R'

TRANS = 'N': Q * | A1 | | A1 A2 | * Q | A2 |

TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H | A2 |

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(k)' . . . H(2)' H(1)'

as returned by CORE_ZTSLQT.

Parameters:
[in]side
  • PlasmaLeft : apply Q or Q**H from the Left;
  • PlasmaRight : apply Q or Q**H from the Right.
[in]trans
  • PlasmaNoTrans : No transpose, apply Q;
  • PlasmaConjTrans : ConjTranspose, apply Q**H.
[in]M1The number of rows of the tile A1. M1 >= 0.
[in]N1The number of columns of the tile A1. N1 >= 0.
[in]M2The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]N2The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]KThe number of elementary reflectors whose product defines the matrix Q.
[in]IBThe inner-blocking size. IB >= 0.
[in,out]A1On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]A2On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]LDA2The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]VThe i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_ZTSLQT in the first k rows of its array argument V.
[in]LDVThe leading dimension of the array V. LDV >= max(1,K).
[out]TThe IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= IB.
[out]WORKWorkspace array of size LDWORK-by-M1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]LDWORKThe leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,N1) if side == PlasmaRight
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 124 of file core_ztsmlq.c.

References CORE_zparfb(), coreblas_error, max, min, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaForward, PlasmaLeft, PlasmaNoTrans, PlasmaRight, and PlasmaRowwise.

{
int i, i1, i3;
int NW;
int kb;
int ic = 0;
int jc = 0;
int mi = M1;
int ni = N1;
/* Check input arguments */
if ((side != PlasmaLeft) && (side != PlasmaRight)) {
coreblas_error(1, "Illegal value of side");
return -1;
}
/* NW is the minimum dimension of WORK */
if (side == PlasmaLeft) {
NW = IB;
}
else {
NW = N1;
}
coreblas_error(2, "Illegal value of trans");
return -2;
}
if (M1 < 0) {
coreblas_error(3, "Illegal value of M1");
return -3;
}
if (N1 < 0) {
coreblas_error(4, "Illegal value of N1");
return -4;
}
if ( (M2 < 0) ||
( (M2 != M1) && (side == PlasmaRight) ) ){
coreblas_error(5, "Illegal value of M2");
return -5;
}
if ( (N2 < 0) ||
( (N2 != N1) && (side == PlasmaLeft) ) ){
coreblas_error(6, "Illegal value of N2");
return -6;
}
if ((K < 0) ||
( (side == PlasmaLeft) && (K > M1) ) ||
( (side == PlasmaRight) && (K > N1) ) ) {
coreblas_error(7, "Illegal value of K");
return -7;
}
if (IB < 0) {
coreblas_error(8, "Illegal value of IB");
return -8;
}
if (LDA1 < max(1,M1)){
coreblas_error(10, "Illegal value of LDA1");
return -10;
}
if (LDA2 < max(1,M2)){
coreblas_error(12, "Illegal value of LDA2");
return -12;
}
if (LDV < max(1,K)){
coreblas_error(14, "Illegal value of LDV");
return -14;
}
if (LDT < max(1,IB)){
coreblas_error(16, "Illegal value of LDT");
return -16;
}
if (LDWORK < max(1,NW)){
coreblas_error(18, "Illegal value of LDWORK");
return -18;
}
/* Quick return */
if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
|| ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
i1 = 0;
i3 = IB;
}
else {
i1 = ((K-1) / IB)*IB;
i3 = -IB;
}
if (trans == PlasmaNoTrans) {
}
else {
}
for(i = i1; (i > -1) && (i < K); i += i3) {
kb = min(IB, K-i);
if (side == PlasmaLeft) {
/*
* H or H' is applied to C(i:m,1:n)
*/
mi = M1 - i;
ic = i;
}
else {
/*
* H or H' is applied to C(1:m,i:n)
*/
ni = N1 - i;
jc = i;
}
/*
* Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
*/
mi, ni, M2, N2, kb, 0,
&A1[LDA1*jc+ic], LDA1,
A2, LDA2,
&V[i], LDV,
&T[LDT*i], LDT,
WORK, LDWORK);
}
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ztsmlq_quark ( Quark quark)

Definition at line 303 of file core_ztsmlq.c.

References CORE_ztsmlq(), quark_unpack_args_18, side, T, trans, and V.

{
int side;
int trans;
int m1;
int n1;
int m2;
int n2;
int k;
int ib;
int lda1;
int lda2;
int ldv;
int ldt;
int ldwork;
quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib,
A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ztsmlq ( Quark quark,
Quark_Task_Flags task_flags,
int  side,
int  trans,
int  m1,
int  n1,
int  m2,
int  n2,
int  k,
int  ib,
int  nb,
PLASMA_Complex64_t A1,
int  lda1,
PLASMA_Complex64_t A2,
int  lda2,
PLASMA_Complex64_t V,
int  ldv,
PLASMA_Complex64_t T,
int  ldt 
)

Definition at line 263 of file core_ztsmlq.c.

References CORE_ztsmlq_quark(), DAG_CORE_TSMLQ, INOUT, INPUT, LOCALITY, PlasmaLeft, QUARK_Insert_Task(), SCRATCH, and VALUE.

{
int ldwork = side == PlasmaLeft ? ib : nb;
sizeof(PLASMA_enum), &side, VALUE,
sizeof(PLASMA_enum), &trans, VALUE,
sizeof(int), &m1, VALUE,
sizeof(int), &n1, VALUE,
sizeof(int), &m2, VALUE,
sizeof(int), &n2, VALUE,
sizeof(int), &k, VALUE,
sizeof(int), &ib, VALUE,
sizeof(PLASMA_Complex64_t)*nb*nb, A1, INOUT,
sizeof(int), &lda1, VALUE,
sizeof(PLASMA_Complex64_t)*nb*nb, A2, INOUT | LOCALITY,
sizeof(int), &lda2, VALUE,
sizeof(PLASMA_Complex64_t)*nb*nb, V, INPUT,
sizeof(int), &ldv, VALUE,
sizeof(PLASMA_Complex64_t)*ib*nb, T, INPUT,
sizeof(int), &ldt, VALUE,
sizeof(PLASMA_Complex64_t)*ib*nb, NULL, SCRATCH,
sizeof(int), &ldwork, VALUE,
0);
}

Here is the call graph for this function:

Here is the caller graph for this function: