PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ztsrfb.c File Reference
#include <cblas.h>
#include <lapacke.h>
#include "common.h"
Include dependency graph for core_ztsrfb.c:

Go to the source code of this file.

Functions

int CORE_ztsrfb (int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, PLASMA_Complex64_t *A1, int LDA1, PLASMA_Complex64_t *A2, int LDA2, PLASMA_Complex64_t *V, int LDV, PLASMA_Complex64_t *T, int LDT, PLASMA_Complex64_t *WORK, int LDWORK)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Hatem Ltaief
Mathieu Faverge
Azzam Haidar
Date:
2010-11-15 normal z -> c d s

Definition in file core_ztsrfb.c.


Function Documentation

int CORE_ztsrfb ( int  side,
int  trans,
int  direct,
int  storev,
int  M1,
int  N1,
int  M2,
int  N2,
int  K,
PLASMA_Complex64_t A1,
int  LDA1,
PLASMA_Complex64_t A2,
int  LDA2,
PLASMA_Complex64_t V,
int  LDV,
PLASMA_Complex64_t T,
int  LDT,
PLASMA_Complex64_t WORK,
int  LDWORK 
)

CORE_ztsrfb applies a complex block reflector H or its transpose H' to a complex rectangular matrix formed by coupling two tiles A1 and A2, from either the left or the right.

Parameters:
[in]side
  • PlasmaLeft : apply Q or Q**H from the Left;
  • PlasmaRight : apply Q or Q**H from the Right.
[in]trans
  • PlasmaNoTrans : No transpose, apply Q;
  • PlasmaConjTrans : ConjTranspose, apply Q**H.
[in]directIndicates how H is formed from a product of elementary reflectors
  • PlasmaForward : H = H(1) H(2) . . . H(k) (Forward)
  • PlasmaBackward : H = H(k) . . . H(2) H(1) (Backward)
[in]storevIndicates how the vectors which define the elementary reflectors are stored:
  • PlasmaColumnwise
  • PlasmaRowwise
[in]M1The number of columns of the tile A1. M1 >= 0.
[in]N1The number of rows of the tile A1. N1 >= 0.
[in]M2The number of columns of the tile A2. M2 >= 0.
[in]N2The number of rows of the tile A2. N2 >= 0.
[in]KThe order of the matrix T (= the number of elementary reflectors whose product defines the block reflector).
[in,out]A1On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]LDA1The leading dimension of the array A1. LDA1 >= max(1,N1).
[in,out]A2On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]LDA2The leading dimension of the tile A2. LDA2 >= max(1,N2).
[in]V(LDV,K) if STOREV = 'C' (LDV,M2) if STOREV = 'R' and SIDE = 'L' (LDV,N2) if STOREV = 'R' and SIDE = 'R' The matrix V.
[in]LDVThe leading dimension of the array V. If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M2); if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N2); if STOREV = 'R', LDV >= K.
[out]TThe triangular K-by-K matrix T in the representation of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]LDTThe leading dimension of the array T. LDT >= K.
[in,out]WORK
[in]LDWORKThe dimension of the array WORK.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value

Definition at line 118 of file core_ztsrfb.c.

References CBLAS_SADDR, cblas_zaxpy(), cblas_zgemm(), cblas_ztrmm(), CblasColMajor, CblasConjTrans, CblasLeft, CblasNonUnit, CblasNoTrans, CblasRight, CblasUpper, coreblas_error, lapack_const, PLASMA_ERR_NOT_SUPPORTED, PLASMA_SUCCESS, PlasmaColumnwise, PlasmaForward, PlasmaLeft, PlasmaRight, and PlasmaUpperLower.

{
static PLASMA_Complex64_t zone = 1.0;
static PLASMA_Complex64_t mzone = -1.0;
int j;
/* Check input arguments */
if (M1 < 0) {
coreblas_error(5, "Illegal value of M1");
return -5;
}
if (N1 < 0) {
coreblas_error(6, "Illegal value of N1");
return -6;
}
if ( (M2 < 0) ||
( (M2 != M1) && (side == PlasmaRight) ) ){
coreblas_error(7, "Illegal value of M2");
return -7;
}
if ( (N2 < 0) ||
( (N2 != N1) && (side == PlasmaLeft) ) ){
coreblas_error(8, "Illegal value of N2");
return -8;
}
if (K < 0) {
coreblas_error(9, "Illegal value of K");
return -9;
}
/* Quick return */
if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0))
if (direct == PlasmaForward) {
if (side == PlasmaLeft) {
/*
* B = A1 + V' * A2
*/
LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
K, N1,
A1, LDA1, WORK, LDWORK);
K, N2, M2,
CBLAS_SADDR(zone), V, LDV,
A2, LDA2,
CBLAS_SADDR(zone), WORK, LDWORK);
/*
* A2 = A2 - V*T*B -> B = T*B, A2 = A2 - V*B
*/
CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);
M2, N2, K,
CBLAS_SADDR(mzone), V, LDV,
WORK, LDWORK,
CBLAS_SADDR(zone), A2, LDA2);
/*
* A1 = A1 - B
*/
for(j = 0; j < N1; j++) {
K, CBLAS_SADDR(mzone),
&WORK[LDWORK*j], 1,
&A1[LDA1*j], 1);
}
}
/*
* Columnwise / Forward / Right
*/
else {
/*
* B = A1 + A2 * V
*/
LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
M1, K,
A1, LDA1, WORK, LDWORK);
M2, K, N2,
CBLAS_SADDR(zone), A2, LDA2,
V, LDV,
CBLAS_SADDR(zone), WORK, LDWORK);
/*
* A2 = A2 - B*T*V' -> B = B*T, A2 = A2 - B*V'
*/
(CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K,
CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);
M2, N2, K,
CBLAS_SADDR(mzone), WORK, LDWORK,
V, LDV,
CBLAS_SADDR(zone), A2, LDA2);
/*
* A1 = A1 - B
*/
for(j = 0; j < K; j++) {
M1, CBLAS_SADDR(mzone),
&WORK[LDWORK*j], 1,
&A1[LDA1*j], 1);
}
}
}
else {
coreblas_error(3, "Not implemented (ColMajor / Backward / Left or Right)");
}
}
else {
if (direct == PlasmaForward) {
/*
* Rowwise / Forward / Left
*/
if (side == PlasmaLeft) {
/*
* B = A1 + V * A2
*/
LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
K, N1,
A1, LDA1, WORK, LDWORK);
K, N2, M2,
CBLAS_SADDR(zone), V, LDV,
A2, LDA2,
CBLAS_SADDR(zone), WORK, LDWORK);
/*
* A2 = A2 - V'*T*B -> B = T*B, A2 = A2 - V'*B
*/
(CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);
M2, N2, K,
CBLAS_SADDR(mzone), V, LDV,
WORK, LDWORK,
CBLAS_SADDR(zone), A2, LDA2);
/*
* A1 = A1 - B
*/
for(j=0; j<N1; j++) {
K, CBLAS_SADDR(mzone),
&WORK[LDWORK*j], 1,
&A1[LDA1*j], 1);
}
}
/*
* Rowwise / Forward / Right
*/
else {
/*
* B = A1 + A2 * V'
*/
LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,
M1, K,
A1, LDA1, WORK, LDWORK);
M2, K, N2,
CBLAS_SADDR(zone), A2, LDA2,
V, LDV,
CBLAS_SADDR(zone), WORK, LDWORK);
/*
* A2 = A2 - B*T*V -> B = B*T, A2 = A2 - B*V'
*/
(CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K,
CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);
M2, N2, K,
CBLAS_SADDR(mzone), WORK, LDWORK,
V, LDV,
CBLAS_SADDR(zone), A2, LDA2);
/*
* A1 = A1 - B
*/
for(j = 0; j < K; j++) {
M1, CBLAS_SADDR(mzone),
&WORK[LDWORK*j], 1,
&A1[LDA1*j], 1);
}
}
}
else {
coreblas_error(3, "Not implemented (RowMajor / Backward / Left or Right)");
}
}
}

Here is the call graph for this function: