#include <lapacke.h>
#include "common.h"

Include dependency graph for core_ztsmlq_corner.c:

Macros
#define	COMPLEX

Functions
int	CORE_ztsmlq_corner (int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, PLASMA_Complex64_t A1, int lda1, PLASMA_Complex64_t A2, int lda2, PLASMA_Complex64_t A3, int lda3, PLASMA_Complex64_t V, int ldv, PLASMA_Complex64_t T, int ldt, PLASMA_Complex64_t WORK, int ldwork)
void	QUARK_CORE_ztsmlq_corner (Quark quark, Quark_Task_Flags task_flags, int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb, PLASMA_Complex64_t A1, int lda1, PLASMA_Complex64_t A2, int lda2, PLASMA_Complex64_t A3, int lda3, PLASMA_Complex64_t V, int ldv, PLASMA_Complex64_t *T, int ldt)
void	CORE_ztsmlq_corner_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:: 2.4.5

Author:: Hatem Ltaief; Mathieu Faverge; Azzam Haidar

Date:: 2010-11-15 normal z -> c d s

Definition in file core_ztsmlq_corner.c.

Macro Definition Documentation

#define COMPLEX

Definition at line 20 of file core_ztsmlq_corner.c.

Function Documentation

int CORE_ztsmlq_corner	(	int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	m3,
		int	n3,
		int	k,
		int	ib,
		int	nb,
		PLASMA_Complex64_t *	A1,
		int	lda1,
		PLASMA_Complex64_t *	A2,
		int	lda2,
		PLASMA_Complex64_t *	A3,
		int	lda3,
		PLASMA_Complex64_t *	V,
		int	ldv,
		PLASMA_Complex64_t *	T,
		int	ldt,
		PLASMA_Complex64_t *	WORK,
		int	ldwork
	)

CORE_ztsmlq_corner: see CORE_ztsmlq

This kernel applies left and right transformations as depicted below: |I -VTV'| * | A1 A2 | * |I - VT'V'| | A2' A3 | where A1 and A3 are symmetric matrices. Only the lower part is referenced. This is an adhoc implementation, can be further optimized...

Parameters:

[in]	side	PlasmaLeft : apply Q or QH from the Left; PlasmaRight : apply Q or QH from the Right.
[in]	trans	PlasmaNoTrans : No transpose, apply Q; PlasmaConjTrans : ConjTranspose, apply Q**H.
[in]	M1	The number of rows of the tile A1. M1 >= 0.
[in]	N1	The number of columns of the tile A1. N1 >= 0.
[in]	M2	The number of rows of the tile A2. M2 >= 0. M2 = M1 if side == PlasmaRight.
[in]	N2	The number of columns of the tile A2. N2 >= 0. N2 = N1 if side == PlasmaLeft.
[in]	K	The number of elementary reflectors whose product defines the matrix Q.
[in]	IB	The inner-blocking size. IB >= 0.
[in,out]	A1	On entry, the M1-by-N1 tile A1. On exit, A1 is overwritten by the application of Q.
[in]	LDA1	The leading dimension of the array A1. LDA1 >= max(1,M1).
[in,out]	A2	On entry, the M2-by-N2 tile A2. On exit, A2 is overwritten by the application of Q.
[in]	LDA2	The leading dimension of the tile A2. LDA2 >= max(1,M2).
[in]	V	The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_ZTSLQT in the first k rows of its array argument V.
[in]	LDV	The leading dimension of the array V. LDV >= max(1,K).
[out]	T	The IB-by-N1 triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	LDT	The leading dimension of the array T. LDT >= IB.
[out]	WORK	Workspace array of size LDWORK-by-M1 if side == PlasmaLeft LDWORK-by-IB if side == PlasmaRight
[in]	LDWORK	The leading dimension of the array WORK. LDWORK >= max(1,IB) if side == PlasmaLeft LDWORK >= max(1,N1) if side == PlasmaRight

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 125 of file core_ztsmlq_corner.c.

References conj(), CORE_ztsmlq(), coreblas_error, PLASMA_SUCCESS, PlasmaConjTrans, PlasmaLeft, PlasmaNoTrans, PlasmaRight, side, and trans.

{
    PLASMA_enum side;
    PLASMA_enum trans;
    int i, j;
    if ( m1 != n1 ) {
        coreblas_error(1, "Illegal value of M1, N1");
        return -1;
    }
    /* Rebuild the symmetric block: WORK <- A1 */
    for (i = 0; i < m1; i++)
        for (j = i; j < n1; j++){
            *(WORK + i + j*ldwork) = *(A1 + i + j*lda1);
            if (j > i){
                *(WORK + j + i*ldwork) =  conj( *(WORK + i + j*ldwork) );
            }
        }
    /*  Copy the transpose of A2: WORK+nb*ldwork <- A2' */
    for (j = 0; j < n2; j++)
        for (i = 0; i < m2; i++){
            *(WORK + j + (i + nb) * ldwork) = conj( *(A2 + i + j*lda2) );
        }
    side = PlasmaRight;
    trans = PlasmaConjTrans;
    /*  Right application on |A1 A2| */
    CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib, 
                WORK, ldwork, A2, lda2, 
                V, ldv, T, ldt, 
                WORK+3*nb*ldwork, ldwork);
    /*  Rebuild the symmetric block: WORK+2*nb*ldwork <- A3 */
    for (i = 0; i < m3; i++)
        for (j = i; j < n3; j++){
            *(WORK + i + (j + 2*nb) * ldwork) = *(A3 + i + j*lda3);
            if (j > i){
                *(WORK + j + (i + 2*nb) * ldwork) =  conj ( *(WORK + i + (j + 2*nb) * ldwork) );
            }
        }
    /*  Right application on | A2' A3 | */
    CORE_ztsmlq(side, trans, n2, m2, m3, n3, k, ib, 
                WORK+nb*ldwork, ldwork, WORK+2*nb*ldwork, ldwork, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    side = PlasmaLeft;
    trans = PlasmaNoTrans;
    /*  Left application on | A1  | */
    /*                      | A2' | */
    CORE_ztsmlq(side, trans, m1, n1, n2, m2, k, ib, 
                WORK, ldwork, WORK+nb*ldwork, ldwork, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    /*  Copy back the final result to the upper part of A1 */
    /*  A1 = WORK */
    for (i = 0; i < m1; i++)
        for (j = i; j < n1; j++)
            *(A1 + i + j*lda1) = *(WORK + i + j*ldwork);
    /*  Left application on | A2 | */
    /*                      | A3 | */
    CORE_ztsmlq(side, trans, m2, n2, m3, n3, k, ib, 
                A2, lda2, WORK+2*nb*ldwork, ldwork, 
                V, ldv, T, ldt, 
                WORK + 3*nb*ldwork, ldwork);
    /*  Copy back the final result to the upper part of A3 */
    /*  A3 = WORK+2*nb*ldwork */
    for (i = 0; i < m3; i++)
        for (j = i; j < n3; j++)
            *(A3 + i + j*lda3) = *(WORK + i + (j+ 2*nb) * ldwork);
    return PLASMA_SUCCESS;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_ztsmlq_corner_quark ( Quark * quark )

This kernel applies right and left transformations as depicted below: |I -VTV'| * | A1 A2| * |I - VT'V'| | A2' A3 | where A1 and A3 are symmetric matrices. Only the upper part is referenced. This is an adhoc implementation, can be further optimized...

Definition at line 266 of file core_ztsmlq_corner.c.

References CORE_ztsmlq_corner(), quark_unpack_args_21, T, and V.

{
    int m1;
    int n1;
    int m2;
    int n2;
    int m3;
    int n3;
    int k;
    int ib;
    int nb;
    PLASMA_Complex64_t *A1;
    int lda1;
    PLASMA_Complex64_t *A2;
    int lda2;
    PLASMA_Complex64_t *A3;
    int lda3;
    PLASMA_Complex64_t *V;
    int ldv;
    PLASMA_Complex64_t *T;
    int ldt;
    PLASMA_Complex64_t *WORK;
    int ldwork;
    quark_unpack_args_21(quark, m1, n1, m2, n2, m3, n3, k, ib, nb, 
                         A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
    CORE_ztsmlq_corner(m1, n1, m2, n2, m3, n3, k, ib, nb, 
                       A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_ztsmlq_corner	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	m1,
		int	n1,
		int	m2,
		int	n2,
		int	m3,
		int	n3,
		int	k,
		int	ib,
		int	nb,
		PLASMA_Complex64_t *	A1,
		int	lda1,
		PLASMA_Complex64_t *	A2,
		int	lda2,
		PLASMA_Complex64_t *	A3,
		int	lda3,
		PLASMA_Complex64_t *	V,
		int	ldv,
		PLASMA_Complex64_t *	T,
		int	ldt
	)

Definition at line 219 of file core_ztsmlq_corner.c.

References CORE_ztsmlq_corner_quark(), INOUT, INPUT, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_U, SCRATCH, and VALUE.

{
    int ldwork = nb;
    QUARK_Insert_Task(quark, CORE_ztsmlq_corner_quark, task_flags,
        sizeof(int),                        &m1,    VALUE,
        sizeof(int),                        &n1,    VALUE,
        sizeof(int),                        &m2,    VALUE,
        sizeof(int),                        &n2,    VALUE,
        sizeof(int),                        &m3,    VALUE,
        sizeof(int),                        &n3,    VALUE,
        sizeof(int),                        &k,     VALUE,
        sizeof(int),                        &ib,    VALUE,
        sizeof(int),                        &nb,    VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb,    A1,            INOUT|QUARK_REGION_D|QUARK_REGION_U,
        sizeof(int),                        &lda1,  VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb,    A2,            INOUT,
        sizeof(int),                        &lda2,  VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb,    A3,            INOUT|QUARK_REGION_D|QUARK_REGION_U,
        sizeof(int),                        &lda3,  VALUE,
        sizeof(PLASMA_Complex64_t)*nb*nb,    V,             INPUT,
        sizeof(int),                        &ldv,   VALUE,
        sizeof(PLASMA_Complex64_t)*ib*nb,    T,             INPUT,
        sizeof(int),                        &ldt,   VALUE,
        sizeof(PLASMA_Complex64_t)*4*nb*nb,    NULL,          SCRATCH,
        sizeof(int),                        &ldwork, VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation