#include <lapacke.h>
#include "common.h"

Include dependency graph for core_cherfb.c:

Macros
#define	COMPLEX

Functions
int	CORE_cherfb (PLASMA_enum uplo, int n, int k, int ib, int nb, PLASMA_Complex32_t A, int lda, PLASMA_Complex32_t T, int ldt, PLASMA_Complex32_t C, int ldc, PLASMA_Complex32_t WORK, int ldwork)
void	QUARK_CORE_cherfb (Quark quark, Quark_Task_Flags task_flags, PLASMA_enum uplo, int n, int k, int ib, int nb, PLASMA_Complex32_t A, int lda, PLASMA_Complex32_t T, int ldt, PLASMA_Complex32_t *C, int ldc)
void	CORE_cherfb_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:: 2.4.5

Author:: Hatem Ltaief

Date:: 2010-11-15 c Tue Nov 22 14:35:22 2011

Definition in file core_cherfb.c.

Macro Definition Documentation

#define COMPLEX

Definition at line 18 of file core_cherfb.c.

Function Documentation

int CORE_cherfb	(	PLASMA_enum	uplo,
		int	n,
		int	k,
		int	ib,
		int	nb,
		PLASMA_Complex32_t *	A,
		int	lda,
		PLASMA_Complex32_t *	T,
		int	ldt,
		PLASMA_Complex32_t *	C,
		int	ldc,
		PLASMA_Complex32_t *	WORK,
		int	ldwork
	)

CORE_cherfb overwrites the symmetric complex N-by-N tile C with

Q**T*C*Q

where Q is a complex unitary matrix defined as the product of k elementary reflectors

Q = H(1) H(2) . . . H(k)

as returned by CORE_cgeqrt. Only PlasmaLower supported!

Parameters:

[in]	uplo	PlasmaLower : the upper part of the symmetric matrix C is not referenced. PlasmaUpper : the lower part of the symmetric matrix C is not referenced (not supported).
[in]	n	The number of rows/columns of the tile C. N >= 0.
[in]	k	The number of elementary reflectors whose product defines the matrix Q. K >= 0.
[in]	ib	The inner-blocking size. IB >= 0.
[in]	nb	The blocking size. NB >= 0.
[in]	A	The i-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CORE_cgeqrt in the first k columns of its array argument A.
[in]	lda	The leading dimension of the array A. LDA >= max(1,N).
[out]	T	The IB-by-K triangular factor T of the block reflector. T is upper triangular by block (economic storage); The rest of the array is not referenced.
[in]	ldt	The leading dimension of the array T. LDT >= IB.
[in,out]	C	On entry, the symmetric N-by-N tile C. On exit, C is overwritten by Q*TC*Q.
[in]	ldc	The leading dimension of the array C. LDC >= max(1,M).
[in,out]	WORK	On exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
[in]	ldwork	The dimension of the array WORK. LDWORK >= max(1,N);

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 110 of file core_cherfb.c.

References CORE_cunmlq(), CORE_cunmqr(), PlasmaConjTrans, PlasmaLeft, PlasmaLower, PlasmaNoTrans, and PlasmaRight.

{
    int i, j;
    if (uplo == PlasmaLower) {
        /* Rebuild the symmetric block: WORK <- C */
        for (j = 0; j < n; j++)
            for (i = j; i < n; i++){
                *(WORK + i + j * ldwork) = *(C + i + j*ldc);
                if (i > j){
                    *(WORK + j + i * ldwork) =  *(WORK + i + j * ldwork);
#ifdef COMPLEX
                    LAPACKE_clacgv_work(1, WORK + j + i * ldwork, ldwork);
#endif
                }
            }
        
        /* Left */
        CORE_cunmqr(PlasmaLeft, PlasmaConjTrans, n, n, k, ib, 
                    A, lda, T, ldt, WORK, ldwork, WORK+nb*ldwork, ldwork);
        /* Right */
        CORE_cunmqr(PlasmaRight, PlasmaNoTrans, n, n, k, ib, 
                    A, lda, T, ldt, WORK, ldwork, WORK+nb*ldwork, ldwork);
        
        /* 
         * Copy back the final result to the lower part of C 
         */
        /* C = WORK */
        for (j = 0; j < n; j++)
            for (i = j; i < n; i++)
                *(C + i + j*ldc) = *(WORK + i + j * ldwork);
    }
    else {
        /* Rebuild the symmetric block: WORK <- C */
        for (i = 0; i < n; i++)
            for (j = i; j < n; j++){
                *(WORK + i + j * ldwork) = *(C + i + j*ldc);
                if (j > i){
                    *(WORK + j + i * ldwork) =  *(WORK + i + j * ldwork);
#ifdef COMPLEX
                    LAPACKE_clacgv_work(1, WORK + j + i * ldwork, ldwork);
#endif
                }
            }
        
        /* Right */
        CORE_cunmlq(PlasmaRight, PlasmaConjTrans, n, n, k, ib, 
                    A, lda, T, ldt, WORK, ldwork, WORK+nb*ldwork, ldwork);
        /* Left */
        CORE_cunmlq(PlasmaLeft, PlasmaNoTrans, n, n, k, ib, 
                    A, lda, T, ldt, WORK, ldwork, WORK+nb*ldwork, ldwork);
        
        /* 
         * Copy back the final result to the upper part of C 
         */
        /* C = WORK */
        for (i = 0; i < n; i++)
            for (j = i; j < n; j++)
                *(C + i + j*ldc) = *(WORK + i + j * ldwork);
    }
    return 0;
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_cherfb_quark ( Quark * quark )

Definition at line 215 of file core_cherfb.c.

References A, C, CORE_cherfb(), quark_unpack_args_13, T, and uplo.

{
    PLASMA_enum uplo;
    int n;
    int k;
    int ib;
    int nb;
    PLASMA_Complex32_t *A;
    int lda;
    PLASMA_Complex32_t *T;
    int ldt;
    PLASMA_Complex32_t *C;
    int ldc;
    PLASMA_Complex32_t *WORK;
    int ldwork;
    quark_unpack_args_13(quark, uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
    CORE_cherfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_cherfb	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		PLASMA_enum	uplo,
		int	n,
		int	k,
		int	ib,
		int	nb,
		PLASMA_Complex32_t *	A,
		int	lda,
		PLASMA_Complex32_t *	T,
		int	ldt,
		PLASMA_Complex32_t *	C,
		int	ldc
	)

This kernel is just a workaround for now... will be deleted eventually and replaced by the one above (Piotr's Task)

Definition at line 183 of file core_cherfb.c.

References CORE_cherfb_quark(), INOUT, INPUT, PlasmaUpper, QUARK_Insert_Task(), QUARK_REGION_D, QUARK_REGION_L, QUARK_REGION_U, SCRATCH, and VALUE.

{
    QUARK_Insert_Task(
        quark, CORE_cherfb_quark, task_flags,
        sizeof(PLASMA_enum),                     &uplo,  VALUE,
        sizeof(int),                             &n,     VALUE,
        sizeof(int),                             &k,     VALUE,
        sizeof(int),                             &ib,    VALUE,
        sizeof(int),                             &nb,    VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,        A,          uplo == PlasmaUpper ? INOUT|QUARK_REGION_U : INOUT|QUARK_REGION_L,
        sizeof(int),                             &lda,   VALUE,
        sizeof(PLASMA_Complex32_t)*ib*nb,        T,          INPUT,
        sizeof(int),                             &ldt,   VALUE,
        sizeof(PLASMA_Complex32_t)*nb*nb,        C,          uplo == PlasmaUpper ? INOUT|QUARK_REGION_D|QUARK_REGION_U : INOUT|QUARK_REGION_D|QUARK_REGION_L,
        sizeof(int),                             &ldc,   VALUE,
        sizeof(PLASMA_Complex32_t)*2*nb*nb,    NULL,         SCRATCH,
        sizeof(int),                             &nb,    VALUE,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation