#include <lapacke.h>
#include "common.h"

Include dependency graph for core_strdalg.c:

Functions
void	CORE_strdalg (PLASMA_enum uplo, int N, int NB, PLASMA_desc pA, float V, float *TAU, int i, int j, int m, int grsiz)
void	QUARK_CORE_strdalg (Quark quark, Quark_Task_Flags task_flags, int uplo, int N, int NB, PLASMA_desc A, float V, float TAU, int i, int j, int m, int grsiz, int BAND, int PCOL, int ACOL, int MCOL)
void	CORE_strdalg_quark (Quark *quark)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:: 2.4.5

Author:: Azzam Haidar

Date:: 2011-05-15 s Tue Nov 22 14:35:22 2011

Definition in file core_strdalg.c.

Function Documentation

void CORE_strdalg	(	PLASMA_enum	uplo,
		int	N,
		int	NB,
		PLASMA_desc *	pA,
		float *	V,
		float *	TAU,
		int	i,
		int	j,
		int	m,
		int	grsiz
	)

CORE_strdalg is a part of the tridiagonal reduction algorithm (bulgechasing) It correspond to a local driver of the kernels that should be executed on a single core.

Parameters:

[in]	uplo	PlasmaLower: PlasmaUpper:
[in]	N	The order of the matrix A. N >= 0.
[in]	NB	The size of the Bandwidth of the matrix A, which correspond to the tile size. NB >= 0.
[in]	pA	A pointer to the descriptor of the matrix A.
[out]	V	float array, dimension (N). The scalar elementary reflectors are written in this array. So it is used as a workspace for V at each step of the bulge chasing algorithm.
[out]	TAU	float array, dimension (N). The scalar factors of the elementary reflectors are written in thisarray. So it is used as a workspace for TAU at each step of the bulge chasing algorithm.
[in]	i	Integer that refer to the current sweep. (outer loop).
[in]	j	Integer that refer to the sweep to chase.(inner loop).
[in]	m	Integer that refer to a sweep step, to ensure order dependencies.
[in]	grsiz	Integer that refer to the size of a group. group mean the number of kernel that should be executed sequentially on the same core. group size is a trade-off between locality (cache reuse) and parallelism. a small group size increase parallelism while a large group size increase cache reuse.

Returns:

Return values:

PLASMA_SUCCESS	successful exit
<0	if -i, the i-th argument had an illegal value

Definition at line 82 of file core_strdalg.c.

References A, CORE_shbelr(), CORE_shblrx(), CORE_shbrce(), plasma_desc_t::dtyp, min, and plasma_element_size().

{
    int    k, shift=3;
    int    myid, colpt, stind, edind, blklastind, stepercol;
    size_t eltsize;
    PLASMA_desc A = *pA;
    eltsize = plasma_element_size(A.dtyp);
    k = shift / grsiz;
    stepercol = (k*grsiz == shift) ? k : k+1;
    for (k = 0; k < grsiz; k++){
        myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + k+1;
        if(myid%2 ==0) {
            colpt      = (myid/2) * NB + 1 + j - 1;
            stind      = colpt - NB + 1;
            edind      = min(colpt, N);
            blklastind = colpt;
        } else {
            colpt      = ((myid+1)/2)*NB + 1 +j -1 ;
            stind      = colpt-NB+1;
            edind      = min(colpt,N);
            if( (stind>=edind-1) && (edind==N) )
                blklastind = N;
            else
                blklastind = 0;
        }
        if( myid == 1 )
           CORE_shbelr(uplo, N, &A, V, TAU, stind, edind, eltsize);
        else if(myid%2 == 0)
           CORE_shbrce(uplo, N, &A, V, TAU, stind, edind, eltsize);
        else /*if(myid%2 == 1)*/
           CORE_shblrx(uplo, N, &A, V, TAU, stind, edind, eltsize);
        if(blklastind >= (N-1))  break;
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

void CORE_strdalg_quark ( Quark * quark )

Definition at line 160 of file core_strdalg.c.

References CORE_strdalg(), quark_unpack_args_10, TAU, uplo, and V.

{
    PLASMA_desc *pA;
    float *V;
    float *TAU;
    int    uplo;
    int    N, NB;
    int    i, j, m, grsiz;
    quark_unpack_args_10(quark, uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
    CORE_strdalg(uplo, N, NB, pA, V, TAU, i, j, m, grsiz);
}

Here is the call graph for this function:

Here is the caller graph for this function:

void QUARK_CORE_strdalg	(	Quark *	quark,
		Quark_Task_Flags *	task_flags,
		int	uplo,
		int	N,
		int	NB,
		PLASMA_desc *	A,
		float *	V,
		float *	TAU,
		int	i,
		int	j,
		int	m,
		int	grsiz,
		int	BAND,
		int *	PCOL,
		int *	ACOL,
		int *	MCOL
	)

Definition at line 126 of file core_strdalg.c.

References CORE_strdalg_quark(), INPUT, LOCALITY, NODEP, OUTPUT, QUARK_Insert_Task(), and VALUE.

{
    QUARK_Insert_Task(quark, CORE_strdalg_quark,   task_flags,
        sizeof(int),               &uplo,               VALUE,
        sizeof(int),                  &N,               VALUE,
        sizeof(int),                 &NB,               VALUE,
        sizeof(PLASMA_desc),           A,               NODEP,
        sizeof(float),    V,               NODEP,
        sizeof(float),    TAU,               NODEP,
        sizeof(int),                  &i,               VALUE,
        sizeof(int),                  &j,               VALUE,
        sizeof(int),                  &m,               VALUE,
        sizeof(int),              &grsiz,               VALUE,
        sizeof(int),                PCOL,               INPUT,
        sizeof(int),                ACOL,               INPUT,
        sizeof(int),                MCOL,               OUTPUT | LOCALITY,
        0);
}

Here is the call graph for this function:

Here is the caller graph for this function:

Functions

Detailed Description

Function Documentation