PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_sgbrce.c File Reference
#include <lapacke.h>
#include "common.h"
Include dependency graph for core_sgbrce.c:

Go to the source code of this file.

Macros

#define A(_m, _n)   (float *)plasma_geteltaddr(A, ((_m)-1), ((_n)-1), eltsize)
#define V(_m)   &(V[(_m)-1])
#define TAU(_m)   &(TAU[(_m)-1])

Functions

int CORE_sgbrce (int uplo, int N, PLASMA_desc *A, float *V, float *TAU, int st, int ed, int eltsize)

Detailed Description

PLASMA core_blas kernel PLASMA is a software package provided by Univ. of Tennessee, Univ. of California Berkeley and Univ. of Colorado Denver

Version:
2.4.5
Author:
Azzam Haidar
Date:
2011-05-15 s Tue Nov 22 14:35:23 2011

Definition in file core_sgbrce.c.


Macro Definition Documentation

#define A (   _m,
  _n 
)    (float *)plasma_geteltaddr(A, ((_m)-1), ((_n)-1), eltsize)

CORE_sgbrce is a kernel that will operate on a region (triangle) of data bounded by st and ed. This kernel apply a right update, create a new nnz, then it eliminate it, and move to the next right update, create a new nnz, eliminate it and so on until finishing. When this is done, it take advantage that data are on cache and will apply the left on the remaining part of this region that has not been updated by the left yet.

Parameters:
[in]uplo
  • PlasmaLower:
  • PlasmaUpper:
[in]NThe order of the matrix A.
[in,out]AA pointer to the descriptor of the matrix A.
[out]Vfloat array, dimension (N). The scalar elementary reflectors are written in this array. So it is used as a workspace for V at each step of the bulge chasing algorithm.
[out]TAUfloat array, dimension (N). The scalar factors of the elementary reflectors are written in thisarray. So it is used as a workspace for TAU at each step of the bulge chasing algorithm.
[in]stA pointer to the start index where this kernel will operate.
[in]edA pointer to the end index where this kernel will operate.
[in]eltsizePLASMA internal value which refer to the size of the precision.
Returns:
Return values:
PLASMA_SUCCESSsuccessful exit
<0if -i, the i-th argument had an illegal value TYPE 1-BDL Householder add -1 because of C

Definition at line 72 of file core_sgbrce.c.

#define TAU (   _m)    &(TAU[(_m)-1])

Definition at line 74 of file core_sgbrce.c.

#define V (   _m)    &(V[(_m)-1])

Definition at line 73 of file core_sgbrce.c.


Function Documentation

int CORE_sgbrce ( int  uplo,
int  N,
PLASMA_desc A,
float *  V,
float *  TAU,
int  st,
int  ed,
int  eltsize 
)

Definition at line 76 of file core_sgbrce.c.

References A, CORE_slarfx2(), coreblas_error, ELTLDD, max, plasma_desc_t::mb, min, PLASMA_SUCCESS, PlasmaLeft, PlasmaLower, PlasmaRight, TAU, and V.

{
int NB, J1, J2, J3, KDM2, len, pt;
int len1, len2, t1ed, t2st;
int i;
static float zzero = 0.0;
/* Check input arguments */
if (N < 0) {
coreblas_error(2, "Illegal value of N");
return -2;
}
if (ed <= st) {
coreblas_error(6, "Illegal value of st and ed (internal)");
return -6;
}
/* Quick return */
if (N == 0)
NB = A->mb;
KDM2 = A->mb-2;
if( uplo == PlasmaLower ){
/* ========================
* LOWER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply Householder from the right. and create newnnz outside the band if J3 < N */
J1 = ed+1;
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J1+1;
if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/
t1ed = (J3/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J3-t2st+1;
if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st));
len = J3-J2;
if(len>0){
/* generate Householder to annihilate a(j+kd,j-1) within the band */
*V(J3) = *A(J3,(i-1));
*A(J3,(i-1)) = 0.0;
LAPACKE_slarfg_work( 2, A(J2,(i-1)), V(J3), 1, TAU(J3));
}
}
/* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
for (i = ed; i >= st+1 ; i--){
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J2;
if(len>0){
pt = J2;
J1 = i;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i ), ELTLDD(vA, pt), A((pt+1), i ), ELTLDD(vA, pt+1) );
if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt), A((pt+1), t2st), ELTLDD(vA, pt+1) );
}
}
} else {
/* ========================
* UPPER CASE
* ========================*/
for (i = ed; i >= st+1 ; i--){
/* apply Householder from the right. and create newnnz outside the band if J3 < N */
J1 = ed+1;
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J1+1;
if(J3>J2)*A((i-1), J3)=zzero;
t1ed = (J3/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J3-t2st+1;
if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
/* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it. */
len = J3-J2;
if(len>0){
/* generate Householder to annihilate a(j+kd,j-1) within the band */
*V(J3) = *A(i-1, J3);
*A(i-1, J3) = 0.0;
LAPACKE_slarfg_work( 2, A(i-1, J2), V(J3), 1, TAU(J3));
}
}
/* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2 */
for (i = ed; i >= st+1 ; i--){
/* find if there was a nnz created. if yes apply right else nothing to be done. */
J2 = min((i+1+KDM2), N);
J3 = min((J2+1), N);
len = J3-J2;
if(len>0){
pt = J2;
J1 = i;
J2 = min(ed,N);
t1ed = (J2/NB)*NB;
t2st = max(t1ed+1,J1);
len1 = t1ed-J1+1;
len2 = J2-t2st+1;
if(len1>0)CORE_slarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i , pt), ELTLDD(vA, i), A(i, pt+1), ELTLDD(vA, i) );
if(len2>0)CORE_slarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st), A(t2st, pt+1), ELTLDD(vA, t2st) );
}
}
} /* end of else for the upper case */
}

Here is the call graph for this function:

Here is the caller graph for this function: