Parallel Reduction from BAND Bidiagonal to the final condensed form - dynamic scheduler
{
#ifdef COMPLEX
static double dzero = (double) 0.0;
double absztmp;
#endif
int M, N, NB, MINMN, INgrsiz, INthgrsiz, BAND;
int myid, grsiz, shift=3, stt, st, ed, stind, edind;
int blklastind, colpt, PCOL, ACOL, MCOL;
int stepercol,mylastid,grnb,grid;
int i, j, m;
int thgrsiz, thgrnb, thgrid, thed;
return;
if ( MINMN == 0 ){
return;
}
if ( NB == 0 ) {
memset(D, 0, MINMN *sizeof(double));
memset(E, 0, (MINMN-1)*sizeof(double));
#ifdef COMPLEX
for (i=0; i<MINMN; i++)
#else
for (i=0; i<MINMN; i++)
#endif
return;
}
if ( NB == 1 ) {
memset(D, 0, MINMN*sizeof(double));
memset(E, 0, (MINMN-1)*sizeof(double));
for (i=0; i<(MINMN-1); i++)
{
LAPACKE_zlarfg_work( 2,
A(i, i), &V, 1, &TAU);
*
A(i, i+1) = - V * TAU * (*
A(i+1, i+1));
*
A(i+1, i+1) = *(
A(i+1, i+1)) * (zone - V * ztmp);
}
}
#ifdef COMPLEX
ztmp = zone;
for (i=0; i<MINMN; i++)
{
ztmp = *
A(i, i) *
conj(ztmp);
D[i] = absztmp;
if(absztmp != dzero)
else
ztmp = zone;
if(i<(MINMN-1)) {
ztmp = *
A(i, (i+1)) *
conj(ztmp);
E[i] = absztmp;
if(absztmp != dzero)
else
ztmp = zone;
}
}
#else
for (i=0; i < MINMN-1; i++) {
}
#endif
return;
}
if ( MINMN <= 0 )
{
int info, ldwork = N*N;
info = LAPACKE_zgebrd_work(LAPACK_COL_MAJOR, M, N,
A(0,0), A.
lm, D, E, taup, tauq, work, ldwork);
if( info == 0 )
else
return;
}
memset(MAXID,0,(MINMN+1)*sizeof(int));
INgrsiz = 1;
if( NB > 160 ) {
INgrsiz = 2;
}
else if( NB > 100 ) {
if( MINMN < 5000 )
INgrsiz = 2;
else
INgrsiz = 4;
} else {
INgrsiz = 6;
}
INthgrsiz = MINMN;
BAND = 0;
grsiz = INgrsiz;
thgrsiz = INthgrsiz;
if( grsiz == 0 ) grsiz = 6;
if( thgrsiz == 0 ) thgrsiz = MINMN;
i = shift/grsiz;
stepercol = i*grsiz == shift ? i:i+1;
i = (MINMN-2)/thgrsiz;
thgrnb = i*thgrsiz == (MINMN-2) ? i:i+1;
for (thgrid = 1; thgrid<=thgrnb; thgrid++){
stt = (thgrid-1)*thgrsiz+1;
thed =
min( (stt + thgrsiz -1), (MINMN-2));
for (i = stt; i <= MINMN-2; i++){
if(stt>ed)break;
for (m = 1; m <=stepercol; m++){
st=stt;
for (j = st; j <=ed; j++){
myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + 1;
mylastid = myid+grsiz-1;
PCOL = mylastid+shift-1;
MAXID[j] = myid;
PCOL =
min(PCOL,MAXID[j-1]);
grnb = PCOL/grsiz;
grid = grnb*grsiz == PCOL ? grnb:grnb+1;
PCOL = (grid-1)*grsiz +1;
ACOL = myid-grsiz;
if(myid==1)ACOL=0;
MCOL = myid;
plasma->
quark, &task_flags,
&A, C, S, i, j, m, grsiz, BAND,
DEP(PCOL),
DEP(ACOL),
DEP(MCOL) );
if(mylastid%2 ==0){
blklastind = (mylastid/2)*NB+1+j-1;
}else{
colpt = ((mylastid+1)/2)*NB + 1 +j -1 ;
stind = colpt-NB+1;
edind =
min(colpt,MINMN);
if( (stind>=edind-1) && (edind==MINMN) )
blklastind=MINMN;
else
blklastind=0;
}
if(blklastind >= (MINMN-1)) stt=stt+1;
}
}
}
}
memset(D, 0, MINMN*sizeof(double));
memset(E, 0, (MINMN-1)*sizeof(double));
for (i=0; i<(MINMN-1); i++)
{
LAPACKE_zlarfg_work( 2,
A(i, i), &V, 1, &TAU);
*
A(i, (i+1)) = - V * TAU * (*
A((i+1), (i+1)));
*
A((i+1), (i+1)) = (*
A((i+1), (i+1))) * (zone - V * ztmp);
}
}
#ifdef COMPLEX
ztmp =zone;
for (i=0; i < MINMN-1; i++) {
if(i<(MINMN-2))
else
E[i] =
cabs( *
A(i, i+1));
}
#else
for (i=0; i < MINMN-1; i++) {
}
#endif
}