Parallel Reduction from BAND tridiagonal to the final condensed form - dynamic scheduler
{
#ifdef COMPLEX
static double zone = (double) 1.0;
static double dzero = (double) 0.0;
double ztmp;
double absztmp;
#endif
int N, NB, INgrsiz, INthgrsiz, BAND;
int myid, grsiz, shift=3, stt, st, ed, stind, edind;
int blklastind, colpt, PCOL, ACOL, MCOL;
int stepercol, mylastid, grnb, grid;
int i, j, m;
int thgrsiz, thgrnb, thgrid, thed;
return;
if (N == 0){
return;
}
if (NB == 0) {
memset(D, 0, N*sizeof(double));
memset(E, 0, (N-1)*sizeof(double));
#ifdef COMPLEX
for (i=0; i<N; i++)
#else
for (i=0; i<N; i++)
#endif
return;
}
if (NB == 1){
memset(D, 0, N *sizeof(double));
memset(E, 0, (N-1)*sizeof(double));
#ifdef COMPLEX
for (i=0; i<N; i++)
{
if( i < (N-1)) {
absztmp = fabs(ztmp);
E[i] = absztmp;
if(absztmp != dzero)
ztmp = (double) (ztmp / absztmp);
else
ztmp = zone;
if(i<(N-2)) *
A((i+2),(i+1)) = *
A((i+2),(i+1)) * ztmp;
}
}
} else {
for (i=0; i<N; i++)
{
if(i<(N-1)) {
absztmp = fabs(ztmp);
E[i] = absztmp;
if(absztmp != dzero)
ztmp = (double) (ztmp / absztmp);
else
ztmp = zone;
if(i<(N-2)) *
A((i+1),(i+2)) = *
A((i+1),(i+2)) * ztmp;
}
}
}
#else
for (i=0; i < N-1; i++) {
}
} else {
for (i=0; i < N-1; i++) {
}
}
#endif
return;
}
if( N <= 0 )
{
double *work, *TTau;
int info, ldwork = N*N;
A(0,0), A.
lm, D, E, TTau, work, ldwork);
if( info == 0 )
else
return;
}
memset(MAXID,0,(N+1)*sizeof(int));
INgrsiz = 1;
if( NB > 160 ) {
INgrsiz = 2;
}
else if( NB > 100 ) {
if( N < 5000 )
INgrsiz = 2;
else
INgrsiz = 4;
} else {
INgrsiz = 6;
}
INthgrsiz = N;
BAND = 0;
grsiz = INgrsiz;
thgrsiz = INthgrsiz;
if( grsiz == 0 ) grsiz = 6;
if( thgrsiz == 0 ) thgrsiz = N;
i = shift/grsiz;
stepercol = i*grsiz == shift ? i:i+1;
i = (N-2)/thgrsiz;
thgrnb = i*thgrsiz == (N-2) ? i:i+1;
for (thgrid = 1; thgrid<=thgrnb; thgrid++){
stt = (thgrid-1)*thgrsiz+1;
thed =
min( (stt + thgrsiz -1), (N-2));
for (i = stt; i <= N-2; i++){
if(stt>ed)break;
for (m = 1; m <=stepercol; m++){
st=stt;
for (j = st; j <=ed; j++){
myid = (i-j)*(stepercol*grsiz) +(m-1)*grsiz + 1;
mylastid = myid+grsiz-1;
PCOL = mylastid+shift-1;
MAXID[j] = myid;
PCOL =
min(PCOL,MAXID[j-1]);
grnb = PCOL/grsiz;
grid = grnb*grsiz == PCOL ? grnb:grnb+1;
PCOL = (grid-1)*grsiz +1;
ACOL = myid-grsiz;
if(myid==1)ACOL=0;
MCOL = myid;
plasma->
quark, &task_flags,
&A, C, S, i, j, m, grsiz, BAND,
DEP(PCOL),
DEP(ACOL),
DEP(MCOL) );
if(mylastid%2 ==0){
blklastind = (mylastid/2)*NB+1+j-1;
}else{
colpt = ((mylastid+1)/2)*NB + 1 +j -1 ;
stind = colpt-NB+1;
if( (stind>=edind-1) && (edind==N) )
blklastind=N;
else
blklastind=0;
}
if(blklastind >= (N-1)) stt=stt+1;
}
}
}
}
memset(D, 0, N *sizeof(double));
memset(E, 0, (N-1)*sizeof(double));
#ifdef COMPLEX
for (i=0; i < N-1 ; i++)
{
if(i<(N-2))
else
E[i] = fabs( *
A(i+1, i));
}
} else {
for (i=0; i<N-1; i++)
{
if( i < (N-2) )
else
E[i] = fabs(*
A(i, (i+1)));
}
}
#else
for (i=0; i < N-1; i++) {
}
} else {
for (i=0; i < N-1; i++) {
}
}
#endif
}