I am testing the pdgemm_ routine.
I called this routine in my program1.c program.
The compilation was successful using mpif77
(mpif77 -o program1 program1.c libscalapack.a blacsF77init_MPI-LINUX-0.a blacs_MPI-LINUX-0.a blacsF77init_MPI-LINUX-0.a liblapack.a libf77blas.a libatlas.a).
But, when I run the executable, I obtain the following:
[root@localhost /root]#mpirun -np 4 ./program1
[cli_1]: aborting job:
Fatal error in MPI_Bcast: Message truncated, error stack:
MPI_Bcast(784)....................: MPI_Bcast(buf=0x8aaa8d8, count=1, dtype=USER<vector>, root=0, comm=0x84000002) failed
MPIR_Bcast(198)...................:
MPIDI_CH3U_Receive_data_found(172): Message from rank 0 and tag 2 truncated; 128 bytes received but buffer size is 32
rank 1 in job 31 localhost.localdomain_43466 caused collective abort of all ranks
exit status of rank 1: return code 1
[root@localhost /root]#
and here is my program1.c:
- Code: Select all
#include<stdio.h>
#include<stdlib.h>
#include<time.h>
#ifdef F77_WITH_NO_UNDERSCORE
#define numroc_ numroc
#define descinit_ descinit
#define pdgemm_ pdgemm
#endif
extern void Cblacs_pinfo( int* mypnum, int* nprocs);
extern void Cblacs_get( int context, int request, int* value);
extern int Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
extern void Cblacs_gridinfo( int context, int* np_row, int* np_col, int* my_row, int* my_col);
extern void Cblacs_gridexit( int context);
extern void Cblacs_exit( int error_code);
extern void Cdgesd2d(int ictxt, int m, int n, double* A, int ld, int rdest, int cdest);
extern void Cdgerv2d(int ictxt, int m, int n, double* A, int ld, int rsrc, int csrc);
extern int numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern void descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc, int *ictxt, int *lld, int *info);
extern void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC );
int main(int argc,char**argv){
int n=10,nb=2;
int ictxt,nprow=2,npcol=2,myrow,mycol;
int iam,nprocs;
int zero=0,one=1,ia,ja;
double alpha=1.0,beta=0.;
int myArows,myAcols;
clock_t start_pdgemm,end_pdgemm;
double time_pdgemm;
double *A,*B,*C;
double *myA,*myB,*myC,*SA,*SB;
int descA[9],descB[9],descC[9];
int blocks,rowproc,colproc,rows,cols,jstart,istart,cb,rb,jl,il,k1,k2,i,j;
int rowblocks,colblocks;
Cblacs_pinfo(&iam,&nprocs) ;
if (nprow*npcol>nprocs){
if (iam==0)
printf(" **** ERROR : I want to use %d processes and only have %d\n",(nprow*npcol),nprocs);
return -1;
}
/*********** process grid ***********/
Cblacs_get( -1, 0, &ictxt);
Cblacs_gridinit(&ictxt,"Row",nprow,npcol);
Cblacs_gridinfo(ictxt,&nprow,&npcol,&myrow,&mycol);
/************ Allocation of local matrices ****************/
myArows=numroc_(&n,&nb,&myrow,&zero,&nprow);
myAcols=numroc_(&n,&nb,&mycol,&zero,&npcol);
myA=(double*)calloc(myArows*myAcols,sizeof(double));
myB=(double*)calloc(myArows*myAcols,sizeof(double));
myC=(double*)calloc(myArows*myAcols,sizeof(double));
if (!(myA&&myB&&myC)){
printf("\npas assez de mémoire, proc[%d][%d]\n",myrow,&mycol);
Cblacs_gridexit(ictxt);
Cblacs_exit(1);
return -1;
}
/*********** Allocation of global matrices on process 0 ************/
if (mycol==0 && myrow==0)
{
A=(double*)calloc(n*n,sizeof(double));
B=(double*)calloc(n*n,sizeof(double));
C=(double*)calloc(n*n,sizeof(double));
SA=(double*)calloc(myArows*myAcols,sizeof(double));
SB=(double*)calloc(myArows*myAcols,sizeof(double));
if(!(A&&B&&C&&SA&&SB)){
printf("\npas assez de mémoire, proc[0][0]\n");
Cblacs_gridexit(ictxt);
Cblacs_exit(1);
return -1;
}
for (i=0;i<n;i++)
for(j=0;j<n;j++){
A[i*n+j]=(rand()%1000);
B[i*n+j]=(rand()%1000);
}
}
/************ distribution of matrices A and B ************/
if(mycol==0 && myrow==0)
{
if(n%nb==0)
blocks=n/nb;
else
blocks=n/nb+1;
for(rowproc=0;rowproc<nprow;rowproc++)
for(colproc=0;colproc<npcol;colproc++)
{
rows=numroc_(&n,&nb,&rowproc,&zero,&nprow);
cols=numroc_(&n,&nb,&colproc,&zero,&npcol);
for(jstart=colproc*nb,cb=colproc,jl=0;cb<blocks;cb+=nprow,jstart+=npcol*nb)
for(k2=0;k2<nb && k2+jstart<n;k2++,jl++)
for(istart=rowproc*nb,rb=rowproc,il=0;rb<blocks;rb+=nprow,istart+=nprow*nb)
for(k1=0;k1<nb && k1+istart<n;k1++,il++)
if(rowproc!=0||colproc!=0)
{
SA[jl*rows+il]=A[(jstart+k2)*n+(istart+k1)];
SB[jl*rows+il]=B[(jstart+k2)*n+(istart+k1)];
}
else
{
myA[jl*rows+il]=A[(jstart+k2)*n+(istart+k1)];
myB[jl*rows+il]=B[(jstart+k2)*n+(istart+k1)];
}
if(rowproc!=0||colproc!=0)
{
Cdgesd2d(ictxt,rows,cols,SA,rows,rowproc,colproc);
Cdgesd2d(ictxt,rows,cols,SB,rows,rowproc,colproc);
}
}
}
else
{
rows=numroc_(&n,&nb,&myrow,&zero,&nprow);
cols=numroc_(&n,&nb,&mycol,&zero,&npcol);
Cdgerv2d(ictxt,rows,cols,myA,rows,0,0);
Cdgerv2d(ictxt,rows,cols,myB,rows,0,0);
}
/******** descriptors *******/
descinit_(descA,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&one);
descinit_(descB,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&one);
descinit_(descC,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&one);
/************ pdgemm routine call ************/
ia=myrow+1;
ja=mycol+1;
start_pdgemm=clock();
pdgemm_("T","N",&myAcols,&myArows,&myArows,&alpha,myA,&ia,&ja,descA,myB,&ia,&ja,descB,&beta,myC,&ia,&ja,descC);
end_pdgemm=clock();
/******** gathering of the result matrix *****/
if(mycol==0&&myrow==0)
{
for(rowproc=0;rowproc<nprow;rowproc++)
for(colproc=0;colproc<npcol;colproc++)
{
rows = numroc_(&n,&nb,&rowproc,&zero,&nprow);
cols = numroc_(&n,&nb,&colproc,&zero,&npcol);
if(rowproc!=0||colproc!=0)
Cdgerv2d(ictxt,rows,cols,myC,rows,rowproc,colproc);
rowblocks=rows/nb;
colblocks=cols/nb;
for(jstart=colproc*nb,cb=0;cb<colblocks;cb++,jstart+=npcol*nb)
for(k2=0;k2<nb;k2++)
{
for(istart=rowproc*nb,rb=0;rb<rowblocks;rb++,istart+=nprow*nb)
for(k1=0;k1<nb;k1++)
C[(jstart+k2)*n+(istart+k1)]=myC[(cb*nb+k2)*rows+(rb*nb+k1)];
if(rows%nb!=0)
for(k1=0;rowblocks*nb+k1<rows;k1++)
C[(jstart+k2)*n+(istart+k1)]=myC[(cb*nb+k2)*rows+(rb*nb+k1)];
}
if(cols%nb!=0)
for(k2=0;(colblocks*nb)+k2<cols;k2++)
{
for(rb=0;rb<rowblocks;rb++,istart+=nprow*nb)
for(k1=0;k1<nb;k1++)
C[(jstart+k2)*n+(istart+k1)]=myC[(cb*nb+k2)*rows+(rb*nb+k1)];
if(rows%nb!=0)
for(k1=0;rowblocks*nb+k1<rows;k1++)
C[(jstart+k2)*n+(istart+k1)]=myC[(cb*nb+k2)*rows+(rb*nb+k1)];
}
}
}
else
{
rows=numroc_(&n,&nb,&myrow,&zero,&nprow);
cols=numroc_(&n,&nb,&mycol,&zero,&npcol);
Cdgesd2d(ictxt,rows,cols,myC,rows,0,0);
}
/************** free matrices ******************/
free(myA);
free(myB);
free(myC);
if (myrow==0&&mycol==0)
{
free(A);
free(B);
free(C);
free(SA);
free(SB);
}
time_pdgemm=((double)(end_pdgemm -start_pdgemm))/CLOCKS_PER_SEC;
if (iam==0)
{
printf("\n procs = %d n=%d nb=%d MM= %6.3f\n",nprocs,n,nb,time_pdgemm);
}
Cblacs_gridexit(ictxt);
Cblacs_exit(0);
return 0;
}
I would be glad if you help me
Thanks in advance,
alex

