The LAPACK forum has moved to https://github.com/Reference-LAPACK/lapack/discussions.

problem running pdgemm_

Open discussion regarding features, bugs, issues, vendors, etc.

problem running pdgemm_

Postby alex » Mon Feb 18, 2008 7:25 pm

Hi,

I am testing the pdgemm_ routine.
I called this routine in my program1.c program.
The compilation was successful using mpif77
(mpif77 -o program1 program1.c libscalapack.a blacsF77init_MPI-LINUX-0.a blacs_MPI-LINUX-0.a blacsF77init_MPI-LINUX-0.a liblapack.a libf77blas.a libatlas.a).

But, when I run the executable, I obtain the following:

[root@localhost /root]#mpirun -np 4 ./program1
[cli_1]: aborting job:
Fatal error in MPI_Bcast: Message truncated, error stack:
MPI_Bcast(784)....................: MPI_Bcast(buf=0x8aaa8d8, count=1, dtype=USER<vector>, root=0, comm=0x84000002) failed
MPIR_Bcast(198)...................:
MPIDI_CH3U_Receive_data_found(172): Message from rank 0 and tag 2 truncated; 128 bytes received but buffer size is 32
rank 1 in job 31 localhost.localdomain_43466 caused collective abort of all ranks
exit status of rank 1: return code 1
[root@localhost /root]#


and here is my program1.c:

Code: Select all
#include<stdio.h>
#include<stdlib.h>
#include<time.h>

#ifdef F77_WITH_NO_UNDERSCORE
#define   numroc_      numroc
#define   descinit_    descinit
#define   pdgemm_      pdgemm
#endif

extern void Cblacs_pinfo( int* mypnum, int* nprocs);
extern void   Cblacs_get( int context, int request, int* value);
extern int Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
extern void Cblacs_gridinfo( int context, int*  np_row, int* np_col, int*  my_row, int*  my_col);
extern void Cblacs_gridexit( int context);
extern void Cblacs_exit( int error_code);
extern void Cdgesd2d(int ictxt, int m, int n, double* A, int ld, int rdest, int cdest);
extern void Cdgerv2d(int ictxt, int m, int n, double* A, int ld, int rsrc, int csrc);

extern int numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern void descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc, int *ictxt, int *lld, int *info);
extern void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA, double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB, double * BETA, double * C, int * IC, int * JC, int * DESCC );

int main(int argc,char**argv){

int n=10,nb=2;
int ictxt,nprow=2,npcol=2,myrow,mycol;
int iam,nprocs;
int zero=0,one=1,ia,ja;
double alpha=1.0,beta=0.;
int myArows,myAcols;
clock_t start_pdgemm,end_pdgemm;

double time_pdgemm;
double *A,*B,*C;
double *myA,*myB,*myC,*SA,*SB;
int descA[9],descB[9],descC[9];
int blocks,rowproc,colproc,rows,cols,jstart,istart,cb,rb,jl,il,k1,k2,i,j;
int rowblocks,colblocks;

Cblacs_pinfo(&iam,&nprocs) ;

if (nprow*npcol>nprocs){
 if (iam==0)
 printf(" **** ERROR : I want to use %d processes and only have %d\n",(nprow*npcol),nprocs);
 return -1;
}

/*********** process grid ***********/
Cblacs_get( -1, 0, &ictxt);
Cblacs_gridinit(&ictxt,"Row",nprow,npcol);

Cblacs_gridinfo(ictxt,&nprow,&npcol,&myrow,&mycol);

/************ Allocation of local matrices ****************/
myArows=numroc_(&n,&nb,&myrow,&zero,&nprow);
myAcols=numroc_(&n,&nb,&mycol,&zero,&npcol);
myA=(double*)calloc(myArows*myAcols,sizeof(double));
myB=(double*)calloc(myArows*myAcols,sizeof(double));
myC=(double*)calloc(myArows*myAcols,sizeof(double));

if (!(myA&&myB&&myC)){

printf("\npas assez de mémoire, proc[%d][%d]\n",myrow,&mycol);
Cblacs_gridexit(ictxt);
Cblacs_exit(1);
return -1;
}

/*********** Allocation of global matrices on process 0 ************/

if (mycol==0 && myrow==0)

{

A=(double*)calloc(n*n,sizeof(double));

B=(double*)calloc(n*n,sizeof(double));

C=(double*)calloc(n*n,sizeof(double));

SA=(double*)calloc(myArows*myAcols,sizeof(double));

SB=(double*)calloc(myArows*myAcols,sizeof(double));

            

if(!(A&&B&&C&&SA&&SB)){

printf("\npas assez de mémoire, proc[0][0]\n");
Cblacs_gridexit(ictxt);
Cblacs_exit(1);
return -1;
}

               

  for (i=0;i<n;i++)

   for(j=0;j<n;j++){

   A[i*n+j]=(rand()%1000);

   B[i*n+j]=(rand()%1000);

   }


}


/************ distribution of matrices A and B ************/

if(mycol==0 && myrow==0)

{

 if(n%nb==0)

   blocks=n/nb;

 else

   blocks=n/nb+1;

         

 for(rowproc=0;rowproc<nprow;rowproc++)            

  for(colproc=0;colproc<npcol;colproc++)

  {

   rows=numroc_(&n,&nb,&rowproc,&zero,&nprow);

   cols=numroc_(&n,&nb,&colproc,&zero,&npcol);

            

   for(jstart=colproc*nb,cb=colproc,jl=0;cb<blocks;cb+=nprow,jstart+=npcol*nb)

    for(k2=0;k2<nb && k2+jstart<n;k2++,jl++)

     for(istart=rowproc*nb,rb=rowproc,il=0;rb<blocks;rb+=nprow,istart+=nprow*nb)

      for(k1=0;k1<nb && k1+istart<n;k1++,il++)

       if(rowproc!=0||colproc!=0)

   {               

    SA[jl*rows+il]=A[(jstart+k2)*n+(istart+k1)];

    SB[jl*rows+il]=B[(jstart+k2)*n+(istart+k1)];

   }

       else

   {

    myA[jl*rows+il]=A[(jstart+k2)*n+(istart+k1)];

    myB[jl*rows+il]=B[(jstart+k2)*n+(istart+k1)];

   }

   

   if(rowproc!=0||colproc!=0)

    {

     Cdgesd2d(ictxt,rows,cols,SA,rows,rowproc,colproc);

     Cdgesd2d(ictxt,rows,cols,SB,rows,rowproc,colproc);

    }

  }

}

else

{

 rows=numroc_(&n,&nb,&myrow,&zero,&nprow);

 cols=numroc_(&n,&nb,&mycol,&zero,&npcol);

 Cdgerv2d(ictxt,rows,cols,myA,rows,0,0);

 Cdgerv2d(ictxt,rows,cols,myB,rows,0,0);

}

/********  descriptors *******/

   

descinit_(descA,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&one);

          

descinit_(descB,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&one);


descinit_(descC,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&one);

   

/************  pdgemm routine call ************/


ia=myrow+1;
ja=mycol+1;      

start_pdgemm=clock();

pdgemm_("T","N",&myAcols,&myArows,&myArows,&alpha,myA,&ia,&ja,descA,myB,&ia,&ja,descB,&beta,myC,&ia,&ja,descC);

end_pdgemm=clock();



/******** gathering of the result matrix  *****/


if(mycol==0&&myrow==0)

{

 for(rowproc=0;rowproc<nprow;rowproc++)            

  for(colproc=0;colproc<npcol;colproc++)

  {

   rows = numroc_(&n,&nb,&rowproc,&zero,&nprow);

   cols = numroc_(&n,&nb,&colproc,&zero,&npcol);

   

   if(rowproc!=0||colproc!=0)

   Cdgerv2d(ictxt,rows,cols,myC,rows,rowproc,colproc);



   rowblocks=rows/nb;

   colblocks=cols/nb;

   

   for(jstart=colproc*nb,cb=0;cb<colblocks;cb++,jstart+=npcol*nb)

    for(k2=0;k2<nb;k2++)

    {

   

     for(istart=rowproc*nb,rb=0;rb<rowblocks;rb++,istart+=nprow*nb)

      for(k1=0;k1<nb;k1++)

       C[(jstart+k2)*n+(istart+k1)]=myC[(cb*nb+k2)*rows+(rb*nb+k1)];

     if(rows%nb!=0)

      for(k1=0;rowblocks*nb+k1<rows;k1++)

       C[(jstart+k2)*n+(istart+k1)]=myC[(cb*nb+k2)*rows+(rb*nb+k1)];

    }

   if(cols%nb!=0)

   for(k2=0;(colblocks*nb)+k2<cols;k2++)

   {

    for(rb=0;rb<rowblocks;rb++,istart+=nprow*nb)

     for(k1=0;k1<nb;k1++)

      C[(jstart+k2)*n+(istart+k1)]=myC[(cb*nb+k2)*rows+(rb*nb+k1)];

    if(rows%nb!=0)

    for(k1=0;rowblocks*nb+k1<rows;k1++)

    C[(jstart+k2)*n+(istart+k1)]=myC[(cb*nb+k2)*rows+(rb*nb+k1)];

   }

  }

}

else

{

 rows=numroc_(&n,&nb,&myrow,&zero,&nprow);

 cols=numroc_(&n,&nb,&mycol,&zero,&npcol);   

 Cdgesd2d(ictxt,rows,cols,myC,rows,0,0);
}

         

   

/************** free matrices ******************/
free(myA);

free(myB);

free(myC);

if (myrow==0&&mycol==0)

{

 free(A);

 free(B);

 free(C);



 free(SA);

 free(SB);

}

      

time_pdgemm=((double)(end_pdgemm -start_pdgemm))/CLOCKS_PER_SEC;

if (iam==0)

{

printf("\n procs = %d   n=%d    nb=%d   MM= %6.3f\n",nprocs,n,nb,time_pdgemm);


}

 
Cblacs_gridexit(ictxt);
Cblacs_exit(0);
return 0;
}



I would be glad if you help me

Thanks in advance,
alex
alex
 
Posts: 8
Joined: Sun Jan 27, 2008 8:58 pm

Postby buttari » Tue Feb 19, 2008 3:53 am

Alex,

the problem comes from the fact that each process is calling pdgemm with different values for M, N, K, IA, JA, IB, JB, IC, JC because in your program they are set based on myArows, myAcols, myrow and mycol which are different on every process. For example, if you want to multiply submatrices of size 5 starting at (2,2) this is what you should do:

Code: Select all
ia=2;
ja=2;
sub_n = 5;

pdgemm_("T","N",&sub_n,&sub_n,&sub_n,&alpha,myA,&ia,&ja,descA,myB,&ia,&ja,descB,&beta,myC,&ia,&ja,descC);



let me know if you need more help on this.

alfredo
buttari
 
Posts: 51
Joined: Tue Jul 11, 2006 2:11 pm

Postby alex » Tue Feb 19, 2008 8:12 am

Alfredo,

Thanks to you, I am now running the pdgemm routine successfully.

and here are the changes I made:

Code: Select all
int info;  /* I add the variable info at the top of main */
   

descinit_(descA,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&info);
          
descinit_(descB,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&info);

descinit_(descC,&n,&n,&nb,&nb,&zero,&zero,&ictxt,&myArows,&info);



start_pdgemm=clock();

pdgemm_("T","N",&n,&n,&n,&alpha,myA,&one,&one,descA,myB,&one,&one,descB,&beta,myC,&one,&one,descC);

end_pdgemm=clock();


alex
Last edited by alex on Tue Feb 19, 2008 8:29 am, edited 1 time in total.
alex
 
Posts: 8
Joined: Sun Jan 27, 2008 8:58 pm

Postby buttari » Tue Feb 19, 2008 8:26 am

Cool, I'm glad it helped.

alfredo
buttari
 
Posts: 51
Joined: Tue Jul 11, 2006 2:11 pm


Return to User Discussion

Who is online

Users browsing this forum: No registered users and 4 guests