The LAPACK forum has moved to https://github.com/Reference-LAPACK/lapack/discussions.

pdsyev on selected processes

Open discussion regarding features, bugs, issues, vendors, etc.

pdsyev on selected processes

Postby dulak » Fri Jan 18, 2008 9:25 am

Dear ScalaPack users,

i would like to run pdsyev only on selected processes,
but I encounter the following problem illustrated by the attached code:
running the code:
make; mpirun -np 2 test.exe
succeeds
when I use char jobz = 'N' at line 107 and npcol = 1; at line 65 of main.c,
but it hangs
when I use char jobz = 'V' and npcol = 1; (settings in the attached example).
If I set npcol = 2; both char jobz = 'N' and char jobz = 'V' succeed.
Do I make something wrong?

For you convenience the code is organised as in this post
http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=271
(of course I have my own Makefile.opts).

Best regards,

Marcin

Makefile
Code: Select all
include Makefile.opts

COMPILE = $(CC) $(CFLAGS) $(INCLUDEMPI) -c

%.o: %.c
   $(COMPILE) $*.c -o $@

SOURCES = main.c
OBJECTS = $(SOURCES:%.c=%.o)

PDSYEV = test.exe

all: $(PDSYEV)
pdsyev: $(PDSYEV)

LINK = $(F77) $(LDFLAGS)
LIBS = $(LIBSCALAPACK) $(LIBBLACS) $(LIBBLAS) $(LIBMPI)

test.exe: main.o
   $(LINK) main.o $(LIBS) -o $@

clean:
   rm -f $(PDSYEV)


Makefile.opts
Code: Select all
CC = /usr/local/openmpi-1.2.3-gfortran/bin/mpicc
F77 = /usr/local/openmpi-1.2.3-gfortran/bin/mpif77
CFLAGS = -O3 -Wall -I./include
INCLUDEMPI = -I/usr/local/openmpi-1.2.3-gfortran/include
LDFLAGS = -O3 -lgfortran
LIBSCALAPACK = /usr/lib64/scalapack-1.7.5-5/libscalapack.a
LIBBLACS = /usr/lib64/blacs-1.1-24.5/libmpiblacsF77init.a /usr/lib64/blacs-1.1-24.5/libmpiblacs.a /usr/lib64/blacs-1.1-24.5/libmpiblacsCinit.a
LIBBLAS = /usr/lib64/libblas.a /usr/lib64/liblapack.a
LIBMPI = -L/usr/local/openmpi-1.2.3-gfortran/lib64 -lmpi


main.c
Code: Select all
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mpi.h"

//#ifdef F77_WITH_NO_UNDERSCORE
#define   Cblacs_exit_    Cblacs_exit
#define   Cblacs_gridexit_     Cblacs_gridexit
#define   Cblacs_gridinfo_     Cblacs_gridinfo
#define   Cblacs_pnum_     Cblacs_pnum
//#endif

#ifdef F77_WITH_NO_UNDERSCORE
#define   numroc_      numroc
#define   descinit_    descinit
#define   pdsyev_    pdsyev
#define   sl_init_    sl_init
#endif

extern void   Cblacs_exit_( int error_code);
extern void   Cblacs_gridexit_( int context);
extern void   Cblacs_gridinfo_( int context, int*  np_row, int* np_col, int*  my_row, int*  my_col);
extern int Cblacs_pnum_(int ConTxt, int prow, int pcol);

extern int    numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern void   descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc,
            int *ictxt, int *lld, int *info);
extern void pdsyev_(char *jobz, char *uplo, int *n, double* a, int *ia, int* ja, int* desca, double *w,
                    double* z, int *iz, int* jz, int* descz, double *work, int *lwork, int *info);
extern void sl_init_(int* ictxt, int* nprow, int* npcol);

int main(int argc, char **argv) {

     static int minusone = -1;
     static int zero = 0;
     static int one = 1;

     // root process
     int root = zero;

     // the size of the blocks the distributed matrix is split into
     // (applies to both rows and columns)
     int mb = 32;
     int nb = mb;
     // the number of rows in the process grid
     // over which the matrix is distributed
     int nprow = 1;
     // the number of columns in the process grid
     // over which the matrix is distributed
     int npcol = 1;
     // dimensions of the matrix to diagonalize
     int n = 50;
     int m = n;
     int lda = n;
     int ldb = n;

     int itype = 1;
     int info = 0;
     int iam = -1;

     int ConTxt;

     nprow = 1;
     npcol = 1;
     // initialize the grid
     sl_init_(&ConTxt, &nprow, &npcol);

     // get information back about the grid
     int myrow = -1;
     int mycol = -1;
     Cblacs_gridinfo_(ConTxt, &nprow, &npcol, &myrow, &mycol);

     int pnum;

     char TOP = ' '; // ?
     char scope = 'A'; // All grid

     int rsrc = 0;
     int csrc = 0;

     if (myrow != -1 && mycol != -1) {
          double* eigvals = (double*)malloc(n * sizeof(double));

          // The system process number of the process in the process grid
          pnum = Cblacs_pnum_(ConTxt, myrow, mycol);
          printf("root: %4d, pnum: %d, nprow: %d, npcol: %d, myrow: %4d, mycol: %4d, ConTxt: %d\n", root, pnum, nprow, npcol, myrow, mycol, ConTxt);

          if (pnum == root) {
               printf("ScaLapack\n");
          }
          else {
          }

          int desc[9];

          // get the size of the distributed matrix
          int locM = numroc_(&m, &mb, &myrow, &rsrc, &nprow);
          int locN = numroc_(&n, &nb, &mycol, &csrc, &npcol);
          // allocate the distributed matrix
          double* mat = (double*)malloc(locM*locN * sizeof(double));

          int lld = locM;

          // build the descriptor
          descinit_(desc, &m, &n, &mb, &nb, &rsrc, &csrc, &ConTxt, &lld, &info);
          char jobz = 'V'; // eigenvectors also
          char uplo = 'U'; // work with upper
          double* work;
          work = (double*)malloc(1 * sizeof(double));
          int querylwork = -1;

          // allocate the distributed matrix of eigenvectors
          double* z = (double*)malloc(locM*locN * sizeof(double));

          printf("before pdsyev, pnum: %d\n", pnum);
          pdsyev_(&jobz, &uplo, &m, mat, &one, &one, desc, eigvals,
                  z, &one, &one, desc, work, &querylwork, &info);
          int lwork = (int)work[0];
          printf("lwork %d\n", lwork);
          free(work);
          free(z);
          free(mat);
          free(eigvals);
          // clean up the grid
          Cblacs_gridexit_(ConTxt);
     }
     Cblacs_exit_(zero);
     exit(0);
}
dulak
 
Posts: 4
Joined: Fri Jan 18, 2008 8:35 am

Postby Julien Langou » Fri Jan 18, 2008 1:09 pm

A while ago I gave an example on how to run scalapack on only a subset of the
processors, I think that what you are trying to do, have a look at:
http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=136
Julien.
Julien Langou
 
Posts: 835
Joined: Thu Dec 09, 2004 12:32 pm
Location: Denver, CO, USA

Postby dulak » Wed Jan 23, 2008 6:49 pm

Hi,

I know the post you mention, however pdsyev hangs as described in my post. I tried Cblacs_gridmap with the same result. Could you have a look
what is wrong in my code?

Best regards,

Marcin
dulak
 
Posts: 4
Joined: Fri Jan 18, 2008 8:35 am

Postby dulak » Wed Jan 30, 2008 10:59 am

Hi,

i still cannot get it working. This time I used the code from
the example you mentioned:
http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=136

As in my first example the program works fine when
only eigenvalues are requested:
mpirun -np 4 test_pdsyev
with char jobz = 'N'; in the line 103 in test_pdsyev.c

however I'm getting MPI errors:
[q080:31711] *** An error occurred in MPI_Group_incl
[q080:31711] *** on communicator MPI_COMM_WORLD
[q080:31711] *** MPI_ERR_RANK: invalid rank
[q080:31711] *** MPI_ERRORS_ARE_FATAL (goodbye)
...

with char jobz = 'V'; in the line 103 in test_pdsyev.c
Moreover if I get rid of everything related to one of the two contexts
in main.c, and make the remaining context to use
all the available processes
the problematic case (char jobz = 'V';) works.

Is it something wrong in the call to pdsyev, mpi version, ...?

Best regards,

Marcin

Makefile
Code: Select all
include Makefile.opts

COMPILE = $(CC) $(CFLAGS) $(INCLUDEMPI) -c

%.o: %.c
   $(COMPILE) $*.c -o $@

SOURCES = main.c test_pdsyev.c
OBJECTS = $(SOURCES:%.c=%.o)

PDSYEV = test_pdsyev

all: $(PDSYEV)

LINK = $(F77) $(LDFLAGS)
LIBS = $(LIBSCALAPACK) $(LIBBLACS) $(LIBBLAS) $(LIBMPI)

test_pdsyev: main.o test_pdsyev.o
   $(LINK) main.o test_pdsyev.o $(LIBS) -o $@

clean:
   rm -f $(PDSYEV) main.o test_pdsyev.o


Makefile.opts
Code: Select all
CC = /usr/local/openmpi-1.2.3-gfortran/bin/mpicc
F77 = /usr/local/openmpi-1.2.3-gfortran/bin/mpif77
CFLAGS = -O3 -Wall -I./include
INCLUDEMPI = -I/usr/local/openmpi-1.2.3-gfortran/include
LDFLAGS = -O3 -lgfortran
LIBSCALAPACK = /usr/lib64/scalapack-1.7.5-5/libscalapack.a
LIBBLACS = /usr/lib64/blacs-1.1-24.5/libmpiblacsF77init.a /usr/lib64/blacs-1.1-24.5/libmpiblacs.a /usr/lib64/blacs-1.1-24.5/libmpiblacsCinit.a
LIBBLAS = /usr/lib64/libblas.a /usr/lib64/liblapack.a
LIBMPI = -L/usr/local/openmpi-1.2.3-gfortran/lib64 -lmpi


main.c (exactly the same as in http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=136)
Code: Select all
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mpi.h"
#include <sys/time.h>

static int max( int a, int b ){
        if (a>b) return(a); else return(b);
}

#ifdef F77_WITH_NO_UNDERSCORE
#define   numroc_      numroc
#define   descinit_    descinit
#define   pdlamch_     pdlamch
#define   pdlange_     pdlange
#define   pdlacpy_     pdlacpy
#define   pdgesv_      pdgesv
#define   pdgemm_      pdgemm
#define   indxg2p_     indxg2p
#endif

extern void   Cblacs_pinfo( int* mypnum, int* nprocs);
extern void   Cblacs_get( int context, int request, int* value);
extern int    Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
extern void   Cblacs_gridinfo( int context, int*  np_row, int* np_col, int*  my_row, int*  my_col);
extern void   Cblacs_gridexit( int context);
extern void   Cblacs_exit( int error_code);

extern int    numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern void   descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc,
            int *ictxt, int *lld, int *info);
extern double pdlamch_( int *ictxt , char *cmach);
extern double pdlange_( char *norm, int *m, int *n, double *A, int *ia, int *ja, int *desca, double *work);

extern void pdlacpy_( char *uplo, int *m, int *n, double *a, int *ia, int *ja, int *desca,
            double *b, int *ib, int *jb, int *descb);
extern void pdgesv_( int *n, int *nrhs, double *A, int *ia, int *ja, int *desca, int* ipiv,
            double *B, int *ib, int *jb, int *descb, int *info);
extern void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA,
            double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB,
            double * BETA, double * C, int * IC, int * JC, int * DESCC );
extern int  indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);

int main(int argc, char **argv) {
   int iam, nprocs;
   int myrank_mpi, nprocs_mpi;
   int ictxt1, nprow1, npcol1, myrow1, mycol1;
   int ictxt2, nprow2, npcol2, myrow2, mycol2;
   int np1, nq1, n1, nb1, nrhs1;
   int np2, nq2, n2, nb2, nrhs2;
   int i, j, k, info, itemp, seed;
   int descA[9], descB[9];
   double *A, *Acpy, *B, *X, *R, eps, *work;
        double AnormF, XnormF, RnormF, BnormF, residF;
   int *ippiv;
   int izero=0,ione=1;
   double mone=(-1.0e0),pone=(1.0e0);
   int *usermap1, *usermap2, ldumap1, ldumap2;
/**/
   double MPIt1, MPIt2, MPIelapsed;
/**/
   MPI_Init( &argc, &argv);
   MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
   MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
/**/
   Cblacs_pinfo( &iam, &nprocs ) ;
   Cblacs_get( -1, 0, &ictxt1 );
   Cblacs_get( -1, 0, &ictxt2 );
/**/
   n1 = 500; nrhs1 = 1; nprow1 = 1; npcol1 = 2; nb1 = 64; ldumap1 = nprow1;
   n2 = 250; nrhs2 = 2; nprow2 = 2; npcol2 = 1; nb2 = 50; ldumap2 = nprow2;
/**/
   usermap1 = (int *)calloc(ldumap1*npcol1,sizeof(int)) ;
   usermap2 = (int *)calloc(ldumap2*npcol2,sizeof(int)) ;
/**/
   usermap1[0] = 0; usermap1[1] = 2;
   usermap2[0] = 1; usermap2[1] = 3;
/**/
   if ((nprow1*npcol1)+(nprow2*npcol2)>nprocs_mpi){
      if (myrank_mpi==0)
         printf(" **** ERROR : I want to use %d processes and only have %d\n",(nprow1*npcol1)+(nprow2*npcol2),nprocs_mpi);
      MPI_Finalize(); exit(1);
   }
/**/
   Cblacs_gridmap ( &ictxt1, usermap1, ldumap1, nprow1, npcol1 );
   free(usermap1);
   Cblacs_gridmap ( &ictxt2, usermap2, ldumap2, nprow2, npcol2 );
   free(usermap2);
/**/
   myrow1=-1;mycol1=-1;myrow2=-1;mycol2=-1;
   Cblacs_gridinfo( ictxt1, &nprow1, &npcol1, &myrow1, &mycol1 );
   Cblacs_gridinfo( ictxt2, &nprow2, &npcol2, &myrow2, &mycol2 );
/**/
   if ((myrow1>-1)&(mycol1>-1)&(myrow1<nprow1)&(mycol1<npcol1)) {
      test_pdgesv( n1, nrhs1, nprow1, npcol1, nb1, ictxt1);
   }
   if ((myrow2>-1)&(mycol2>-1)&(myrow2<nprow2)&(mycol2<npcol2)) {
      test_pdgesv( n2, nrhs2, nprow2, npcol2, nb2, ictxt2);
   }
/**/
   if  ( ((myrow1>-1)&(mycol1>-1)&(myrow1<nprow1)&(mycol1<npcol1))
      || ((myrow2>-1)&(mycol2>-1)&(myrow2<nprow2)&(mycol2<npcol2))
       )
      Cblacs_gridexit( 0 );

   MPI_Finalize();
   exit(0);
}


test_pdsyev.c
Code: Select all
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mpi.h"
#include <sys/time.h>

static int max( int a, int b ){
        if (a>b) return(a); else return(b);
}

#ifdef F77_WITH_NO_UNDERSCORE
#define   numroc_      numroc
#define   descinit_    descinit
#define   pdlamch_     pdlamch
#define   pdlange_     pdlange
#define   pdlacpy_     pdlacpy
#define   pdsyev_      pdsyev
#define   pdgemm_      pdgemm
#define   indxg2p_     indxg2p
#endif

extern void   Cblacs_pinfo( int* mypnum, int* nprocs);
extern void   Cblacs_get( int context, int request, int* value);
extern int    Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
extern void   Cblacs_gridinfo( int context, int*  np_row, int* np_col, int*  my_row, int*  my_col);
extern void   Cblacs_gridexit( int context);
extern void   Cblacs_exit( int error_code);

extern int    numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern void   descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc,
            int *ictxt, int *lld, int *info);
extern double pdlamch_( int *ictxt , char *cmach);
extern double pdlange_( char *norm, int *m, int *n, double *A, int *ia, int *ja, int *desca, double *work);

extern void pdlacpy_( char *uplo, int *m, int *n, double *a, int *ia, int *ja, int *desca,
            double *b, int *ib, int *jb, int *descb);
extern void pdsyev_(char *jobz, char *uplo, int *n, double* a, int *ia, int* ja, int* desca, double *w,
                    double* z, int *iz, int* jz, int* descz, double *work, int *lwork, int *info);
extern void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA,
            double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB,
            double * BETA, double * C, int * IC, int * JC, int * DESCC );
extern int  indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);

int test_pdgesv( int n, int nrhs, int nprow, int npcol, int nb, int ictxt){
   int iam, nprocs;
   int myrank_mpi, nprocs_mpi;
   int myrow, mycol;
   int np, nq, nqrhs;
   int i, j, k, info, itemp, seed;
   int descA[9], descB[9];
   double *A, *Acpy, *B, *X, *R, eps, *work;
        double AnormF, XnormF, RnormF, BnormF, residF;
   int *ippiv;
   int izero=0,ione=1;
   double mone=(-1.0e0),pone=(1.0e0);
/**/
   double MPIt1, MPIt2, MPIelapsed;
/**/
   Cblacs_pinfo( &iam, &nprocs ) ;
   Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );

   if (nb>n) nb = n;

   np    = numroc_( &n   , &nb, &myrow, &izero, &nprow );
   nq    = numroc_( &n   , &nb, &mycol, &izero, &npcol );
   nqrhs = numroc_( &nrhs, &nb, &mycol, &izero, &npcol );

   seed = iam*n*(n+nrhs); srand(seed);
   A = (double *)calloc(np*nq,sizeof(double)) ;
   if (A==NULL){ printf("error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
   Acpy = (double *)calloc(np*nq,sizeof(double)) ;
   if (Acpy==NULL){ printf("error of memory allocation Acpy on proc %dx%d\n",myrow,mycol); exit(0); }
   B = (double *)calloc(np*nqrhs,sizeof(double)) ;
   if (B==NULL){ printf("error of memory allocation B on proc %dx%d\n",myrow,mycol); exit(0); }
   X = (double *)calloc(np*nqrhs,sizeof(double)) ;
   if (X==NULL){ printf("error of memory allocation X on proc %dx%d\n",myrow,mycol); exit(0); }
   R = (double *)calloc(np*nqrhs,sizeof(double)) ;
   if (R==NULL){ printf("error of memory allocation R on proc %dx%d\n",myrow,mycol); exit(0); }
   ippiv = (int *)calloc(np+nb,sizeof(int)) ;
   if (ippiv==NULL){ printf("error of memory allocation IPIV on proc %dx%d\n",myrow,mycol); exit(0); }

   k = 0;
   for (i = 0; i < np; i++) {
      for (j = 0; j < nq; j++) {
         A[k] = ((double) rand()) / ((double) RAND_MAX) - 0.5 ;
         k++;
      }
   }
   k = 0;
   for (i = 0; i < np; i++) {
      for (j = 0; j < nqrhs; j++) {
         B[k] = ((double) rand()) / ((double) RAND_MAX) - 0.5 ;
         k++;
      }
   }
   itemp = max( 1, np );
   descinit_( descA, &n, &n   , &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
   descinit_( descB, &n, &nrhs, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );

   pdlacpy_( "All", &n, &n   , A, &ione, &ione, descA, Acpy, &ione, &ione, descA );
   pdlacpy_( "All", &n, &nrhs, B, &ione, &ione, descB, X   , &ione, &ione, descB );

   char jobz = 'V'; // eigenvectors also
   char uplo = 'U'; // work with upper
   work = (double*)malloc(1 * sizeof(double));
   int querylwork = -1;

   printf("before pdsyev: iam %d\n", iam);
   MPIt1 = MPI_Wtime();
   pdsyev_(&jobz, &uplo, &n, A, &ione, &ione, descA, X,
           Acpy, &ione, &ione, descA, work, &querylwork, &info);
   MPIt2 = MPI_Wtime();
   MPIelapsed=MPIt2-MPIt1;

   int lwork = (int)work[0];
   printf("lwork %d\n", lwork);
   free(work);

   pdlacpy_( "All", &n, &nrhs, B, &ione, &ione, descB, R   , &ione, &ione, descB );
         eps = pdlamch_( &ictxt, "Epsilon" );
   pdgemm_( "N", "N", &n, &nrhs, &n, &pone, Acpy, &ione, &ione, descA, X, &ione, &ione, descB,
         &mone, R, &ione, &ione, descB);
   AnormF = pdlange_( "F", &n, &n   , A, &ione, &ione, descA, work);
   BnormF = pdlange_( "F", &n, &nrhs, B, &ione, &ione, descB, work);
   XnormF = pdlange_( "F", &n, &nrhs, X, &ione, &ione, descB, work);
   RnormF = pdlange_( "F", &n, &nrhs, R, &ione, &ione, descB, work);
   residF = RnormF / ( AnormF * XnormF * eps * ((double) n));

   if ( (myrow==0)&(mycol==0) ){
      printf("\tn = %d\tnrhs = %d\tprocess grid (%d,%d)\t with blocks (%dx%d) : res = %e, info = %d, time = %f s\n",
            n,nrhs,nprow,npcol,nb,nb,residF,info,MPIelapsed);
   }

   free(A);
   free(Acpy);
   free(B);
   free(X);
   free(ippiv);

}
dulak
 
Posts: 4
Joined: Fri Jan 18, 2008 8:35 am

Postby dulak » Fri Feb 01, 2008 11:39 am

Hi,

using pdsyevd instead of pdsyev solves the problem.

Best regards,

Marcin
dulak
 
Posts: 4
Joined: Fri Jan 18, 2008 8:35 am


Return to User Discussion

Who is online

Users browsing this forum: No registered users and 4 guests