Hi,
i still cannot get it working. This time I used the code from
the example you mentioned:
http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=136
As in my first example the program works fine when
only eigenvalues are requested:
mpirun -np 4 test_pdsyev
with char jobz = 'N'; in the line 103 in test_pdsyev.c
however I'm getting MPI errors:
[q080:31711] *** An error occurred in MPI_Group_incl
[q080:31711] *** on communicator MPI_COMM_WORLD
[q080:31711] *** MPI_ERR_RANK: invalid rank
[q080:31711] *** MPI_ERRORS_ARE_FATAL (goodbye)
...
with char jobz = 'V'; in the line 103 in test_pdsyev.c
Moreover if I get rid of everything related to one of the two contexts
in main.c, and make the remaining context to use
all the available processes
the problematic case (char jobz = 'V';) works.
Is it something wrong in the call to pdsyev, mpi version, ...?
Best regards,
Marcin
Makefile
- Code: Select all
include Makefile.opts
COMPILE = $(CC) $(CFLAGS) $(INCLUDEMPI) -c
%.o: %.c
$(COMPILE) $*.c -o $@
SOURCES = main.c test_pdsyev.c
OBJECTS = $(SOURCES:%.c=%.o)
PDSYEV = test_pdsyev
all: $(PDSYEV)
LINK = $(F77) $(LDFLAGS)
LIBS = $(LIBSCALAPACK) $(LIBBLACS) $(LIBBLAS) $(LIBMPI)
test_pdsyev: main.o test_pdsyev.o
$(LINK) main.o test_pdsyev.o $(LIBS) -o $@
clean:
rm -f $(PDSYEV) main.o test_pdsyev.o
Makefile.opts
- Code: Select all
CC = /usr/local/openmpi-1.2.3-gfortran/bin/mpicc
F77 = /usr/local/openmpi-1.2.3-gfortran/bin/mpif77
CFLAGS = -O3 -Wall -I./include
INCLUDEMPI = -I/usr/local/openmpi-1.2.3-gfortran/include
LDFLAGS = -O3 -lgfortran
LIBSCALAPACK = /usr/lib64/scalapack-1.7.5-5/libscalapack.a
LIBBLACS = /usr/lib64/blacs-1.1-24.5/libmpiblacsF77init.a /usr/lib64/blacs-1.1-24.5/libmpiblacs.a /usr/lib64/blacs-1.1-24.5/libmpiblacsCinit.a
LIBBLAS = /usr/lib64/libblas.a /usr/lib64/liblapack.a
LIBMPI = -L/usr/local/openmpi-1.2.3-gfortran/lib64 -lmpi
main.c (exactly the same as in
http://icl.cs.utk.edu/lapack-forum/viewtopic.php?t=136)
- Code: Select all
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mpi.h"
#include <sys/time.h>
static int max( int a, int b ){
if (a>b) return(a); else return(b);
}
#ifdef F77_WITH_NO_UNDERSCORE
#define numroc_ numroc
#define descinit_ descinit
#define pdlamch_ pdlamch
#define pdlange_ pdlange
#define pdlacpy_ pdlacpy
#define pdgesv_ pdgesv
#define pdgemm_ pdgemm
#define indxg2p_ indxg2p
#endif
extern void Cblacs_pinfo( int* mypnum, int* nprocs);
extern void Cblacs_get( int context, int request, int* value);
extern int Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
extern void Cblacs_gridinfo( int context, int* np_row, int* np_col, int* my_row, int* my_col);
extern void Cblacs_gridexit( int context);
extern void Cblacs_exit( int error_code);
extern int numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern void descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc,
int *ictxt, int *lld, int *info);
extern double pdlamch_( int *ictxt , char *cmach);
extern double pdlange_( char *norm, int *m, int *n, double *A, int *ia, int *ja, int *desca, double *work);
extern void pdlacpy_( char *uplo, int *m, int *n, double *a, int *ia, int *ja, int *desca,
double *b, int *ib, int *jb, int *descb);
extern void pdgesv_( int *n, int *nrhs, double *A, int *ia, int *ja, int *desca, int* ipiv,
double *B, int *ib, int *jb, int *descb, int *info);
extern void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA,
double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB,
double * BETA, double * C, int * IC, int * JC, int * DESCC );
extern int indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);
int main(int argc, char **argv) {
int iam, nprocs;
int myrank_mpi, nprocs_mpi;
int ictxt1, nprow1, npcol1, myrow1, mycol1;
int ictxt2, nprow2, npcol2, myrow2, mycol2;
int np1, nq1, n1, nb1, nrhs1;
int np2, nq2, n2, nb2, nrhs2;
int i, j, k, info, itemp, seed;
int descA[9], descB[9];
double *A, *Acpy, *B, *X, *R, eps, *work;
double AnormF, XnormF, RnormF, BnormF, residF;
int *ippiv;
int izero=0,ione=1;
double mone=(-1.0e0),pone=(1.0e0);
int *usermap1, *usermap2, ldumap1, ldumap2;
/**/
double MPIt1, MPIt2, MPIelapsed;
/**/
MPI_Init( &argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
/**/
Cblacs_pinfo( &iam, &nprocs ) ;
Cblacs_get( -1, 0, &ictxt1 );
Cblacs_get( -1, 0, &ictxt2 );
/**/
n1 = 500; nrhs1 = 1; nprow1 = 1; npcol1 = 2; nb1 = 64; ldumap1 = nprow1;
n2 = 250; nrhs2 = 2; nprow2 = 2; npcol2 = 1; nb2 = 50; ldumap2 = nprow2;
/**/
usermap1 = (int *)calloc(ldumap1*npcol1,sizeof(int)) ;
usermap2 = (int *)calloc(ldumap2*npcol2,sizeof(int)) ;
/**/
usermap1[0] = 0; usermap1[1] = 2;
usermap2[0] = 1; usermap2[1] = 3;
/**/
if ((nprow1*npcol1)+(nprow2*npcol2)>nprocs_mpi){
if (myrank_mpi==0)
printf(" **** ERROR : I want to use %d processes and only have %d\n",(nprow1*npcol1)+(nprow2*npcol2),nprocs_mpi);
MPI_Finalize(); exit(1);
}
/**/
Cblacs_gridmap ( &ictxt1, usermap1, ldumap1, nprow1, npcol1 );
free(usermap1);
Cblacs_gridmap ( &ictxt2, usermap2, ldumap2, nprow2, npcol2 );
free(usermap2);
/**/
myrow1=-1;mycol1=-1;myrow2=-1;mycol2=-1;
Cblacs_gridinfo( ictxt1, &nprow1, &npcol1, &myrow1, &mycol1 );
Cblacs_gridinfo( ictxt2, &nprow2, &npcol2, &myrow2, &mycol2 );
/**/
if ((myrow1>-1)&(mycol1>-1)&(myrow1<nprow1)&(mycol1<npcol1)) {
test_pdgesv( n1, nrhs1, nprow1, npcol1, nb1, ictxt1);
}
if ((myrow2>-1)&(mycol2>-1)&(myrow2<nprow2)&(mycol2<npcol2)) {
test_pdgesv( n2, nrhs2, nprow2, npcol2, nb2, ictxt2);
}
/**/
if ( ((myrow1>-1)&(mycol1>-1)&(myrow1<nprow1)&(mycol1<npcol1))
|| ((myrow2>-1)&(mycol2>-1)&(myrow2<nprow2)&(mycol2<npcol2))
)
Cblacs_gridexit( 0 );
MPI_Finalize();
exit(0);
}
test_pdsyev.c
- Code: Select all
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mpi.h"
#include <sys/time.h>
static int max( int a, int b ){
if (a>b) return(a); else return(b);
}
#ifdef F77_WITH_NO_UNDERSCORE
#define numroc_ numroc
#define descinit_ descinit
#define pdlamch_ pdlamch
#define pdlange_ pdlange
#define pdlacpy_ pdlacpy
#define pdsyev_ pdsyev
#define pdgemm_ pdgemm
#define indxg2p_ indxg2p
#endif
extern void Cblacs_pinfo( int* mypnum, int* nprocs);
extern void Cblacs_get( int context, int request, int* value);
extern int Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
extern void Cblacs_gridinfo( int context, int* np_row, int* np_col, int* my_row, int* my_col);
extern void Cblacs_gridexit( int context);
extern void Cblacs_exit( int error_code);
extern int numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern void descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc,
int *ictxt, int *lld, int *info);
extern double pdlamch_( int *ictxt , char *cmach);
extern double pdlange_( char *norm, int *m, int *n, double *A, int *ia, int *ja, int *desca, double *work);
extern void pdlacpy_( char *uplo, int *m, int *n, double *a, int *ia, int *ja, int *desca,
double *b, int *ib, int *jb, int *descb);
extern void pdsyev_(char *jobz, char *uplo, int *n, double* a, int *ia, int* ja, int* desca, double *w,
double* z, int *iz, int* jz, int* descz, double *work, int *lwork, int *info);
extern void pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA,
double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB,
double * BETA, double * C, int * IC, int * JC, int * DESCC );
extern int indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);
int test_pdgesv( int n, int nrhs, int nprow, int npcol, int nb, int ictxt){
int iam, nprocs;
int myrank_mpi, nprocs_mpi;
int myrow, mycol;
int np, nq, nqrhs;
int i, j, k, info, itemp, seed;
int descA[9], descB[9];
double *A, *Acpy, *B, *X, *R, eps, *work;
double AnormF, XnormF, RnormF, BnormF, residF;
int *ippiv;
int izero=0,ione=1;
double mone=(-1.0e0),pone=(1.0e0);
/**/
double MPIt1, MPIt2, MPIelapsed;
/**/
Cblacs_pinfo( &iam, &nprocs ) ;
Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
if (nb>n) nb = n;
np = numroc_( &n , &nb, &myrow, &izero, &nprow );
nq = numroc_( &n , &nb, &mycol, &izero, &npcol );
nqrhs = numroc_( &nrhs, &nb, &mycol, &izero, &npcol );
seed = iam*n*(n+nrhs); srand(seed);
A = (double *)calloc(np*nq,sizeof(double)) ;
if (A==NULL){ printf("error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
Acpy = (double *)calloc(np*nq,sizeof(double)) ;
if (Acpy==NULL){ printf("error of memory allocation Acpy on proc %dx%d\n",myrow,mycol); exit(0); }
B = (double *)calloc(np*nqrhs,sizeof(double)) ;
if (B==NULL){ printf("error of memory allocation B on proc %dx%d\n",myrow,mycol); exit(0); }
X = (double *)calloc(np*nqrhs,sizeof(double)) ;
if (X==NULL){ printf("error of memory allocation X on proc %dx%d\n",myrow,mycol); exit(0); }
R = (double *)calloc(np*nqrhs,sizeof(double)) ;
if (R==NULL){ printf("error of memory allocation R on proc %dx%d\n",myrow,mycol); exit(0); }
ippiv = (int *)calloc(np+nb,sizeof(int)) ;
if (ippiv==NULL){ printf("error of memory allocation IPIV on proc %dx%d\n",myrow,mycol); exit(0); }
k = 0;
for (i = 0; i < np; i++) {
for (j = 0; j < nq; j++) {
A[k] = ((double) rand()) / ((double) RAND_MAX) - 0.5 ;
k++;
}
}
k = 0;
for (i = 0; i < np; i++) {
for (j = 0; j < nqrhs; j++) {
B[k] = ((double) rand()) / ((double) RAND_MAX) - 0.5 ;
k++;
}
}
itemp = max( 1, np );
descinit_( descA, &n, &n , &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
descinit_( descB, &n, &nrhs, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
pdlacpy_( "All", &n, &n , A, &ione, &ione, descA, Acpy, &ione, &ione, descA );
pdlacpy_( "All", &n, &nrhs, B, &ione, &ione, descB, X , &ione, &ione, descB );
char jobz = 'V'; // eigenvectors also
char uplo = 'U'; // work with upper
work = (double*)malloc(1 * sizeof(double));
int querylwork = -1;
printf("before pdsyev: iam %d\n", iam);
MPIt1 = MPI_Wtime();
pdsyev_(&jobz, &uplo, &n, A, &ione, &ione, descA, X,
Acpy, &ione, &ione, descA, work, &querylwork, &info);
MPIt2 = MPI_Wtime();
MPIelapsed=MPIt2-MPIt1;
int lwork = (int)work[0];
printf("lwork %d\n", lwork);
free(work);
pdlacpy_( "All", &n, &nrhs, B, &ione, &ione, descB, R , &ione, &ione, descB );
eps = pdlamch_( &ictxt, "Epsilon" );
pdgemm_( "N", "N", &n, &nrhs, &n, &pone, Acpy, &ione, &ione, descA, X, &ione, &ione, descB,
&mone, R, &ione, &ione, descB);
AnormF = pdlange_( "F", &n, &n , A, &ione, &ione, descA, work);
BnormF = pdlange_( "F", &n, &nrhs, B, &ione, &ione, descB, work);
XnormF = pdlange_( "F", &n, &nrhs, X, &ione, &ione, descB, work);
RnormF = pdlange_( "F", &n, &nrhs, R, &ione, &ione, descB, work);
residF = RnormF / ( AnormF * XnormF * eps * ((double) n));
if ( (myrow==0)&(mycol==0) ){
printf("\tn = %d\tnrhs = %d\tprocess grid (%d,%d)\t with blocks (%dx%d) : res = %e, info = %d, time = %f s\n",
n,nrhs,nprow,npcol,nb,nb,residF,info,MPIelapsed);
}
free(A);
free(Acpy);
free(B);
free(X);
free(ippiv);
}