My questions:
Hi, there,
I got this code to get eigenvalues parallelly, however, I got some error like this. I was trying to debug it, but I really do now know if I did some critical error in concept. I got that I need two nested loop to call PDELSET to distribute the globle matrix to local. In the parameters of PDELSET , there is one called ALPHA and I do not know how to use that. Is it something of A(i,j)?
My input Matrix is
2 1 1 1
1 2 1 1
1 1 2 1
1 1 1 2
the eigenvalues should be 1,1,1,5. But I got 0,1,1,4 and with a little process errors.
You help is highly appreciated!
Bruce
======================
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include "mpi.h"
#include "blas.h"
#include "blacs.h"
#include "scalapack.h"
#define N 4
extern void pdsyev_(char *jobz, char *uplo, int *n, double *a, int *ia, int *ja, int *desca, double *w, double *z, int *iz, int *jz, int *descz, double *work, int *lwork, int *info);
extern int numroc_(int *, int *, int *, int *, int * );
// this step is to call other functions to load data to array A
double a[N][N]={{2,1,1,1},{1,2,1,1},{1,1,2,1},{1,1,1,2}};
int main(int argc, char ** argv)
{
int iam, nprocs;
int myrank_mpi, nprocs_mpi;
int ictxt, prow, pcol, myrow, mycol;
int brow,bcol;
int info, lwork;
int desca[9],descz[9];
double z[N][N];
double *a0, *z0, *w0,*work;
int izero=0, ione=1;
double mone=(-1.0e0);
char jobz, uplo;
int i,j;
int n;
int LDA, LDB;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD,&myrank_mpi);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
// printf("myrank_mpi is: %d \n",myrank_mpi);
// printf("nprocs_mpi is: %d \n", nprocs_mpi);
n=N;
brow=2;
bcol=2;
prow=2;
pcol=2;
jobz='V';
uplo='L';
Cblacs_pinfo(&iam, &nprocs);
Cblacs_get(-1,0,&ictxt);
Cblacs_gridinit(&ictxt,"Row",prow, pcol);
Cblacs_gridinfo(ictxt, &prow,&pcol, &myrow, &mycol);
//Compute the size of the local matrices
LDA=numroc_(&n, &brow, &myrow, &izero, &prow);
LDB=numroc_(&n, &bcol, &myrow, &izero, &pcol);
//allocat space for A, Z and W
a0=(double *) malloc(LDA*LDB*sizeof(double));
w0=(double *) malloc(n*sizeof(double));
z0=(double *) malloc(LDA*LDB*sizeof(double));
//test number of LDA and LDB
printf("this is to test LDA and LDB \n");
printf("LDA is %d \n",LDA);
printf("LDB is %d \n",LDB);
//initialize the array descriptor
descinit_(desca, &n, &n, &brow, &bcol, &izero, &izero, &ictxt, &LDA, &info);
descinit_(descz, &n, &n, &brow, &bcol, &izero, &izero, &ictxt, &LDA, &info);
//test a matrix
// printf("This is the Array A for testing \n");
// for(i=0;i<n;i++)
// {
// for(j=0;j<n;j++)
// {
// printf("%12.6f",a[i][j]);
// }
// printf("\n");
// }
//distribute matrix to grid
for(j=0;j<n;j++)
{
for(i=0;i<n;i++)
{
pdelset_(a0,&i,&j,desca,&a[i][j]);
}
}
// testing the a0 array. It is used to distributed to the process grid
// i=0;
// while(a0[i]!=0)
// {
// printf("a0[%d",i) ; printf("] is %12.5f \n", a0[i]);
// i++;
// }
work=(double *)malloc(1*sizeof(double));
lwork=-1;
pdsyev_(&jobz, &uplo, &n, a0 , &ione, &ione, desca, w0, z0, &ione, &ione, descz, work, &lwork, &info);
lwork=(int)work[0];
free(work);
work=(double *)malloc(lwork*sizeof(double));
pdsyev_(&jobz, &uplo, &n, a0, &ione, &ione, desca, w0, z0, &ione, &ione, descz, work, &lwork, &info);
fprintf(stdout, "e1=%f\n", w0[0]);
fprintf(stdout, "e2=%f\n", w0[1]);
fprintf(stdout, "e3=%f\n", w0[2]);
fprintf(stdout, "e4=%f\n", w0[3]);
free(work);
free(w0);
free(a0);
free(z0);
Cblacs_gridexit(0);
Cblacs_exit(0);
MPI_Finalize();
exit(0);
}
=============================
The following is the error I got.
[xiaofeng@athena ver3]$ mpiexec -n 4 pdsyev
this is to test LDA and LDB
LDA is 2
LDB is 2
this is to test LDA and LDB
LDA is 2
LDB is 2
this is to test LDA and LDB
LDA is 2
LDB is 2
this is to test LDA and LDB
LDA is 2
LDB is 2
e1=0.000000
e2=1.000000
e3=1.000000
e4=4.000000
rank 3 in job 2 athena.cs.siu.edu_58523 caused collective abort of all ranks
exit status of rank 3: return code 1
e1=0.000000
e2=1.000000
e3=1.000000
e4=4.000000
e1=0.000000
e2=1.000000
e3=1.000000
e4=4.000000
e1=0.000000
e2=1.000000
e3=1.000000
e4=4.000000
*** glibc detected *** double free or corruption (out): 0x00000000007dbf20 ***
[cli_3]: aborting job:
Fatal error in MPI_Finalize: Other MPI error, error stack:
MPI_Finalize(233).........................: MPI_Finalize failed
MPI_Finalize(153).........................:
MPID_Finalize(91).........................:
MPIDI_CH3U_VC_WaitForClose(219)...........: an error occurred while the device was waiting for all open connections to close
MPIDI_CH3_Progress_wait(217)..............: an error occurred while handling an event returned by MPIDU_Sock_Wait()
MPIDI_CH3I_Progress_handle_sock_event(415):
MPIDU_Socki_handle_read(670)..............: connection failure (set=0,sock=2,errno=104:Connection reset by peer)
*** glibc detected *** double free or corruption (out): 0x00000000007d97b0 ***
rank 1 in job 2 athena.cs.siu.edu_58523 caused collective abort of all ranks
exit status of rank 1: killed by signal 6
==================================
The second time, I transplanted the SAMPLE_PDSYEV_CALL.F. I saw the following part is for the matrix distribution, so I use C to rewrite it in my program in order to see the result.
DO 20 J = 1, N
DO 10 I = 1, N
IF( I.EQ.J ) THEN
CALL PDELSET( A, I, J, DESCA,
$ ( DBLE( N-I+1 ) ) / DBLE( N )+ONE /
$ ( DBLE( I+J )-ONE ) )
ELSE
CALL PDELSET( A, I, J, DESCA, ONE / ( DBLE( I+J )-ONE ) )
END IF
10 CONTINUE
20 CONTINUE
here is my code:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include "mpi.h"
#include "blas.h"
#include "blacs.h"
#include "scalapack.h"
#define N 4
extern void pdsyev_(char *jobz, char *uplo, int *n, double *a, int *ia, int *ja, int *desca, double *w, double *z, int *iz, int *jz, int *descz, double *work, int *lwork, int *info);
extern int numroc_(int *, int *, int *, int *, int * );
// this step is to call other functions to load data to array A
double a[N][N]={{2,1,1,1},{1,2,1,1},{1,1,2,1},{1,1,1,2}};
int main(int argc, char ** argv)
{
int iam, nprocs;
int myrank_mpi, nprocs_mpi;
int ictxt, prow, pcol, myrow, mycol;
int brow,bcol;
int info, lwork;
int desca[9],descz[9];
double z[N][N];
double *a0, *z0, *w0,*work;
int izero=0, ione=1;
double mone=(-1.0e0);
char jobz, uplo;
int i,j;
int n;
int LDA, LDB;
double temp1, temp2;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD,&myrank_mpi);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
// printf("myrank_mpi is: %d \n",myrank_mpi);
// printf("nprocs_mpi is: %d \n", nprocs_mpi);
n=N;
brow=2;
bcol=2;
prow=2;
pcol=2;
jobz='V';
uplo='L';
Cblacs_pinfo(&iam, &nprocs);
Cblacs_get(-1,0,&ictxt);
Cblacs_gridinit(&ictxt,"Row",prow, pcol);
Cblacs_gridinfo(ictxt, &prow,&pcol, &myrow, &mycol);
//Compute the size of the local matrices
LDA=numroc_(&n, &brow, &myrow, &izero, &prow);
LDB=numroc_(&n, &bcol, &myrow, &izero, &pcol);
//allocat space for A, Z and W
a0=(double *) malloc(LDA*LDB*sizeof(double));
w0=(double *) malloc(n*sizeof(double));
z0=(double *) malloc(LDA*LDB*sizeof(double));
//test number of LDA and LDB
printf("this is to test LDA and LDB \n");
printf("LDA is %d \n",LDA);
printf("LDB is %d \n",LDB);
//initialize the array descriptor
descinit_(desca, &n, &n, &brow, &bcol, &izero, &izero, &ictxt, &LDA, &info);
descinit_(descz, &n, &n, &brow, &bcol, &izero, &izero, &ictxt, &LDA, &info);
//test a matrix
// printf("This is the Array A for testing \n");
// for(i=0;i<n;i++)
// {
// for(j=0;j<n;j++)
// {
// printf("%12.6f",a[i][j]);
// }
// printf("\n");
// }
//distribute matrix to grid
for(j=0;j<n;j++)
{
for(i=0;i<n;i++)
{
// pdelset_(a0,&i,&j,desca,&a[i][j]);
if(i !=j )
{
temp1=(double)(n-i+1)/(double)(n)+(double)(1)/(double)(i+j-1);
pdelset_(a0,&i,&j,desca,&temp1);
}
else
{
temp2=(double)(1)/(double)(i+j-1);
pdelset_(a0,&i,&j,desca,&temp2);
}
}
}
// testing the a0 array. It is used to distributed to the process grid
// i=0;
// while(a0[i]!=0)
// {
// printf("a0[%d",i) ; printf("] is %12.5f \n", a0[i]);
// i++;
// }
work=(double *)malloc(1*sizeof(double));
lwork=-1;
pdsyev_(&jobz, &uplo, &n, a0 , &ione, &ione, desca, w0, z0, &ione, &ione, descz, work, &lwork, &info);
lwork=(int)work[0];
free(work);
work=(double *)malloc(lwork*sizeof(double));
pdsyev_(&jobz, &uplo, &n, a0, &ione, &ione, desca, w0, z0, &ione, &ione, descz, work, &lwork, &info);
fprintf(stdout, "e1=%f\n", w0[0]);
fprintf(stdout, "e2=%f\n", w0[1]);
fprintf(stdout, "e3=%f\n", w0[2]);
fprintf(stdout, "e4=%f\n", w0[3]);
free(work);
free(w0);
free(a0);
free(z0);
Cblacs_gridexit(0);
Cblacs_exit(0);
MPI_Finalize();
exit(0);
}
========
And I also got errors.
The error is like the following:
[xiaofeng@athena ver3]$ mpiexec -n 4 pdsyev
this is to test LDA and LDB
LDA is 2
LDB is 2
this is to test LDA and LDB
LDA is 2
LDB is 2
this is to test LDA and LDB
LDA is 2
LDB is 2
this is to test LDA and LDB
LDA is 2
LDB is 2
e1=-1.608251
e2=-0.199625
e3=0.000000
e4=3.341209
e1=-1.608251
e2=-0.199625
e3=0.000000
e4=3.341209
e1=-1.608251
e2=-0.199625
e3=0.000000
e4=3.341209
*** glibc detected *** double free or corruption (out): 0x00000000007d9650 ***
e1=-1.608251
e2=-0.199625
e3=0.000000
e4=3.341209
*** glibc detected *** double free or corruption (out): 0x00000000007d9650 ***
[cli_3]: rank 3 in job 6 athena.cs.siu.edu_58523 caused collective abort of all ranks
exit status of rank 3: return code 1
aborting job:
Fatal error in MPI_Finalize: Other MPI error, error stack:
MPI_Finalize(233).........................: MPI_Finalize failed
MPI_Finalize(153).........................:
MPID_Finalize(91).........................:
MPIDI_CH3U_VC_WaitForClose(219)...........: an error occurred while the device was waiting for all open connections to close
MPIDI_CH3_Progress_wait(217)..............: an error occurred while handling an event returned by MPIDU_Sock_Wait()
MPIDI_CH3I_Progress_handle_sock_event(415):
MPIDU_Socki_handle_read(670)..............: connection failure (set=0,sock=2,errno=104:Connection reset by peer)
rank 1 in job 6 athena.cs.siu.edu_58523 caused collective abort of all ranks
exit status of rank 1: killed by signal 6
I am going to try to debug it but really hope some one can tell me!
Thanks
Bruce

