#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/time.h>
#include "mpi.h"

//BLAS
extern void   dscal_( int *n, double *da, double *dx, int *incx);

//BLACS
extern void   Cblacs_pinfo( int* mypnum, int* nprocs);
extern void   Cblacs_get( int context, int request, int* value);
extern int    Cblacs_gridinit( int* context, char * order, int np_row, int np_col);
extern void   Cblacs_gridinfo( int context, int*  np_row, int* np_col, int*  my_row, int*  my_col);
extern void   Cblacs_gridexit( int context);
extern void   Cblacs_exit( int error_code);

//SCALAPACK
extern void   pdlawrite_( char **filenam, int *m, int *n, double *A, int *ia, int *ja, int *descA, int *irwrit, int *icwrit, double *work);
extern void   pdelset_( double *A, int *ia, int *ja, int *desca, double *alpha);
extern double pdlamch_( int *ictxt, char *cmach);
extern int    indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern int    indxg2l_( int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern int    numroc_( int *n, int *nb, int *iproc, int *isrcproc, int *nprocs);
extern void   descinit_( int *desc, int *m, int *n, int *mb, int *nb, int *irsrc, int *icsrc,
				int *ictxt, int *lld, int *info);
extern void   pdlaset_( char *uplo, int *m, int *n, double *alpha, double *beta, double *A, int *ia, int *ja, int *descA );
extern double pdlange_( char *norm, int *m, int *n, double *A, int *ia, int *ja, int *desca, double *work);
extern void   pdlacpy_( char *uplo, int *m, int *n, double *a, int *ia, int *ja, int *desca,
				double *b, int *ib, int *jb, int *descb);
extern void   pdgesv_( int *n, int *nrhs, double *A, int *ia, int *ja, int *desca, int* ipiv,
				double *B, int *ib, int *jb, int *descb, int *info);
extern void   pdgesvd_( char *jobu, char *jobvt, int *m, int *n, double *a, int *ia, int *ja, int *desca,
	       			double *s, double *u, int *iu, int *ju, int *descu,
			       	double *vt, int *ivt, int *jvt, int *descvt, double *work, int *lwork, int *info);
extern void   pdgemm_( char *TRANSA, char *TRANSB, int * M, int * N, int * K, double * ALPHA,
				double * A, int * IA, int * JA, int * DESCA, double * B, int * IB, int * JB, int * DESCB,
				double * BETA, double * C, int * IC, int * JC, int * DESCC );
extern int    indxg2p_( int *indxglob, int *nb, int *iproc, int *isrcproc, int *nprocs);

#ifdef F77_WITH_NO_UNDERSCORE
#define   numroc_      numroc
#define   descinit_    descinit
#define   pdlamch_     pdlamch
#define   pdlange_     pdlange
#define   pdlacpy_     pdlacpy
#define   pdgesv_      pdgesv
#define   pdgemm_      pdgemm
#define   indxg2p_     indxg2p
#endif



//OTHER
extern void pdgeadd_( char *trans, int *m, int *n,
			       	double *alpha, double *a, int *ia, int *ja, int *desca,
			       	double *beta, double *c, int *ic, int *jc, int *descc );
extern void pdlawritea_( int *m,  int *n, double *A,  int *ia,  int *ja, int *descA, int *irwrit, int *icwrit, double *work);
extern void pdlawritez_( int *m,  int *n, double *A,  int *ia,  int *ja, int *descA, int *irwrit, int *icwrit, double *work);

extern double verif_orthogonality(int m, int n, double *U, int iu, int ju, int *descU);
extern double verif_representativity(int n, double *A, int ia, int ja, int *descA,
                                                     double *U, int iu, int ju, int *descU,
                                                     double *S);

static int max( int a, int b ){
        if (a>b) return(a); else return(b);
}
static int min( int a, int b ){
        if (a<b) return(a); else return(b);
}

enum ENUM_METHODE_SYEV { METHODE_SYEV=2301, METHODE_SYEVR=2302, METHODE_SYEVX=2304, METHODE_SYEVD=2305};

int main(int argc, char **argv) {
	int iam, nprocs;
	int myrank_mpi, nprocs_mpi;
	int ictxt, nprow, npcol, myrow, mycol;
	int nb, n, nloc;
	int mpA, nqA;
	int i, j, k, info, itemp, seed, lwork;
	int descA[9], descZ[9];
	double *A=NULL, *Z=NULL, *work=NULL, *W=NULL, *Acpy=NULL;
	double dzero=0.0e+00,dpone=1.0e+00;
	int mfound, nzcomputed, liwork=0, *iwork=NULL;
	int izero=0,ione=1;
	enum ENUM_METHODE_SYEV methode;
	int *iclustr, *ifail, verif, dumpall;
	double *gap, orfac, abstol, orth, repres;
/**/
	char jobz, uplo;
/**/
	MPI_Init( &argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
	MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
/**/
	n = 100; nloc = -1; nprow = 2; npcol = 2; nb = 64; jobz= 'V'; uplo='U';methode=METHODE_SYEVR; verif=1; dumpall=1;
	for( i = 1; i < argc; i++ ) {
		if( strcmp( argv[i], "-dumpall" ) == 0 ) {
			dumpall = 1;
		}
		if( strcmp( argv[i], "-noverif" ) == 0 ) {
			verif = 0;
		}
		if( strcmp( argv[i], "-jobz" ) == 0 ) {
			if (i+1<argc) {
				if( strcmp( argv[i+1], "V" ) == 0 ){ jobz = 'V'; i++; }
				else if( strcmp( argv[i+1], "N" ) == 0 ){ jobz = 'N'; i++; }
				else if( strcmp( argv[i+1], "A" ) == 0 ){ jobz = 'A'; i++; }
				else printf(" ** warning: jobu should be set to V, N or A in the command line ** \n");
			}
			else	
				printf(" ** warning: jobu should be set to V, N or A in the command line ** \n");
		}
		if( strcmp( argv[i], "-n" ) == 0 ) {
			n      = atoi(argv[i+1]);
			i++;
		}
                if( strcmp( argv[i], "-nloc" ) == 0 ) {
                        nloc   = atoi(argv[i+1]);
                        i++;
                }
		if( strcmp( argv[i], "-p" ) == 0 ) {
			nprow  = atoi(argv[i+1]);
			i++;
		}
		if( strcmp( argv[i], "-q" ) == 0 ) {
			npcol  = atoi(argv[i+1]);
			i++;
		}
		if( strcmp( argv[i], "-nb" ) == 0 ) {
			nb     = atoi(argv[i+1]);
			i++;
		}
	}
/**/
/*      If nloc (n on a processor) is provided, we overwite the value of n to  n = nprow*npcol*nloc   */
        if (nloc > 0) 
		n = nprow*npcol*nloc;
	if (nb>n)
		nb = n;
	if (nprow*npcol>nprocs_mpi){
		if (myrank_mpi==0)
			printf(" **** ERROR : we do not have enough processes available to make a p-by-q process grid ***\n");
			printf(" **** Bye-bye                                                                         ***\n");
		MPI_Finalize(); exit(1);
	}
/**/
	Cblacs_pinfo( &iam, &nprocs ) ;
	Cblacs_get( -1, 0, &ictxt );
	Cblacs_gridinit( &ictxt, "Row", nprow, npcol );
	Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );
/**/
	if (iam==0)
 		printf("\n");
/**/
/*	
 *	if (iam==0)
 *		printf("\tn = %d\tnrhs = %d\t(%d,%d)\t%dx%d\n",n,nrhs,nprow,npcol,nb,nb);
 *	printf("Hello World, I am proc %d over %d for MPI, proc %d over %d for BLACS in position (%d,%d) in the process grid\n", 
 *	 		myrank_mpi,nprocs_mpi,iam,nprocs,myrow,mycol);
 */	 
/*
*
*     Work only the process in the process grid
*
*/
	if ((myrow > -1)&(mycol > -1)&(myrow < nprow)&(mycol < npcol)){
/*
*
*     Compute the size of the local matrices (thanks to numroc)
*
*/ 
		mpA    = numroc_( &n     , &nb, &myrow, &izero, &nprow );
		nqA    = numroc_( &n     , &nb, &mycol, &izero, &npcol );
/*
*
*     Allocate and fill the matrices A and B
*
*/ 
		A = (double *)calloc(mpA*nqA,sizeof(double)) ;
		if (A==NULL){ printf("error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
		Z = (double *)calloc(mpA*nqA,sizeof(double)) ;
		if (Z==NULL){ printf("error of memory allocation VT on proc %dx%d\n",myrow,mycol); exit(0); }
		W = (double *)calloc(n,sizeof(double)) ;
		if (W==NULL){ printf("error of memory allocation S on proc %dx%d\n",myrow,mycol); exit(0); }
/**/		
		seed = iam*(mpA*nqA*2); srand(seed);
/**/		
		k = 0;
		for (i = 0; i < mpA; i++) {
			for (j = 0; j < nqA; j++) {
				//A[k] = ((double) rand()) / ((double) RAND_MAX) - 0.5 ;
				if (i==j) A[k]=0;
				else {
				A[k]=0.1*i+0.01*j;
				}
				k++;	
			}
		}
/*
*
*     Initialize the array descriptor for the distributed matrices xA, U and VT
*
*/ 
		itemp = max( 1, mpA );
		descinit_( descA,  &n, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );
		descinit_( descZ,  &n, &n, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );

      		pdlacpy_( "All", &n, &n, A, &ione, &ione, descA, Z, &ione, &ione, descZ );
		pdgeadd_( "T", &n, &n, &dpone, Z, &ione, &ione, descZ, &dpone, A, &ione, &ione, descA );

		if (dumpall)
		{ 
		double *work_pdlawrite=NULL;
		work_pdlawrite = (double *)calloc(nb,sizeof(double)) ;
		if (work_pdlawrite==NULL){ printf("error of memory allocation work_pdlawrite on proc %dx%d\n",myrow,mycol); exit(0); }
		pdlawritea_(&n, &n, A, &ione, &ione, descA, &izero, &izero, work_pdlawrite);
		free(work_pdlawrite);
		}
		if (verif){
			Acpy = (double *)calloc(mpA*nqA,sizeof(double)) ;
			if (Acpy==NULL){ printf("error of memory allocation Acpy on proc %dx%d\n",myrow,mycol); exit(0); }
      			pdlacpy_( "All", &n, &n, A, &ione, &ione, descA, Acpy, &ione, &ione, descA );
		}

			work = (double *)calloc(2,sizeof(double)) ;
			if (work==NULL){ printf("error of memory allocation for work on proc %dx%d (1st time)\n",myrow,mycol); exit(0); }
			iwork = (int *)calloc(2,sizeof(int)) ;
			if (iwork==NULL){ printf("error of memory allocation for work on proc %dx%d (1st time)\n",myrow,mycol); exit(0); }
			lwork=-1;
			liwork=-1;
			pdsyevr_( &jobz, "A", &uplo, &n, A, &ione, &ione, descA,
					&dzero, &dzero, &izero, &izero, &mfound, &nzcomputed,
					W, Z, &ione, &ione, descZ, work, &lwork, iwork, &liwork, &info );
			lwork=  (int)  work[0];
			liwork= (int) iwork[0];
			free(work);
			free(iwork);
			work  =  (double *)calloc(100*lwork,sizeof(double)) ;
			iwork =  (int *)calloc(100*liwork,sizeof(int)) ;
			if ((work==NULL)||(iwork==NULL)){ printf("error of memory allocation work on proc %dx%d\n",myrow,mycol); exit(0); }
			pdsyevr_( &jobz, "A", &uplo, &n, A, &ione, &ione, descA,
					&dzero, &dzero, &izero, &izero, &mfound, &nzcomputed,
					W, Z, &ione, &ione, descZ, work, &lwork, iwork, &liwork, &info );
			free(work);
			free(iwork);

		if (dumpall)
		{ 
		double *work_pdlawrite=NULL;
		work_pdlawrite = (double *)calloc(nb,sizeof(double)) ;
		if (work_pdlawrite==NULL){ printf("error of memory allocation work_pdlawrite on proc %dx%d\n",myrow,mycol); exit(0); }
		pdlawritez_(&n, &n, Z, &ione, &ione, descZ, &izero, &izero, work_pdlawrite);
		free(work_pdlawrite);
		}

		if ((dumpall)&&(iam==0))
		{
		FILE *Fp;
		Fp = fopen("W.out","w");
		for (i=0;i<n;i++)
			fprintf(Fp,"%20.8e\n",W[i]);
		fclose(Fp);
		}

		if (verif){
      			pdlacpy_( "All", &n, &n, Acpy, &ione, &ione, descA, A, &ione, &ione, descA );
			free(Acpy);
		}

		if ( iam==0 ){
		printf("lwork  = %8.2f Mo (%8.2f%%)\n",
				((double) lwork)*((double) sizeof(double))/1024.00/1024.00,((double) lwork)/((double) mpA)/((double) nqA)*100.00);
		printf("liwork = %8.2f Mo (%8.2f%%)\n",
			((double) liwork)*((double) sizeof(int))/1024.00/1024.00,((double) liwork)/((double) mpA)/((double) nqA)*100.00);
		if (nloc > 0 ) 
			printf("n= %d nloc= %d  (%d,%d)  nb= %d  jobz= %c  \n",n,nloc,nprow,npcol,nb,jobz);
                else
			printf("n= %d no nloc   (%d,%d)  nb= %d  jobz= %c   \n",n,nprow,npcol,nb,jobz);
		printf("\n\n");

		}
/**/
		if (verif){
			repres = verif_representativity(n,A,1,1,descA,Z,1,1,descZ,W);
			orth  = verif_orthogonality(n,n,Z,1,1,descZ);
			if ( iam==0 ){
				printf("orth   = %e\n", orth);
				printf("repres = %e\n", repres);
			}
		}
/**/
		free(W);
		free(Z);
		free(A);
		Cblacs_gridexit( 0 );
	}
/*
*     Print ending messages
*/
	if ( iam==0 ){
		printf("\n");
	}
/**/
	MPI_Finalize();
	return 0;
}
/**/		
double verif_representativity(int n, double *A, int ia, int ja, int *descA,
                                              double *U, int iu, int ju, int *descU,
                                              double *S){

	double *Acpy=NULL, *Ucpy=NULL;
	int nprow, npcol, myrow, mycol;
	int mpA, pcol, localcol, i, nqA;
	int ictxt, nbA, rsrcA, csrcA, nbU, rsrcU, csrcU, mpU, nqU;
	int ctxt_ = 1, nb_ = 5, rsrc_ = 6, csrc_ = 7;
	int izero = 0, ione = 1;
	double *wwork=NULL;
	double tmone= -1.0e+00, tpone= +1.0e+00;
	double residF, AnormF;

	ictxt = descA[ctxt_];
	Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );

	nbA = descA[nb_]; rsrcA = descA[rsrc_] ; csrcA = descA[csrc_] ;
	mpA    = numroc_( &n, &nbA, &myrow, &rsrcA, &nprow );
	nqA    = numroc_( &n, &nbA, &mycol, &csrcA, &npcol );
	Acpy = (double *)calloc(mpA*nqA,sizeof(double)) ;
	if (Acpy==NULL){ printf("error of memory allocation Acpy on proc %dx%d\n",myrow,mycol); exit(0); }
      	pdlacpy_( "All", &n, &n, A, &ia, &ja, descA, Acpy, &ia, &ja, descA );

	nbU = descU[nb_]; rsrcU = descU[rsrc_] ; csrcU = descU[csrc_] ;
	mpU    = numroc_( &n, &nbU, &myrow, &rsrcU, &nprow );
	nqU    = numroc_( &n, &nbU, &mycol, &csrcU, &npcol );
	Ucpy = (double *)calloc(mpU*nqU,sizeof(double)) ;
	if (Ucpy==NULL){ printf("error of memory allocation Ucpy on proc %dx%d\n",myrow,mycol); exit(0); }
      	pdlacpy_( "All", &n, &n, U, &iu, &ju, descU, Ucpy, &iu, &ju, descU );

	AnormF = pdlange_( "F", &n, &n, A, &ia, &ja, descA, wwork);

	for (i=1;i<n+1;i++){
		pcol = indxg2p_( &i, &(descU[5]), &izero, &izero, &npcol );
		localcol = indxg2l_( &i, &(descU[5]), &izero, &izero, &npcol );
		if( mycol==pcol )
			dscal_( &mpA, &(S[i-1]), &(Ucpy[ ( localcol-1 )*descU[8] ]), &ione );
	}
	pdgemm_( "N", "T", &n, &n, &n, &tpone, Ucpy, &iu, &ju, descU, U, &iu, &ju, descU,
	       		&tmone, Acpy, &ia, &ja, descA ); 
	residF = pdlange_( "F", &n, &n, Acpy, &ione, &ione, descA, wwork);
	residF = residF/AnormF/((double) n);

	free(Ucpy);
	free(Acpy);

	return residF;
}
/**/		
double verif_orthogonality(int m, int n, double *U, int iu, int ju, int *descU){

	double *R=NULL;
	int nprow, npcol, myrow, mycol;
	int mpR, nqR, nb, itemp, descR[9], ictxt, info, min_mn, max_mn;
	int ctxt_ = 1, nb_ = 5;
	int izero = 0, ione = 1;
	double *wwork=NULL;
	double tmone= -1.0e+00,  tpone= +1.0e+00,  tzero= +0.0e+00;
	double orthU;

	min_mn = min(m,n);
	max_mn = max(m,n);
	ictxt = descU[ctxt_];
	nb = descU[nb_];
	Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );

	mpR    = numroc_( &min_mn, &nb, &myrow, &izero, &nprow );
	nqR    = numroc_( &min_mn, &nb, &mycol, &izero, &npcol );
	R = (double *)calloc(mpR*nqR,sizeof(double)) ;
	if (R==NULL){ printf("error of memory allocation R on proc %dx%d\n",myrow,mycol); exit(0); }
	itemp = max( 1, mpR );
	descinit_( descR,  &min_mn, &min_mn, &nb, &nb, &izero, &izero, &ictxt, &itemp, &info );

	pdlaset_( "F", &min_mn, &min_mn, &tzero, &tpone, R, &ione, &ione, descR );
	if (m>n)
		pdgemm_( "T", "N", &min_mn, &min_mn, &m, &tpone, U, &iu, &ju, descU, U, 
			&iu, &ju, descU, &tmone, R, &ione, &ione, descR );
	else
		pdgemm_( "N", "T", &min_mn, &min_mn, &n, &tpone, U, &iu, &ju, descU, U, 
			&iu, &ju, descU, &tmone, R, &ione, &ione, descR );
	orthU = pdlange_( "F", &min_mn, &min_mn, R, &ione, &ione, descR, wwork );
	orthU = orthU / ((double) max_mn);
	free(R);

	return orthU;

}
/**/		
