I am trying to write a matrix multiplication program, but when it is executed I receive this error msg (the error msg is repeated for A, B, C matrices and for the all 4 procs but I excluded them):
PBLAS ERROR 'Illegal IA = 0, IA must be at least 1'
from {0,0}, pnum=0, Contxt=0, in routine 'PDGEMM'.
PBLAS ERROR 'Illegal JA = 0, IA must be at least 1'
from {0,0}, pnum=0, Contxt=0, in routine 'PDGEMM'.
PBLAS ERROR 'Parameter number 8 had an illegal value'
from {0,0}, pnum=0, Contxt=0, in routine 'PDGEMM'.
The program is in C and I divide the procs into a square grid ( initial attempt, assuming the user will enter either one process or even+square procs like 4, 16, 64, ...). The matrices A, B, and C are distributed in a 2-D block cyclic with NB = MB = 2. Here is the code with lots of printfs for sake of debugging:
- Code: Select all
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mpi.h"
struct Matrix
{
int rowSize;
int colSize;
int rootRow;
int rootCol;
double * data;
};
struct Matrix * mult(struct Matrix A, struct Matrix B, MPI_Comm comm, int root, int nprow, int npcol, int MB, int NB)
{
int flag;
int ret = MPI_Initialized(&flag);
if(ret != MPI_SUCCESS || !flag)
{ // error
}
printf("mult ... 1\n");
int my_rank, nprocs;
Cblacs_pinfo( &my_rank, &nprocs );
int ictxt;
printf("rank %d mult ... 2\n", my_rank);
Cblacs_get( -1, 0, &ictxt );
printf("rank %d mult ... 3\n", my_rank);
// printf("mult ... 3\n");
Cblacs_gridinit( &ictxt, "Row", nprow, npcol );
printf("rank %d mult ... 4\n", my_rank);
// printf("mult ... 4\n");
int myrow, mycol;
Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol);
printf("rank %d mult ... 5\n", my_rank);
// printf("mult ... 1\n");
if(nprow == -1)
{ // error the grid is not initialized
}
// distribute A and B to the process grid, the process that hold the
// global matrices of A and B are inside the matrix object.
double ONE=1, ZERO=0;
int myrank_mpi;
MPI_Comm_rank(comm, &myrank_mpi);
printf("rank %d mult ... 6\n", my_rank);
// printf("mult ... 1\n");
if(myrank_mpi == root)
//if(A.rootRow == myrow && A.rootCol == mycol)
{
for(int row=0; row < A.rowSize; ++row)
{
for(int col=0; col < A.colSize; ++col)
{
int rdest, cdest;
rdest = (row/MB) % nprow;
cdest = (col/NB) % npcol;
dgesd2d_(&ictxt, &ONE, &ONE, &A.data[row*A.colSize+col], &ONE, &rdest, &cdest);
}
printf("i is %d\n");
}
}
printf("rank %d mult ... 7\n", my_rank);
// printf("mult ... 1\n");
// All rcv A data from root:
int mA = numroc_( &A.rowSize, &MB, &myrow, &ZERO, &nprow );
printf("rank %d mult ... 8\n", my_rank);
// printf("mult ... 1\n");
int nA = numroc_( &A.colSize, &NB, &mycol, &ZERO, &npcol );
printf("myRow= %d\t myCol= %d\t mA= %d\t nA= %d\n", myrow, mycol, mA, nA);
// now rcv:
double *A_local = (double*) malloc(mA*nA*sizeof(double));
printf("rank %d mult ... 9\n", my_rank);
double rcvData;
if(myrank_mpi != root)
//if(A.rootRow != myrow || A.rootCol != mycol)
{
for(int i=0; i < mA; i++)
for(int j=0; j < nA; j++)
{
dgerv2d_(&ictxt, &ONE, &ONE, &rcvData, &ONE, &A.rootRow, &A.rootCol);
A_local[i*nA+j] = rcvData;
}
}
printf("rank %d mult ... 10\n", my_rank);
int descA[9]; int info;
descinit_(descA, &A.rowSize, &A.colSize, &MB, &NB, &ZERO, &ZERO, &ictxt, &mA, &info);
printf("rank %d mult ... 11\n", my_rank);
// sync:
Cblacs_barrier(ictxt,"A");
// distribute B:
if(myrank_mpi == root)
//if(B.rootRow == myrow && B.rootCol == mycol)
{
for(int row=0; row < B.rowSize; ++row)
for(int col=0; col < B.colSize; ++col)
{
int rdest, cdest;
rdest = (row/MB) % nprow;
cdest = (col/NB) % npcol;
dgesd2d_(&ictxt, &ONE, &ONE, &B.data[row*B.colSize+col], &ONE, &rdest, &cdest);
}
}
// all rcv B data from the root:
int mB = numroc_( &B.rowSize, &MB, &myrow, &ZERO, &nprow );
int nB = numroc_( &B.colSize, &NB, &mycol, &ZERO, &npcol );
printf("myRow= %d\t myCol= %d\t mA= %d\t nA= %d\n", myrow, mycol, mB, nB);
double *B_local = (double*) malloc(mB*nB*sizeof(double));
// now rcv:
//double rcvData;
if(myrank_mpi != root)
//if(B.rootRow != myrow || B.rootCol != mycol)
{
for(int i=0; i < mB; i++)
for(int j=0; j < nB; j++)
{
dgerv2d_(&ictxt, &ONE, &ONE, &rcvData, &ONE, &B.rootRow, &B.rootCol);
B_local[i*nB+j] = rcvData;
}
}
int descB[9]; //int info;
descinit_(descB, &B.rowSize, &B.colSize, &MB, &NB, &ZERO, &ZERO, &ictxt, &mB, &info);
printf("rank %d mult ... 12\n", my_rank);
// sync:
Cblacs_barrier(ictxt,"A");
int mC = numroc_( &A.rowSize, &MB, &myrow, &ZERO, &nprow );
int nC = numroc_( &B.colSize, &NB, &mycol, &ZERO, &npcol );
printf("myRow= %d\t myCol= %d\t mA= %d\t nA= %d\n", myrow, mycol, mC, nC);
double *C_local = (double*) malloc(mC*nC*sizeof(double));
int descC[9]; //int info;
descinit_(descC, &A.rowSize, &B.colSize, &MB, &NB, &ZERO, &ZERO, &ictxt, &mC, &info);
printf("rank %d mult ... 13\n", my_rank);
double alpha = 1.0; double beta = 0.0;
pdgemm_("N", "N", &A.rowSize, &A.colSize, &B.colSize, &alpha, A_local, &ZERO, &ZERO, descA, B_local, &ZERO, &ZERO, descB, &beta, C_local, &ZERO, &ZERO, descC);
printf("rank %d mult ... 14\n", my_rank);
Cblacs_barrier(ictxt,"A");
printf("myRow= %d\t myCol= %d\n", myrow, mycol);
for(int i=0;i<mC; i++)
{
for(int j=0; j < nC; j++)
printf("%.2f \t", C_local[j*mC+i]);
printf("\n");
}
Cblacs_gridexit( 0 );
// MPI_Finalize();
return 0;
}
int main(int argc, char **argv)
{
int myrank_mpi, nprocs_mpi;
MPI_Init( &argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
printf("my rank is %d\t", myrank_mpi);
struct Matrix A, B;
int M = 5;
printf("M is %d\t", M);
MPI_Barrier(MPI_COMM_WORLD);
if(myrank_mpi == 0)
{
printf("allocating A ... \n");
A.data = (double*) malloc(M*M*sizeof(double));
if(A.data == 0)
{ // fail to allocate Matrix A
printf("fail to allocate Matrix A\n");
}
for(int i=0; i<M;i++ )
for(int j=0; j<M;j++)
A.data[i*M+j] = (2*i+j+1);
printf("allocating B ... \n");
B.data = (double*) malloc(M*M*sizeof(double));
if(B.data == 0)
{ // fail to allocate Matrix B
printf("fail to allocate Matrix B\n");
}
printf("B is allocated ...\n");
for(int i=0; i<M;i++ )
for(int j=0; j<M;j++)
B.data[i*M+j] = (i+2*j+1);
printf("B is initialized ...\n");
}
else
{
A.data = 0;
}
printf("....1\n");
A.rowSize = A.colSize = B.rowSize = B.colSize = M;
printf("....2\n");
A.rootRow = A.rootCol = B.rootRow = B.rootCol = 0;
printf("....3\n");
MPI_Barrier(MPI_COMM_WORLD);
printf("Start ... \n");
if(nprocs_mpi == 1)
mult(A, B, MPI_COMM_WORLD, 0, nprocs_mpi, nprocs_mpi, 1, 1);
else
mult(A, B, MPI_COMM_WORLD, 0, nprocs_mpi/2, nprocs_mpi/2, 2, 2);
printf("Finish ... \n");
MPI_Finalize();
return 0;
}
Frankly, I don't know how to calculate IA and JA parameters, can you please show me how?
Thanks,
Anas

