I would like to distribute a 10x10 matrix initialized and filled in on a master process (0,0) to all 4 processes (0,1|1,0|1,1) on a BLACS grid. Block size is 2x2. Please see a figure below. On the left is the original matrix with colorized blocks, on the right is the expected distribution:
Below is the minimal BLACS code to distribute the matrix over a 2x2 process grid:
- Code: Select all
PROGRAM DIST_MTRX_MINI
IMPLICIT NONE
double precision, allocatable :: A(:,:), AA(:,:)
integer :: M = 10, N = 10, MB = 2, NB = 2
integer :: iCtxt, rpp, cpp
integer :: rSrc = 0, cSrc = 0, rDst, cDst, iRow, iCol
integer :: me, np, npRow = 2, npCol = 2
integer :: descA(9), info, err
integer :: iDist = 1, iSeed(4) = [2, 3296, 521, 17]
integer :: ia, ja, ib, jb
integer, external :: numroc, prc, blk, crd
character(len=5), external :: to_str
! initialize BLACS
call blacs_pinfo(me, np)
call blacs_get(-1, 0, iCtxt)
call blacs_gridinit(iCtxt, 'Row-major', npRow, npCol)
call blacs_gridinfo(iCtxt, npRow, npCol, iRow, iCol)
rpp = numroc(M, MB, iRow, 0, npRow)
cpp = numroc(N, NB, iCol, 0, npCol)
allocate(AA(rpp,cpp), stat=err)
AA = 0.0
call descinit(descA, M, N, MB, NB, 0, 0, iCtxt, max(1, rpp), info)
! @master: allocate, initialize matrix A
if (me==0) then
allocate(A(M,N), stat=err)
! consequtive numbers
do ia = 1, M
do ja = 1, N
A(ia,ja) = (ia-1)*N+ja
end do
end do
end if
do ja = 1, N, NB
do ia = 1, M, MB
rDst = prc(rSrc,ia,MB,npRow)
cDst = prc(cSrc,ja,NB,npCol)
ib = blk(ia,npRow,MB)*MB + crd(ia,MB)
jb = blk(ja,npCol,NB)*NB + crd(ja,NB)
! @master: send matrix block AA
if (me==0) then
! call dgesd2d(iCtxt, MB, NB, A(ia:ia+MB-1,ja:ja+NB-1), rpp, rDst, cDst)
call dgesd2d(iCtxt, MB, NB, A(ia,ja), rpp, rDst, cDst)
end if
! @destination: receive matrix block AA
if (rDst==iRow.and.cDst==iCol) then
! call dgerv2d(iCtxt, MB, NB, AA(ib:ib+MB-1,jb:jb+NB-1), rpp, rSrc, cSrc)
call dgerv2d(iCtxt, MB, NB, AA(ib,jb), rpp, rSrc, cSrc)
end if
end do
end do
! @test: save received matrix block to file
open(unit = 51, &
file = "AA" // trim(to_str(me)) // ".txt", &
status = "new", &
action = "write")
do ia = 1, rpp
write(51, *) AA(ia,:)
end do
close(51)
if (me==0) deallocate(A, stat=err)
deallocate(AA, stat=err)
! finalize BLACS
call blacs_gridexit(iCtxt)
call blacs_exit(0)
END PROGRAM DIST_MTRX_MINI
!-----------------------------------------------------------------------
integer FUNCTION prc(rSrc, ia, mb, npRow)
implicit none
integer, intent(in) :: rSrc, ia, mb, npRow
prc = mod(rSrc+floor(real(ia-1)/mb),npRow)
END FUNCTION prc
!-----------------------------------------------------------------------
integer FUNCTION blk(ia, npRow, mb)
implicit none
integer, intent(in) :: ia, npRow, mb
blk = floor(real(ia-1)/(npRow*mb))
END FUNCTION blk
!-----------------------------------------------------------------------
integer FUNCTION crd(ia, mb)
implicit none
integer, intent(in) :: ia, mb
crd = mod(ia-1,mb)+1
END FUNCTION crd
!-----------------------------------------------------------------------
!> Converts given number into string.
!!
!! @param[in] n given number, type integer
CHARACTER(len=5) FUNCTION to_str(n)
! Input argument declaration
integer, intent(in) :: n
write(to_str, "(I5)") n
to_str = trim(adjustl(to_str))
END FUNCTION to_str
The code compiles with MPICH v3.2 and GNU Fortran 5.3.0 and executes successfully with "mpiexec -np 4 ./dist_mtrx_mini". However, the output files "AA?.txt", where each process stores its block AA of matrix A do not contain the data in the expected order. The first columns of each block are transferred as expected, while the second columns are scrambled.
I have tried to state explicitly what A elements do I need to transfer and to receive in each BLACS send/receive routine (Please see the lines commented out). However, this results in a segmentation fault. I have also tried to pack each block into a dedicated temporary array, then send it, receive at destination, unpack and place into a correct location. Even in this case the second column of a block on the receiver side contains zeros.
I have seen examples on the internet where people use MPI statements for matrix distribution, but I would still like to get it working with BLACS.

