At my program, I have to use a lot of PCGEMR2D to distribute and re-distribute between global matrixes. It is OK to use a few of PCGEMR2D, but when I use too much of PCGEMR2D, the program just crashed. Try to point out the problem is really at PCGEMR2D, I wrote a very simple test program with only PCGEMR2D called in the test program (the source code is attached).
In the test program, I set up several pause points, and read the memory usage by the “top” command of Linux. I can see the memory usage will accumulated when more and more PCGEMR2D is called.
If memory is accumulated large enough, the program will crash with the following error message:
[2] Abort: VAPI_register_mr at line 65 in file collutils.c
Timeout alarm signaled
Cleaning up all processes ...forrtl: error (78): process killed (SIGTERM)
forrtl: error (78): process killed (SIGTERM)
forrtl: error (78): process killed (SIGTERM)
done.
In my test program, the program crashed after enter 3 integers which corresponding to total number of loop between 1500 and 2000.
My computer setup is as following:
Intel Xeon EM64t process
2GB memory
Intel fortran compiler V8.01
MPICH
Infiniband communcation network
Intel Math Kernel Library Cluster version 8.0
Source code:
- Code: Select all
implicit none
integer, parameter :: M = 324, N = 162, mb = 64, nb = mb
integer, parameter :: rsrc = 0, csrc = 0, dlen_ = 9
integer :: ictxt, rank, tproc, icroot
integer :: prow, pcol, nprow, npcol, myrow, mycol
integer, external :: numroc
integer :: mxlocr_M, mxlocc_M, mxlld_M, desc_M(dlen_)
integer :: mxlocr_N, mxlocc_N, mxlld_N, desc_N(dlen_)
complex, allocatable :: A(:,:), A1(:,:), A2(:,:),A3(:,:),A4(:,:)
real, allocatable :: B(:,:)
integer :: i, ppp, info
call blacs_pinfo(rank, tproc)
call blacs_get(-1,0,ictxt)
nprow = int(sqrt(real(tproc+0.0001)))
npcol = tproc/nprow
call blacs_gridinit(ictxt,'r',nprow, npcol)
call blacs_gridinfo(ictxt,nprow,npcol,myrow,mycol)
print *, 'rank',rank, 'myrow', myrow, 'mycol',mycol
mxlocr_M = numroc(M,nb,myrow,rsrc,nprow)
mxlocc_M = numroc(M,nb,mycol,rsrc,npcol)
mxlld_M = max(1,mxlocr_M)
call descinit(desc_M,M,M,nb,nb,rsrc,rsrc,ictxt,mxlld_M,info)
if (info .NE. 0 ) then
print *, 'descinit error, info =',info
stop
endif
mxlocr_N = numroc(N,nb,myrow,rsrc,nprow)
mxlocc_N = numroc(N,nb,mycol,rsrc,npcol)
mxlld_N = max(1,mxlocr_N)
call descinit(desc_N,N,N,nb,nb,rsrc,rsrc,ictxt,mxlld_N,info)
if (info .NE. 0 ) then
print *, 'descinit error, info =',info
stop
endif
allocate(A(mxlld_M,mxlocc_M))
allocate(B(mxlld_M,mxlocc_M))
allocate(A1(mxlld_N,mxlocc_N))
allocate(A2(mxlld_N,mxlocc_N))
allocate(A3(mxlld_N,mxlocc_N))
allocate(A4(mxlld_N,mxlocc_N))
call random_number(B)
A=cmplx(B,(rank+1)*2.8*B)
DO i=1,3000
CALL PCGEMR2D(N,N,A,1,1,desc_M,A1,1,1,desc_N, ictxt)
CALL PCGEMR2D(N,N,A,1+N,1,desc_M,A2,1,1,desc_N, ictxt)
CALL PCGEMR2D(N,N,A,1,1+N,desc_M,A3,1,1,desc_N, ictxt)
CALL PCGEMR2D(N,N,A,1+N,1+N,desc_M,A4,1,1,desc_N, ictxt)
CALL PCGEMR2D(N,N,A1,1,1,desc_N,A,1,1,desc_M, ictxt)
CALL PCGEMR2D(N,N,A2,1,1,desc_N,A,1+N,1,desc_M, ictxt)
CALL PCGEMR2D(N,N,A3,1,1,desc_N,A,1,1+N,desc_M, ictxt)
CALL PCGEMR2D(N,N,A4,1,1,desc_N,A,1+N,1+N,desc_M, ictxt)
if (mod(i,500) .eq. 0) then
print *,'interation:',i,A1(10,10)
if (rank .eq. 0) then
print *, 'Please enter an integer after read the memory by <top> command'
read(*,*)ppp
endif
call blacs_barrier(ictxt, 'A')
endif
ENDDO
deallocate(A,A1,A2,A3,A4,B)
call blacs_gridexit(ictxt)
call blacs_exit(0)
END

