Hi,
I am working on a code that reads/distributes a matrix created externally (using the routine PSLAREAD). The code works fine for matrices with dimensions of up to 1250x1250 using 4 CPUs, but it fails for higher dimensions. I am almost certain I have to tune some parameters (e.g. worksize, memory, etc) but I am not sure which one.
Please could anyone help me with this ?. THANKS
The error message I got when working with a 1500x1500 matrix, using NB=2, NPROW=2,NPCOL=2, is the following:
[arxt27:26348] *** Process received signal ***
[arxt27:26348] Signal: Segmentation fault (11)
[arxt27:26348] Signal code: (128)
[arxt27:26348] Failing at address: (nil)
[arxt27:26348] [ 0] /lib/libpthread.so.0(+0xf8f0) [0x2b4e681238f0]
[arxt27:26348] [ 1] test(BI_GetBuff+0x1c) [0x40a08c]
[arxt27:26348] [ 2] test(BI_Pack+0xef) [0x40ac4f]
[arxt27:26348] [ 3] test(sgesd2d_+0x5d) [0x4069dd]
[arxt27:26348] [ 4] test(pslaread_+0x4fa) [0x4061aa]
[arxt27:26348] [ 5] test(MAIN__+0x402) [0x403462]
[arxt27:26348] [ 6] test(main+0x2a) [0x40b11a]
[arxt27:26348] [ 7] /lib/libc.so.6(__libc_start_main+0xfd) [0x2b4e68350c4d]
[arxt27:26348] [ 8] test() [0x402f99]
[arxt27:26348] *** End of error message ***
The code :
*==========================================================
PROGRAM TEST
*
* .. Parameters ..
INTEGER REALSZ, INTGSZ, MEMSIZ, TOTMEM
PARAMETER ( REALSZ = 4, INTGSZ = 4, TOTMEM = 2000000,
$ MEMSIZ = TOTMEM / REALSZ )
INTEGER BLOCK_CYCLIC_2D, CSRC_, CTXT_, DLEN_, DT_,
$ LLD_, MB_, M_, NB_, N_, RSRC_
PARAMETER ( BLOCK_CYCLIC_2D = 1, DLEN_ = 9, DT_ = 1,
$ CTXT_ = 2, M_ = 3, N_ = 4, MB_ = 5, NB_ = 6,
$ RSRC_ = 7, CSRC_ = 8, LLD_ = 9 )
REAL ONE
PARAMETER ( ONE = 1.0E+0 )
* ..
* .. Local Scalars ..
CHARACTER*80 OUTFILE
INTEGER IAM, ICTXT, INFO, IPA, IPACPY, IPB, IPPIV, IPX,
$ IPW, LIPIV, MYCOL, MYROW, N, NB, NOUT, NPCOL,
$ NPROCS, NPROW, NP, NQ, NQRHS, NRHS, WORKSIZ
REAL ANORM, BNORM, EPS, XNORM, RESID
* ..
* .. Local Arrays ..
INTEGER DESCA( DLEN_ ), DESCB( DLEN_ ), DESCX( DLEN_ )
REAL MEM( MEMSIZ )
* ..
* .. External Subroutines ..
EXTERNAL BLACS_EXIT, BLACS_GET, BLACS_GRIDEXIT,
$ BLACS_GRIDINFO, BLACS_GRIDINIT, BLACS_PINFO,
$ DESCINIT, IGSUM2D, PDSCAEXINFO, PSGESV,
$ PSGEMM, PSLACPY, PSLAPRNT, PSLAREAD, PSLAWRITE
* ..
* .. External Functions ..
INTEGER ICEIL, NUMROC
REAL PSLAMCH, PSLANGE
EXTERNAL ICEIL, NUMROC, PSLAMCH, PSLANGE
* ..
* .. Intrinsic Functions ..
INTRINSIC DBLE, MAX
* ..
* .. Executable Statements ..
*
* Get starting information
*
CALL BLACS_PINFO( IAM, NPROCS )
CALL PDSCAEXINFO( OUTFILE, NOUT, N, NRHS, NB, NPROW, NPCOL, MEM,
$ IAM, NPROCS )
*
* Define process grid
*
CALL BLACS_GET( -1, 0, ICTXT )
CALL BLACS_GRIDINIT( ICTXT, 'Row-major', NPROW, NPCOL )
CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
*
* Go to bottom of process grid loop if this case doesn't use my
* process
*
IF( MYROW.GE.NPROW .OR. MYCOL.GE.NPCOL )
$ GO TO 20
*
NP = NUMROC( N, NB, MYROW, 0, NPROW )
NQ = NUMROC( N, NB, MYCOL, 0, NPCOL )
*
* Initialize the array descriptor for the matrix A
*
CALL DESCINIT( DESCA, N, N, NB, NB, 0, 0, ICTXT, MAX( 1, NP ),
$ INFO )
*
* Assign pointers into MEM for SCALAPACK arrays, A is
* allocated starting at position MEM( 1 )
*
IPA = 1
IPACPY = IPA + DESCA( LLD_ )*NQ
LIPIV = ICEIL( INTGSZ*( NP+NB ), REALSZ )
IPW = MAX( NP, LIPIV )
*
WORKSIZ = NB
*
* Check for adequate memory for problem size
*
INFO = 0
IF( IPW+WORKSIZ.GT.MEMSIZ ) THEN
IF( IAM.EQ.0 )
$ WRITE( NOUT, FMT = 9998 ) 'test', ( IPW+WORKSIZ )*REALSZ
INFO = 1
END IF
*
* Check all processes for an error
*
CALL IGSUM2D( ICTXT, 'All', ' ', 1, 1, INFO, 1, -1, 0 )
IF( INFO.GT.0 ) THEN
IF( IAM.EQ.0 )
$ WRITE( NOUT, FMT = 9999 ) 'MEMORY'
GO TO 10
END IF
*
* Read from file and distribute matrix A
*
CALL PSLAREAD( 'test1500x1500.dat', MEM( IPA ), DESCA, 0, 0,
$ MEM( IPW ) )
*
* Make a copy of A for checking purposes
*
* CALL PSLACPY( 'All', N, N, MEM( IPA ), 1, 1, DESCA,
* $ MEM( IPACPY ), 1, 1, DESCA )
*
CALL PSLAPRNT( N, N, MEM( IPA ), 1, 1, DESCA, 0, 0,
$ 'A', NOUT, MEM( IPW ) )
*
10 CONTINUE
*
CALL BLACS_GRIDEXIT( ICTXT )
*
20 CONTINUE
*
*
*
CALL BLACS_EXIT( 0 )
*
9999 FORMAT( 'Bad ', A6, ' parameters: going on to next test case.' )
9998 FORMAT( 'Unable to perform ', A, ': need TOTMEM of at least',
$ I11 )
*
STOP
*
*
END

