plasma/docs/cdrvls_8f_source.html

      SUBROUTINE cdrvls( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB,

     $                   ibval, nbval, nxval, thresh, tsterr, a, copya,

     $                   b, copyb, c, s, copys, work, rwork, iwork,

     $                   nout )

*

      include 'plasmaf.h'

*

*  -- LAPACK test routine (version 3.1.1) --

*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..

*     January 2007

*

*     .. Scalar Arguments ..

      LOGICAL            tsterr

      INTEGER            nm, nn, nnb, nns, nout

      REAL               thresh

*     ..

*     .. Array Arguments ..

      LOGICAL            dotype( * )

      INTEGER            iwork( * ), mval( * ), nbval( * ), nsval( * ),

     $                   nval( * ), nxval( * ), ibval( * )

      REAL               copys( * ), rwork( * ), s( * )

      COMPLEX            a( * ), b( * ), c( * ), copya( * ), copyb( * ),

     $                   work( * )

*     ..

*

*  Purpose

*  =======

*

*  CDRVLS tests the least squares driver routines CGELS, CGELSX, CGELSS,

*  CGELSY and CGELSD.

*

*  Arguments

*  =========

*

*  DOTYPE  (input) LOGICAL array, dimension (NTYPES)

*          The matrix types to be used for testing.  Matrices of type j

*          (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) =

*          .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used.

*          The matrix of type j is generated as follows:

*          j=1: A = U*D*V where U and V are random unitary matrices

*               and D has random entries (> 0.1) taken from a uniform

*               distribution (0,1). A is full rank.

*          j=2: The same of 1, but A is scaled up.

*          j=3: The same of 1, but A is scaled down.

*          j=4: A = U*D*V where U and V are random unitary matrices

*               and D has 3*min(M,N)/4 random entries (> 0.1) taken

*               from a uniform distribution (0,1) and the remaining

*               entries set to 0. A is rank-deficient.

*          j=5: The same of 4, but A is scaled up.

*          j=6: The same of 5, but A is scaled down.

*

*  NM      (input) INTEGER

*          The number of values of M contained in the vector MVAL.

*

*  MVAL    (input) INTEGER array, dimension (NM)

*          The values of the matrix row dimension M.

*

*  NN      (input) INTEGER

*          The number of values of N contained in the vector NVAL.

*

*  NVAL    (input) INTEGER array, dimension (NN)

*          The values of the matrix column dimension N.

*

*  NNB     (input) INTEGER

*          The number of values of NB and NX contained in the

*          vectors NBVAL and NXVAL.  The blocking parameters are used

*          in pairs (NB,NX).

*

*  NBVAL   (input) INTEGER array, dimension (NNB)

*          The values of the blocksize NB.

*

*  IBVAL   (input) INTEGER array, dimension (NNB)

*          The values of the inner block size IB.

*

*  NXVAL   (input) INTEGER array, dimension (NNB)

*          The values of the crossover point NX.

*

*  NNS     (input) INTEGER

*          The number of values of NRHS contained in the vector NSVAL.

*

*  NSVAL   (input) INTEGER array, dimension (NNS)

*          The values of the number of right hand sides NRHS.

*

*  THRESH  (input) REAL

*          The threshold value for the test ratios.  A result is

*          included in the output file if RESULT >= THRESH.  To have

*          every test ratio printed, use THRESH = 0.

*

*  TSTERR  (input) LOGICAL

*          Flag that indicates whether error exits are to be tested.

*

*  A       (workspace) COMPLEX array, dimension (MMAX*NMAX)

*          where MMAX is the maximum value of M in MVAL and NMAX is the

*          maximum value of N in NVAL.

*

*  COPYA   (workspace) COMPLEX array, dimension (MMAX*NMAX)

*

*  B       (workspace) COMPLEX array, dimension (MMAX*NSMAX)

*          where MMAX is the maximum value of M in MVAL and NSMAX is the

*          maximum value of NRHS in NSVAL.

*

*  COPYB   (workspace) COMPLEX array, dimension (MMAX*NSMAX)

*

*  C       (workspace) COMPLEX array, dimension (MMAX*NSMAX)

*

*  S       (workspace) REAL array, dimension

*                      (min(MMAX,NMAX))

*

*  COPYS   (workspace) REAL array, dimension

*                      (min(MMAX,NMAX))

*

*  WORK    (workspace) COMPLEX array, dimension

*                      (MMAX*NMAX + 4*NMAX + MMAX).

*

*  RWORK   (workspace) REAL array, dimension (5*NMAX-1)

*

*  IWORK   (workspace) INTEGER array, dimension (15*NMAX)

*

*  NOUT    (input) INTEGER

*          The unit number for output.

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            ntests

      parameter( ntests = 18 )

      INTEGER            smlsiz

      parameter( smlsiz = 25 )

      REAL               one, zero

      parameter( one = 1.0e+0, zero = 0.0e+0 )

      COMPLEX            cone, czero

      parameter( cone = ( 1.0e+0, 0.0e+0 ),

     $                   czero = ( 0.0e+0, 0.0e+0 ) )

*     ..

*     .. Local Scalars ..

      CHARACTER          trans

      CHARACTER*3        path

      INTEGER            crank, i, im, in, inb, info, ins, irank,

     $                   iscale, itran, itype, j, k, lda, ldb, ldwork,

     $                   lwlsy, lwork, m, mnmin, n, nb, ncols, nerrs,

     $                   nfail, nrhs, nrows, nrun, rank, ib,

     $                   plasma_trans

      INTEGER            ht( 2 )

      REAL               eps, norma, normb, rcond

*     ..

*     .. Local Arrays ..

      INTEGER            iseed( 4 ), iseedy( 4 )

      REAL               result( ntests )

*     ..

*     .. External Functions ..

      REAL               cqrt14, cqrt17, sasum, slamch

      EXTERNAL           cqrt14, cqrt17, sasum, slamch

*     ..

*     .. External Subroutines ..

      EXTERNAL           alaerh, alahd, alasvm, cerrls, cgels, cgelsd,

     $                   cgelss, cgelsx, cgelsy, cgemm, clacpy, clarnv,

     $                   cqrt13, cqrt15, cqrt16, csscal, saxpy,

     $                   xlaenv

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          max, min, REAL, sqrt

*     ..

*     .. Scalars in Common ..

      LOGICAL            lerr, ok

      CHARACTER*32       srnamt

      INTEGER            infot, iounit

*     ..

*     .. Common blocks ..

      common             / infoc / infot, iounit, ok, lerr

      common             / srnamc / srnamt

*     ..

*     .. Data statements ..

      DATA               iseedy / 1988, 1989, 1990, 1991 /

*     ..

*     .. Executable Statements ..

*

*     Initialize constants and the random number seed.

*

      path( 1: 1 ) = 'Complex precision'

      path( 2: 3 ) = 'LS'

      nrun = 0

      nfail = 0

      nerrs = 0

      DO 10 i = 1, 4

         iseed( i ) = iseedy( i )

   10 continue

      eps = slamch( 'Epsilon' )

*

*     Threshold for rank estimation

*

      rcond = sqrt( eps ) - ( sqrt( eps )-eps ) / 2

*

*     Test the error exits

*

      CALL xlaenv( 9, smlsiz )

      IF( tsterr )

     $   CALL cerrls( path, nout )

*

*     Print the header if NM = 0 or NN = 0 and THRESH = 0.

*

      IF( ( nm.EQ.0 .OR. nn.EQ.0 ) .AND. thresh.EQ.zero )

     $   CALL alahd( nout, path )

      infot = 0

*

      DO 140 im = 1, nm

         m = mval( im )

         lda = max( 1, m )

*

         DO 130 in = 1, nn

            n = nval( in )

            mnmin = min( m, n )

            ldb = max( 1, m, n )

*

            DO 120 ins = 1, nns

               nrhs = nsval( ins )

               lwork = max( 1, ( m+nrhs )*( n+2 ), ( n+nrhs )*( m+2 ),

     $                 m*n+4*mnmin+max( m, n ), 2*n+m )

*

               DO 110 irank = 1, 2

                  DO 100 iscale = 1, 3

                     itype = ( irank-1 )*3 + iscale

                     IF( .NOT.dotype( itype ) )

     $                  go to 100

*

                     IF( irank.EQ.1 ) THEN

*

*                       Test CGELS

*

*                       Generate a matrix of scaling type ISCALE

*

                        CALL cqrt13( iscale, m, n, copya, lda, norma,

     $                               iseed )

                        DO 40 inb = 1, nnb

                           nb = nbval( inb )

                           ib = ibval( inb )

                           CALL xlaenv( 1, nb )

                           CALL xlaenv( 3, nxval( inb ) )

                           IF ( (max(m, n) / 25) .GT. nb ) THEN

                              goto 40

                           END IF

                           CALL plasma_set( plasma_tile_size, nb, info )

                           CALL plasma_set( plasma_inner_block_size, ib,

     $                                      info )

*

*                          Allocate T

*

                           CALL plasma_alloc_workspace_cgels( m, n , ht,

     $                                                        info )

*

*                           DO 30 ITRAN = 1, 2

                           DO 30 itran = 1, 1

                              IF( itran.EQ.1 ) THEN

                                 trans = 'N'

                                 plasma_trans = plasmanotrans

                                 nrows = m

                                 ncols = n

                              ELSE

                                 trans = 'C'

                                 plasma_trans = plasmaconjtrans

                                 nrows = n

                                 ncols = m

                              END IF

                              ldwork = max( 1, ncols )

*

*                             Set up a consistent rhs

*

                              IF( ncols.GT.0 ) THEN

                                 CALL clarnv( 2, iseed, ncols*nrhs,

     $                                        work )

                                 CALL csscal( ncols*nrhs,

     $                                        one / REAL( NCOLS ), work,

     $                                        1 )

                              END IF

                              CALL cgemm( trans, 'No transpose', nrows,

     $                                    nrhs, ncols, cone, copya, lda,

     $                                    work, ldwork, czero, b, ldb )

                              CALL clacpy( 'Full', nrows, nrhs, b, ldb,

     $                                     copyb, ldb )

*

*                             Solve LS or overdetermined system

*

                              IF( m.GT.0 .AND. n.GT.0 ) THEN

                                 CALL clacpy( 'Full', m, n, copya, lda,

     $                                        a, lda )

                                 CALL clacpy( 'Full', nrows, nrhs,

     $                                        copyb, ldb, b, ldb )

                              END IF

                              srnamt = 'CGELS '

                              CALL plasma_cgels( plasma_trans,

     $                                           m, n, nrhs,

     $                                           a, lda, ht, b, ldb,

     $                                           info )

*

                              IF( info.NE.0 )

     $                           CALL alaerh( path, 'CGELS ', info, 0,

     $                                        trans, m, n, nrhs, -1, nb,

     $                                        itype, nfail, nerrs,

     $                                        nout )

*

*                             Check correctness of results

*

                              ldwork = max( 1, nrows )

                              IF( nrows.GT.0 .AND. nrhs.GT.0 )

     $                           CALL clacpy( 'Full', nrows, nrhs,

     $                                        copyb, ldb, c, ldb )

                              CALL cqrt16( trans, m, n, nrhs, copya,

     $                                     lda, b, ldb, c, ldb, rwork,

     $                                     result( 1 ) )

*

                              IF( ( itran.EQ.1 .AND. m.GE.n ) .OR.

     $                            ( itran.EQ.2 .AND. m.LT.n ) ) THEN

*

*                                Solving LS system

*

                                 result( 2 ) = cqrt17( trans, 1, m, n,

     $                                         nrhs, copya, lda, b, ldb,

     $                                         copyb, ldb, c, work,

     $                                         lwork )

                              ELSE

*

*                                Solving overdetermined system

*

                                 result( 2 ) = cqrt14( trans, m, n,

     $                                         nrhs, copya, lda, b, ldb,

     $                                         work, lwork )

                              END IF

*

*                             Print information about the tests that

*                             did not pass the threshold.

*

                              DO 20 k = 1, 2

                                 IF( result( k ).GE.thresh ) THEN

                                    IF( nfail.EQ.0 .AND. nerrs.EQ.0 )

     $                                 CALL alahd( nout, path )

                                    WRITE( nout, fmt = 9999 )trans, m,

     $                                 n, nrhs, nb, itype, k,

     $                                 result( k )

                                    nfail = nfail + 1

                                 END IF

   20                         continue

                              nrun = nrun + 2

   30                      continue

*

*                          Deallocate T

*

                           CALL plasma_dealloc_handle( ht, info )

   40                   continue

                     END IF

*

  100             continue

  110          continue

  120       continue

  130    continue

  140 continue

*

*     Print a summary of the results.

*

      CALL alasvm( path, nout, nfail, nrun, nerrs )

*

 9999 format( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,

     $      ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )

 9998 format( ' M=', i5, ', N=', i5, ', NRHS=', i4, ', NB=', i4,

     $      ', type', i2, ', test(', i2, ')=', g12.5 )

      return

*

*     End of CDRVLS

*

      END