plasma/docs/sdrvls_8f_source.html

      SUBROUTINE sdrvls( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB,

     $                   nbval, nxval, thresh, tsterr, a, copya, b,

     $                   copyb, c, s, copys, ibval, work, iwork, nout )

*

      include 'plasmaf.h'

*

*  -- LAPACK test routine (version 3.1.1) --

*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..

*     January 2007

*

*     .. Scalar Arguments ..

      LOGICAL            tsterr

      INTEGER            nm, nn, nnb, nns, nout

      REAL               thresh

*     ..

*     .. Array Arguments ..

      LOGICAL            dotype( * )

      INTEGER            iwork( * ), mval( * ), nbval( * ), nsval( * ),

     $                   nval( * ), nxval( * ), ibval( * )

      REAL               a( * ), b( * ), c( * ), copya( * ), copyb( * ),

     $                   copys( * ), s( * ), work( * )

*     ..

*

*  Purpose

*  =======

*

*  SDRVLS tests the least squares driver routines SGELS, SGELSS, SGELSX,

*  SGELSY and SGELSD.

*

*  Arguments

*  =========

*

*  DOTYPE  (input) LOGICAL array, dimension (NTYPES)

*          The matrix types to be used for testing.  Matrices of type j

*          (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) =

*          .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used.

*          The matrix of type j is generated as follows:

*          j=1: A = U*D*V where U and V are random orthogonal matrices

*               and D has random entries (> 0.1) taken from a uniform

*               distribution (0,1). A is full rank.

*          j=2: The same of 1, but A is scaled up.

*          j=3: The same of 1, but A is scaled down.

*          j=4: A = U*D*V where U and V are random orthogonal matrices

*               and D has 3*min(M,N)/4 random entries (> 0.1) taken

*               from a uniform distribution (0,1) and the remaining

*               entries set to 0. A is rank-deficient.

*          j=5: The same of 4, but A is scaled up.

*          j=6: The same of 5, but A is scaled down.

*

*  NM      (input) INTEGER

*          The number of values of M contained in the vector MVAL.

*

*  MVAL    (input) INTEGER array, dimension (NM)

*          The values of the matrix row dimension M.

*

*  NN      (input) INTEGER

*          The number of values of N contained in the vector NVAL.

*

*  NVAL    (input) INTEGER array, dimension (NN)

*          The values of the matrix column dimension N.

*

*  NNS     (input) INTEGER

*          The number of values of NRHS contained in the vector NSVAL.

*

*  NSVAL   (input) INTEGER array, dimension (NNS)

*          The values of the number of right hand sides NRHS.

*

*  NNB     (input) INTEGER

*          The number of values of NB and NX contained in the

*          vectors NBVAL and NXVAL.  The blocking parameters are used

*          in pairs (NB,NX).

*

*  NBVAL   (input) INTEGER array, dimension (NNB)

*          The values of the blocksize NB.

*

*  IBVAL   (input) INTEGER array, dimension (NNB)

*          The values of the inner block size IB.

*

*  NXVAL   (input) INTEGER array, dimension (NNB)

*          The values of the crossover point NX.

*

*  THRESH  (input) REAL

*          The threshold value for the test ratios.  A result is

*          included in the output file if RESULT >= THRESH.  To have

*          every test ratio printed, use THRESH = 0.

*

*  TSTERR  (input) LOGICAL

*          Flag that indicates whether error exits are to be tested.

*

*  A       (workspace) REAL array, dimension (MMAX*NMAX)

*          where MMAX is the maximum value of M in MVAL and NMAX is the

*          maximum value of N in NVAL.

*

*  COPYA   (workspace) REAL array, dimension (MMAX*NMAX)

*

*  B       (workspace) REAL array, dimension (MMAX*NSMAX)

*          where MMAX is the maximum value of M in MVAL and NSMAX is the

*          maximum value of NRHS in NSVAL.

*

*  COPYB   (workspace) REAL array, dimension (MMAX*NSMAX)

*

*  C       (workspace) REAL array, dimension (MMAX*NSMAX)

*

*  S       (workspace) REAL array, dimension

*                      (min(MMAX,NMAX))

*

*  COPYS   (workspace) REAL array, dimension

*                      (min(MMAX,NMAX))

*

*  WORK    (workspace) REAL array,

*                      dimension (MMAX*NMAX + 4*NMAX + MMAX).

*

*  IWORK   (workspace) INTEGER array, dimension (15*NMAX)

*

*  NOUT    (input) INTEGER

*          The unit number for output.

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            ntests

      parameter( ntests = 18 )

      INTEGER            smlsiz

      parameter( smlsiz = 25 )

      REAL               one, two, zero

      parameter( one = 1.0e0, two = 2.0e0, zero = 0.0e0 )

*     ..

*     .. Local Scalars ..

      CHARACTER          trans

      CHARACTER*3        path

      INTEGER            crank, i, im, in, inb, info, ins, irank,

     $                   iscale, itran, itype, j, k, lda, ldb, ldwork,

     $                   lwlsy, lwork, m, mnmin, n, nb, ncols, nerrs,

     $                   nfail, nlvl, nrhs, nrows, nrun, rank, ib,

     $                   plasma_trans

      INTEGER            ht( 2 )

      REAL               eps, norma, normb, rcond

*     ..

*     .. Local Arrays ..

      INTEGER            iseed( 4 ), iseedy( 4 )

      REAL               result( ntests )

*     ..

*     .. External Functions ..

      REAL               sasum, slamch, sqrt14, sqrt17

      EXTERNAL           sasum, slamch, sqrt14, sqrt17

*     ..

*     .. External Subroutines ..

      EXTERNAL           alaerh, alahd, alasvm, saxpy, serrls, sgels,

     $                   sgelsd, sgelss, sgelsx, sgelsy, sgemm, slacpy,

     $                   slarnv, sqrt13, sqrt15, sqrt16, sscal,

     $                   xlaenv

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          int, log, max, min, REAL, sqrt

*     ..

*     .. Scalars in Common ..

      LOGICAL            lerr, ok

      CHARACTER*32       srnamt

      INTEGER            infot, iounit

*     ..

*     .. Common blocks ..

      common             / infoc / infot, iounit, ok, lerr

      common             / srnamc / srnamt

*     ..

*     .. Data statements ..

      DATA               iseedy / 1988, 1989, 1990, 1991 /

*     ..

*     .. Executable Statements ..

*

*     Initialize constants and the random number seed.

*

      path( 1: 1 ) = 'Single precision'

      path( 2: 3 ) = 'LS'

      nrun = 0

      nfail = 0

      nerrs = 0

      DO 10 i = 1, 4

         iseed( i ) = iseedy( i )

   10 continue

      eps = slamch( 'Epsilon' )

*

*     Threshold for rank estimation

*

      rcond = sqrt( eps ) - ( sqrt( eps )-eps ) / 2

*

*     Test the error exits

*

      CALL xlaenv( 2, 2 )

      CALL xlaenv( 9, smlsiz )

      IF( tsterr )

     $   CALL serrls( path, nout )

*

*     Print the header if NM = 0 or NN = 0 and THRESH = 0.

*

      IF( ( nm.EQ.0 .OR. nn.EQ.0 ) .AND. thresh.EQ.zero )

     $   CALL alahd( nout, path )

      infot = 0

*

      DO 150 im = 1, nm

         m = mval( im )

         lda = max( 1, m )

*

         DO 140 in = 1, nn

            n = nval( in )

            mnmin = min( m, n )

            ldb = max( 1, m, n )

*

            DO 130 ins = 1, nns

               nrhs = nsval( ins )

               nlvl = max( int( log( max( one, REAL( MNMIN ) ) /

     $                REAL( SMLSIZ+1 ) ) / log( two ) ) + 1, 0 )

               lwork = max( 1, ( m+nrhs )*( n+2 ), ( n+nrhs )*( m+2 ),

     $                 m*n+4*mnmin+max( m, n ), 12*mnmin+2*mnmin*smlsiz+

     $                 8*mnmin*nlvl+mnmin*nrhs+(smlsiz+1)**2 )

*

               DO 120 irank = 1, 2

                  DO 110 iscale = 1, 3

                     itype = ( irank-1 )*3 + iscale

                     IF( .NOT.dotype( itype ) )

     $                  go to 110

*

                     IF( irank.EQ.1 ) THEN

*

*                       Test SGELS

*

*                       Generate a matrix of scaling type ISCALE

*

                        CALL sqrt13( iscale, m, n, copya, lda, norma,

     $                               iseed )

                        DO 40 inb = 1, nnb

                           nb = nbval( inb )

                           ib = ibval( inb )

                           CALL xlaenv( 1, nb )

                           CALL xlaenv( 3, nxval( inb ) )

                           IF ( (max(m, n) / 25) .GT. nb ) THEN

                              goto 40

                           END IF

                           CALL plasma_set( plasma_tile_size, nb, info )

                           CALL plasma_set( plasma_inner_block_size, ib,

     $                                      info )

*

*                          Allocate T

*

                           CALL plasma_alloc_workspace_sgels( m, n , ht,

     $                                                       info )

*

*                          DO 30 ITRAN = 1, 2

                          DO 30 itran = 1, 1

*

*                             ONLY PLASMANOTRANS supported !

*

*

                              IF( itran.EQ.1 ) THEN

                                 trans = 'N'

                                 plasma_trans = plasmanotrans

                                 nrows = m

                                 ncols = n

                              ELSE

                                 trans = 'T'

                                 plasma_trans = plasmatrans

                                 nrows = n

                                 ncols = m

                              END IF

                              ldwork = max( 1, ncols )

*

*                             Set up a consistent rhs

*

                              IF( ncols.GT.0 ) THEN

                                 CALL slarnv( 2, iseed, ncols*nrhs,

     $                                        work )

                                 CALL sscal( ncols*nrhs,

     $                                       one / REAL( NCOLS ), work,

     $                                       1 )

                              END IF

                              CALL sgemm( trans, 'No transpose', nrows,

     $                                    nrhs, ncols, one, copya, lda,

     $                                    work, ldwork, zero, b, ldb )

                              CALL slacpy( 'Full', nrows, nrhs, b, ldb,

     $                                     copyb, ldb )

*

*                             Solve LS or overdetermined system

*

                              IF( m.GT.0 .AND. n.GT.0 ) THEN

                                 CALL slacpy( 'Full', m, n, copya, lda,

     $                                        a, lda )

                                 CALL slacpy( 'Full', nrows, nrhs,

     $                                        copyb, ldb, b, ldb )

                              END IF

                              srnamt = 'SGELS '

*

                              CALL plasma_sgels( plasma_trans,

     $                                           m, n, nrhs,

     $                                           a, lda, ht, b, ldb,

     $                                           info )

                              IF( info.NE.0 )

     $                           CALL alaerh( path, 'SGELS ', info, 0,

     $                                        trans, m, n, nrhs, -1, nb,

     $                                        itype, nfail, nerrs,

     $                                        nout )

*

*                             Check correctness of results

*

                              ldwork = max( 1, nrows )

                              IF( nrows.GT.0 .AND. nrhs.GT.0 )

     $                           CALL slacpy( 'Full', nrows, nrhs,

     $                                        copyb, ldb, c, ldb )

                              CALL sqrt16( trans, m, n, nrhs, copya,

     $                                     lda, b, ldb, c, ldb, work,

     $                                     result( 1 ) )

*

                              IF( ( itran.EQ.1 .AND. m.GE.n ) .OR.

     $                            ( itran.EQ.2 .AND. m.LT.n ) ) THEN

*

*                                Solving LS system

*

                                 result( 2 ) = sqrt17( trans, 1, m, n,

     $                                         nrhs, copya, lda, b, ldb,

     $                                         copyb, ldb, c, work,

     $                                         lwork )

                              ELSE

*

*                                Solving overdetermined system

*

                                 result( 2 ) = sqrt14( trans, m, n,

     $                                         nrhs, copya, lda, b, ldb,

     $                                         work, lwork )

                              END IF

*

*                             Print information about the tests that

*                             did not pass the threshold.

*

                              DO 20 k = 1, 2

                                 IF( result( k ).GE.thresh ) THEN

                                    IF( nfail.EQ.0 .AND. nerrs.EQ.0 )

     $                                 CALL alahd( nout, path )

                                    WRITE( nout, fmt = 9999 )trans, m,

     $                                 n, nrhs, nb, itype, k,

     $                                 result( k )

                                    nfail = nfail + 1

                                 END IF

   20                         continue

                              nrun = nrun + 2

   30                      continue

*

*                          Deallocate T

*

                           CALL plasma_dealloc_handle( ht, info )

   40                   continue

                     END IF

  110             continue

  120          continue

  130       continue

  140    continue

  150 continue

*

*     Print a summary of the results.

*

      CALL alasvm( path, nout, nfail, nrun, nerrs )

*

 9999 format( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,

     $      ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )

 9998 format( ' M=', i5, ', N=', i5, ', NRHS=', i4, ', NB=', i4,

     $      ', type', i2, ', test(', i2, ')=', g12.5 )

      return

*

*     End of SDRVLS

*

      END