PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sdrvls.f
Go to the documentation of this file.
1  SUBROUTINE sdrvls( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB,
2  $ nbval, nxval, thresh, tsterr, a, copya, b,
3  $ copyb, c, s, copys, ibval, work, iwork, nout )
4 *
5  include 'plasmaf.h'
6 *
7 * -- LAPACK test routine (version 3.1.1) --
8 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
9 * January 2007
10 *
11 * .. Scalar Arguments ..
12  LOGICAL tsterr
13  INTEGER nm, nn, nnb, nns, nout
14  REAL thresh
15 * ..
16 * .. Array Arguments ..
17  LOGICAL dotype( * )
18  INTEGER iwork( * ), mval( * ), nbval( * ), nsval( * ),
19  $ nval( * ), nxval( * ), ibval( * )
20  REAL a( * ), b( * ), c( * ), copya( * ), copyb( * ),
21  $ copys( * ), s( * ), work( * )
22 * ..
23 *
24 * Purpose
25 * =======
26 *
27 * SDRVLS tests the least squares driver routines SGELS, SGELSS, SGELSX,
28 * SGELSY and SGELSD.
29 *
30 * Arguments
31 * =========
32 *
33 * DOTYPE (input) LOGICAL array, dimension (NTYPES)
34 * The matrix types to be used for testing. Matrices of type j
35 * (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) =
36 * .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used.
37 * The matrix of type j is generated as follows:
38 * j=1: A = U*D*V where U and V are random orthogonal matrices
39 * and D has random entries (> 0.1) taken from a uniform
40 * distribution (0,1). A is full rank.
41 * j=2: The same of 1, but A is scaled up.
42 * j=3: The same of 1, but A is scaled down.
43 * j=4: A = U*D*V where U and V are random orthogonal matrices
44 * and D has 3*min(M,N)/4 random entries (> 0.1) taken
45 * from a uniform distribution (0,1) and the remaining
46 * entries set to 0. A is rank-deficient.
47 * j=5: The same of 4, but A is scaled up.
48 * j=6: The same of 5, but A is scaled down.
49 *
50 * NM (input) INTEGER
51 * The number of values of M contained in the vector MVAL.
52 *
53 * MVAL (input) INTEGER array, dimension (NM)
54 * The values of the matrix row dimension M.
55 *
56 * NN (input) INTEGER
57 * The number of values of N contained in the vector NVAL.
58 *
59 * NVAL (input) INTEGER array, dimension (NN)
60 * The values of the matrix column dimension N.
61 *
62 * NNS (input) INTEGER
63 * The number of values of NRHS contained in the vector NSVAL.
64 *
65 * NSVAL (input) INTEGER array, dimension (NNS)
66 * The values of the number of right hand sides NRHS.
67 *
68 * NNB (input) INTEGER
69 * The number of values of NB and NX contained in the
70 * vectors NBVAL and NXVAL. The blocking parameters are used
71 * in pairs (NB,NX).
72 *
73 * NBVAL (input) INTEGER array, dimension (NNB)
74 * The values of the blocksize NB.
75 *
76 * IBVAL (input) INTEGER array, dimension (NNB)
77 * The values of the inner block size IB.
78 *
79 * NXVAL (input) INTEGER array, dimension (NNB)
80 * The values of the crossover point NX.
81 *
82 * THRESH (input) REAL
83 * The threshold value for the test ratios. A result is
84 * included in the output file if RESULT >= THRESH. To have
85 * every test ratio printed, use THRESH = 0.
86 *
87 * TSTERR (input) LOGICAL
88 * Flag that indicates whether error exits are to be tested.
89 *
90 * A (workspace) REAL array, dimension (MMAX*NMAX)
91 * where MMAX is the maximum value of M in MVAL and NMAX is the
92 * maximum value of N in NVAL.
93 *
94 * COPYA (workspace) REAL array, dimension (MMAX*NMAX)
95 *
96 * B (workspace) REAL array, dimension (MMAX*NSMAX)
97 * where MMAX is the maximum value of M in MVAL and NSMAX is the
98 * maximum value of NRHS in NSVAL.
99 *
100 * COPYB (workspace) REAL array, dimension (MMAX*NSMAX)
101 *
102 * C (workspace) REAL array, dimension (MMAX*NSMAX)
103 *
104 * S (workspace) REAL array, dimension
105 * (min(MMAX,NMAX))
106 *
107 * COPYS (workspace) REAL array, dimension
108 * (min(MMAX,NMAX))
109 *
110 * WORK (workspace) REAL array,
111 * dimension (MMAX*NMAX + 4*NMAX + MMAX).
112 *
113 * IWORK (workspace) INTEGER array, dimension (15*NMAX)
114 *
115 * NOUT (input) INTEGER
116 * The unit number for output.
117 *
118 * =====================================================================
119 *
120 * .. Parameters ..
121  INTEGER ntests
122  parameter( ntests = 18 )
123  INTEGER smlsiz
124  parameter( smlsiz = 25 )
125  REAL one, two, zero
126  parameter( one = 1.0e0, two = 2.0e0, zero = 0.0e0 )
127 * ..
128 * .. Local Scalars ..
129  CHARACTER trans
130  CHARACTER*3 path
131  INTEGER crank, i, im, in, inb, info, ins, irank,
132  $ iscale, itran, itype, j, k, lda, ldb, ldwork,
133  $ lwlsy, lwork, m, mnmin, n, nb, ncols, nerrs,
134  $ nfail, nlvl, nrhs, nrows, nrun, rank, ib,
135  $ plasma_trans
136  INTEGER ht( 2 )
137  REAL eps, norma, normb, rcond
138 * ..
139 * .. Local Arrays ..
140  INTEGER iseed( 4 ), iseedy( 4 )
141  REAL result( ntests )
142 * ..
143 * .. External Functions ..
144  REAL sasum, slamch, sqrt14, sqrt17
145  EXTERNAL sasum, slamch, sqrt14, sqrt17
146 * ..
147 * .. External Subroutines ..
148  EXTERNAL alaerh, alahd, alasvm, saxpy, serrls, sgels,
149  $ sgelsd, sgelss, sgelsx, sgelsy, sgemm, slacpy,
150  $ slarnv, sqrt13, sqrt15, sqrt16, sscal,
151  $ xlaenv
152 * ..
153 * .. Intrinsic Functions ..
154  INTRINSIC int, log, max, min, REAL, sqrt
155 * ..
156 * .. Scalars in Common ..
157  LOGICAL lerr, ok
158  CHARACTER*32 srnamt
159  INTEGER infot, iounit
160 * ..
161 * .. Common blocks ..
162  common / infoc / infot, iounit, ok, lerr
163  common / srnamc / srnamt
164 * ..
165 * .. Data statements ..
166  DATA iseedy / 1988, 1989, 1990, 1991 /
167 * ..
168 * .. Executable Statements ..
169 *
170 * Initialize constants and the random number seed.
171 *
172  path( 1: 1 ) = 'Single precision'
173  path( 2: 3 ) = 'LS'
174  nrun = 0
175  nfail = 0
176  nerrs = 0
177  DO 10 i = 1, 4
178  iseed( i ) = iseedy( i )
179  10 continue
180  eps = slamch( 'Epsilon' )
181 *
182 * Threshold for rank estimation
183 *
184  rcond = sqrt( eps ) - ( sqrt( eps )-eps ) / 2
185 *
186 * Test the error exits
187 *
188  CALL xlaenv( 2, 2 )
189  CALL xlaenv( 9, smlsiz )
190  IF( tsterr )
191  $ CALL serrls( path, nout )
192 *
193 * Print the header if NM = 0 or NN = 0 and THRESH = 0.
194 *
195  IF( ( nm.EQ.0 .OR. nn.EQ.0 ) .AND. thresh.EQ.zero )
196  $ CALL alahd( nout, path )
197  infot = 0
198 *
199  DO 150 im = 1, nm
200  m = mval( im )
201  lda = max( 1, m )
202 *
203  DO 140 in = 1, nn
204  n = nval( in )
205  mnmin = min( m, n )
206  ldb = max( 1, m, n )
207 *
208  DO 130 ins = 1, nns
209  nrhs = nsval( ins )
210  nlvl = max( int( log( max( one, REAL( MNMIN ) ) /
211  $ REAL( SMLSIZ+1 ) ) / log( two ) ) + 1, 0 )
212  lwork = max( 1, ( m+nrhs )*( n+2 ), ( n+nrhs )*( m+2 ),
213  $ m*n+4*mnmin+max( m, n ), 12*mnmin+2*mnmin*smlsiz+
214  $ 8*mnmin*nlvl+mnmin*nrhs+(smlsiz+1)**2 )
215 *
216  DO 120 irank = 1, 2
217  DO 110 iscale = 1, 3
218  itype = ( irank-1 )*3 + iscale
219  IF( .NOT.dotype( itype ) )
220  $ go to 110
221 *
222  IF( irank.EQ.1 ) THEN
223 *
224 * Test SGELS
225 *
226 * Generate a matrix of scaling type ISCALE
227 *
228  CALL sqrt13( iscale, m, n, copya, lda, norma,
229  $ iseed )
230  DO 40 inb = 1, nnb
231  nb = nbval( inb )
232  ib = ibval( inb )
233  CALL xlaenv( 1, nb )
234  CALL xlaenv( 3, nxval( inb ) )
235  IF ( (max(m, n) / 25) .GT. nb ) THEN
236  goto 40
237  END IF
238  CALL plasma_set( plasma_tile_size, nb, info )
239  CALL plasma_set( plasma_inner_block_size, ib,
240  $ info )
241 *
242 * Allocate T
243 *
244  CALL plasma_alloc_workspace_sgels( m, n , ht,
245  $ info )
246 *
247 * DO 30 ITRAN = 1, 2
248  DO 30 itran = 1, 1
249 *
250 * ONLY PLASMANOTRANS supported !
251 *
252 *
253  IF( itran.EQ.1 ) THEN
254  trans = 'N'
255  plasma_trans = plasmanotrans
256  nrows = m
257  ncols = n
258  ELSE
259  trans = 'T'
260  plasma_trans = plasmatrans
261  nrows = n
262  ncols = m
263  END IF
264  ldwork = max( 1, ncols )
265 *
266 * Set up a consistent rhs
267 *
268  IF( ncols.GT.0 ) THEN
269  CALL slarnv( 2, iseed, ncols*nrhs,
270  $ work )
271  CALL sscal( ncols*nrhs,
272  $ one / REAL( NCOLS ), work,
273  $ 1 )
274  END IF
275  CALL sgemm( trans, 'No transpose', nrows,
276  $ nrhs, ncols, one, copya, lda,
277  $ work, ldwork, zero, b, ldb )
278  CALL slacpy( 'Full', nrows, nrhs, b, ldb,
279  $ copyb, ldb )
280 *
281 * Solve LS or overdetermined system
282 *
283  IF( m.GT.0 .AND. n.GT.0 ) THEN
284  CALL slacpy( 'Full', m, n, copya, lda,
285  $ a, lda )
286  CALL slacpy( 'Full', nrows, nrhs,
287  $ copyb, ldb, b, ldb )
288  END IF
289  srnamt = 'SGELS '
290 *
291  CALL plasma_sgels( plasma_trans,
292  $ m, n, nrhs,
293  $ a, lda, ht, b, ldb,
294  $ info )
295  IF( info.NE.0 )
296  $ CALL alaerh( path, 'SGELS ', info, 0,
297  $ trans, m, n, nrhs, -1, nb,
298  $ itype, nfail, nerrs,
299  $ nout )
300 *
301 * Check correctness of results
302 *
303  ldwork = max( 1, nrows )
304  IF( nrows.GT.0 .AND. nrhs.GT.0 )
305  $ CALL slacpy( 'Full', nrows, nrhs,
306  $ copyb, ldb, c, ldb )
307  CALL sqrt16( trans, m, n, nrhs, copya,
308  $ lda, b, ldb, c, ldb, work,
309  $ result( 1 ) )
310 *
311  IF( ( itran.EQ.1 .AND. m.GE.n ) .OR.
312  $ ( itran.EQ.2 .AND. m.LT.n ) ) THEN
313 *
314 * Solving LS system
315 *
316  result( 2 ) = sqrt17( trans, 1, m, n,
317  $ nrhs, copya, lda, b, ldb,
318  $ copyb, ldb, c, work,
319  $ lwork )
320  ELSE
321 *
322 * Solving overdetermined system
323 *
324  result( 2 ) = sqrt14( trans, m, n,
325  $ nrhs, copya, lda, b, ldb,
326  $ work, lwork )
327  END IF
328 *
329 * Print information about the tests that
330 * did not pass the threshold.
331 *
332  DO 20 k = 1, 2
333  IF( result( k ).GE.thresh ) THEN
334  IF( nfail.EQ.0 .AND. nerrs.EQ.0 )
335  $ CALL alahd( nout, path )
336  WRITE( nout, fmt = 9999 )trans, m,
337  $ n, nrhs, nb, itype, k,
338  $ result( k )
339  nfail = nfail + 1
340  END IF
341  20 continue
342  nrun = nrun + 2
343  30 continue
344 *
345 * Deallocate T
346 *
347  CALL plasma_dealloc_handle( ht, info )
348  40 continue
349  END IF
350  110 continue
351  120 continue
352  130 continue
353  140 continue
354  150 continue
355 *
356 * Print a summary of the results.
357 *
358  CALL alasvm( path, nout, nfail, nrun, nerrs )
359 *
360  9999 format( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
361  $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
362  9998 format( ' M=', i5, ', N=', i5, ', NRHS=', i4, ', NB=', i4,
363  $ ', type', i2, ', test(', i2, ')=', g12.5 )
364  return
365 *
366 * End of SDRVLS
367 *
368  END