PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
cdrvls.f
Go to the documentation of this file.
1  SUBROUTINE cdrvls( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB,
2  $ ibval, nbval, nxval, thresh, tsterr, a, copya,
3  $ b, copyb, c, s, copys, work, rwork, iwork,
4  $ nout )
5 *
6  include 'plasmaf.h'
7 *
8 * -- LAPACK test routine (version 3.1.1) --
9 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
10 * January 2007
11 *
12 * .. Scalar Arguments ..
13  LOGICAL tsterr
14  INTEGER nm, nn, nnb, nns, nout
15  REAL thresh
16 * ..
17 * .. Array Arguments ..
18  LOGICAL dotype( * )
19  INTEGER iwork( * ), mval( * ), nbval( * ), nsval( * ),
20  $ nval( * ), nxval( * ), ibval( * )
21  REAL copys( * ), rwork( * ), s( * )
22  COMPLEX a( * ), b( * ), c( * ), copya( * ), copyb( * ),
23  $ work( * )
24 * ..
25 *
26 * Purpose
27 * =======
28 *
29 * CDRVLS tests the least squares driver routines CGELS, CGELSX, CGELSS,
30 * CGELSY and CGELSD.
31 *
32 * Arguments
33 * =========
34 *
35 * DOTYPE (input) LOGICAL array, dimension (NTYPES)
36 * The matrix types to be used for testing. Matrices of type j
37 * (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) =
38 * .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used.
39 * The matrix of type j is generated as follows:
40 * j=1: A = U*D*V where U and V are random unitary matrices
41 * and D has random entries (> 0.1) taken from a uniform
42 * distribution (0,1). A is full rank.
43 * j=2: The same of 1, but A is scaled up.
44 * j=3: The same of 1, but A is scaled down.
45 * j=4: A = U*D*V where U and V are random unitary matrices
46 * and D has 3*min(M,N)/4 random entries (> 0.1) taken
47 * from a uniform distribution (0,1) and the remaining
48 * entries set to 0. A is rank-deficient.
49 * j=5: The same of 4, but A is scaled up.
50 * j=6: The same of 5, but A is scaled down.
51 *
52 * NM (input) INTEGER
53 * The number of values of M contained in the vector MVAL.
54 *
55 * MVAL (input) INTEGER array, dimension (NM)
56 * The values of the matrix row dimension M.
57 *
58 * NN (input) INTEGER
59 * The number of values of N contained in the vector NVAL.
60 *
61 * NVAL (input) INTEGER array, dimension (NN)
62 * The values of the matrix column dimension N.
63 *
64 * NNB (input) INTEGER
65 * The number of values of NB and NX contained in the
66 * vectors NBVAL and NXVAL. The blocking parameters are used
67 * in pairs (NB,NX).
68 *
69 * NBVAL (input) INTEGER array, dimension (NNB)
70 * The values of the blocksize NB.
71 *
72 * IBVAL (input) INTEGER array, dimension (NNB)
73 * The values of the inner block size IB.
74 *
75 * NXVAL (input) INTEGER array, dimension (NNB)
76 * The values of the crossover point NX.
77 *
78 * NNS (input) INTEGER
79 * The number of values of NRHS contained in the vector NSVAL.
80 *
81 * NSVAL (input) INTEGER array, dimension (NNS)
82 * The values of the number of right hand sides NRHS.
83 *
84 * THRESH (input) REAL
85 * The threshold value for the test ratios. A result is
86 * included in the output file if RESULT >= THRESH. To have
87 * every test ratio printed, use THRESH = 0.
88 *
89 * TSTERR (input) LOGICAL
90 * Flag that indicates whether error exits are to be tested.
91 *
92 * A (workspace) COMPLEX array, dimension (MMAX*NMAX)
93 * where MMAX is the maximum value of M in MVAL and NMAX is the
94 * maximum value of N in NVAL.
95 *
96 * COPYA (workspace) COMPLEX array, dimension (MMAX*NMAX)
97 *
98 * B (workspace) COMPLEX array, dimension (MMAX*NSMAX)
99 * where MMAX is the maximum value of M in MVAL and NSMAX is the
100 * maximum value of NRHS in NSVAL.
101 *
102 * COPYB (workspace) COMPLEX array, dimension (MMAX*NSMAX)
103 *
104 * C (workspace) COMPLEX array, dimension (MMAX*NSMAX)
105 *
106 * S (workspace) REAL array, dimension
107 * (min(MMAX,NMAX))
108 *
109 * COPYS (workspace) REAL array, dimension
110 * (min(MMAX,NMAX))
111 *
112 * WORK (workspace) COMPLEX array, dimension
113 * (MMAX*NMAX + 4*NMAX + MMAX).
114 *
115 * RWORK (workspace) REAL array, dimension (5*NMAX-1)
116 *
117 * IWORK (workspace) INTEGER array, dimension (15*NMAX)
118 *
119 * NOUT (input) INTEGER
120 * The unit number for output.
121 *
122 * =====================================================================
123 *
124 * .. Parameters ..
125  INTEGER ntests
126  parameter( ntests = 18 )
127  INTEGER smlsiz
128  parameter( smlsiz = 25 )
129  REAL one, zero
130  parameter( one = 1.0e+0, zero = 0.0e+0 )
131  COMPLEX cone, czero
132  parameter( cone = ( 1.0e+0, 0.0e+0 ),
133  $ czero = ( 0.0e+0, 0.0e+0 ) )
134 * ..
135 * .. Local Scalars ..
136  CHARACTER trans
137  CHARACTER*3 path
138  INTEGER crank, i, im, in, inb, info, ins, irank,
139  $ iscale, itran, itype, j, k, lda, ldb, ldwork,
140  $ lwlsy, lwork, m, mnmin, n, nb, ncols, nerrs,
141  $ nfail, nrhs, nrows, nrun, rank, ib,
142  $ plasma_trans
143  INTEGER ht( 2 )
144  REAL eps, norma, normb, rcond
145 * ..
146 * .. Local Arrays ..
147  INTEGER iseed( 4 ), iseedy( 4 )
148  REAL result( ntests )
149 * ..
150 * .. External Functions ..
151  REAL cqrt14, cqrt17, sasum, slamch
152  EXTERNAL cqrt14, cqrt17, sasum, slamch
153 * ..
154 * .. External Subroutines ..
155  EXTERNAL alaerh, alahd, alasvm, cerrls, cgels, cgelsd,
156  $ cgelss, cgelsx, cgelsy, cgemm, clacpy, clarnv,
157  $ cqrt13, cqrt15, cqrt16, csscal, saxpy,
158  $ xlaenv
159 * ..
160 * .. Intrinsic Functions ..
161  INTRINSIC max, min, REAL, sqrt
162 * ..
163 * .. Scalars in Common ..
164  LOGICAL lerr, ok
165  CHARACTER*32 srnamt
166  INTEGER infot, iounit
167 * ..
168 * .. Common blocks ..
169  common / infoc / infot, iounit, ok, lerr
170  common / srnamc / srnamt
171 * ..
172 * .. Data statements ..
173  DATA iseedy / 1988, 1989, 1990, 1991 /
174 * ..
175 * .. Executable Statements ..
176 *
177 * Initialize constants and the random number seed.
178 *
179  path( 1: 1 ) = 'Complex precision'
180  path( 2: 3 ) = 'LS'
181  nrun = 0
182  nfail = 0
183  nerrs = 0
184  DO 10 i = 1, 4
185  iseed( i ) = iseedy( i )
186  10 continue
187  eps = slamch( 'Epsilon' )
188 *
189 * Threshold for rank estimation
190 *
191  rcond = sqrt( eps ) - ( sqrt( eps )-eps ) / 2
192 *
193 * Test the error exits
194 *
195  CALL xlaenv( 9, smlsiz )
196  IF( tsterr )
197  $ CALL cerrls( path, nout )
198 *
199 * Print the header if NM = 0 or NN = 0 and THRESH = 0.
200 *
201  IF( ( nm.EQ.0 .OR. nn.EQ.0 ) .AND. thresh.EQ.zero )
202  $ CALL alahd( nout, path )
203  infot = 0
204 *
205  DO 140 im = 1, nm
206  m = mval( im )
207  lda = max( 1, m )
208 *
209  DO 130 in = 1, nn
210  n = nval( in )
211  mnmin = min( m, n )
212  ldb = max( 1, m, n )
213 *
214  DO 120 ins = 1, nns
215  nrhs = nsval( ins )
216  lwork = max( 1, ( m+nrhs )*( n+2 ), ( n+nrhs )*( m+2 ),
217  $ m*n+4*mnmin+max( m, n ), 2*n+m )
218 *
219  DO 110 irank = 1, 2
220  DO 100 iscale = 1, 3
221  itype = ( irank-1 )*3 + iscale
222  IF( .NOT.dotype( itype ) )
223  $ go to 100
224 *
225  IF( irank.EQ.1 ) THEN
226 *
227 * Test CGELS
228 *
229 * Generate a matrix of scaling type ISCALE
230 *
231  CALL cqrt13( iscale, m, n, copya, lda, norma,
232  $ iseed )
233  DO 40 inb = 1, nnb
234  nb = nbval( inb )
235  ib = ibval( inb )
236  CALL xlaenv( 1, nb )
237  CALL xlaenv( 3, nxval( inb ) )
238  IF ( (max(m, n) / 25) .GT. nb ) THEN
239  goto 40
240  END IF
241  CALL plasma_set( plasma_tile_size, nb, info )
242  CALL plasma_set( plasma_inner_block_size, ib,
243  $ info )
244 *
245 * Allocate T
246 *
247  CALL plasma_alloc_workspace_cgels( m, n , ht,
248  $ info )
249 *
250 * DO 30 ITRAN = 1, 2
251  DO 30 itran = 1, 1
252  IF( itran.EQ.1 ) THEN
253  trans = 'N'
254  plasma_trans = plasmanotrans
255  nrows = m
256  ncols = n
257  ELSE
258  trans = 'C'
259  plasma_trans = plasmaconjtrans
260  nrows = n
261  ncols = m
262  END IF
263  ldwork = max( 1, ncols )
264 *
265 * Set up a consistent rhs
266 *
267  IF( ncols.GT.0 ) THEN
268  CALL clarnv( 2, iseed, ncols*nrhs,
269  $ work )
270  CALL csscal( ncols*nrhs,
271  $ one / REAL( NCOLS ), work,
272  $ 1 )
273  END IF
274  CALL cgemm( trans, 'No transpose', nrows,
275  $ nrhs, ncols, cone, copya, lda,
276  $ work, ldwork, czero, b, ldb )
277  CALL clacpy( 'Full', nrows, nrhs, b, ldb,
278  $ copyb, ldb )
279 *
280 * Solve LS or overdetermined system
281 *
282  IF( m.GT.0 .AND. n.GT.0 ) THEN
283  CALL clacpy( 'Full', m, n, copya, lda,
284  $ a, lda )
285  CALL clacpy( 'Full', nrows, nrhs,
286  $ copyb, ldb, b, ldb )
287  END IF
288  srnamt = 'CGELS '
289  CALL plasma_cgels( plasma_trans,
290  $ m, n, nrhs,
291  $ a, lda, ht, b, ldb,
292  $ info )
293 *
294  IF( info.NE.0 )
295  $ CALL alaerh( path, 'CGELS ', info, 0,
296  $ trans, m, n, nrhs, -1, nb,
297  $ itype, nfail, nerrs,
298  $ nout )
299 *
300 * Check correctness of results
301 *
302  ldwork = max( 1, nrows )
303  IF( nrows.GT.0 .AND. nrhs.GT.0 )
304  $ CALL clacpy( 'Full', nrows, nrhs,
305  $ copyb, ldb, c, ldb )
306  CALL cqrt16( trans, m, n, nrhs, copya,
307  $ lda, b, ldb, c, ldb, rwork,
308  $ result( 1 ) )
309 *
310  IF( ( itran.EQ.1 .AND. m.GE.n ) .OR.
311  $ ( itran.EQ.2 .AND. m.LT.n ) ) THEN
312 *
313 * Solving LS system
314 *
315  result( 2 ) = cqrt17( trans, 1, m, n,
316  $ nrhs, copya, lda, b, ldb,
317  $ copyb, ldb, c, work,
318  $ lwork )
319  ELSE
320 *
321 * Solving overdetermined system
322 *
323  result( 2 ) = cqrt14( trans, m, n,
324  $ nrhs, copya, lda, b, ldb,
325  $ work, lwork )
326  END IF
327 *
328 * Print information about the tests that
329 * did not pass the threshold.
330 *
331  DO 20 k = 1, 2
332  IF( result( k ).GE.thresh ) THEN
333  IF( nfail.EQ.0 .AND. nerrs.EQ.0 )
334  $ CALL alahd( nout, path )
335  WRITE( nout, fmt = 9999 )trans, m,
336  $ n, nrhs, nb, itype, k,
337  $ result( k )
338  nfail = nfail + 1
339  END IF
340  20 continue
341  nrun = nrun + 2
342  30 continue
343 *
344 * Deallocate T
345 *
346  CALL plasma_dealloc_handle( ht, info )
347  40 continue
348  END IF
349 *
350  100 continue
351  110 continue
352  120 continue
353  130 continue
354  140 continue
355 *
356 * Print a summary of the results.
357 *
358  CALL alasvm( path, nout, nfail, nrun, nerrs )
359 *
360  9999 format( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
361  $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
362  9998 format( ' M=', i5, ', N=', i5, ', NRHS=', i4, ', NB=', i4,
363  $ ', type', i2, ', test(', i2, ')=', g12.5 )
364  return
365 *
366 * End of CDRVLS
367 *
368  END