PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zdrvls.f
Go to the documentation of this file.
1  SUBROUTINE zdrvls( DOTYPE, NM, MVAL, NN, NVAL, NNS, NSVAL, NNB,
2  $ ibval, nbval, nxval, thresh, tsterr, a, copya,
3  $ b, copyb, c, s, copys, work, rwork, iwork,
4  $ nout )
5 *
6  include 'plasmaf.h'
7 *
8 * -- LAPACK test routine (version 3.1.1) --
9 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
10 * January 2007
11 *
12 * .. Scalar Arguments ..
13  LOGICAL tsterr
14  INTEGER nm, nn, nnb, nns, nout
15  DOUBLE PRECISION thresh
16 * ..
17 * .. Array Arguments ..
18  LOGICAL dotype( * )
19  INTEGER iwork( * ), mval( * ), nbval( * ), nsval( * ),
20  $ ibval( * ), nval( * ), nxval( * )
21  DOUBLE PRECISION copys( * ), rwork( * ), s( * )
22  COMPLEX*16 a( * ), b( * ), c( * ), copya( * ), copyb( * ),
23  $ work( * )
24 * ..
25 *
26 * Purpose
27 * =======
28 *
29 * ZDRVLS tests the least squares driver routines ZGELS, CGELSX, CGELSS,
30 * ZGELSY and CGELSD.
31 *
32 * Arguments
33 * =========
34 *
35 * DOTYPE (input) LOGICAL array, dimension (NTYPES)
36 * The matrix types to be used for testing. Matrices of type j
37 * (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) =
38 * .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used.
39 * The matrix of type j is generated as follows:
40 * j=1: A = U*D*V where U and V are random unitary matrices
41 * and D has random entries (> 0.1) taken from a uniform
42 * distribution (0,1). A is full rank.
43 * j=2: The same of 1, but A is scaled up.
44 * j=3: The same of 1, but A is scaled down.
45 * j=4: A = U*D*V where U and V are random unitary matrices
46 * and D has 3*min(M,N)/4 random entries (> 0.1) taken
47 * from a uniform distribution (0,1) and the remaining
48 * entries set to 0. A is rank-deficient.
49 * j=5: The same of 4, but A is scaled up.
50 * j=6: The same of 5, but A is scaled down.
51 *
52 * NM (input) INTEGER
53 * The number of values of M contained in the vector MVAL.
54 *
55 * MVAL (input) INTEGER array, dimension (NM)
56 * The values of the matrix row dimension M.
57 *
58 * NN (input) INTEGER
59 * The number of values of N contained in the vector NVAL.
60 *
61 * NVAL (input) INTEGER array, dimension (NN)
62 * The values of the matrix column dimension N.
63 *
64 * NNB (input) INTEGER
65 * The number of values of NB and NX contained in the
66 * vectors NBVAL and NXVAL. The blocking parameters are used
67 * in pairs (NB,NX).
68 *
69 * NBVAL (input) INTEGER array, dimension (NNB)
70 * The values of the blocksize NB.
71 *
72 * NXVAL (input) INTEGER array, dimension (NNB)
73 * The values of the crossover point NX.
74 *
75 * NNS (input) INTEGER
76 * The number of values of NRHS contained in the vector NSVAL.
77 *
78 * NSVAL (input) INTEGER array, dimension (NNS)
79 * The values of the number of right hand sides NRHS.
80 *
81 * THRESH (input) DOUBLE PRECISION
82 * The threshold value for the test ratios. A result is
83 * included in the output file if RESULT >= THRESH. To have
84 * every test ratio printed, use THRESH = 0.
85 *
86 * TSTERR (input) LOGICAL
87 * Flag that indicates whether error exits are to be tested.
88 *
89 * A (workspace) COMPLEX*16 array, dimension (MMAX*NMAX)
90 * where MMAX is the maximum value of M in MVAL and NMAX is the
91 * maximum value of N in NVAL.
92 *
93 * COPYA (workspace) COMPLEX*16 array, dimension (MMAX*NMAX)
94 *
95 * B (workspace) COMPLEX*16 array, dimension (MMAX*NSMAX)
96 * where MMAX is the maximum value of M in MVAL and NSMAX is the
97 * maximum value of NRHS in NSVAL.
98 *
99 * COPYB (workspace) COMPLEX*16 array, dimension (MMAX*NSMAX)
100 *
101 * C (workspace) COMPLEX*16 array, dimension (MMAX*NSMAX)
102 *
103 * S (workspace) DOUBLE PRECISION array, dimension
104 * (min(MMAX,NMAX))
105 *
106 * COPYS (workspace) DOUBLE PRECISION array, dimension
107 * (min(MMAX,NMAX))
108 *
109 * WORK (workspace) COMPLEX*16 array, dimension
110 * (MMAX*NMAX + 4*NMAX + MMAX).
111 *
112 * RWORK (workspace) DOUBLE PRECISION array, dimension (5*NMAX-1)
113 *
114 * IWORK (workspace) INTEGER array, dimension (15*NMAX)
115 *
116 * NOUT (input) INTEGER
117 * The unit number for output.
118 *
119 * =====================================================================
120 *
121 * .. Parameters ..
122  INTEGER ntests
123  parameter( ntests = 18 )
124  INTEGER smlsiz
125  parameter( smlsiz = 25 )
126  DOUBLE PRECISION one, zero
127  parameter( one = 1.0d+0, zero = 0.0d+0 )
128  COMPLEX*16 cone, czero
129  parameter( cone = ( 1.0d+0, 0.0d+0 ),
130  $ czero = ( 0.0d+0, 0.0d+0 ) )
131 * ..
132 * .. Local Scalars ..
133  CHARACTER trans
134  CHARACTER*3 path
135  INTEGER crank, i, im, in, inb, info, ins, irank,
136  $ iscale, itran, itype, j, k, lda, ldb, ldwork,
137  $ lwlsy, lwork, m, mnmin, n, nb, ncols, nerrs,
138  $ nfail, nrhs, nrows, nrun, rank, ib,
139  $ plasma_trans
140  INTEGER ht( 2 )
141  DOUBLE PRECISION eps, norma, normb, rcond
142 * ..
143 * .. Local Arrays ..
144  INTEGER iseed( 4 ), iseedy( 4 )
145  DOUBLE PRECISION result( ntests )
146 * ..
147 * .. External Functions ..
148  DOUBLE PRECISION dasum, dlamch, zqrt14, zqrt17
149  EXTERNAL dasum, dlamch, zqrt14, zqrt17
150 * ..
151 * .. External Subroutines ..
152  EXTERNAL alaerh, alahd, alasvm, daxpy, dlasrt, xlaenv,
153  $ zdscal, zerrls, zgels, zgelsd, zgelss, zgelsx,
154  $ zgelsy, zgemm, zlacpy, zlarnv, zqrt13, zqrt15,
155  $ zqrt16
156 * ..
157 * .. Intrinsic Functions ..
158  INTRINSIC dble, max, min, sqrt
159 * ..
160 * .. Scalars in Common ..
161  LOGICAL lerr, ok
162  CHARACTER*32 srnamt
163  INTEGER infot, iounit
164 * ..
165 * .. Common blocks ..
166  common / infoc / infot, iounit, ok, lerr
167  common / srnamc / srnamt
168 * ..
169 * .. Data statements ..
170  DATA iseedy / 1988, 1989, 1990, 1991 /
171 * ..
172 * .. Executable Statements ..
173 *
174 * Initialize constants and the random number seed.
175 *
176  path( 1: 1 ) = 'Zomplex precision'
177  path( 2: 3 ) = 'LS'
178  nrun = 0
179  nfail = 0
180  nerrs = 0
181  DO 10 i = 1, 4
182  iseed( i ) = iseedy( i )
183  10 continue
184  eps = dlamch( 'Epsilon' )
185 *
186 * Threshold for rank estimation
187 *
188  rcond = sqrt( eps ) - ( sqrt( eps )-eps ) / 2
189 *
190 * Test the error exits
191 *
192  CALL xlaenv( 9, smlsiz )
193  IF( tsterr )
194  $ CALL zerrls( path, nout )
195 *
196 * Print the header if NM = 0 or NN = 0 and THRESH = 0.
197 *
198  IF( ( nm.EQ.0 .OR. nn.EQ.0 ) .AND. thresh.EQ.zero )
199  $ CALL alahd( nout, path )
200  infot = 0
201 *
202  DO 140 im = 1, nm
203  m = mval( im )
204  lda = max( 1, m )
205 *
206  DO 130 in = 1, nn
207  n = nval( in )
208  mnmin = min( m, n )
209  ldb = max( 1, m, n )
210 *
211  DO 120 ins = 1, nns
212  nrhs = nsval( ins )
213  lwork = max( 1, ( m+nrhs )*( n+2 ), ( n+nrhs )*( m+2 ),
214  $ m*n+4*mnmin+max( m, n ), 2*n+m )
215 *
216  DO 110 irank = 1, 2
217  DO 100 iscale = 1, 3
218  itype = ( irank-1 )*3 + iscale
219  IF( .NOT.dotype( itype ) )
220  $ go to 100
221 *
222  IF( irank.EQ.1 ) THEN
223 *
224 * Test ZGELS
225 *
226 * Generate a matrix of scaling type ISCALE
227 *
228  CALL zqrt13( iscale, m, n, copya, lda, norma,
229  $ iseed )
230  DO 40 inb = 1, nnb
231  nb = nbval( inb )
232  ib = ibval( inb )
233  CALL xlaenv( 1, nb )
234  CALL xlaenv( 3, nxval( inb ) )
235  IF ( (max(m, n) / 25) .GT. nb ) THEN
236  goto 40
237  END IF
238  CALL plasma_set( plasma_tile_size, nb, info )
239  CALL plasma_set( plasma_inner_block_size, ib,
240  $ info )
241 *
242 * Allocate T
243 *
244  CALL plasma_alloc_workspace_zgels( m, n , ht,
245  $ info )
246 *
247 * DO 30 ITRAN = 1, 2
248  DO 30 itran = 1, 1
249  IF( itran.EQ.1 ) THEN
250  trans = 'N'
251  plasma_trans = plasmanotrans
252  nrows = m
253  ncols = n
254  ELSE
255  trans = 'C'
256  plasma_trans = plasmaconjtrans
257  nrows = n
258  ncols = m
259  END IF
260  ldwork = max( 1, ncols )
261 *
262 * Set up a consistent rhs
263 *
264  IF( ncols.GT.0 ) THEN
265  CALL zlarnv( 2, iseed, ncols*nrhs,
266  $ work )
267  CALL zdscal( ncols*nrhs,
268  $ one / dble( ncols ), work,
269  $ 1 )
270  END IF
271  CALL zgemm( trans, 'No transpose', nrows,
272  $ nrhs, ncols, cone, copya, lda,
273  $ work, ldwork, czero, b, ldb )
274  CALL zlacpy( 'Full', nrows, nrhs, b, ldb,
275  $ copyb, ldb )
276 *
277 * Solve LS or overdetermined system
278 *
279  IF( m.GT.0 .AND. n.GT.0 ) THEN
280  CALL zlacpy( 'Full', m, n, copya, lda,
281  $ a, lda )
282  CALL zlacpy( 'Full', nrows, nrhs,
283  $ copyb, ldb, b, ldb )
284  END IF
285  srnamt = 'ZGELS '
286  CALL plasma_zgels( plasma_trans,
287  $ m, n, nrhs,
288  $ a, lda, ht, b, ldb,
289  $ info )
290 *
291  IF( info.NE.0 )
292  $ CALL alaerh( path, 'ZGELS ', info, 0,
293  $ trans, m, n, nrhs, -1, nb,
294  $ itype, nfail, nerrs,
295  $ nout )
296 *
297 * Check correctness of results
298 *
299  ldwork = max( 1, nrows )
300  IF( nrows.GT.0 .AND. nrhs.GT.0 )
301  $ CALL zlacpy( 'Full', nrows, nrhs,
302  $ copyb, ldb, c, ldb )
303  CALL zqrt16( trans, m, n, nrhs, copya,
304  $ lda, b, ldb, c, ldb, rwork,
305  $ result( 1 ) )
306 *
307  IF( ( itran.EQ.1 .AND. m.GE.n ) .OR.
308  $ ( itran.EQ.2 .AND. m.LT.n ) ) THEN
309 *
310 * Solving LS system
311 *
312  result( 2 ) = zqrt17( trans, 1, m, n,
313  $ nrhs, copya, lda, b, ldb,
314  $ copyb, ldb, c, work,
315  $ lwork )
316  ELSE
317 *
318 * Solving overdetermined system
319 *
320  result( 2 ) = zqrt14( trans, m, n,
321  $ nrhs, copya, lda, b, ldb,
322  $ work, lwork )
323  END IF
324 *
325 * Print information about the tests that
326 * did not pass the threshold.
327 *
328  DO 20 k = 1, 2
329  IF( result( k ).GE.thresh ) THEN
330  IF( nfail.EQ.0 .AND. nerrs.EQ.0 )
331  $ CALL alahd( nout, path )
332  WRITE( nout, fmt = 9999 )trans, m,
333  $ n, nrhs, nb, itype, k,
334  $ result( k )
335  nfail = nfail + 1
336  END IF
337  20 continue
338  nrun = nrun + 2
339  30 continue
340 *
341 * Deallocate T
342 *
343  CALL plasma_dealloc_handle( ht, info )
344  40 continue
345  END IF
346 *
347  100 continue
348  110 continue
349  120 continue
350  130 continue
351  140 continue
352 *
353 * Print a summary of the results.
354 *
355  CALL alasvm( path, nout, nfail, nrun, nerrs )
356 *
357  9999 format( ' TRANS=''', a1, ''', M=', i5, ', N=', i5, ', NRHS=', i4,
358  $ ', NB=', i4, ', type', i2, ', test(', i2, ')=', g12.5 )
359  9998 format( ' M=', i5, ', N=', i5, ', NRHS=', i4, ', NB=', i4,
360  $ ', type', i2, ', test(', i2, ')=', g12.5 )
361  return
362 *
363 * End of ZDRVLS
364 *
365  END