PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sporfs.f
Go to the documentation of this file.
1  SUBROUTINE sporfs( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X,
2  $ ldx, ferr, berr, work, iwork, info )
3 *
4  include 'plasmaf.h'
5 *
6 * -- LAPACK routine (version 3.2) --
7 * Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
8 * November 2006
9 *
10 * Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH.
11 *
12 * .. Scalar Arguments ..
13  CHARACTER uplo
14  INTEGER info, lda, ldaf, ldb, ldx, n, nrhs
15 * ..
16 * .. Array Arguments ..
17  INTEGER iwork( * )
18  REAL a( lda, * ), af( ldaf, * ), b( ldb, * ),
19  $ berr( * ), ferr( * ), work( * ), x( ldx, * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * SPORFS improves the computed solution to a system of linear
26 * equations when the coefficient matrix is symmetric positive definite,
27 * and provides error bounds and backward error estimates for the
28 * solution.
29 *
30 * Arguments
31 * =========
32 *
33 * UPLO (input) CHARACTER*1
34 * = 'U': Upper triangle of A is stored;
35 * = 'L': Lower triangle of A is stored.
36 *
37 * N (input) INTEGER
38 * The order of the matrix A. N >= 0.
39 *
40 * NRHS (input) INTEGER
41 * The number of right hand sides, i.e., the number of columns
42 * of the matrices B and X. NRHS >= 0.
43 *
44 * A (input) REAL array, dimension (LDA,N)
45 * The symmetric matrix A. If UPLO = 'U', the leading N-by-N
46 * upper triangular part of A contains the upper triangular part
47 * of the matrix A, and the strictly lower triangular part of A
48 * is not referenced. If UPLO = 'L', the leading N-by-N lower
49 * triangular part of A contains the lower triangular part of
50 * the matrix A, and the strictly upper triangular part of A is
51 * not referenced.
52 *
53 * LDA (input) INTEGER
54 * The leading dimension of the array A. LDA >= max(1,N).
55 *
56 * AF (input) REAL array, dimension (LDAF,N)
57 * The triangular factor U or L from the Cholesky factorization
58 * A = U**T*U or A = L*L**T, as computed by SPOTRF.
59 *
60 * LDAF (input) INTEGER
61 * The leading dimension of the array AF. LDAF >= max(1,N).
62 *
63 * B (input) REAL array, dimension (LDB,NRHS)
64 * The right hand side matrix B.
65 *
66 * LDB (input) INTEGER
67 * The leading dimension of the array B. LDB >= max(1,N).
68 *
69 * X (input/output) REAL array, dimension (LDX,NRHS)
70 * On entry, the solution matrix X, as computed by SPOTRS.
71 * On exit, the improved solution matrix X.
72 *
73 * LDX (input) INTEGER
74 * The leading dimension of the array X. LDX >= max(1,N).
75 *
76 * FERR (output) REAL array, dimension (NRHS)
77 * The estimated forward error bound for each solution vector
78 * X(j) (the j-th column of the solution matrix X).
79 * If XTRUE is the true solution corresponding to X(j), FERR(j)
80 * is an estimated upper bound for the magnitude of the largest
81 * element in (X(j) - XTRUE) divided by the magnitude of the
82 * largest element in X(j). The estimate is as reliable as
83 * the estimate for RCOND, and is almost always a slight
84 * overestimate of the true error.
85 *
86 * BERR (output) REAL array, dimension (NRHS)
87 * The componentwise relative backward error of each solution
88 * vector X(j) (i.e., the smallest relative change in
89 * any element of A or B that makes X(j) an exact solution).
90 *
91 * WORK (workspace) REAL array, dimension (3*N)
92 *
93 * IWORK (workspace) INTEGER array, dimension (N)
94 *
95 * INFO (output) INTEGER
96 * = 0: successful exit
97 * < 0: if INFO = -i, the i-th argument had an illegal value
98 *
99 * Internal Parameters
100 * ===================
101 *
102 * ITMAX is the maximum number of steps of iterative refinement.
103 *
104 * =====================================================================
105 *
106 * .. Parameters ..
107  INTEGER itmax
108  parameter( itmax = 5 )
109  REAL zero
110  parameter( zero = 0.0e+0 )
111  REAL one
112  parameter( one = 1.0e+0 )
113  REAL two
114  parameter( two = 2.0e+0 )
115  REAL three
116  parameter( three = 3.0e+0 )
117 * ..
118 * .. Local Scalars ..
119  LOGICAL upper
120  INTEGER count, i, j, k, kase, nz, plasma_uplo
121  REAL eps, lstres, s, safe1, safe2, safmin, xk
122 * ..
123 * .. Local Arrays ..
124  INTEGER isave( 3 )
125 * ..
126 * .. External Subroutines ..
127  EXTERNAL saxpy, scopy, slacn2, spotrs, ssymv, xerbla
128 * ..
129 * .. Intrinsic Functions ..
130  INTRINSIC abs, max
131 * ..
132 * .. External Functions ..
133  LOGICAL lsame
134  REAL slamch
135  EXTERNAL lsame, slamch
136 * ..
137 * .. Executable Statements ..
138 *
139 * Test the input parameters.
140 *
141  info = 0
142  upper = lsame( uplo, 'U' )
143  IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN
144  info = -1
145  ELSE IF( n.LT.0 ) THEN
146  info = -2
147  ELSE IF( nrhs.LT.0 ) THEN
148  info = -3
149  ELSE IF( lda.LT.max( 1, n ) ) THEN
150  info = -5
151  ELSE IF( ldaf.LT.max( 1, n ) ) THEN
152  info = -7
153  ELSE IF( ldb.LT.max( 1, n ) ) THEN
154  info = -9
155  ELSE IF( ldx.LT.max( 1, n ) ) THEN
156  info = -11
157  END IF
158  IF( info.NE.0 ) THEN
159  CALL xerbla( 'SPORFS', -info )
160  return
161  END IF
162 *
163 * Quick return if possible
164 *
165  IF( n.EQ.0 .OR. nrhs.EQ.0 ) THEN
166  DO 10 j = 1, nrhs
167  ferr( j ) = zero
168  berr( j ) = zero
169  10 continue
170  return
171  END IF
172 *
173  IF ( lsame( uplo, 'U' ) ) THEN
174  plasma_uplo = plasmaupper
175  ELSE
176  plasma_uplo = plasmalower
177  ENDIF
178 *
179 * NZ = maximum number of nonzero elements in each row of A, plus 1
180 *
181  nz = n + 1
182  eps = slamch( 'Epsilon' )
183  safmin = slamch( 'Safe minimum' )
184  safe1 = nz*safmin
185  safe2 = safe1 / eps
186 *
187 * Do for each right hand side
188 *
189  DO 140 j = 1, nrhs
190 *
191  count = 1
192  lstres = three
193  20 continue
194 *
195 * Loop until stopping criterion is satisfied.
196 *
197 * Compute residual R = B - A * X
198 *
199  CALL scopy( n, b( 1, j ), 1, work( n+1 ), 1 )
200  CALL ssymv( uplo, n, -one, a, lda, x( 1, j ), 1, one,
201  $ work( n+1 ), 1 )
202 *
203 * Compute componentwise relative backward error from formula
204 *
205 * max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )
206 *
207 * where abs(Z) is the componentwise absolute value of the matrix
208 * or vector Z. If the i-th component of the denominator is less
209 * than SAFE2, then SAFE1 is added to the i-th components of the
210 * numerator and denominator before dividing.
211 *
212  DO 30 i = 1, n
213  work( i ) = abs( b( i, j ) )
214  30 continue
215 *
216 * Compute abs(A)*abs(X) + abs(B).
217 *
218  IF( upper ) THEN
219  DO 50 k = 1, n
220  s = zero
221  xk = abs( x( k, j ) )
222  DO 40 i = 1, k - 1
223  work( i ) = work( i ) + abs( a( i, k ) )*xk
224  s = s + abs( a( i, k ) )*abs( x( i, j ) )
225  40 continue
226  work( k ) = work( k ) + abs( a( k, k ) )*xk + s
227  50 continue
228  ELSE
229  DO 70 k = 1, n
230  s = zero
231  xk = abs( x( k, j ) )
232  work( k ) = work( k ) + abs( a( k, k ) )*xk
233  DO 60 i = k + 1, n
234  work( i ) = work( i ) + abs( a( i, k ) )*xk
235  s = s + abs( a( i, k ) )*abs( x( i, j ) )
236  60 continue
237  work( k ) = work( k ) + s
238  70 continue
239  END IF
240  s = zero
241  DO 80 i = 1, n
242  IF( work( i ).GT.safe2 ) THEN
243  s = max( s, abs( work( n+i ) ) / work( i ) )
244  ELSE
245  s = max( s, ( abs( work( n+i ) )+safe1 ) /
246  $ ( work( i )+safe1 ) )
247  END IF
248  80 continue
249  berr( j ) = s
250 *
251 * Test stopping criterion. Continue iterating if
252 * 1) The residual BERR(J) is larger than machine epsilon, and
253 * 2) BERR(J) decreased by at least a factor of 2 during the
254 * last iteration, and
255 * 3) At most ITMAX iterations tried.
256 *
257  IF( berr( j ).GT.eps .AND. two*berr( j ).LE.lstres .AND.
258  $ count.LE.itmax ) THEN
259 *
260 * Update solution and try again.
261 *
262  CALL plasma_spotrs( plasma_uplo, n, 1, af, ldaf,
263  $ work( n+1 ), n, info )
264  CALL saxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 )
265  lstres = berr( j )
266  count = count + 1
267  go to 20
268  END IF
269 *
270 * Bound error from formula
271 *
272 * norm(X - XTRUE) / norm(X) .le. FERR =
273 * norm( abs(inv(A))*
274 * ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)
275 *
276 * where
277 * norm(Z) is the magnitude of the largest component of Z
278 * inv(A) is the inverse of A
279 * abs(Z) is the componentwise absolute value of the matrix or
280 * vector Z
281 * NZ is the maximum number of nonzeros in any row of A, plus 1
282 * EPS is machine epsilon
283 *
284 * The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))
285 * is incremented by SAFE1 if the i-th component of
286 * abs(A)*abs(X) + abs(B) is less than SAFE2.
287 *
288 * Use SLACN2 to estimate the infinity-norm of the matrix
289 * inv(A) * diag(W),
290 * where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))
291 *
292  DO 90 i = 1, n
293  IF( work( i ).GT.safe2 ) THEN
294  work( i ) = abs( work( n+i ) ) + nz*eps*work( i )
295  ELSE
296  work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1
297  END IF
298  90 continue
299 *
300  kase = 0
301  100 continue
302  CALL slacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),
303  $ kase, isave )
304  IF( kase.NE.0 ) THEN
305  IF( kase.EQ.1 ) THEN
306 *
307 * Multiply by diag(W)*inv(A').
308 *
309  CALL plasma_spotrs( plasma_uplo, n, 1, af, ldaf,
310  $ work( n+1 ), n, info )
311  DO 110 i = 1, n
312  work( n+i ) = work( i )*work( n+i )
313  110 continue
314  ELSE IF( kase.EQ.2 ) THEN
315 *
316 * Multiply by inv(A)*diag(W).
317 *
318  DO 120 i = 1, n
319  work( n+i ) = work( i )*work( n+i )
320  120 continue
321  CALL plasma_spotrs( plasma_uplo, n, 1, af, ldaf,
322  $ work( n+1 ), n, info )
323  END IF
324  go to 100
325  END IF
326 *
327 * Normalize error.
328 *
329  lstres = zero
330  DO 130 i = 1, n
331  lstres = max( lstres, abs( x( i, j ) ) )
332  130 continue
333  IF( lstres.NE.zero )
334  $ ferr( j ) = ferr( j ) / lstres
335 *
336  140 continue
337 *
338  return
339 *
340 * End of SPORFS
341 *
342  END