plasma/docs/sporfs_8f_source.html

      SUBROUTINE sporfs( UPLO, N, NRHS, A, LDA, AF, LDAF, B, LDB, X,

     $                   ldx, ferr, berr, work, iwork, info )

*

      include 'plasmaf.h'

*

*  -- LAPACK routine (version 3.2) --

*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..

*     November 2006

*

*     Modified to call SLACN2 in place of SLACON, 7 Feb 03, SJH.

*

*     .. Scalar Arguments ..

      CHARACTER          uplo

      INTEGER            info, lda, ldaf, ldb, ldx, n, nrhs

*     ..

*     .. Array Arguments ..

      INTEGER            iwork( * )

      REAL               a( lda, * ), af( ldaf, * ), b( ldb, * ),

     $                   berr( * ), ferr( * ), work( * ), x( ldx, * )

*     ..

*

*  Purpose

*  =======

*

*  SPORFS improves the computed solution to a system of linear

*  equations when the coefficient matrix is symmetric positive definite,

*  and provides error bounds and backward error estimates for the

*  solution.

*

*  Arguments

*  =========

*

*  UPLO    (input) CHARACTER*1

*          = 'U':  Upper triangle of A is stored;

*          = 'L':  Lower triangle of A is stored.

*

*  N       (input) INTEGER

*          The order of the matrix A.  N >= 0.

*

*  NRHS    (input) INTEGER

*          The number of right hand sides, i.e., the number of columns

*          of the matrices B and X.  NRHS >= 0.

*

*  A       (input) REAL array, dimension (LDA,N)

*          The symmetric matrix A.  If UPLO = 'U', the leading N-by-N

*          upper triangular part of A contains the upper triangular part

*          of the matrix A, and the strictly lower triangular part of A

*          is not referenced.  If UPLO = 'L', the leading N-by-N lower

*          triangular part of A contains the lower triangular part of

*          the matrix A, and the strictly upper triangular part of A is

*          not referenced.

*

*  LDA     (input) INTEGER

*          The leading dimension of the array A.  LDA >= max(1,N).

*

*  AF      (input) REAL array, dimension (LDAF,N)

*          The triangular factor U or L from the Cholesky factorization

*          A = U**T*U or A = L*L**T, as computed by SPOTRF.

*

*  LDAF    (input) INTEGER

*          The leading dimension of the array AF.  LDAF >= max(1,N).

*

*  B       (input) REAL array, dimension (LDB,NRHS)

*          The right hand side matrix B.

*

*  LDB     (input) INTEGER

*          The leading dimension of the array B.  LDB >= max(1,N).

*

*  X       (input/output) REAL array, dimension (LDX,NRHS)

*          On entry, the solution matrix X, as computed by SPOTRS.

*          On exit, the improved solution matrix X.

*

*  LDX     (input) INTEGER

*          The leading dimension of the array X.  LDX >= max(1,N).

*

*  FERR    (output) REAL array, dimension (NRHS)

*          The estimated forward error bound for each solution vector

*          X(j) (the j-th column of the solution matrix X).

*          If XTRUE is the true solution corresponding to X(j), FERR(j)

*          is an estimated upper bound for the magnitude of the largest

*          element in (X(j) - XTRUE) divided by the magnitude of the

*          largest element in X(j).  The estimate is as reliable as

*          the estimate for RCOND, and is almost always a slight

*          overestimate of the true error.

*

*  BERR    (output) REAL array, dimension (NRHS)

*          The componentwise relative backward error of each solution

*          vector X(j) (i.e., the smallest relative change in

*          any element of A or B that makes X(j) an exact solution).

*

*  WORK    (workspace) REAL array, dimension (3*N)

*

*  IWORK   (workspace) INTEGER array, dimension (N)

*

*  INFO    (output) INTEGER

*          = 0:  successful exit

*          < 0:  if INFO = -i, the i-th argument had an illegal value

*

*  Internal Parameters

*  ===================

*

*  ITMAX is the maximum number of steps of iterative refinement.

*

*  =====================================================================

*

*     .. Parameters ..

      INTEGER            itmax

      parameter( itmax = 5 )

      REAL               zero

      parameter( zero = 0.0e+0 )

      REAL               one

      parameter( one = 1.0e+0 )

      REAL               two

      parameter( two = 2.0e+0 )

      REAL               three

      parameter( three = 3.0e+0 )

*     ..

*     .. Local Scalars ..

      LOGICAL            upper

      INTEGER            count, i, j, k, kase, nz, plasma_uplo

      REAL               eps, lstres, s, safe1, safe2, safmin, xk

*     ..

*     .. Local Arrays ..

      INTEGER            isave( 3 )

*     ..

*     .. External Subroutines ..

      EXTERNAL           saxpy, scopy, slacn2, spotrs, ssymv, xerbla

*     ..

*     .. Intrinsic Functions ..

      INTRINSIC          abs, max

*     ..

*     .. External Functions ..

      LOGICAL            lsame

      REAL               slamch

      EXTERNAL           lsame, slamch

*     ..

*     .. Executable Statements ..

*

*     Test the input parameters.

*

      info = 0

      upper = lsame( uplo, 'U' )

      IF( .NOT.upper .AND. .NOT.lsame( uplo, 'L' ) ) THEN

         info = -1

      ELSE IF( n.LT.0 ) THEN

         info = -2

      ELSE IF( nrhs.LT.0 ) THEN

         info = -3

      ELSE IF( lda.LT.max( 1, n ) ) THEN

         info = -5

      ELSE IF( ldaf.LT.max( 1, n ) ) THEN

         info = -7

      ELSE IF( ldb.LT.max( 1, n ) ) THEN

         info = -9

      ELSE IF( ldx.LT.max( 1, n ) ) THEN

         info = -11

      END IF

      IF( info.NE.0 ) THEN

         CALL xerbla( 'SPORFS', -info )

         return

      END IF

*

*     Quick return if possible

*

      IF( n.EQ.0 .OR. nrhs.EQ.0 ) THEN

         DO 10 j = 1, nrhs

            ferr( j ) = zero

            berr( j ) = zero

   10    continue

         return

      END IF

*

      IF ( lsame( uplo, 'U' ) ) THEN

          plasma_uplo = plasmaupper

      ELSE

          plasma_uplo = plasmalower

      ENDIF

*

*     NZ = maximum number of nonzero elements in each row of A, plus 1

*

      nz = n + 1

      eps = slamch( 'Epsilon' )

      safmin = slamch( 'Safe minimum' )

      safe1 = nz*safmin

      safe2 = safe1 / eps

*

*     Do for each right hand side

*

      DO 140 j = 1, nrhs

*

         count = 1

         lstres = three

   20    continue

*

*        Loop until stopping criterion is satisfied.

*

*        Compute residual R = B - A * X

*

         CALL scopy( n, b( 1, j ), 1, work( n+1 ), 1 )

         CALL ssymv( uplo, n, -one, a, lda, x( 1, j ), 1, one,

     $               work( n+1 ), 1 )

*

*        Compute componentwise relative backward error from formula

*

*        max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) )

*

*        where abs(Z) is the componentwise absolute value of the matrix

*        or vector Z.  If the i-th component of the denominator is less

*        than SAFE2, then SAFE1 is added to the i-th components of the

*        numerator and denominator before dividing.

*

         DO 30 i = 1, n

            work( i ) = abs( b( i, j ) )

   30    continue

*

*        Compute abs(A)*abs(X) + abs(B).

*

         IF( upper ) THEN

            DO 50 k = 1, n

               s = zero

               xk = abs( x( k, j ) )

               DO 40 i = 1, k - 1

                  work( i ) = work( i ) + abs( a( i, k ) )*xk

                  s = s + abs( a( i, k ) )*abs( x( i, j ) )

   40          continue

               work( k ) = work( k ) + abs( a( k, k ) )*xk + s

   50       continue

         ELSE

            DO 70 k = 1, n

               s = zero

               xk = abs( x( k, j ) )

               work( k ) = work( k ) + abs( a( k, k ) )*xk

               DO 60 i = k + 1, n

                  work( i ) = work( i ) + abs( a( i, k ) )*xk

                  s = s + abs( a( i, k ) )*abs( x( i, j ) )

   60          continue

               work( k ) = work( k ) + s

   70       continue

         END IF

         s = zero

         DO 80 i = 1, n

            IF( work( i ).GT.safe2 ) THEN

               s = max( s, abs( work( n+i ) ) / work( i ) )

            ELSE

               s = max( s, ( abs( work( n+i ) )+safe1 ) /

     $             ( work( i )+safe1 ) )

            END IF

   80    continue

         berr( j ) = s

*

*        Test stopping criterion. Continue iterating if

*           1) The residual BERR(J) is larger than machine epsilon, and

*           2) BERR(J) decreased by at least a factor of 2 during the

*              last iteration, and

*           3) At most ITMAX iterations tried.

*

         IF( berr( j ).GT.eps .AND. two*berr( j ).LE.lstres .AND.

     $       count.LE.itmax ) THEN

*

*           Update solution and try again.

*

            CALL plasma_spotrs( plasma_uplo, n, 1, af, ldaf,

     $                         work( n+1 ), n, info )

            CALL saxpy( n, one, work( n+1 ), 1, x( 1, j ), 1 )

            lstres = berr( j )

            count = count + 1

            go to 20

         END IF

*

*        Bound error from formula

*

*        norm(X - XTRUE) / norm(X) .le. FERR =

*        norm( abs(inv(A))*

*           ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X)

*

*        where

*          norm(Z) is the magnitude of the largest component of Z

*          inv(A) is the inverse of A

*          abs(Z) is the componentwise absolute value of the matrix or

*             vector Z

*          NZ is the maximum number of nonzeros in any row of A, plus 1

*          EPS is machine epsilon

*

*        The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B))

*        is incremented by SAFE1 if the i-th component of

*        abs(A)*abs(X) + abs(B) is less than SAFE2.

*

*        Use SLACN2 to estimate the infinity-norm of the matrix

*           inv(A) * diag(W),

*        where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) )))

*

         DO 90 i = 1, n

            IF( work( i ).GT.safe2 ) THEN

               work( i ) = abs( work( n+i ) ) + nz*eps*work( i )

            ELSE

               work( i ) = abs( work( n+i ) ) + nz*eps*work( i ) + safe1

            END IF

   90    continue

*

         kase = 0

  100    continue

         CALL slacn2( n, work( 2*n+1 ), work( n+1 ), iwork, ferr( j ),

     $                kase, isave )

         IF( kase.NE.0 ) THEN

            IF( kase.EQ.1 ) THEN

*

*              Multiply by diag(W)*inv(A').

*

               CALL plasma_spotrs( plasma_uplo, n, 1, af, ldaf,

     $                            work( n+1 ), n, info )

               DO 110 i = 1, n

                  work( n+i ) = work( i )*work( n+i )

  110          continue

            ELSE IF( kase.EQ.2 ) THEN

*

*              Multiply by inv(A)*diag(W).

*

               DO 120 i = 1, n

                  work( n+i ) = work( i )*work( n+i )

  120          continue

               CALL plasma_spotrs( plasma_uplo, n, 1, af, ldaf,

     $                            work( n+1 ), n, info )

            END IF

            go to 100

         END IF

*

*        Normalize error.

*

         lstres = zero

         DO 130 i = 1, n

            lstres = max( lstres, abs( x( i, j ) ) )

  130    continue

         IF( lstres.NE.zero )

     $      ferr( j ) = ferr( j ) / lstres

*

  140 continue

*

      return

*

*     End of SPORFS

*

      END