d2/d01/dlaqr5_8f_source.html

 *> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
 *
 *  =========== DOCUMENTATION ===========
 *
 * Online html documentation available at
 *            http://www.netlib.org/lapack/explore-html/
 *
 *> \htmlonly
 *> Download DLAQR5 + dependencies
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
 *> [TGZ]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
 *> [ZIP]</a>
 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
 *> [TXT]</a>
 *> \endhtmlonly
 *
 *  Definition:
 *  ===========
 *
 *       SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
 *                          SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
 *                          LDU, NV, WV, LDWV, NH, WH, LDWH )
 *
 *       .. Scalar Arguments ..
 *       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
 *      $                   LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
 *       LOGICAL            WANTT, WANTZ
 *       ..
 *       .. Array Arguments ..
 *       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
 *      $                   V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
 *      $                   Z( LDZ, * )
 *       ..
 *
 *
 *> \par Purpose:
 *  =============
 *>
 *> \verbatim
 *>
 *>    DLAQR5, called by DLAQR0, performs a
 *>    single small-bulge multi-shift QR sweep.
 *> \endverbatim
 *
 *  Arguments:
 *  ==========
 *
 *> \param[in] WANTT
 *> \verbatim
 *>          WANTT is LOGICAL
 *>             WANTT = .true. if the quasi-triangular Schur factor
 *>             is being computed.  WANTT is set to .false. otherwise.
 *> \endverbatim
 *>
 *> \param[in] WANTZ
 *> \verbatim
 *>          WANTZ is LOGICAL
 *>             WANTZ = .true. if the orthogonal Schur factor is being
 *>             computed.  WANTZ is set to .false. otherwise.
 *> \endverbatim
 *>
 *> \param[in] KACC22
 *> \verbatim
 *>          KACC22 is INTEGER with value 0, 1, or 2.
 *>             Specifies the computation mode of far-from-diagonal
 *>             orthogonal updates.
 *>        = 0: DLAQR5 does not accumulate reflections and does not
 *>             use matrix-matrix multiply to update far-from-diagonal
 *>             matrix entries.
 *>        = 1: DLAQR5 accumulates reflections and uses matrix-matrix
 *>             multiply to update the far-from-diagonal matrix entries.
 *>        = 2: Same as KACC22 = 1. This option used to enable exploiting
 *>             the 2-by-2 structure during matrix multiplications, but
 *>             this is no longer supported.
 *> \endverbatim
 *>
 *> \param[in] N
 *> \verbatim
 *>          N is INTEGER
 *>             N is the order of the Hessenberg matrix H upon which this
 *>             subroutine operates.
 *> \endverbatim
 *>
 *> \param[in] KTOP
 *> \verbatim
 *>          KTOP is INTEGER
 *> \endverbatim
 *>
 *> \param[in] KBOT
 *> \verbatim
 *>          KBOT is INTEGER
 *>             These are the first and last rows and columns of an
 *>             isolated diagonal block upon which the QR sweep is to be
 *>             applied. It is assumed without a check that
 *>                       either KTOP = 1  or   H(KTOP,KTOP-1) = 0
 *>             and
 *>                       either KBOT = N  or   H(KBOT+1,KBOT) = 0.
 *> \endverbatim
 *>
 *> \param[in] NSHFTS
 *> \verbatim
 *>          NSHFTS is INTEGER
 *>             NSHFTS gives the number of simultaneous shifts.  NSHFTS
 *>             must be positive and even.
 *> \endverbatim
 *>
 *> \param[in,out] SR
 *> \verbatim
 *>          SR is DOUBLE PRECISION array, dimension (NSHFTS)
 *> \endverbatim
 *>
 *> \param[in,out] SI
 *> \verbatim
 *>          SI is DOUBLE PRECISION array, dimension (NSHFTS)
 *>             SR contains the real parts and SI contains the imaginary
 *>             parts of the NSHFTS shifts of origin that define the
 *>             multi-shift QR sweep.  On output SR and SI may be
 *>             reordered.
 *> \endverbatim
 *>
 *> \param[in,out] H
 *> \verbatim
 *>          H is DOUBLE PRECISION array, dimension (LDH,N)
 *>             On input H contains a Hessenberg matrix.  On output a
 *>             multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
 *>             to the isolated diagonal block in rows and columns KTOP
 *>             through KBOT.
 *> \endverbatim
 *>
 *> \param[in] LDH
 *> \verbatim
 *>          LDH is INTEGER
 *>             LDH is the leading dimension of H just as declared in the
 *>             calling procedure.  LDH >= MAX(1,N).
 *> \endverbatim
 *>
 *> \param[in] ILOZ
 *> \verbatim
 *>          ILOZ is INTEGER
 *> \endverbatim
 *>
 *> \param[in] IHIZ
 *> \verbatim
 *>          IHIZ is INTEGER
 *>             Specify the rows of Z to which transformations must be
 *>             applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
 *> \endverbatim
 *>
 *> \param[in,out] Z
 *> \verbatim
 *>          Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ)
 *>             If WANTZ = .TRUE., then the QR Sweep orthogonal
 *>             similarity transformation is accumulated into
 *>             Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
 *>             If WANTZ = .FALSE., then Z is unreferenced.
 *> \endverbatim
 *>
 *> \param[in] LDZ
 *> \verbatim
 *>          LDZ is INTEGER
 *>             LDA is the leading dimension of Z just as declared in
 *>             the calling procedure. LDZ >= N.
 *> \endverbatim
 *>
 *> \param[out] V
 *> \verbatim
 *>          V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2)
 *> \endverbatim
 *>
 *> \param[in] LDV
 *> \verbatim
 *>          LDV is INTEGER
 *>             LDV is the leading dimension of V as declared in the
 *>             calling procedure.  LDV >= 3.
 *> \endverbatim
 *>
 *> \param[out] U
 *> \verbatim
 *>          U is DOUBLE PRECISION array, dimension (LDU,2*NSHFTS)
 *> \endverbatim
 *>
 *> \param[in] LDU
 *> \verbatim
 *>          LDU is INTEGER
 *>             LDU is the leading dimension of U just as declared in the
 *>             in the calling subroutine.  LDU >= 2*NSHFTS.
 *> \endverbatim
 *>
 *> \param[in] NV
 *> \verbatim
 *>          NV is INTEGER
 *>             NV is the number of rows in WV agailable for workspace.
 *>             NV >= 1.
 *> \endverbatim
 *>
 *> \param[out] WV
 *> \verbatim
 *>          WV is DOUBLE PRECISION array, dimension (LDWV,2*NSHFTS)
 *> \endverbatim
 *>
 *> \param[in] LDWV
 *> \verbatim
 *>          LDWV is INTEGER
 *>             LDWV is the leading dimension of WV as declared in the
 *>             in the calling subroutine.  LDWV >= NV.
 *> \endverbatim
 *
 *> \param[in] NH
 *> \verbatim
 *>          NH is INTEGER
 *>             NH is the number of columns in array WH available for
 *>             workspace. NH >= 1.
 *> \endverbatim
 *>
 *> \param[out] WH
 *> \verbatim
 *>          WH is DOUBLE PRECISION array, dimension (LDWH,NH)
 *> \endverbatim
 *>
 *> \param[in] LDWH
 *> \verbatim
 *>          LDWH is INTEGER
 *>             Leading dimension of WH just as declared in the
 *>             calling procedure.  LDWH >= 2*NSHFTS.
 *> \endverbatim
 *>
 *  Authors:
 *  ========
 *
 *> \author Univ. of Tennessee
 *> \author Univ. of California Berkeley
 *> \author Univ. of Colorado Denver
 *> \author NAG Ltd.
 *
 *> \ingroup laqr5
 *
 *> \par Contributors:
 *  ==================
 *>
 *>       Karen Braman and Ralph Byers, Department of Mathematics,
 *>       University of Kansas, USA
 *>
 *>       Lars Karlsson, Daniel Kressner, and Bruno Lang
 *>
 *>       Thijs Steel, Department of Computer science,
 *>       KU Leuven, Belgium
 *
 *> \par References:
 *  ================
 *>
 *>       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
 *>       Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
 *>       Performance, SIAM Journal of Matrix Analysis, volume 23, pages
 *>       929--947, 2002.
 *>
 *>       Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
 *>       chains of bulges in multishift QR algorithms.
 *>       ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
 *>
 *  =====================================================================
       SUBROUTINE dlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
      $                   SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
      $                   LDU, NV, WV, LDWV, NH, WH, LDWH )
       IMPLICIT NONE
 *
 *  -- LAPACK auxiliary routine --
 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 *
 *     .. Scalar Arguments ..
       INTEGER            IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
      $                   ldwh, ldwv, ldz, n, nh, nshfts, nv
       LOGICAL            WANTT, WANTZ
 *     ..
 *     .. Array Arguments ..
       DOUBLE PRECISION   H( ldh, * ), SI( * ), SR( * ), U( ldu, * ),
      $                   v( ldv, * ), wh( ldwh, * ), wv( ldwv, * ),
      $                   z( ldz, * )
 *     ..
 *
 *  ================================================================
 *     .. Parameters ..
       DOUBLE PRECISION   ZERO, ONE
       parameter( zero = 0.0d0, one = 1.0d0 )
 *     ..
 *     .. Local Scalars ..
       DOUBLE PRECISION   ALPHA, BETA, H11, H12, H21, H22, REFSUM,
      $                   safmax, safmin, scl, smlnum, swap, t1, t2,
      $                   t3, tst1, tst2, ulp
       INTEGER            I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
      $                   jrow, jtop, k, k1, kdu, kms, krcol,
      $                   m, m22, mbot, mtop, nbmps, ndcol,
      $                   ns, nu
       LOGICAL            ACCUM, BMP22
 *     ..
 *     .. External Functions ..
       DOUBLE PRECISION   DLAMCH
       EXTERNAL           dlamch
 *     ..
 *     .. Intrinsic Functions ..
 *
       INTRINSIC          abs, dble, max, min, mod
 *     ..
 *     .. Local Arrays ..
       DOUBLE PRECISION   VT( 3 )
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           dgemm, dlabad, dlacpy, dlaqr1, dlarfg, dlaset,
      $                   dtrmm
 *     ..
 *     .. Executable Statements ..
 *
 *     ==== If there are no shifts, then there is nothing to do. ====
 *
       IF( nshfts.LT.2 )
      $   RETURN
 *
 *     ==== If the active block is empty or 1-by-1, then there
 *     .    is nothing to do. ====
 *
       IF( ktop.GE.kbot )
      $   RETURN
 *
 *     ==== Shuffle shifts into pairs of real shifts and pairs
 *     .    of complex conjugate shifts assuming complex
 *     .    conjugate shifts are already adjacent to one
 *     .    another. ====
 *
       DO 10 i = 1, nshfts - 2, 2
          IF( si( i ).NE.-si( i+1 ) ) THEN
 *
             swap = sr( i )
             sr( i ) = sr( i+1 )
             sr( i+1 ) = sr( i+2 )
             sr( i+2 ) = swap
 *
             swap = si( i )
             si( i ) = si( i+1 )
             si( i+1 ) = si( i+2 )
             si( i+2 ) = swap
          END IF
    10 CONTINUE
 *
 *     ==== NSHFTS is supposed to be even, but if it is odd,
 *     .    then simply reduce it by one.  The shuffle above
 *     .    ensures that the dropped shift is real and that
 *     .    the remaining shifts are paired. ====
 *
       ns = nshfts - mod( nshfts, 2 )
 *
 *     ==== Machine constants for deflation ====
 *
       safmin = dlamch( 'SAFE MINIMUM' )
       safmax = one / safmin
       CALL dlabad( safmin, safmax )
       ulp = dlamch( 'PRECISION' )
       smlnum = safmin*( dble( n ) / ulp )
 *
 *     ==== Use accumulated reflections to update far-from-diagonal
 *     .    entries ? ====
 *
       accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
 *
 *     ==== clear trash ====
 *
       IF( ktop+2.LE.kbot )
      $   h( ktop+2, ktop ) = zero
 *
 *     ==== NBMPS = number of 2-shift bulges in the chain ====
 *
       nbmps = ns / 2
 *
 *     ==== KDU = width of slab ====
 *
       kdu = 4*nbmps
 *
 *     ==== Create and chase chains of NBMPS bulges ====
 *
       DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
 *
 *        JTOP = Index from which updates from the right start.
 *
          IF( accum ) THEN
             jtop = max( ktop, incol )
          ELSE IF( wantt ) THEN
             jtop = 1
          ELSE
             jtop = ktop
          END IF
 *
          ndcol = incol + kdu
          IF( accum )
      $      CALL dlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
 *
 *        ==== Near-the-diagonal bulge chase.  The following loop
 *        .    performs the near-the-diagonal part of a small bulge
 *        .    multi-shift QR sweep.  Each 4*NBMPS column diagonal
 *        .    chunk extends from column INCOL to column NDCOL
 *        .    (including both column INCOL and column NDCOL). The
 *        .    following loop chases a 2*NBMPS+1 column long chain of
 *        .    NBMPS bulges 2*NBMPS columns to the right.  (INCOL
 *        .    may be less than KTOP and and NDCOL may be greater than
 *        .    KBOT indicating phantom columns from which to chase
 *        .    bulges before they are actually introduced or to which
 *        .    to chase bulges beyond column KBOT.)  ====
 *
          DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
 *
 *           ==== Bulges number MTOP to MBOT are active double implicit
 *           .    shift bulges.  There may or may not also be small
 *           .    2-by-2 bulge, if there is room.  The inactive bulges
 *           .    (if any) must wait until the active bulges have moved
 *           .    down the diagonal to make room.  The phantom matrix
 *           .    paradigm described above helps keep track.  ====
 *
             mtop = max( 1, ( ktop-krcol ) / 2+1 )
             mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
             m22 = mbot + 1
             bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
      $              ( kbot-2 )
 *
 *           ==== Generate reflections to chase the chain right
 *           .    one column.  (The minimum value of K is KTOP-1.) ====
 *
             IF ( bmp22 ) THEN
 *
 *              ==== Special case: 2-by-2 reflection at bottom treated
 *              .    separately ====
 *
                k = krcol + 2*( m22-1 )
                IF( k.EQ.ktop-1 ) THEN
                   CALL dlaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
      $                         si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
      $                         v( 1, m22 ) )
                   beta = v( 1, m22 )
                   CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
                ELSE
                   beta = h( k+1, k )
                   v( 2, m22 ) = h( k+2, k )
                   CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
                   h( k+1, k ) = beta
                   h( k+2, k ) = zero
                END IF

 *
 *              ==== Perform update from right within
 *              .    computational window. ====
 *
                t1 = v( 1, m22 )
                t2 = t1*v( 2, m22 )
                DO 30 j = jtop, min( kbot, k+3 )
                   refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
                   h( j, k+1 ) = h( j, k+1 ) - refsum*t1
                   h( j, k+2 ) = h( j, k+2 ) - refsum*t2
    30          CONTINUE
 *
 *              ==== Perform update from left within
 *              .    computational window. ====
 *
                IF( accum ) THEN
                   jbot = min( ndcol, kbot )
                ELSE IF( wantt ) THEN
                   jbot = n
                ELSE
                   jbot = kbot
                END IF
                t1 = v( 1, m22 )
                t2 = t1*v( 2, m22 )
                DO 40 j = k+1, jbot
                   refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
                   h( k+1, j ) = h( k+1, j ) - refsum*t1
                   h( k+2, j ) = h( k+2, j ) - refsum*t2
    40          CONTINUE
 *
 *              ==== The following convergence test requires that
 *              .    the tradition small-compared-to-nearby-diagonals
 *              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
 *              .    criteria both be satisfied.  The latter improves
 *              .    accuracy in some examples. Falling back on an
 *              .    alternate convergence criterion when TST1 or TST2
 *              .    is zero (as done here) is traditional but probably
 *              .    unnecessary. ====
 *
                IF( k.GE.ktop ) THEN
                   IF( h( k+1, k ).NE.zero ) THEN
                      tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
                      IF( tst1.EQ.zero ) THEN
                         IF( k.GE.ktop+1 )
      $                     tst1 = tst1 + abs( h( k, k-1 ) )
                         IF( k.GE.ktop+2 )
      $                     tst1 = tst1 + abs( h( k, k-2 ) )
                         IF( k.GE.ktop+3 )
      $                     tst1 = tst1 + abs( h( k, k-3 ) )
                         IF( k.LE.kbot-2 )
      $                     tst1 = tst1 + abs( h( k+2, k+1 ) )
                         IF( k.LE.kbot-3 )
      $                     tst1 = tst1 + abs( h( k+3, k+1 ) )
                         IF( k.LE.kbot-4 )
      $                     tst1 = tst1 + abs( h( k+4, k+1 ) )
                      END IF
                      IF( abs( h( k+1, k ) )
      $                   .LE.max( smlnum, ulp*tst1 ) ) THEN
                         h12 = max( abs( h( k+1, k ) ),
      $                             abs( h( k, k+1 ) ) )
                         h21 = min( abs( h( k+1, k ) ),
      $                             abs( h( k, k+1 ) ) )
                         h11 = max( abs( h( k+1, k+1 ) ),
      $                             abs( h( k, k )-h( k+1, k+1 ) ) )
                         h22 = min( abs( h( k+1, k+1 ) ),
      $                        abs( h( k, k )-h( k+1, k+1 ) ) )
                         scl = h11 + h12
                         tst2 = h22*( h11 / scl )
 *
                         IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
      $                      max( smlnum, ulp*tst2 ) ) THEN
                            h( k+1, k ) = zero
                         END IF
                      END IF
                   END IF
                END IF
 *
 *              ==== Accumulate orthogonal transformations. ====
 *
                IF( accum ) THEN
                   kms = k - incol
                   t1 = v( 1, m22 )
                   t2 = t1*v( 2, m22 )
                   DO 50 j = max( 1, ktop-incol ), kdu
                      refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
                      u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
                      u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
   50                 CONTINUE
                ELSE IF( wantz ) THEN
                   t1 = v( 1, m22 )
                   t2 = t1*v( 2, m22 )
                   DO 60 j = iloz, ihiz
                      refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
                      z( j, k+1 ) = z( j, k+1 ) - refsum*t1
                      z( j, k+2 ) = z( j, k+2 ) - refsum*t2
   60              CONTINUE
                END IF
             END IF
 *
 *           ==== Normal case: Chain of 3-by-3 reflections ====
 *
             DO 80 m = mbot, mtop, -1
                k = krcol + 2*( m-1 )
                IF( k.EQ.ktop-1 ) THEN
                   CALL dlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
      $                         si( 2*m-1 ), sr( 2*m ), si( 2*m ),
      $                         v( 1, m ) )
                   alpha = v( 1, m )
                   CALL dlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
                ELSE
 *
 *                 ==== Perform delayed transformation of row below
 *                 .    Mth bulge. Exploit fact that first two elements
 *                 .    of row are actually zero. ====
 *
                   refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
                   h( k+3, k   ) = -refsum
                   h( k+3, k+1 ) = -refsum*v( 2, m )
                   h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m )
 *
 *                 ==== Calculate reflection to move
 *                 .    Mth bulge one step. ====
 *
                   beta      = h( k+1, k )
                   v( 2, m ) = h( k+2, k )
                   v( 3, m ) = h( k+3, k )
                   CALL dlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
 *
 *                 ==== A Bulge may collapse because of vigilant
 *                 .    deflation or destructive underflow.  In the
 *                 .    underflow case, try the two-small-subdiagonals
 *                 .    trick to try to reinflate the bulge.  ====
 *
                   IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
      $                zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
 *
 *                    ==== Typical case: not collapsed (yet). ====
 *
                      h( k+1, k ) = beta
                      h( k+2, k ) = zero
                      h( k+3, k ) = zero
                   ELSE
 *
 *                    ==== Atypical case: collapsed.  Attempt to
 *                    .    reintroduce ignoring H(K+1,K) and H(K+2,K).
 *                    .    If the fill resulting from the new
 *                    .    reflector is too large, then abandon it.
 *                    .    Otherwise, use the new one. ====
 *
                      CALL dlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
      $                            si( 2*m-1 ), sr( 2*m ), si( 2*m ),
      $                            vt )
                      alpha = vt( 1 )
                      CALL dlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
                      refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
      $                        h( k+2, k ) )
 *
                      IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
      $                   abs( refsum*vt( 3 ) ).GT.ulp*
      $                   ( abs( h( k, k ) )+abs( h( k+1,
      $                   k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
 *
 *                       ==== Starting a new bulge here would
 *                       .    create non-negligible fill.  Use
 *                       .    the old one with trepidation. ====
 *
                         h( k+1, k ) = beta
                         h( k+2, k ) = zero
                         h( k+3, k ) = zero
                      ELSE
 *
 *                       ==== Starting a new bulge here would
 *                       .    create only negligible fill.
 *                       .    Replace the old reflector with
 *                       .    the new one. ====
 *
                         h( k+1, k ) = h( k+1, k ) - refsum
                         h( k+2, k ) = zero
                         h( k+3, k ) = zero
                         v( 1, m ) = vt( 1 )
                         v( 2, m ) = vt( 2 )
                         v( 3, m ) = vt( 3 )
                      END IF
                   END IF
                END IF
 *
 *              ====  Apply reflection from the right and
 *              .     the first column of update from the left.
 *              .     These updates are required for the vigilant
 *              .     deflation check. We still delay most of the
 *              .     updates from the left for efficiency. ====
 *
                t1 = v( 1, m )
                t2 = t1*v( 2, m )
                t3 = t1*v( 3, m )
                DO 70 j = jtop, min( kbot, k+3 )
                   refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
      $                     + v( 3, m )*h( j, k+3 )
                   h( j, k+1 ) = h( j, k+1 ) - refsum*t1
                   h( j, k+2 ) = h( j, k+2 ) - refsum*t2
                   h( j, k+3 ) = h( j, k+3 ) - refsum*t3
    70          CONTINUE
 *
 *              ==== Perform update from left for subsequent
 *              .    column. ====
 *
                refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
      $                  + v( 3, m )*h( k+3, k+1 )
                h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
                h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
                h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
 *
 *              ==== The following convergence test requires that
 *              .    the tradition small-compared-to-nearby-diagonals
 *              .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
 *              .    criteria both be satisfied.  The latter improves
 *              .    accuracy in some examples. Falling back on an
 *              .    alternate convergence criterion when TST1 or TST2
 *              .    is zero (as done here) is traditional but probably
 *              .    unnecessary. ====
 *
                IF( k.LT.ktop)
      $              cycle
                IF( h( k+1, k ).NE.zero ) THEN
                   tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
                   IF( tst1.EQ.zero ) THEN
                      IF( k.GE.ktop+1 )
      $                  tst1 = tst1 + abs( h( k, k-1 ) )
                      IF( k.GE.ktop+2 )
      $                  tst1 = tst1 + abs( h( k, k-2 ) )
                      IF( k.GE.ktop+3 )
      $                  tst1 = tst1 + abs( h( k, k-3 ) )
                      IF( k.LE.kbot-2 )
      $                  tst1 = tst1 + abs( h( k+2, k+1 ) )
                      IF( k.LE.kbot-3 )
      $                  tst1 = tst1 + abs( h( k+3, k+1 ) )
                      IF( k.LE.kbot-4 )
      $                  tst1 = tst1 + abs( h( k+4, k+1 ) )
                   END IF
                   IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
      $                 THEN
                      h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
                      h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
                      h11 = max( abs( h( k+1, k+1 ) ),
      $                     abs( h( k, k )-h( k+1, k+1 ) ) )
                      h22 = min( abs( h( k+1, k+1 ) ),
      $                     abs( h( k, k )-h( k+1, k+1 ) ) )
                      scl = h11 + h12
                      tst2 = h22*( h11 / scl )
 *
                      IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
      $                   max( smlnum, ulp*tst2 ) ) THEN
                         h( k+1, k ) = zero
                      END IF
                   END IF
                END IF
    80       CONTINUE
 *
 *           ==== Multiply H by reflections from the left ====
 *
             IF( accum ) THEN
                jbot = min( ndcol, kbot )
             ELSE IF( wantt ) THEN
                jbot = n
             ELSE
                jbot = kbot
             END IF
 *
             DO 100 m = mbot, mtop, -1
                k = krcol + 2*( m-1 )
                t1 = v( 1, m )
                t2 = t1*v( 2, m )
                t3 = t1*v( 3, m )
                DO 90 j = max( ktop, krcol + 2*m ), jbot
                   refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
      $                     + v( 3, m )*h( k+3, j )
                   h( k+1, j ) = h( k+1, j ) - refsum*t1
                   h( k+2, j ) = h( k+2, j ) - refsum*t2
                   h( k+3, j ) = h( k+3, j ) - refsum*t3
    90          CONTINUE
   100       CONTINUE
 *
 *           ==== Accumulate orthogonal transformations. ====
 *
             IF( accum ) THEN
 *
 *              ==== Accumulate U. (If needed, update Z later
 *              .    with an efficient matrix-matrix
 *              .    multiply.) ====
 *
                DO 120 m = mbot, mtop, -1
                   k = krcol + 2*( m-1 )
                   kms = k - incol
                   i2 = max( 1, ktop-incol )
                   i2 = max( i2, kms-(krcol-incol)+1 )
                   i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
                   t1 = v( 1, m )
                   t2 = t1*v( 2, m )
                   t3 = t1*v( 3, m )
                   DO 110 j = i2, i4
                      refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
      $                        + v( 3, m )*u( j, kms+3 )
                      u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
                      u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
                      u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
   110             CONTINUE
   120          CONTINUE
             ELSE IF( wantz ) THEN
 *
 *              ==== U is not accumulated, so update Z
 *              .    now by multiplying by reflections
 *              .    from the right. ====
 *
                DO 140 m = mbot, mtop, -1
                   k = krcol + 2*( m-1 )
                   t1 = v( 1, m )
                   t2 = t1*v( 2, m )
                   t3 = t1*v( 3, m )
                   DO 130 j = iloz, ihiz
                      refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
      $                        + v( 3, m )*z( j, k+3 )
                      z( j, k+1 ) = z( j, k+1 ) - refsum*t1
                      z( j, k+2 ) = z( j, k+2 ) - refsum*t2
                      z( j, k+3 ) = z( j, k+3 ) - refsum*t3
   130             CONTINUE
   140          CONTINUE
             END IF
 *
 *           ==== End of near-the-diagonal bulge chase. ====
 *
   145    CONTINUE
 *
 *        ==== Use U (if accumulated) to update far-from-diagonal
 *        .    entries in H.  If required, use U to update Z as
 *        .    well. ====
 *
          IF( accum ) THEN
             IF( wantt ) THEN
                jtop = 1
                jbot = n
             ELSE
                jtop = ktop
                jbot = kbot
             END IF
             k1 = max( 1, ktop-incol )
             nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
 *
 *           ==== Horizontal Multiply ====
 *
             DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
                jlen = min( nh, jbot-jcol+1 )
                CALL dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
      $                        ldu, h( incol+k1, jcol ), ldh, zero, wh,
      $                        ldwh )
                CALL dlacpy( 'ALL', nu, jlen, wh, ldwh,
      $                         h( incol+k1, jcol ), ldh )
   150       CONTINUE
 *
 *           ==== Vertical multiply ====
 *
             DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
                jlen = min( nv, max( ktop, incol )-jrow )
                CALL dgemm( 'N', 'N', jlen, nu, nu, one,
      $                     h( jrow, incol+k1 ), ldh, u( k1, k1 ),
      $                     ldu, zero, wv, ldwv )
                CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
      $                      h( jrow, incol+k1 ), ldh )
   160       CONTINUE
 *
 *           ==== Z multiply (also vertical) ====
 *
             IF( wantz ) THEN
                DO 170 jrow = iloz, ihiz, nv
                   jlen = min( nv, ihiz-jrow+1 )
                   CALL dgemm( 'N', 'N', jlen, nu, nu, one,
      $                        z( jrow, incol+k1 ), ldz, u( k1, k1 ),
      $                        ldu, zero, wv, ldwv )
                   CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
      $                         z( jrow, incol+k1 ), ldz )
   170          CONTINUE
             END IF
          END IF
   180 CONTINUE
 *
 *     ==== End of DLAQR5 ====
 *
       END
dlarfg
subroutine dlarfg(N, ALPHA, X, INCX, TAU)
DLARFG generates an elementary reflector (Householder matrix).
Definition: dlarfg.f:106

dlacpy
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:103

dgemm
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:187

dlaqr5
subroutine dlaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
DLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition: dlaqr5.f:265

dlaset
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: dlaset.f:110

dlaqr1
subroutine dlaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
DLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: dlaqr1.f:121

dtrmm
subroutine dtrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
DTRMM
Definition: dtrmm.f:177

dlabad
subroutine dlabad(SMALL, LARGE)
DLABAD
Definition: dlabad.f:74