LAPACK  3.11.0
LAPACK: Linear Algebra PACKage
dlaqr5.f
1 *> \brief \b DLAQR5 performs a single small-bulge multi-shift QR sweep.
2 *
3 * =========== DOCUMENTATION ===========
4 *
5 * Online html documentation available at
6 * http://www.netlib.org/lapack/explore-html/
7 *
8 *> \htmlonly
9 *> Download DLAQR5 + dependencies
10 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaqr5.f">
11 *> [TGZ]</a>
12 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaqr5.f">
13 *> [ZIP]</a>
14 *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaqr5.f">
15 *> [TXT]</a>
16 *> \endhtmlonly
17 *
18 * Definition:
19 * ===========
20 *
21 * SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
22 * SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
23 * LDU, NV, WV, LDWV, NH, WH, LDWH )
24 *
25 * .. Scalar Arguments ..
26 * INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
27 * $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
28 * LOGICAL WANTT, WANTZ
29 * ..
30 * .. Array Arguments ..
31 * DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
32 * $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
33 * $ Z( LDZ, * )
34 * ..
35 *
36 *
37 *> \par Purpose:
38 * =============
39 *>
40 *> \verbatim
41 *>
42 *> DLAQR5, called by DLAQR0, performs a
43 *> single small-bulge multi-shift QR sweep.
44 *> \endverbatim
45 *
46 * Arguments:
47 * ==========
48 *
49 *> \param[in] WANTT
50 *> \verbatim
51 *> WANTT is LOGICAL
52 *> WANTT = .true. if the quasi-triangular Schur factor
53 *> is being computed. WANTT is set to .false. otherwise.
54 *> \endverbatim
55 *>
56 *> \param[in] WANTZ
57 *> \verbatim
58 *> WANTZ is LOGICAL
59 *> WANTZ = .true. if the orthogonal Schur factor is being
60 *> computed. WANTZ is set to .false. otherwise.
61 *> \endverbatim
62 *>
63 *> \param[in] KACC22
64 *> \verbatim
65 *> KACC22 is INTEGER with value 0, 1, or 2.
66 *> Specifies the computation mode of far-from-diagonal
67 *> orthogonal updates.
68 *> = 0: DLAQR5 does not accumulate reflections and does not
69 *> use matrix-matrix multiply to update far-from-diagonal
70 *> matrix entries.
71 *> = 1: DLAQR5 accumulates reflections and uses matrix-matrix
72 *> multiply to update the far-from-diagonal matrix entries.
73 *> = 2: Same as KACC22 = 1. This option used to enable exploiting
74 *> the 2-by-2 structure during matrix multiplications, but
75 *> this is no longer supported.
76 *> \endverbatim
77 *>
78 *> \param[in] N
79 *> \verbatim
80 *> N is INTEGER
81 *> N is the order of the Hessenberg matrix H upon which this
82 *> subroutine operates.
83 *> \endverbatim
84 *>
85 *> \param[in] KTOP
86 *> \verbatim
87 *> KTOP is INTEGER
88 *> \endverbatim
89 *>
90 *> \param[in] KBOT
91 *> \verbatim
92 *> KBOT is INTEGER
93 *> These are the first and last rows and columns of an
94 *> isolated diagonal block upon which the QR sweep is to be
95 *> applied. It is assumed without a check that
96 *> either KTOP = 1 or H(KTOP,KTOP-1) = 0
97 *> and
98 *> either KBOT = N or H(KBOT+1,KBOT) = 0.
99 *> \endverbatim
100 *>
101 *> \param[in] NSHFTS
102 *> \verbatim
103 *> NSHFTS is INTEGER
104 *> NSHFTS gives the number of simultaneous shifts. NSHFTS
105 *> must be positive and even.
106 *> \endverbatim
107 *>
108 *> \param[in,out] SR
109 *> \verbatim
110 *> SR is DOUBLE PRECISION array, dimension (NSHFTS)
111 *> \endverbatim
112 *>
113 *> \param[in,out] SI
114 *> \verbatim
115 *> SI is DOUBLE PRECISION array, dimension (NSHFTS)
116 *> SR contains the real parts and SI contains the imaginary
117 *> parts of the NSHFTS shifts of origin that define the
118 *> multi-shift QR sweep. On output SR and SI may be
119 *> reordered.
120 *> \endverbatim
121 *>
122 *> \param[in,out] H
123 *> \verbatim
124 *> H is DOUBLE PRECISION array, dimension (LDH,N)
125 *> On input H contains a Hessenberg matrix. On output a
126 *> multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
127 *> to the isolated diagonal block in rows and columns KTOP
128 *> through KBOT.
129 *> \endverbatim
130 *>
131 *> \param[in] LDH
132 *> \verbatim
133 *> LDH is INTEGER
134 *> LDH is the leading dimension of H just as declared in the
135 *> calling procedure. LDH >= MAX(1,N).
136 *> \endverbatim
137 *>
138 *> \param[in] ILOZ
139 *> \verbatim
140 *> ILOZ is INTEGER
141 *> \endverbatim
142 *>
143 *> \param[in] IHIZ
144 *> \verbatim
145 *> IHIZ is INTEGER
146 *> Specify the rows of Z to which transformations must be
147 *> applied if WANTZ is .TRUE.. 1 <= ILOZ <= IHIZ <= N
148 *> \endverbatim
149 *>
150 *> \param[in,out] Z
151 *> \verbatim
152 *> Z is DOUBLE PRECISION array, dimension (LDZ,IHIZ)
153 *> If WANTZ = .TRUE., then the QR Sweep orthogonal
154 *> similarity transformation is accumulated into
155 *> Z(ILOZ:IHIZ,ILOZ:IHIZ) from the right.
156 *> If WANTZ = .FALSE., then Z is unreferenced.
157 *> \endverbatim
158 *>
159 *> \param[in] LDZ
160 *> \verbatim
161 *> LDZ is INTEGER
162 *> LDA is the leading dimension of Z just as declared in
163 *> the calling procedure. LDZ >= N.
164 *> \endverbatim
165 *>
166 *> \param[out] V
167 *> \verbatim
168 *> V is DOUBLE PRECISION array, dimension (LDV,NSHFTS/2)
169 *> \endverbatim
170 *>
171 *> \param[in] LDV
172 *> \verbatim
173 *> LDV is INTEGER
174 *> LDV is the leading dimension of V as declared in the
175 *> calling procedure. LDV >= 3.
176 *> \endverbatim
177 *>
178 *> \param[out] U
179 *> \verbatim
180 *> U is DOUBLE PRECISION array, dimension (LDU,2*NSHFTS)
181 *> \endverbatim
182 *>
183 *> \param[in] LDU
184 *> \verbatim
185 *> LDU is INTEGER
186 *> LDU is the leading dimension of U just as declared in the
187 *> in the calling subroutine. LDU >= 2*NSHFTS.
188 *> \endverbatim
189 *>
190 *> \param[in] NV
191 *> \verbatim
192 *> NV is INTEGER
193 *> NV is the number of rows in WV agailable for workspace.
194 *> NV >= 1.
195 *> \endverbatim
196 *>
197 *> \param[out] WV
198 *> \verbatim
199 *> WV is DOUBLE PRECISION array, dimension (LDWV,2*NSHFTS)
200 *> \endverbatim
201 *>
202 *> \param[in] LDWV
203 *> \verbatim
204 *> LDWV is INTEGER
205 *> LDWV is the leading dimension of WV as declared in the
206 *> in the calling subroutine. LDWV >= NV.
207 *> \endverbatim
208 *
209 *> \param[in] NH
210 *> \verbatim
211 *> NH is INTEGER
212 *> NH is the number of columns in array WH available for
213 *> workspace. NH >= 1.
214 *> \endverbatim
215 *>
216 *> \param[out] WH
217 *> \verbatim
218 *> WH is DOUBLE PRECISION array, dimension (LDWH,NH)
219 *> \endverbatim
220 *>
221 *> \param[in] LDWH
222 *> \verbatim
223 *> LDWH is INTEGER
224 *> Leading dimension of WH just as declared in the
225 *> calling procedure. LDWH >= 2*NSHFTS.
226 *> \endverbatim
227 *>
228 * Authors:
229 * ========
230 *
231 *> \author Univ. of Tennessee
232 *> \author Univ. of California Berkeley
233 *> \author Univ. of Colorado Denver
234 *> \author NAG Ltd.
235 *
236 *> \ingroup laqr5
237 *
238 *> \par Contributors:
239 * ==================
240 *>
241 *> Karen Braman and Ralph Byers, Department of Mathematics,
242 *> University of Kansas, USA
243 *>
244 *> Lars Karlsson, Daniel Kressner, and Bruno Lang
245 *>
246 *> Thijs Steel, Department of Computer science,
247 *> KU Leuven, Belgium
248 *
249 *> \par References:
250 * ================
251 *>
252 *> K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
253 *> Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
254 *> Performance, SIAM Journal of Matrix Analysis, volume 23, pages
255 *> 929--947, 2002.
256 *>
257 *> Lars Karlsson, Daniel Kressner, and Bruno Lang, Optimally packed
258 *> chains of bulges in multishift QR algorithms.
259 *> ACM Trans. Math. Softw. 40, 2, Article 12 (February 2014).
260 *>
261 * =====================================================================
262  SUBROUTINE dlaqr5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
263  $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
264  $ LDU, NV, WV, LDWV, NH, WH, LDWH )
265  IMPLICIT NONE
266 *
267 * -- LAPACK auxiliary routine --
268 * -- LAPACK is a software package provided by Univ. of Tennessee, --
269 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
270 *
271 * .. Scalar Arguments ..
272  INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
273  $ ldwh, ldwv, ldz, n, nh, nshfts, nv
274  LOGICAL WANTT, WANTZ
275 * ..
276 * .. Array Arguments ..
277  DOUBLE PRECISION H( ldh, * ), SI( * ), SR( * ), U( ldu, * ),
278  $ v( ldv, * ), wh( ldwh, * ), wv( ldwv, * ),
279  $ z( ldz, * )
280 * ..
281 *
282 * ================================================================
283 * .. Parameters ..
284  DOUBLE PRECISION ZERO, ONE
285  parameter( zero = 0.0d0, one = 1.0d0 )
286 * ..
287 * .. Local Scalars ..
288  DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
289  $ safmax, safmin, scl, smlnum, swap, t1, t2,
290  $ t3, tst1, tst2, ulp
291  INTEGER I, I2, I4, INCOL, J, JBOT, JCOL, JLEN,
292  $ jrow, jtop, k, k1, kdu, kms, krcol,
293  $ m, m22, mbot, mtop, nbmps, ndcol,
294  $ ns, nu
295  LOGICAL ACCUM, BMP22
296 * ..
297 * .. External Functions ..
298  DOUBLE PRECISION DLAMCH
299  EXTERNAL dlamch
300 * ..
301 * .. Intrinsic Functions ..
302 *
303  INTRINSIC abs, dble, max, min, mod
304 * ..
305 * .. Local Arrays ..
306  DOUBLE PRECISION VT( 3 )
307 * ..
308 * .. External Subroutines ..
309  EXTERNAL dgemm, dlabad, dlacpy, dlaqr1, dlarfg, dlaset,
310  $ dtrmm
311 * ..
312 * .. Executable Statements ..
313 *
314 * ==== If there are no shifts, then there is nothing to do. ====
315 *
316  IF( nshfts.LT.2 )
317  $ RETURN
318 *
319 * ==== If the active block is empty or 1-by-1, then there
320 * . is nothing to do. ====
321 *
322  IF( ktop.GE.kbot )
323  $ RETURN
324 *
325 * ==== Shuffle shifts into pairs of real shifts and pairs
326 * . of complex conjugate shifts assuming complex
327 * . conjugate shifts are already adjacent to one
328 * . another. ====
329 *
330  DO 10 i = 1, nshfts - 2, 2
331  IF( si( i ).NE.-si( i+1 ) ) THEN
332 *
333  swap = sr( i )
334  sr( i ) = sr( i+1 )
335  sr( i+1 ) = sr( i+2 )
336  sr( i+2 ) = swap
337 *
338  swap = si( i )
339  si( i ) = si( i+1 )
340  si( i+1 ) = si( i+2 )
341  si( i+2 ) = swap
342  END IF
343  10 CONTINUE
344 *
345 * ==== NSHFTS is supposed to be even, but if it is odd,
346 * . then simply reduce it by one. The shuffle above
347 * . ensures that the dropped shift is real and that
348 * . the remaining shifts are paired. ====
349 *
350  ns = nshfts - mod( nshfts, 2 )
351 *
352 * ==== Machine constants for deflation ====
353 *
354  safmin = dlamch( 'SAFE MINIMUM' )
355  safmax = one / safmin
356  CALL dlabad( safmin, safmax )
357  ulp = dlamch( 'PRECISION' )
358  smlnum = safmin*( dble( n ) / ulp )
359 *
360 * ==== Use accumulated reflections to update far-from-diagonal
361 * . entries ? ====
362 *
363  accum = ( kacc22.EQ.1 ) .OR. ( kacc22.EQ.2 )
364 *
365 * ==== clear trash ====
366 *
367  IF( ktop+2.LE.kbot )
368  $ h( ktop+2, ktop ) = zero
369 *
370 * ==== NBMPS = number of 2-shift bulges in the chain ====
371 *
372  nbmps = ns / 2
373 *
374 * ==== KDU = width of slab ====
375 *
376  kdu = 4*nbmps
377 *
378 * ==== Create and chase chains of NBMPS bulges ====
379 *
380  DO 180 incol = ktop - 2*nbmps + 1, kbot - 2, 2*nbmps
381 *
382 * JTOP = Index from which updates from the right start.
383 *
384  IF( accum ) THEN
385  jtop = max( ktop, incol )
386  ELSE IF( wantt ) THEN
387  jtop = 1
388  ELSE
389  jtop = ktop
390  END IF
391 *
392  ndcol = incol + kdu
393  IF( accum )
394  $ CALL dlaset( 'ALL', kdu, kdu, zero, one, u, ldu )
395 *
396 * ==== Near-the-diagonal bulge chase. The following loop
397 * . performs the near-the-diagonal part of a small bulge
398 * . multi-shift QR sweep. Each 4*NBMPS column diagonal
399 * . chunk extends from column INCOL to column NDCOL
400 * . (including both column INCOL and column NDCOL). The
401 * . following loop chases a 2*NBMPS+1 column long chain of
402 * . NBMPS bulges 2*NBMPS columns to the right. (INCOL
403 * . may be less than KTOP and and NDCOL may be greater than
404 * . KBOT indicating phantom columns from which to chase
405 * . bulges before they are actually introduced or to which
406 * . to chase bulges beyond column KBOT.) ====
407 *
408  DO 145 krcol = incol, min( incol+2*nbmps-1, kbot-2 )
409 *
410 * ==== Bulges number MTOP to MBOT are active double implicit
411 * . shift bulges. There may or may not also be small
412 * . 2-by-2 bulge, if there is room. The inactive bulges
413 * . (if any) must wait until the active bulges have moved
414 * . down the diagonal to make room. The phantom matrix
415 * . paradigm described above helps keep track. ====
416 *
417  mtop = max( 1, ( ktop-krcol ) / 2+1 )
418  mbot = min( nbmps, ( kbot-krcol-1 ) / 2 )
419  m22 = mbot + 1
420  bmp22 = ( mbot.LT.nbmps ) .AND. ( krcol+2*( m22-1 ) ).EQ.
421  $ ( kbot-2 )
422 *
423 * ==== Generate reflections to chase the chain right
424 * . one column. (The minimum value of K is KTOP-1.) ====
425 *
426  IF ( bmp22 ) THEN
427 *
428 * ==== Special case: 2-by-2 reflection at bottom treated
429 * . separately ====
430 *
431  k = krcol + 2*( m22-1 )
432  IF( k.EQ.ktop-1 ) THEN
433  CALL dlaqr1( 2, h( k+1, k+1 ), ldh, sr( 2*m22-1 ),
434  $ si( 2*m22-1 ), sr( 2*m22 ), si( 2*m22 ),
435  $ v( 1, m22 ) )
436  beta = v( 1, m22 )
437  CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
438  ELSE
439  beta = h( k+1, k )
440  v( 2, m22 ) = h( k+2, k )
441  CALL dlarfg( 2, beta, v( 2, m22 ), 1, v( 1, m22 ) )
442  h( k+1, k ) = beta
443  h( k+2, k ) = zero
444  END IF
445 
446 *
447 * ==== Perform update from right within
448 * . computational window. ====
449 *
450  t1 = v( 1, m22 )
451  t2 = t1*v( 2, m22 )
452  DO 30 j = jtop, min( kbot, k+3 )
453  refsum = h( j, k+1 ) + v( 2, m22 )*h( j, k+2 )
454  h( j, k+1 ) = h( j, k+1 ) - refsum*t1
455  h( j, k+2 ) = h( j, k+2 ) - refsum*t2
456  30 CONTINUE
457 *
458 * ==== Perform update from left within
459 * . computational window. ====
460 *
461  IF( accum ) THEN
462  jbot = min( ndcol, kbot )
463  ELSE IF( wantt ) THEN
464  jbot = n
465  ELSE
466  jbot = kbot
467  END IF
468  t1 = v( 1, m22 )
469  t2 = t1*v( 2, m22 )
470  DO 40 j = k+1, jbot
471  refsum = h( k+1, j ) + v( 2, m22 )*h( k+2, j )
472  h( k+1, j ) = h( k+1, j ) - refsum*t1
473  h( k+2, j ) = h( k+2, j ) - refsum*t2
474  40 CONTINUE
475 *
476 * ==== The following convergence test requires that
477 * . the tradition small-compared-to-nearby-diagonals
478 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
479 * . criteria both be satisfied. The latter improves
480 * . accuracy in some examples. Falling back on an
481 * . alternate convergence criterion when TST1 or TST2
482 * . is zero (as done here) is traditional but probably
483 * . unnecessary. ====
484 *
485  IF( k.GE.ktop ) THEN
486  IF( h( k+1, k ).NE.zero ) THEN
487  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
488  IF( tst1.EQ.zero ) THEN
489  IF( k.GE.ktop+1 )
490  $ tst1 = tst1 + abs( h( k, k-1 ) )
491  IF( k.GE.ktop+2 )
492  $ tst1 = tst1 + abs( h( k, k-2 ) )
493  IF( k.GE.ktop+3 )
494  $ tst1 = tst1 + abs( h( k, k-3 ) )
495  IF( k.LE.kbot-2 )
496  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
497  IF( k.LE.kbot-3 )
498  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
499  IF( k.LE.kbot-4 )
500  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
501  END IF
502  IF( abs( h( k+1, k ) )
503  $ .LE.max( smlnum, ulp*tst1 ) ) THEN
504  h12 = max( abs( h( k+1, k ) ),
505  $ abs( h( k, k+1 ) ) )
506  h21 = min( abs( h( k+1, k ) ),
507  $ abs( h( k, k+1 ) ) )
508  h11 = max( abs( h( k+1, k+1 ) ),
509  $ abs( h( k, k )-h( k+1, k+1 ) ) )
510  h22 = min( abs( h( k+1, k+1 ) ),
511  $ abs( h( k, k )-h( k+1, k+1 ) ) )
512  scl = h11 + h12
513  tst2 = h22*( h11 / scl )
514 *
515  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
516  $ max( smlnum, ulp*tst2 ) ) THEN
517  h( k+1, k ) = zero
518  END IF
519  END IF
520  END IF
521  END IF
522 *
523 * ==== Accumulate orthogonal transformations. ====
524 *
525  IF( accum ) THEN
526  kms = k - incol
527  t1 = v( 1, m22 )
528  t2 = t1*v( 2, m22 )
529  DO 50 j = max( 1, ktop-incol ), kdu
530  refsum = u( j, kms+1 ) + v( 2, m22 )*u( j, kms+2 )
531  u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
532  u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
533  50 CONTINUE
534  ELSE IF( wantz ) THEN
535  t1 = v( 1, m22 )
536  t2 = t1*v( 2, m22 )
537  DO 60 j = iloz, ihiz
538  refsum = z( j, k+1 )+v( 2, m22 )*z( j, k+2 )
539  z( j, k+1 ) = z( j, k+1 ) - refsum*t1
540  z( j, k+2 ) = z( j, k+2 ) - refsum*t2
541  60 CONTINUE
542  END IF
543  END IF
544 *
545 * ==== Normal case: Chain of 3-by-3 reflections ====
546 *
547  DO 80 m = mbot, mtop, -1
548  k = krcol + 2*( m-1 )
549  IF( k.EQ.ktop-1 ) THEN
550  CALL dlaqr1( 3, h( ktop, ktop ), ldh, sr( 2*m-1 ),
551  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
552  $ v( 1, m ) )
553  alpha = v( 1, m )
554  CALL dlarfg( 3, alpha, v( 2, m ), 1, v( 1, m ) )
555  ELSE
556 *
557 * ==== Perform delayed transformation of row below
558 * . Mth bulge. Exploit fact that first two elements
559 * . of row are actually zero. ====
560 *
561  refsum = v( 1, m )*v( 3, m )*h( k+3, k+2 )
562  h( k+3, k ) = -refsum
563  h( k+3, k+1 ) = -refsum*v( 2, m )
564  h( k+3, k+2 ) = h( k+3, k+2 ) - refsum*v( 3, m )
565 *
566 * ==== Calculate reflection to move
567 * . Mth bulge one step. ====
568 *
569  beta = h( k+1, k )
570  v( 2, m ) = h( k+2, k )
571  v( 3, m ) = h( k+3, k )
572  CALL dlarfg( 3, beta, v( 2, m ), 1, v( 1, m ) )
573 *
574 * ==== A Bulge may collapse because of vigilant
575 * . deflation or destructive underflow. In the
576 * . underflow case, try the two-small-subdiagonals
577 * . trick to try to reinflate the bulge. ====
578 *
579  IF( h( k+3, k ).NE.zero .OR. h( k+3, k+1 ).NE.
580  $ zero .OR. h( k+3, k+2 ).EQ.zero ) THEN
581 *
582 * ==== Typical case: not collapsed (yet). ====
583 *
584  h( k+1, k ) = beta
585  h( k+2, k ) = zero
586  h( k+3, k ) = zero
587  ELSE
588 *
589 * ==== Atypical case: collapsed. Attempt to
590 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
591 * . If the fill resulting from the new
592 * . reflector is too large, then abandon it.
593 * . Otherwise, use the new one. ====
594 *
595  CALL dlaqr1( 3, h( k+1, k+1 ), ldh, sr( 2*m-1 ),
596  $ si( 2*m-1 ), sr( 2*m ), si( 2*m ),
597  $ vt )
598  alpha = vt( 1 )
599  CALL dlarfg( 3, alpha, vt( 2 ), 1, vt( 1 ) )
600  refsum = vt( 1 )*( h( k+1, k )+vt( 2 )*
601  $ h( k+2, k ) )
602 *
603  IF( abs( h( k+2, k )-refsum*vt( 2 ) )+
604  $ abs( refsum*vt( 3 ) ).GT.ulp*
605  $ ( abs( h( k, k ) )+abs( h( k+1,
606  $ k+1 ) )+abs( h( k+2, k+2 ) ) ) ) THEN
607 *
608 * ==== Starting a new bulge here would
609 * . create non-negligible fill. Use
610 * . the old one with trepidation. ====
611 *
612  h( k+1, k ) = beta
613  h( k+2, k ) = zero
614  h( k+3, k ) = zero
615  ELSE
616 *
617 * ==== Starting a new bulge here would
618 * . create only negligible fill.
619 * . Replace the old reflector with
620 * . the new one. ====
621 *
622  h( k+1, k ) = h( k+1, k ) - refsum
623  h( k+2, k ) = zero
624  h( k+3, k ) = zero
625  v( 1, m ) = vt( 1 )
626  v( 2, m ) = vt( 2 )
627  v( 3, m ) = vt( 3 )
628  END IF
629  END IF
630  END IF
631 *
632 * ==== Apply reflection from the right and
633 * . the first column of update from the left.
634 * . These updates are required for the vigilant
635 * . deflation check. We still delay most of the
636 * . updates from the left for efficiency. ====
637 *
638  t1 = v( 1, m )
639  t2 = t1*v( 2, m )
640  t3 = t1*v( 3, m )
641  DO 70 j = jtop, min( kbot, k+3 )
642  refsum = h( j, k+1 ) + v( 2, m )*h( j, k+2 )
643  $ + v( 3, m )*h( j, k+3 )
644  h( j, k+1 ) = h( j, k+1 ) - refsum*t1
645  h( j, k+2 ) = h( j, k+2 ) - refsum*t2
646  h( j, k+3 ) = h( j, k+3 ) - refsum*t3
647  70 CONTINUE
648 *
649 * ==== Perform update from left for subsequent
650 * . column. ====
651 *
652  refsum = h( k+1, k+1 ) + v( 2, m )*h( k+2, k+1 )
653  $ + v( 3, m )*h( k+3, k+1 )
654  h( k+1, k+1 ) = h( k+1, k+1 ) - refsum*t1
655  h( k+2, k+1 ) = h( k+2, k+1 ) - refsum*t2
656  h( k+3, k+1 ) = h( k+3, k+1 ) - refsum*t3
657 *
658 * ==== The following convergence test requires that
659 * . the tradition small-compared-to-nearby-diagonals
660 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
661 * . criteria both be satisfied. The latter improves
662 * . accuracy in some examples. Falling back on an
663 * . alternate convergence criterion when TST1 or TST2
664 * . is zero (as done here) is traditional but probably
665 * . unnecessary. ====
666 *
667  IF( k.LT.ktop)
668  $ cycle
669  IF( h( k+1, k ).NE.zero ) THEN
670  tst1 = abs( h( k, k ) ) + abs( h( k+1, k+1 ) )
671  IF( tst1.EQ.zero ) THEN
672  IF( k.GE.ktop+1 )
673  $ tst1 = tst1 + abs( h( k, k-1 ) )
674  IF( k.GE.ktop+2 )
675  $ tst1 = tst1 + abs( h( k, k-2 ) )
676  IF( k.GE.ktop+3 )
677  $ tst1 = tst1 + abs( h( k, k-3 ) )
678  IF( k.LE.kbot-2 )
679  $ tst1 = tst1 + abs( h( k+2, k+1 ) )
680  IF( k.LE.kbot-3 )
681  $ tst1 = tst1 + abs( h( k+3, k+1 ) )
682  IF( k.LE.kbot-4 )
683  $ tst1 = tst1 + abs( h( k+4, k+1 ) )
684  END IF
685  IF( abs( h( k+1, k ) ).LE.max( smlnum, ulp*tst1 ) )
686  $ THEN
687  h12 = max( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
688  h21 = min( abs( h( k+1, k ) ), abs( h( k, k+1 ) ) )
689  h11 = max( abs( h( k+1, k+1 ) ),
690  $ abs( h( k, k )-h( k+1, k+1 ) ) )
691  h22 = min( abs( h( k+1, k+1 ) ),
692  $ abs( h( k, k )-h( k+1, k+1 ) ) )
693  scl = h11 + h12
694  tst2 = h22*( h11 / scl )
695 *
696  IF( tst2.EQ.zero .OR. h21*( h12 / scl ).LE.
697  $ max( smlnum, ulp*tst2 ) ) THEN
698  h( k+1, k ) = zero
699  END IF
700  END IF
701  END IF
702  80 CONTINUE
703 *
704 * ==== Multiply H by reflections from the left ====
705 *
706  IF( accum ) THEN
707  jbot = min( ndcol, kbot )
708  ELSE IF( wantt ) THEN
709  jbot = n
710  ELSE
711  jbot = kbot
712  END IF
713 *
714  DO 100 m = mbot, mtop, -1
715  k = krcol + 2*( m-1 )
716  t1 = v( 1, m )
717  t2 = t1*v( 2, m )
718  t3 = t1*v( 3, m )
719  DO 90 j = max( ktop, krcol + 2*m ), jbot
720  refsum = h( k+1, j ) + v( 2, m )*h( k+2, j )
721  $ + v( 3, m )*h( k+3, j )
722  h( k+1, j ) = h( k+1, j ) - refsum*t1
723  h( k+2, j ) = h( k+2, j ) - refsum*t2
724  h( k+3, j ) = h( k+3, j ) - refsum*t3
725  90 CONTINUE
726  100 CONTINUE
727 *
728 * ==== Accumulate orthogonal transformations. ====
729 *
730  IF( accum ) THEN
731 *
732 * ==== Accumulate U. (If needed, update Z later
733 * . with an efficient matrix-matrix
734 * . multiply.) ====
735 *
736  DO 120 m = mbot, mtop, -1
737  k = krcol + 2*( m-1 )
738  kms = k - incol
739  i2 = max( 1, ktop-incol )
740  i2 = max( i2, kms-(krcol-incol)+1 )
741  i4 = min( kdu, krcol + 2*( mbot-1 ) - incol + 5 )
742  t1 = v( 1, m )
743  t2 = t1*v( 2, m )
744  t3 = t1*v( 3, m )
745  DO 110 j = i2, i4
746  refsum = u( j, kms+1 ) + v( 2, m )*u( j, kms+2 )
747  $ + v( 3, m )*u( j, kms+3 )
748  u( j, kms+1 ) = u( j, kms+1 ) - refsum*t1
749  u( j, kms+2 ) = u( j, kms+2 ) - refsum*t2
750  u( j, kms+3 ) = u( j, kms+3 ) - refsum*t3
751  110 CONTINUE
752  120 CONTINUE
753  ELSE IF( wantz ) THEN
754 *
755 * ==== U is not accumulated, so update Z
756 * . now by multiplying by reflections
757 * . from the right. ====
758 *
759  DO 140 m = mbot, mtop, -1
760  k = krcol + 2*( m-1 )
761  t1 = v( 1, m )
762  t2 = t1*v( 2, m )
763  t3 = t1*v( 3, m )
764  DO 130 j = iloz, ihiz
765  refsum = z( j, k+1 ) + v( 2, m )*z( j, k+2 )
766  $ + v( 3, m )*z( j, k+3 )
767  z( j, k+1 ) = z( j, k+1 ) - refsum*t1
768  z( j, k+2 ) = z( j, k+2 ) - refsum*t2
769  z( j, k+3 ) = z( j, k+3 ) - refsum*t3
770  130 CONTINUE
771  140 CONTINUE
772  END IF
773 *
774 * ==== End of near-the-diagonal bulge chase. ====
775 *
776  145 CONTINUE
777 *
778 * ==== Use U (if accumulated) to update far-from-diagonal
779 * . entries in H. If required, use U to update Z as
780 * . well. ====
781 *
782  IF( accum ) THEN
783  IF( wantt ) THEN
784  jtop = 1
785  jbot = n
786  ELSE
787  jtop = ktop
788  jbot = kbot
789  END IF
790  k1 = max( 1, ktop-incol )
791  nu = ( kdu-max( 0, ndcol-kbot ) ) - k1 + 1
792 *
793 * ==== Horizontal Multiply ====
794 *
795  DO 150 jcol = min( ndcol, kbot ) + 1, jbot, nh
796  jlen = min( nh, jbot-jcol+1 )
797  CALL dgemm( 'C', 'N', nu, jlen, nu, one, u( k1, k1 ),
798  $ ldu, h( incol+k1, jcol ), ldh, zero, wh,
799  $ ldwh )
800  CALL dlacpy( 'ALL', nu, jlen, wh, ldwh,
801  $ h( incol+k1, jcol ), ldh )
802  150 CONTINUE
803 *
804 * ==== Vertical multiply ====
805 *
806  DO 160 jrow = jtop, max( ktop, incol ) - 1, nv
807  jlen = min( nv, max( ktop, incol )-jrow )
808  CALL dgemm( 'N', 'N', jlen, nu, nu, one,
809  $ h( jrow, incol+k1 ), ldh, u( k1, k1 ),
810  $ ldu, zero, wv, ldwv )
811  CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
812  $ h( jrow, incol+k1 ), ldh )
813  160 CONTINUE
814 *
815 * ==== Z multiply (also vertical) ====
816 *
817  IF( wantz ) THEN
818  DO 170 jrow = iloz, ihiz, nv
819  jlen = min( nv, ihiz-jrow+1 )
820  CALL dgemm( 'N', 'N', jlen, nu, nu, one,
821  $ z( jrow, incol+k1 ), ldz, u( k1, k1 ),
822  $ ldu, zero, wv, ldwv )
823  CALL dlacpy( 'ALL', jlen, nu, wv, ldwv,
824  $ z( jrow, incol+k1 ), ldz )
825  170 CONTINUE
826  END IF
827  END IF
828  180 CONTINUE
829 *
830 * ==== End of DLAQR5 ====
831 *
832  END
subroutine dlarfg(N, ALPHA, X, INCX, TAU)
DLARFG generates an elementary reflector (Householder matrix).
Definition: dlarfg.f:106
subroutine dlacpy(UPLO, M, N, A, LDA, B, LDB)
DLACPY copies all or part of one two-dimensional array to another.
Definition: dlacpy.f:103
subroutine dgemm(TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC)
DGEMM
Definition: dgemm.f:187
subroutine dlaqr5(WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS, SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U, LDU, NV, WV, LDWV, NH, WH, LDWH)
DLAQR5 performs a single small-bulge multi-shift QR sweep.
Definition: dlaqr5.f:265
subroutine dlaset(UPLO, M, N, ALPHA, BETA, A, LDA)
DLASET initializes the off-diagonal elements and the diagonal elements of a matrix to given values...
Definition: dlaset.f:110
subroutine dlaqr1(N, H, LDH, SR1, SI1, SR2, SI2, V)
DLAQR1 sets a scalar multiple of the first column of the product of 2-by-2 or 3-by-3 matrix H and spe...
Definition: dlaqr1.f:121
subroutine dtrmm(SIDE, UPLO, TRANSA, DIAG, M, N, ALPHA, A, LDA, B, LDB)
DTRMM
Definition: dtrmm.f:177
subroutine dlabad(SMALL, LARGE)
DLABAD
Definition: dlabad.f:74