/* /home4/luszczek/mscratch/build/SCALAPACK/SRC/pdlahqr.f -- translated by f2c (version 20031025).
   You must link the resulting object file with libf2c:
	on Microsoft Windows system, link with libf2c.lib;
	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
	or, if you install libf2c.a in a standard place, with -lf2c -lm
	-- in that order, at the end of the command line, as in
		cc *.o -lf2c -lm
	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,

		http://www.netlib.org/f2c/libf2c.zip
*/

#include "f2c.h"

/* Table of constant values */

static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__0 = 0;
static integer c__64 = 64;
static logical c_false = FALSE_;
static integer c__6 = 6;
static integer c__3 = 3;
static logical c_true = TRUE_;

/* Subroutine */ int pdlahqr_(logical *wantt, logical *wantz, integer *n, 
	integer *ilo, integer *ihi, doublereal *a, integer *desca, doublereal 
	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__, 
	integer *descz, doublereal *work, integer *lwork, integer *iwork, 
	integer *ilwork, integer *info)
{
    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5, i__6, i__7;
    doublereal d__1, d__2;

    /* Builtin functions */
    double sqrt(doublereal), d_sign(doublereal *, doublereal *);

    /* Local variables */
    extern /* Subroutine */ int dlasorte_(doublereal *, integer *, integer *, 
	    doublereal *, integer *);
    integer i__, j, k, l, m;
    doublereal s;
    extern /* Subroutine */ int pdlaconsb_(doublereal *, integer *, integer *,
	     integer *, integer *, doublereal *, doublereal *, doublereal *, 
	    doublereal *, integer *);
    integer i1, i2, k1[32], k2[32];
    doublereal s1[4096]	/* was [64][64] */, t1, t2, t3, v2, v3;
    extern /* Subroutine */ int pdlasmsub_(doublereal *, integer *, integer *,
	     integer *, integer *, doublereal *, doublereal *, integer *);
    integer istartcol;
    doublereal h00, h10, h11, h12;
    integer istartrow;
    doublereal h21, h22, h33;
    integer ii;
    doublereal h44;
    integer jj, ki, nh, nr, up, nz, lda, hbl;
    doublereal ave;
    integer ldz, itn, its, num;
    doublereal ulp, sum;
    integer idia;
    doublereal h43h34;
    integer jblk;
    doublereal disc;
    extern integer ilcm_(integer *, integer *);
    integer node, icol, kcol[32], left, ierr, isub;
    doublereal unfl, ovfl;
    integer down, irow, isup, rotn, krow[32], icol1, icol2, itmp1, itmp2, 
	    irow1, irow2, icbuf, lihih, lcmrc, ispec, irbuf, liloh, npcol, 
	    right;
    extern /* Subroutine */ int lfc_SLdcopy(integer *, doublereal *, integer *, 
	    doublereal *, integer *);
    integer lihiz, mycol, liloz, istop;
    doublereal vcopy[3];
    integer nprow, myrow, modkm1, kp2col[32];
    doublereal v1save, v2save, v3save, t1copy;
    integer kp2row[32];
    extern /* Subroutine */ int dlaref_(char *, doublereal *, integer *, 
	    logical *, doublereal *, integer *, logical *, integer *, integer 
	    *, integer *, integer *, integer *, integer *, integer *, integer 
	    *, doublereal *, doublereal *, doublereal *, doublereal *, 
	    doublereal *, doublereal *, ftnlen), dlarfg_(integer *, 
	    doublereal *, doublereal *, integer *, doublereal *);
    integer localk, ibulge, localm;
    doublereal smalla[1152]	/* was [6][6][32] */;
    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *, 
	    integer *, integer *, doublereal *, integer *, doublereal *, 
	    doublereal *, integer *, integer *, doublereal *, integer *, 
	    integer *);
    integer nbulge;
    extern integer numroc_(integer *, integer *, integer *, integer *, 
	    integer *);
    extern /* Subroutine */ int blacs_gridinfo__(integer *, integer *, 
	    integer *, integer *, integer *);
    integer istart;
    extern /* Subroutine */ int dgebr2d_(integer *, char *, char *, integer *,
	     integer *, doublereal *, integer *, integer *, integer *, ftnlen,
	     ftnlen), dgebs2d_(integer *, char *, char *, integer *, integer *
	    , doublereal *, integer *, ftnlen, ftnlen);
    doublereal smlnum;
    extern /* Subroutine */ int dgesd2d_(integer *, integer *, integer *, 
	    doublereal *, integer *, integer *, integer *);
    integer contxt;
    extern /* Subroutine */ int igamn2d_(integer *, char *, char *, integer *,
	     integer *, integer *, integer *, integer *, integer *, integer *,
	     integer *, integer *, ftnlen, ftnlen);
    integer locali1, locali2;
    extern /* Subroutine */ int pdlacp3_(integer *, integer *, doublereal *, 
	    integer *, doublereal *, integer *, integer *, integer *, integer 
	    *);
    integer localk2[32];
    extern /* Subroutine */ int dgerv2d_(integer *, integer *, integer *, 
	    doublereal *, integer *, integer *, integer *), infog1l_(integer *
	    , integer *, integer *, integer *, integer *, integer *, integer *
	    ), infog2l_(integer *, integer *, integer *, integer *, integer *,
	     integer *, integer *, integer *, integer *, integer *, integer *)
	    , dgsum2d_(integer *, char *, char *, integer *, integer *, 
	    doublereal *, integer *, integer *, integer *, ftnlen, ftnlen), 
	    pdlabad_(integer *, doublereal *, doublereal *);
    extern doublereal pdlamch_(integer *, char *, ftnlen);
    extern /* Subroutine */ int pdlawil_(integer *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
	     doublereal *), pxerbla_(integer *, char *, integer *, ftnlen);
    integer icurcol[32], iafirst, jafirst, vecsidx, itermax, icurrow[32];


/*  -- ScaLAPACK routine (version 1.7) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory, */
/*     and University of California, Berkeley. */
/*     December 31, 1998 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  PDLAHQR is an auxiliary routine used to find the Schur decomposition */
/*    and or eigenvalues of a matrix already in Hessenberg form from */
/*    cols ILO to IHI. */

/*  Notes */
/*  ===== */

/*  Each global data object is described by an associated description */
/*  vector.  This vector stores the information required to establish */
/*  the mapping between an object element and its corresponding process */
/*  and memory location. */

/*  Let A be a generic term for any 2D block cyclicly distributed array. */
/*  Such a global array has an associated description vector DESCA. */
/*  In the following comments, the character _ should be read as */
/*  "of the global array". */

/*  NOTATION        STORED IN      EXPLANATION */
/*  --------------- -------------- -------------------------------------- */
/*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case, */
/*                                 DTYPE_A = 1. */
/*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating */
/*                                 the BLACS process grid A is distribu- */
/*                                 ted over. The context itself is glo- */
/*                                 bal, but the handle (the integer */
/*                                 value) may vary. */
/*  M_A    (global) DESCA( M_ )    The number of rows in the global */
/*                                 array A. */
/*  N_A    (global) DESCA( N_ )    The number of columns in the global */
/*                                 array A. */
/*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute */
/*                                 the rows of the array. */
/*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute */
/*                                 the columns of the array. */
/*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first */
/*                                 row of the array A is distributed. */
/*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the */
/*                                 first column of the array A is */
/*                                 distributed. */
/*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local */
/*                                 array.  LLD_A >= MAX(1,LOCr(M_A)). */

/*  Let K be the number of rows or columns of a distributed matrix, */
/*  and assume that its process grid has dimension p x q. */
/*  LOCr( K ) denotes the number of elements of K that a process */
/*  would receive if K were distributed over the p processes of its */
/*  process column. */
/*  Similarly, LOCc( K ) denotes the number of elements of K that a */
/*  process would receive if K were distributed over the q processes of */
/*  its process row. */
/*  The values of LOCr() and LOCc() may be determined via a call to the */
/*  ScaLAPACK tool function, NUMROC: */
/*          LOCr( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), */
/*          LOCc( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). */
/*  An upper bound for these quantities may be computed by: */
/*          LOCr( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A */
/*          LOCc( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A */

/*  Arguments */
/*  ========= */

/*  WANTT   (global input) LOGICAL */
/*          = .TRUE. : the full Schur form T is required; */
/*          = .FALSE.: only eigenvalues are required. */

/*  WANTZ   (global input) LOGICAL */
/*          = .TRUE. : the matrix of Schur vectors Z is required; */
/*          = .FALSE.: Schur vectors are not required. */

/*  N       (global input) INTEGER */
/*          The order of the Hessenberg matrix A (and Z if WANTZ). */
/*          N >= 0. */

/*  ILO     (global input) INTEGER */
/*  IHI     (global input) INTEGER */
/*          It is assumed that A is already upper quasi-triangular in */
/*          rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless */
/*          ILO = 1). PDLAHQR works primarily with the Hessenberg */
/*          submatrix in rows and columns ILO to IHI, but applies */
/*          transformations to all of H if WANTT is .TRUE.. */
/*          1 <= ILO <= max(1,IHI); IHI <= N. */

/*  A       (global input/output) DOUBLE PRECISION array, dimension */
/*          (DESCA(LLD_),*) */
/*          On entry, the upper Hessenberg matrix A. */
/*          On exit, if WANTT is .TRUE., A is upper quasi-triangular in */
/*          rows and columns ILO:IHI, with any 2-by-2 or larger diagonal */
/*          blocks not yet in standard form. If WANTT is .FALSE., the */
/*          contents of A are unspecified on exit. */

/*  DESCA   (global and local input) INTEGER array of dimension DLEN_. */
/*          The array descriptor for the distributed matrix A. */

/*  WR      (global replicated output) DOUBLE PRECISION array, */
/*                                                         dimension (N) */
/*  WI      (global replicated output) DOUBLE PRECISION array, */
/*                                                         dimension (N) */
/*          The real and imaginary parts, respectively, of the computed */
/*          eigenvalues ILO to IHI are stored in the corresponding */
/*          elements of WR and WI. If two eigenvalues are computed as a */
/*          complex conjugate pair, they are stored in consecutive */
/*          elements of WR and WI, say the i-th and (i+1)th, with */
/*          WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the */
/*          eigenvalues are stored in the same order as on the diagonal */
/*          of the Schur form returned in A.  A may be returned with */
/*          larger diagonal blocks until the next release. */

/*  ILOZ    (global input) INTEGER */
/*  IHIZ    (global input) INTEGER */
/*          Specify the rows of Z to which transformations must be */
/*          applied if WANTZ is .TRUE.. */
/*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N. */

/*  Z       (global input/output) DOUBLE PRECISION array. */
/*          If WANTZ is .TRUE., on entry Z must contain the current */
/*          matrix Z of transformations accumulated by PDHSEQR, and on */
/*          exit Z has been updated; transformations are applied only to */
/*          the submatrix Z(ILOZ:IHIZ,ILO:IHI). */
/*          If WANTZ is .FALSE., Z is not referenced. */

/*  DESCZ   (global and local input) INTEGER array of dimension DLEN_. */
/*          The array descriptor for the distributed matrix Z. */

/*  WORK    (local output) DOUBLE PRECISION array of size LWORK */

/*  LWORK   (local input) INTEGER */
/*          WORK(LWORK) is a local array and LWORK is assumed big enough */
/*          so that LWORK >= 3*N + */
/*                MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCc(N), */
/*                     7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) ) */

/*  IWORK   (global and local input) INTEGER array of size ILWORK */

/*  ILWORK  (local input) INTEGER */
/*          This holds the some of the IBLK integer arrays.  This is held */
/*          as a place holder for the next release. */

/*  INFO    (global output) INTEGER */
/*          < 0: parameter number -INFO incorrect or inconsistent */
/*          = 0: successful exit */
/*          > 0: PDLAHQR failed to compute all the eigenvalues ILO to IHI */
/*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i, */
/*               elements i+1:ihi of WR and WI contain those eigenvalues */
/*               which have been successfully computed. */

/*  Logic: */
/*       This algorithm is very similar to _LAHQR.  Unlike _LAHQR, */
/*       instead of sending one double shift through the largest */
/*       unreduced submatrix, this algorithm sends multiple double shifts */
/*       and spaces them apart so that there can be parallelism across */
/*       several processor row/columns.  Another critical difference is */
/*       that this algorithm aggregrates multiple transforms together in */
/*       order to apply them in a block fashion. */

/*  Important Local Variables: */
/*       IBLK = The maximum number of bulges that can be computed. */
/*           Currently fixed.  Future releases this won't be fixed. */
/*       HBL  = The square block size (HBL=DESCA(MB_)=DESCA(NB_)) */
/*       ROTN = The number of transforms to block together */
/*       NBULGE = The number of bulges that will be attempted on the */
/*           current submatrix. */
/*       IBULGE = The current number of bulges started. */
/*       K1(*),K2(*) = The current bulge loops from K1(*) to K2(*). */

/*  Subroutines: */
/*       This routine calls: */
/*           PDLACONSB   -> To determine where to start each iteration */
/*           PDLAWIL   -> Given the shift, get the transformation */
/*           DLASORTE   -> Pair up eigenvalues so that reals are paired. */
/*           PDLACP3   -> Parallel array to local replicated array copy & */
/*                        back. */
/*           DLAREF   -> Row/column reflector applier.  Core routine */
/*                        here. */
/*           PDLASMSUB   -> Finds negligible subdiagonal elements. */

/*  Current Notes and/or Restrictions: */
/*       1.) This code requires the distributed block size to be square */
/*           and at least six (6); unlike simpler codes like LU, this */
/*           algorithm is extremely sensitive to block size.  Unwise */
/*           choices of too small a block size can lead to bad */
/*           performance. */
/*       2.) This code requires A and Z to be distributed identically */
/*           and have identical contxts. */
/*       3.) This release currently does not have a routine for */
/*           resolving the Schur blocks into regular 2x2 form after */
/*           this code is completed.  Because of this, a significant */
/*           performance impact is required while the deflation is done */
/*           by sometimes a single column of processors. */
/*       4.) This code does not currently block the initial transforms */
/*           so that none of the rows or columns for any bulge are */
/*           completed until all are started.  To offset pipeline */
/*           start-up it is recommended that at least 2*LCM(NPROW,NPCOL) */
/*           bulges are used (if possible) */
/*       5.) The maximum number of bulges currently supported is fixed at */
/*           32.  In future versions this will be limited only by the */
/*           incoming WORK array. */
/*       6.) The matrix A must be in upper Hessenberg form.  If elements */
/*           below the subdiagonal are nonzero, the resulting transforms */
/*           may be nonsimilar.  This is also true with the LAPACK */
/*           routine. */
/*       7.) For this release, it is assumed RSRC_=CSRC_=0 */
/*       8.) Currently, all the eigenvalues are distributed to all the */
/*           nodes.  Future releases will probably distribute the */
/*           eigenvalues by the column partitioning. */
/*       9.) The internals of this routine are subject to change. */

/*  Implemented by:  G. Henry, November 17, 1996 */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    --iwork;
    --work;
    --descz;
    --z__;
    --wi;
    --wr;
    --desca;
    --a;

    /* Function Body */
    *info = 0;

    itermax = (*ihi - *ilo + 1) * 30;
/*     ITERMAX = 0 */
    if (*n == 0) {
	return 0;
    }

/*     NODE (IAFIRST,JAFIRST) OWNS A(1,1) */

    hbl = desca[5];
    contxt = desca[2];
    lda = desca[9];
    iafirst = desca[7];
    jafirst = desca[8];
    ldz = descz[9];
    blacs_gridinfo__(&contxt, &nprow, &npcol, &myrow, &mycol);
    node = myrow * npcol + mycol;
    num = nprow * npcol;
    left = (mycol + npcol - 1) % npcol;
    right = (mycol + 1) % npcol;
    up = (myrow + nprow - 1) % nprow;
    down = (myrow + 1) % nprow;
    lcmrc = ilcm_(&nprow, &npcol);

/*     Determine the number of columns we have so we can check workspace */

    localk = numroc_(n, &hbl, &mycol, &jafirst, &npcol);
    jj = *n / hbl;
    if (jj * hbl < *n) {
	++jj;
    }
    jj = jj * 7 / lcmrc;
/* Computing MAX */
    i__1 = (max(lda,ldz) << 1) + (localk << 1);
    if (*lwork < *n * 3 + max(i__1,jj)) {
	*info = -15;
    }
    if (descz[2] != desca[2]) {
	*info = -1302;
    }
    if (desca[5] != desca[6]) {
	*info = -706;
    }
    if (descz[5] != descz[6]) {
	*info = -1306;
    }
    if (desca[5] != descz[5]) {
	*info = -1305;
    }
    if (desca[7] != 0 || desca[8] != 0) {
	*info = -707;
    }
    if (descz[7] != 0 || descz[8] != 0) {
	*info = -1307;
    }
    if (*ilo > *n || *ilo < 1) {
	*info = -4;
    }
    if (*ihi > *n || *ihi < 1) {
	*info = -5;
    }
    if (hbl < 5) {
	*info = -705;
    }
    igamn2d_(&contxt, "ALL", " ", &c__1, &c__1, info, &c__1, &itmp1, &itmp2, &
	    c_n1, &c_n1, &c_n1, (ftnlen)3, (ftnlen)1);
    if (*info < 0) {
	i__1 = -(*info);
	pxerbla_(&contxt, "PDLAHQR", &i__1, (ftnlen)7);
	return 0;
    }

/*     Set work array indices */

    vecsidx = 0;
    idia = *n * 3;
    isub = *n * 3;
    isup = *n * 3;
    irbuf = *n * 3;
    icbuf = *n * 3;

/*     Find a value for ROTN */

    rotn = hbl / 3;
/* Computing MAX */
    i__1 = rotn, i__2 = hbl - 2;
    rotn = max(i__1,i__2);
    rotn = min(rotn,1);

    if (*ilo == *ihi) {
	infog2l_(ilo, ilo, &desca[1], &nprow, &npcol, &myrow, &mycol, &irow, &
		icol, &ii, &jj);
	if (myrow == ii && mycol == jj) {
	    wr[*ilo] = a[(icol - 1) * lda + irow];
	} else {
	    wr[*ilo] = 0.;
	}
	wi[*ilo] = 0.;
	return 0;
    }

    nh = *ihi - *ilo + 1;
    nz = *ihiz - *iloz + 1;

    infog1l_(iloz, &hbl, &nprow, &myrow, &c__0, &liloz, &lihiz);
    lihiz = numroc_(ihiz, &hbl, &myrow, &c__0, &nprow);

/*     Set machine-dependent constants for the stopping criterion. */
/*     If NORM(H) <= SQRT(OVFL), overflow should not occur. */

    unfl = pdlamch_(&contxt, "SAFE MINIMUM", (ftnlen)12);
    ovfl = 1. / unfl;
    pdlabad_(&contxt, &unfl, &ovfl);
    ulp = pdlamch_(&contxt, "PRECISION", (ftnlen)9);
    smlnum = unfl * (nh / ulp);

/*     I1 and I2 are the indices of the first row and last column of H */
/*     to which transformations must be applied. If eigenvalues only are */
/*     being computed, I1 and I2 are set inside the main loop. */

    if (*wantt) {
	i1 = 1;
	i2 = *n;
    }

/*     ITN is the total number of QR iterations allowed. */

    itn = itermax;

/*     The main loop begins here. I is the loop index and decreases from */
/*     IHI to ILO in steps of our schur block size (<=2*IBLK). Each */
/*     iteration of the loop works  with the active submatrix in rows */
/*     and columns L to I.   Eigenvalues I+1 to IHI have already */
/*     converged. Either L = ILO or the global A(L,L-1) is negligible */
/*     so that the matrix splits. */

    i__ = *ihi;
L10:
    l = *ilo;
    if (i__ < *ilo) {
	goto L450;
    }

/*     Perform QR iterations on rows and columns ILO to I until a */
/*     submatrix of order 1 or 2 splits off at the bottom because a */
/*     subdiagonal element has become negligible. */

    i__1 = itn;
    for (its = 0; its <= i__1; ++its) {

/*        Look for a single small subdiagonal element. */

	i__2 = *lwork - irbuf;
	pdlasmsub_(&a[1], &desca[1], &i__, &l, &k, &smlnum, &work[irbuf + 1], 
		&i__2);
	l = k;

	if (l > *ilo) {

/*           H(L,L-1) is negligible */

	    i__2 = l - 1;
	    infog2l_(&l, &i__2, &desca[1], &nprow, &npcol, &myrow, &mycol, &
		    irow, &icol, &itmp1, &itmp2);
	    if (myrow == itmp1 && mycol == itmp2) {
		a[(icol - 1) * lda + irow] = 0.;
	    }
	    work[isub + l - 1] = 0.;
	}

/*        Exit from loop if a submatrix of order 1 or 2 has split off. */

	m = l - 10;
/*        IF ( L .GE. I - (2*IBLK-1) ) */
/*         IF ( L .GE. I - MAX(2*IBLK-1,HBL) ) */
	if (l >= i__ - 1) {
	    goto L430;
	}

/*        Now the active submatrix is in rows and columns L to I. If */
/*        eigenvalues only are being computed, only the active submatrix */
/*        need be transformed. */

	if (! (*wantt)) {
	    i1 = l;
	    i2 = i__;
	}

/*        Copy submatrix of size 2*JBLK and prepare to do generalized */
/*           Wilkinson shift or an exceptional shift */

/* Computing MIN */
	i__2 = 32, i__3 = (i__ - l + 1) / 2 - 1;
	jblk = min(i__2,i__3);
	if (jblk > lcmrc) {

/*           Make sure it's divisible by LCM (we want even workloads!) */

	    jblk -= jblk % lcmrc;
	}
/* Computing MIN */
	i__2 = jblk, i__3 = lcmrc << 1;
	jblk = min(i__2,i__3);
	jblk = max(jblk,1);

	i__2 = jblk << 1;
	i__3 = i__ - (jblk << 1) + 1;
	pdlacp3_(&i__2, &i__3, &a[1], &desca[1], s1, &c__64, &c_n1, &c_n1, &
		c__0);
	if (its == 20 || its == 40) {

/*           Exceptional shift. */

	    for (ii = jblk << 1; ii >= 1; --ii) {
		s1[ii + (ii << 6) - 65] = ((d__1 = s1[ii + (ii << 6) - 65], 
			abs(d__1)) + (d__2 = s1[ii + (ii - 1 << 6) - 65], abs(
			d__2))) * 1.5;
		s1[ii + (ii - 1 << 6) - 65] = 0.;
		s1[ii - 1 + (ii << 6) - 65] = 0.;
/* L20: */
	    }
	} else {
	    i__2 = jblk << 1;
	    i__3 = jblk << 1;
	    i__4 = jblk << 1;
	    dlahqr_(&c_false, &c_false, &i__2, &c__1, &i__3, s1, &c__64, &
		    work[irbuf + 1], &work[icbuf + 1], &c__1, &i__4, &z__[1], 
		    &ldz, &ierr);

/*           Prepare to use Wilkinson's double shift */

	    h44 = s1[(jblk << 1) + (jblk << 7) - 65];
	    h33 = s1[(jblk << 1) - 1 + ((jblk << 1) - 1 << 6) - 65];
	    h43h34 = s1[(jblk << 1) - 1 + (jblk << 7) - 65] * s1[(jblk << 1) 
		    + ((jblk << 1) - 1 << 6) - 65];
	    if (jblk > 1 && its > 30) {
		s = s1[(jblk << 1) - 1 + ((jblk << 1) - 2 << 6) - 65];
		disc = (h33 - h44) * .5;
		disc = disc * disc + h43h34;
		if (disc > 0.) {

/*                 Real roots: Use Wilkinson's shift twice */

		    disc = sqrt(disc);
		    ave = (h33 + h44) * .5;
		    if (abs(h33) - abs(h44) > 0.) {
			h33 = h33 * h44 - h43h34;
			h44 = h33 / (d_sign(&disc, &ave) + ave);
		    } else {
			h44 = d_sign(&disc, &ave) + ave;
		    }
		    h33 = h44;
		    h43h34 = 0.;
		}
	    }
	}

/*        Look for two consecutive small subdiagonal elements: */
/*           PDLACONSB is the routine that does this. */

	i__2 = *lwork - irbuf;
	pdlaconsb_(&a[1], &desca[1], &i__, &l, &m, &h44, &h33, &h43h34, &work[
		irbuf + 1], &i__2);

/*        Skip small submatrices */

/*        IF ( M .GE. I - 5 ) */
/*    $      GO TO 80 */

/*        Double-shift QR step */

/*        NBULGE is the number of bulges that will be attempted */

/* Computing MIN */
	i__2 = m + rotn - m % rotn, i__3 = i__ - 2;
	istop = min(i__2,i__3);
/* Computing MIN */
	i__2 = istop, i__3 = m + hbl - 3 - (m - 1) % hbl;
	istop = min(i__2,i__3);
/* Computing MIN */
	i__2 = istop, i__3 = i2 - 2;
	istop = min(i__2,i__3);
	istop = max(istop,m);
	nbulge = (i__ - 1 - istop) / hbl;

/*        Do not exceed maximum determined. */

	nbulge = min(nbulge,jblk);
	if (nbulge > lcmrc) {

/*           Make sure it's divisible by LCM (we want even workloads!) */

	    nbulge -= nbulge % lcmrc;
	}
	nbulge = max(nbulge,1);

	if (its != 20 && its != 40 && nbulge > 1) {

/*           sort the eigenpairs so that they are in twos for double */
/*           shifts.  only call if several need sorting */

	    i__2 = nbulge << 1;
	    dlasorte_(&s1[(jblk - nbulge << 1) + 1 + ((jblk - nbulge << 1) + 
		    1 << 6) - 65], &c__64, &i__2, &work[irbuf + 1], &ierr);
	}

/*        IBULGE is the number of bulges going so far */

	ibulge = 1;

/*        "A" row defs : main row transforms from LOCALK to LOCALI2 */

	infog1l_(&m, &hbl, &npcol, &mycol, &c__0, &itmp1, &localk);
	localk = numroc_(n, &hbl, &mycol, &c__0, &npcol);
	infog1l_(&c__1, &hbl, &npcol, &mycol, &c__0, &icol1, &locali2);
	locali2 = numroc_(&i2, &hbl, &mycol, &c__0, &npcol);

/*        "A" col defs : main col transforms from LOCALI1 to LOCALM */

	infog1l_(&i1, &hbl, &nprow, &myrow, &c__0, &locali1, &icol1);
	icol1 = numroc_(n, &hbl, &myrow, &c__0, &nprow);
	infog1l_(&c__1, &hbl, &nprow, &myrow, &c__0, &localm, &icol1);
/* Computing MIN */
	i__3 = m + 3;
	i__2 = min(i__3,i__);
	icol1 = numroc_(&i__2, &hbl, &myrow, &c__0, &nprow);

/*        Which row & column will start the bulges */

	istartrow = (m + 1) / hbl % nprow + iafirst;
	istartcol = (m + 1) / hbl % npcol + jafirst;

	infog1l_(&m, &hbl, &nprow, &myrow, &c__0, &ii, &itmp2);
	itmp2 = numroc_(n, &hbl, &myrow, &c__0, &nprow);
	infog1l_(&m, &hbl, &npcol, &mycol, &c__0, &jj, &itmp2);
	itmp2 = numroc_(n, &hbl, &mycol, &c__0, &npcol);
	infog1l_(&c__1, &hbl, &nprow, &myrow, &c__0, &istop, kp2row);
	i__2 = m + 2;
	kp2row[0] = numroc_(&i__2, &hbl, &myrow, &c__0, &nprow);
	infog1l_(&c__1, &hbl, &npcol, &mycol, &c__0, &istop, kp2col);
	i__2 = m + 2;
	kp2col[0] = numroc_(&i__2, &hbl, &mycol, &c__0, &npcol);

/*        Set all values for bulges.  All bulges are stored in */
/*          intermediate steps as loops over KI.  Their current "task" */
/*          over the global M to I-1 values is always K1(KI) to K2(KI). */
/*          However, because there are many bulges, K1(KI) & K2(KI) might */
/*          go past that range while later bulges (KI+1,KI+2,etc..) are */
/*          finishing up. */

/*        Rules: */
/*              If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2 */
/*              If MOD(K1(KI)-1,HBL) = HBL-2 then MOD(K2(KI)-1,HBL)=HBL-2 */
/*              If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1 */
/*              K2(KI)-K1(KI) <= ROTN */

/*        We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit */
/*        it again when MOD(K1(KI)-1,HBL)=HBL-1. */

	i__2 = nbulge;
	for (ki = 1; ki <= i__2; ++ki) {
	    k1[ki - 1] = m;
/* Computing MIN */
	    i__3 = m + rotn - m % rotn, i__4 = i__ - 2;
	    istop = min(i__3,i__4);
/* Computing MIN */
	    i__3 = istop, i__4 = m + hbl - 3 - (m - 1) % hbl;
	    istop = min(i__3,i__4);
/* Computing MIN */
	    i__3 = istop, i__4 = i2 - 2;
	    istop = min(i__3,i__4);
	    istop = max(istop,m);
	    k2[ki - 1] = istop;
	    icurrow[ki - 1] = istartrow;
	    icurcol[ki - 1] = istartcol;
	    localk2[ki - 1] = itmp1;
	    krow[ki - 1] = ii;
	    kcol[ki - 1] = jj;
	    if (ki > 1) {
		kp2row[ki - 1] = kp2row[0];
	    }
	    if (ki > 1) {
		kp2col[ki - 1] = kp2col[0];
	    }
/* L30: */
	}

/*        Get first transform on node who owns M+2,M+2 */

	itmp1 = istartrow;
	itmp2 = istartcol;
	pdlawil_(&itmp1, &itmp2, &m, &a[1], &desca[1], &h44, &h33, &h43h34, 
		vcopy);
	v1save = vcopy[0];
	v2save = vcopy[1];
	v3save = vcopy[2];
	if (k2[ibulge - 1] <= i__ - 1) {
L40:
	    if (k1[ibulge - 1] >= m + 5 && ibulge < nbulge) {
		if ((k2[ibulge - 1] + 2) % hbl == (k2[ibulge] + 2) % hbl && 
			k1[0] <= i__ - 1) {
		    h44 = s1[(jblk << 1) - (ibulge << 1) + ((jblk << 1) - (
			    ibulge << 1) << 6) - 65];
		    h33 = s1[(jblk << 1) - (ibulge << 1) - 1 + ((jblk << 1) - 
			    (ibulge << 1) - 1 << 6) - 65];
		    h43h34 = s1[(jblk << 1) - (ibulge << 1) - 1 + ((jblk << 1)
			     - (ibulge << 1) << 6) - 65] * s1[(jblk << 1) - (
			    ibulge << 1) + ((jblk << 1) - (ibulge << 1) - 1 <<
			     6) - 65];
		    itmp1 = istartrow;
		    itmp2 = istartcol;
		    pdlawil_(&itmp1, &itmp2, &m, &a[1], &desca[1], &h44, &h33,
			     &h43h34, vcopy);
		    v1save = vcopy[0];
		    v2save = vcopy[1];
		    v3save = vcopy[2];
		    ++ibulge;
		}
	    }

/*        When we hit a border, there are row and column transforms that */
/*          overlap over several processors and the code gets very */
/*          "congested."  As a remedy, when we first hit a border, a 6x6 */
/*          *local* matrix is generated on one node (called SMALLA) and */
/*          work is done on that.  At the end of the border, the data is */
/*          passed back and everything stays a lot simpler. */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {

/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);
		k = istart;
		modkm1 = (k - 1) % hbl;
		if (modkm1 >= hbl - 2) {
		    if (modkm1 == hbl - 2 && k < i__ - 1) {

/*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4) */

			i__3 = k + 2;
			i__4 = k + 2;
			infog2l_(&i__3, &i__4, &desca[1], &nprow, &npcol, &
				myrow, &mycol, &irow1, &icol1, &itmp1, &itmp2)
				;
/* Computing MIN */
			i__4 = 6, i__5 = *n - k + 2;
			i__3 = min(i__4,i__5);
			i__6 = k - 1;
			pdlacp3_(&i__3, &i__6, &a[1], &desca[1], &smalla[(ki *
				 6 + 1) * 6 - 42], &c__6, &itmp1, &itmp2, &
				c__0);
		    }
		    if (modkm1 == hbl - 1 && k == m) {

/*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3) */

			i__3 = k + 1;
			i__4 = k + 1;
			infog2l_(&i__3, &i__4, &desca[1], &nprow, &npcol, &
				myrow, &mycol, &irow1, &icol1, &itmp1, &itmp2)
				;
/* Computing MIN */
			i__4 = 6, i__5 = *n - k + 3;
			i__3 = min(i__4,i__5);
			i__6 = k - 2;
			pdlacp3_(&i__3, &i__6, &a[1], &desca[1], &smalla[(ki *
				 6 + 1) * 6 - 42], &c__6, &itmp1, &itmp2, &
				c__0);
		    }
		}

/*           DLAHQR used to have a single row application and a single */
/*              column application to H.  Here we do something a little */
/*              more clever.  We break each transformation down into 3 */
/*              parts: */
/*                  1.) The minimum amount of work it takes to determine */
/*                        a group of ROTN transformations (this is on */
/*                        the critical path.) (Loops 130-180) */
/*                  2.) The small work it takes so that each of the rows */
/*                        and columns is at the same place.  For example, */
/*                        all ROTN row transforms are all complete */
/*                        through some column TMP.  (Loops within 190) */
/*                  3.) The majority of the row and column transforms */
/*                        are then applied in a block fashion. */
/*                        (Loops 290 on.) */

/*           Each of these three parts are further subdivided into 3 */
/*           parts: */
/*               A.) Work at the start of a border when */
/*                       MOD(ISTART-1,HBL) = HBL-2 */
/*               B.) Work at the end of a border when */
/*                       MOD(ISTART-1,HBL) = HBL-1 */
/*               C.) Work in the middle of the block when */
/*                       MOD(ISTART-1,HBL) < HBL-2 */

/* Computing MIN */
		i__3 = i__ - 1, i__4 = istop + 1;
		if (myrow == icurrow[ki - 1] && mycol == icurcol[ki - 1] && 
			modkm1 == hbl - 2 && istart < min(i__3,i__4)) {
		    k = istart;
/* Computing MIN */
		    i__3 = 3, i__4 = i__ - k + 1;
		    nr = min(i__3,i__4);
		    if (k > m) {
			lfc_SLdcopy(&nr, &smalla[(ki * 6 + 1) * 6 - 41], &c__1, 
				vcopy, &c__1);
		    } else {
			vcopy[0] = v1save;
			vcopy[1] = v2save;
			vcopy[2] = v3save;
		    }
		    dlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
		    if (k > m) {
			smalla[(ki * 6 + 1) * 6 - 41] = vcopy[0];
			smalla[(ki * 6 + 1) * 6 - 40] = 0.;
			if (k < i__ - 1) {
			    smalla[(ki * 6 + 1) * 6 - 39] = 0.;
			}
		    } else if (m > l) {
			smalla[(ki * 6 + 1) * 6 - 41] = -smalla[(ki * 6 + 1) *
				 6 - 41];
		    }
		    v2 = vcopy[1];
		    t2 = t1copy * v2;
		    work[vecsidx + (k - 1) * 3 + 1] = vcopy[1];
		    work[vecsidx + (k - 1) * 3 + 2] = vcopy[2];
		    work[vecsidx + (k - 1) * 3 + 3] = t1copy;
		}

		if ((istop - 1) % hbl == hbl - 1 && myrow == icurrow[ki - 1] 
			&& mycol == icurcol[ki - 1] && istart <= min(i__,
			istop)) {
		    k = istart;
/* Computing MIN */
		    i__3 = 3, i__4 = i__ - k + 1;
		    nr = min(i__3,i__4);
		    if (k > m) {
			lfc_SLdcopy(&nr, &smalla[(ki * 6 + 2) * 6 - 40], &c__1, 
				vcopy, &c__1);
		    } else {
			vcopy[0] = v1save;
			vcopy[1] = v2save;
			vcopy[2] = v3save;
		    }
		    dlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
		    if (k > m) {
			smalla[(ki * 6 + 2) * 6 - 40] = vcopy[0];
			smalla[(ki * 6 + 2) * 6 - 39] = 0.;
			if (k < i__ - 1) {
			    smalla[(ki * 6 + 2) * 6 - 38] = 0.;
			}

/*                 Set a subdiagonal to zero now if it's possible */

/*                 H11 = SMALLA(1,1,KI) */
/*                 H10 = SMALLA(2,1,KI) */
/*                 H22 = SMALLA(2,2,KI) */
/*                 IF ( ABS(H10) .LE. MAX(ULP*(ABS(H11)+ABS(H22)), */
/*    $                                    SMLNUM) ) THEN */
/*                    SMALLA(2,1,KI) = ZERO */
/*     WORK(ISUB+K-2) = ZERO */
/*                 END IF */
		    } else if (m > l) {
			smalla[(ki * 6 + 2) * 6 - 40] = -smalla[(ki * 6 + 2) *
				 6 - 40];
		    }
		    v2 = vcopy[1];
		    t2 = t1copy * v2;
		    work[vecsidx + (k - 1) * 3 + 1] = vcopy[1];
		    work[vecsidx + (k - 1) * 3 + 2] = vcopy[2];
		    work[vecsidx + (k - 1) * 3 + 3] = t1copy;
		}

		if (modkm1 == 0 && istart <= i__ - 1 && myrow == icurrow[ki - 
			1] && right == icurcol[ki - 1]) {

/*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART) */

		    irow1 = krow[ki - 1];
		    icol1 = localk2[ki - 1];
		    if (istart > m) {
			vcopy[0] = smalla[(ki * 6 + 3) * 6 - 39];
			vcopy[1] = smalla[(ki * 6 + 3) * 6 - 38];
			vcopy[2] = smalla[(ki * 6 + 3) * 6 - 37];
/* Computing MIN */
			i__3 = 3, i__4 = i__ - istart + 1;
			nr = min(i__3,i__4);
			dlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
			a[(icol1 - 2) * lda + irow1] = vcopy[0];
			a[(icol1 - 2) * lda + irow1 + 1] = 0.;
			if (istart < i__ - 1) {
			    a[(icol1 - 2) * lda + irow1 + 2] = 0.;
			}
		    } else {
			if (m > l) {
			    a[(icol1 - 2) * lda + irow1] = -a[(icol1 - 2) * 
				    lda + irow1];
			}
		    }
		}

		if (myrow == icurrow[ki - 1] && mycol == icurcol[ki - 1] && (
			modkm1 == hbl - 2 && istart == i__ - 1 || modkm1 < 
			hbl - 2 && istart <= i__ - 1)) {

/*           (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART) */

		    irow1 = krow[ki - 1];
		    icol1 = localk2[ki - 1];
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {

/*              Create and do these transforms */

/* Computing MIN */
			i__4 = 3, i__5 = i__ - k + 1;
			nr = min(i__4,i__5);
			if (k > m) {
			    if ((k - 1) % hbl == 0) {
				vcopy[0] = smalla[(ki * 6 + 3) * 6 - 39];
				vcopy[1] = smalla[(ki * 6 + 3) * 6 - 38];
				vcopy[2] = smalla[(ki * 6 + 3) * 6 - 37];
			    } else {
				vcopy[0] = a[(icol1 - 2) * lda + irow1];
				vcopy[1] = a[(icol1 - 2) * lda + irow1 + 1];
				if (nr == 3) {
				    vcopy[2] = a[(icol1 - 2) * lda + irow1 + 
					    2];
				}
			    }
			} else {
			    vcopy[0] = v1save;
			    vcopy[1] = v2save;
			    vcopy[2] = v3save;
			}
			dlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
			if (k > m) {
			    if ((k - 1) % hbl > 0) {
				a[(icol1 - 2) * lda + irow1] = vcopy[0];
				a[(icol1 - 2) * lda + irow1 + 1] = 0.;
				if (k < i__ - 1) {
				    a[(icol1 - 2) * lda + irow1 + 2] = 0.;
				}

/*                    Set a subdiagonal to zero now if it's possible */

/*                    IF ( (IROW1.GT.2) .AND. (ICOL1.GT.2) .AND. */
/*    $                    (MOD(K-1,HBL) .GT. 1) ) THEN */
/*                       H11 = A((ICOL1-3)*LDA+IROW1-2) */
/*                       H10 = A((ICOL1-3)*LDA+IROW1-1) */
/*                       H22 = A((ICOL1-2)*LDA+IROW1-1) */
/*                       IF ( ABS(H10).LE.MAX(ULP*(ABS(H11)+ABS(H22)), */
/*    $                                       SMLNUM) ) THEN */
/*                           A((ICOL1-3)*LDA+IROW1-1) = ZERO */
/*                       END IF */
/*                    END IF */
			    }
			} else if (m > l) {
			    if ((k - 1) % hbl > 0) {
				a[(icol1 - 2) * lda + irow1] = -a[(icol1 - 2) 
					* lda + irow1];
			    }
			}
			v2 = vcopy[1];
			t2 = t1copy * v2;
			work[vecsidx + (k - 1) * 3 + 1] = vcopy[1];
			work[vecsidx + (k - 1) * 3 + 2] = vcopy[2];
			work[vecsidx + (k - 1) * 3 + 3] = t1copy;
			t1 = t1copy;
			if (k < istop) {

/*                 Do some work so next step is ready... */

			    v3 = vcopy[2];
			    t3 = t1 * v3;
/* Computing MIN */
			    i__5 = k2[ki - 1] + 1, i__6 = i__ - 1;
			    i__4 = min(i__5,i__6) + icol1 - k;
			    for (j = icol1; j <= i__4; ++j) {
				sum = a[(j - 1) * lda + irow1] + v2 * a[(j - 
					1) * lda + irow1 + 1] + v3 * a[(j - 1)
					 * lda + irow1 + 2];
				a[(j - 1) * lda + irow1] -= sum * t1;
				a[(j - 1) * lda + irow1 + 1] -= sum * t2;
				a[(j - 1) * lda + irow1 + 2] -= sum * t3;
/* L50: */
			    }
			    itmp1 = localk2[ki - 1];
			    i__4 = irow1 + 3;
			    for (j = irow1 + 1; j <= i__4; ++j) {
				sum = a[(icol1 - 1) * lda + j] + v2 * a[icol1 
					* lda + j] + v3 * a[(icol1 + 1) * lda 
					+ j];
				a[(icol1 - 1) * lda + j] -= sum * t1;
				a[icol1 * lda + j] -= sum * t2;
				a[(icol1 + 1) * lda + j] -= sum * t3;
/* L60: */
			    }
			}
			++irow1;
			++icol1;
/* L70: */
		    }
		}

		if (modkm1 == hbl - 2) {
		    if (down == icurrow[ki - 1] && right == icurcol[ki - 1] &&
			     num > 1) {
			dgerv2d_(&contxt, &c__3, &c__1, &work[vecsidx + (
				istart - 1) * 3 + 1], &c__3, &down, &right);
		    }
		    if (myrow == icurrow[ki - 1] && mycol == icurcol[ki - 1] 
			    && num > 1) {
			dgesd2d_(&contxt, &c__3, &c__1, &work[vecsidx + (
				istart - 1) * 3 + 1], &c__3, &up, &left);
		    }
		    if (down == icurrow[ki - 1] && npcol > 1 && istart <= 
			    istop) {
			jj = (icurcol[ki - 1] + npcol - 1) % npcol;
			if (mycol != jj) {
			    i__3 = (istop - istart + 1) * 3;
			    i__4 = (istop - istart + 1) * 3;
			    dgebr2d_(&contxt, "ROW", " ", &i__3, &c__1, &work[
				    vecsidx + (istart - 1) * 3 + 1], &i__4, &
				    myrow, &jj, (ftnlen)3, (ftnlen)1);
			} else {
			    i__3 = (istop - istart + 1) * 3;
			    i__4 = (istop - istart + 1) * 3;
			    dgebs2d_(&contxt, "ROW", " ", &i__3, &c__1, &work[
				    vecsidx + (istart - 1) * 3 + 1], &i__4, (
				    ftnlen)3, (ftnlen)1);
			}
		    }
		}

/*           Broadcast Householder information from the block */

		if (myrow == icurrow[ki - 1] && npcol > 1 && istart <= istop) 
			{
		    if (mycol != icurcol[ki - 1]) {
			i__3 = (istop - istart + 1) * 3;
			i__4 = (istop - istart + 1) * 3;
			dgebr2d_(&contxt, "ROW", " ", &i__3, &c__1, &work[
				vecsidx + (istart - 1) * 3 + 1], &i__4, &
				myrow, &icurcol[ki - 1], (ftnlen)3, (ftnlen)1)
				;
		    } else {
			i__3 = (istop - istart + 1) * 3;
			i__4 = (istop - istart + 1) * 3;
			dgebs2d_(&contxt, "ROW", " ", &i__3, &c__1, &work[
				vecsidx + (istart - 1) * 3 + 1], &i__4, (
				ftnlen)3, (ftnlen)1);
		    }
		}
/* L80: */
	    }

/*        Now do column transforms and finish work */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {

/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);

		if ((istart - 1) % hbl == hbl - 2) {
		    if (right == icurcol[ki - 1] && nprow > 1 && istart <= 
			    istop) {
			jj = (icurrow[ki - 1] + nprow - 1) % nprow;
			if (myrow != jj) {
			    i__3 = (istop - istart + 1) * 3;
			    i__4 = (istop - istart + 1) * 3;
			    dgebr2d_(&contxt, "COL", " ", &i__3, &c__1, &work[
				    vecsidx + (istart - 1) * 3 + 1], &i__4, &
				    jj, &mycol, (ftnlen)3, (ftnlen)1);
			} else {
			    i__3 = (istop - istart + 1) * 3;
			    i__4 = (istop - istart + 1) * 3;
			    dgebs2d_(&contxt, "COL", " ", &i__3, &c__1, &work[
				    vecsidx + (istart - 1) * 3 + 1], &i__4, (
				    ftnlen)3, (ftnlen)1);
			}
		    }
		}

		if (mycol == icurcol[ki - 1] && nprow > 1 && istart <= istop) 
			{
		    if (myrow != icurrow[ki - 1]) {
			i__3 = (istop - istart + 1) * 3;
			i__4 = (istop - istart + 1) * 3;
			dgebr2d_(&contxt, "COL", " ", &i__3, &c__1, &work[
				vecsidx + (istart - 1) * 3 + 1], &i__4, &
				icurrow[ki - 1], &mycol, (ftnlen)3, (ftnlen)1)
				;
		    } else {
			i__3 = (istop - istart + 1) * 3;
			i__4 = (istop - istart + 1) * 3;
			dgebs2d_(&contxt, "COL", " ", &i__3, &c__1, &work[
				vecsidx + (istart - 1) * 3 + 1], &i__4, (
				ftnlen)3, (ftnlen)1);
		    }
		}
/* L90: */
	    }

/*        Now do make up work to have things in block fashion */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);

		modkm1 = (istart - 1) % hbl;
		if (myrow == icurrow[ki - 1] && mycol == icurcol[ki - 1] && 
			modkm1 == hbl - 2 && istart < i__ - 1) {
		    k = istart;

/*              Catch up on column & border work */

/* Computing MIN */
		    i__3 = 3, i__4 = i__ - k + 1;
		    nr = min(i__3,i__4);
		    v2 = work[vecsidx + (k - 1) * 3 + 1];
		    v3 = work[vecsidx + (k - 1) * 3 + 2];
		    t1 = work[vecsidx + (k - 1) * 3 + 3];
		    t2 = t1 * v2;
		    if (nr == 3) {

/*                 Do some work so next step is ready... */

/*                 V3 = VCOPY( 3 ) */
			t3 = t1 * v3;
/* Computing MIN */
			i__3 = 6, i__4 = i2 + 2 - k;
			itmp1 = min(i__3,i__4);
/* Computing MAX */
			i__3 = i1 - k + 2;
			itmp2 = max(i__3,1);
			i__3 = itmp1;
			for (j = 2; j <= i__3; ++j) {
			    sum = smalla[(j + ki * 6) * 6 - 41] + v2 * smalla[
				    (j + ki * 6) * 6 - 40] + v3 * smalla[(j + 
				    ki * 6) * 6 - 39];
			    smalla[(j + ki * 6) * 6 - 41] -= sum * t1;
			    smalla[(j + ki * 6) * 6 - 40] -= sum * t2;
			    smalla[(j + ki * 6) * 6 - 39] -= sum * t3;
/* L100: */
			}
			for (j = itmp2; j <= 5; ++j) {
			    sum = smalla[j + (ki * 6 + 2) * 6 - 43] + v2 * 
				    smalla[j + (ki * 6 + 3) * 6 - 43] + v3 * 
				    smalla[j + (ki * 6 + 4) * 6 - 43];
			    smalla[j + (ki * 6 + 2) * 6 - 43] -= sum * t1;
			    smalla[j + (ki * 6 + 3) * 6 - 43] -= sum * t2;
			    smalla[j + (ki * 6 + 4) * 6 - 43] -= sum * t3;
/* L110: */
			}
		    }
		}

		if ((istart - 1) % hbl == hbl - 1 && istart <= istop && myrow 
			== icurrow[ki - 1] && mycol == icurcol[ki - 1]) {
		    k = istop;

/*              Catch up on column & border work */

/* Computing MIN */
		    i__3 = 3, i__4 = i__ - k + 1;
		    nr = min(i__3,i__4);
		    v2 = work[vecsidx + (k - 1) * 3 + 1];
		    v3 = work[vecsidx + (k - 1) * 3 + 2];
		    t1 = work[vecsidx + (k - 1) * 3 + 3];
		    t2 = t1 * v2;
		    if (nr == 3) {

/*                 Do some work so next step is ready... */

/*                 V3 = VCOPY( 3 ) */
			t3 = t1 * v3;
/* Computing MIN */
			i__3 = 6, i__4 = i2 - k + 3;
			itmp1 = min(i__3,i__4);
/* Computing MAX */
			i__3 = i1 - k + 3;
			itmp2 = max(i__3,1);
			i__3 = itmp1;
			for (j = 3; j <= i__3; ++j) {
			    sum = smalla[(j + ki * 6) * 6 - 40] + v2 * smalla[
				    (j + ki * 6) * 6 - 39] + v3 * smalla[(j + 
				    ki * 6) * 6 - 38];
			    smalla[(j + ki * 6) * 6 - 40] -= sum * t1;
			    smalla[(j + ki * 6) * 6 - 39] -= sum * t2;
			    smalla[(j + ki * 6) * 6 - 38] -= sum * t3;
/* L120: */
			}
			for (j = itmp2; j <= 6; ++j) {
			    sum = smalla[j + (ki * 6 + 3) * 6 - 43] + v2 * 
				    smalla[j + (ki * 6 + 4) * 6 - 43] + v3 * 
				    smalla[j + (ki * 6 + 5) * 6 - 43];
			    smalla[j + (ki * 6 + 3) * 6 - 43] -= sum * t1;
			    smalla[j + (ki * 6 + 4) * 6 - 43] -= sum * t2;
			    smalla[j + (ki * 6 + 5) * 6 - 43] -= sum * t3;
/* L130: */
			}
		    }
		}

		modkm1 = (istart - 1) % hbl;
		if (myrow == icurrow[ki - 1] && mycol == icurcol[ki - 1] && (
			modkm1 == hbl - 2 && istart == i__ - 1 || modkm1 < 
			hbl - 2 && istart <= i__ - 1)) {

/*           (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART) */

		    irow1 = krow[ki - 1];
		    icol1 = localk2[ki - 1];
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {

/*              Catch up on column & border work */

/* Computing MIN */
			i__4 = 3, i__5 = i__ - k + 1;
			nr = min(i__4,i__5);
			v2 = work[vecsidx + (k - 1) * 3 + 1];
			v3 = work[vecsidx + (k - 1) * 3 + 2];
			t1 = work[vecsidx + (k - 1) * 3 + 3];
			t2 = t1 * v2;
			if (k < istop) {

/*                 Do some work so next step is ready... */

			    t3 = t1 * v3;
/* Computing MIN */
			    i__5 = istart + 1;
			    i__4 = min(i__5,i__) - k + irow1;
			    dlaref_("Col", &a[1], &lda, &c_false, &z__[1], &
				    ldz, &c_false, &icol1, &icol1, &istart, &
				    istop, &i__4, &irow1, &liloz, &lihiz, &
				    work[vecsidx + 1], &v2, &v3, &t1, &t2, &
				    t3, (ftnlen)3);
			    ++irow1;
			    ++icol1;
			} else {
			    if (nr == 3 && (k - 1) % hbl < hbl - 2) {
				t3 = t1 * v3;
/* Computing MIN */
/* Computing MIN */
				i__6 = k2[ki - 1] + 1, i__7 = i__ - 1;
				i__5 = min(i__6,i__7);
				i__4 = min(i__5,i2) - k + icol1;
				dlaref_("Row", &a[1], &lda, &c_false, &z__[1],
					 &ldz, &c_false, &irow1, &irow1, &
					istart, &istop, &icol1, &i__4, &liloz,
					 &lihiz, &work[vecsidx + 1], &v2, &v3,
					 &t1, &t2, &t3, (ftnlen)3);
			    }
			}
/* L140: */
		    }
		}

/*           Send SMALLA back again. */

		k = istart;
		modkm1 = (k - 1) % hbl;
		if (modkm1 >= hbl - 2 && k <= i__ - 1) {
		    if (modkm1 == hbl - 2 && k < i__ - 1) {

/*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4) */

			i__3 = k + 2;
			i__4 = k + 2;
			infog2l_(&i__3, &i__4, &desca[1], &nprow, &npcol, &
				myrow, &mycol, &irow1, &icol1, &itmp1, &itmp2)
				;
/* Computing MIN */
			i__4 = 6, i__5 = *n - k + 2;
			i__3 = min(i__4,i__5);
			i__6 = k - 1;
			pdlacp3_(&i__3, &i__6, &a[1], &desca[1], &smalla[(ki *
				 6 + 1) * 6 - 42], &c__6, &itmp1, &itmp2, &
				c__1);

		    }
		    if (modkm1 == hbl - 1) {

/*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3) */

			i__3 = k + 1;
			i__4 = k + 1;
			infog2l_(&i__3, &i__4, &desca[1], &nprow, &npcol, &
				myrow, &mycol, &irow1, &icol1, &itmp1, &itmp2)
				;
/* Computing MIN */
			i__4 = 6, i__5 = *n - k + 3;
			i__3 = min(i__4,i__5);
			i__6 = k - 2;
			pdlacp3_(&i__3, &i__6, &a[1], &desca[1], &smalla[(ki *
				 6 + 1) * 6 - 42], &c__6, &itmp1, &itmp2, &
				c__1);
		    }
		}

/* L150: */
	    }

/*        Now start major set of block ROW reflections */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (myrow != icurrow[ki - 1] && down != icurrow[ki - 1]) {
		    goto L160;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);

		if (istop > istart && (istart - 1) % hbl < hbl - 2 && icurrow[
			ki - 1] == myrow) {
/* Computing MIN */
		    i__3 = k2[ki - 1] + 1, i__4 = i__ - 1;
		    irow1 = min(i__3,i__4) + 1;
		    infog1l_(&irow1, &hbl, &npcol, &mycol, &c__0, &itmp1, &
			    itmp2);
		    itmp2 = numroc_(&i2, &hbl, &mycol, &c__0, &npcol);
		    ii = krow[ki - 1];
		    dlaref_("Row", &a[1], &lda, wantz, &z__[1], &ldz, &c_true,
			     &ii, &ii, &istart, &istop, &itmp1, &itmp2, &
			    liloz, &lihiz, &work[vecsidx + 1], &v2, &v3, &t1, 
			    &t2, &t3, (ftnlen)3);
		}
L160:
		;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (krow[ki - 1] > kp2row[ki - 1]) {
		    goto L180;
		}
		if (myrow != icurrow[ki - 1] && down != icurrow[ki - 1]) {
		    goto L180;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);
		if (istart == istop || (istart - 1) % hbl >= hbl - 2 || 
			icurrow[ki - 1] != myrow) {
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {
			v2 = work[vecsidx + (k - 1) * 3 + 1];
			v3 = work[vecsidx + (k - 1) * 3 + 2];
			t1 = work[vecsidx + (k - 1) * 3 + 3];
/* Computing MIN */
			i__4 = 3, i__5 = i__ - k + 1;
			nr = min(i__4,i__5);
			t2 = t1 * v2;
			if (nr == 3 && krow[ki - 1] <= kp2row[ki - 1]) {
			    t3 = t1 * v3;
			    if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__4 = k2[ki - 1] + 1, i__5 = i__ - 1;
				itmp1 = min(i__4,i__5) + 1;
			    } else {
				if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				    i__4 = k2[ki - 1] + 1, i__5 = i__ - 1;
				    itmp1 = min(i__4,i__5) + 1;
				}
				if ((k - 1) % hbl == hbl - 2) {
/* Computing MIN */
				    i__4 = k + 4;
				    itmp1 = min(i__4,i2) + 1;
				}
				if ((k - 1) % hbl == hbl - 1) {
/* Computing MIN */
				    i__4 = k + 3;
				    itmp1 = min(i__4,i2) + 1;
				}
			    }

/*                    Find local coor of rows K through K+2 */

			    irow1 = krow[ki - 1];
			    irow2 = kp2row[ki - 1];
			    infog1l_(&itmp1, &hbl, &npcol, &mycol, &c__0, &
				    icol1, &icol2);
			    icol2 = numroc_(&i2, &hbl, &mycol, &c__0, &npcol);
			    if ((k - 1) % hbl < hbl - 2 || nprow == 1) {
				dlaref_("Row", &a[1], &lda, wantz, &z__[1], &
					ldz, &c_false, &irow1, &irow1, &
					istart, &istop, &icol1, &icol2, &
					liloz, &lihiz, &work[vecsidx + 1], &
					v2, &v3, &t1, &t2, &t3, (ftnlen)3);
			    }
			    if ((k - 1) % hbl == hbl - 2 && nprow > 1) {
				if (irow1 == irow2) {
				    i__4 = icol2 - icol1 + 1;
				    dgesd2d_(&contxt, &c__1, &i__4, &a[(icol1 
					    - 1) * lda + irow2], &lda, &up, &
					    mycol);
				}
			    }
			    if ((k - 1) % hbl == hbl - 1 && nprow > 1) {
				if (irow1 == irow2) {
				    i__4 = icol2 - icol1 + 1;
				    dgesd2d_(&contxt, &c__1, &i__4, &a[(icol1 
					    - 1) * lda + irow1], &lda, &down, 
					    &mycol);
				}
			    }
			}
/* L170: */
		    }
		}
L180:
		;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (krow[ki - 1] > kp2row[ki - 1]) {
		    goto L220;
		}
		if (myrow != icurrow[ki - 1] && down != icurrow[ki - 1]) {
		    goto L220;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);
		if (istart == istop || (istart - 1) % hbl >= hbl - 2 || 
			icurrow[ki - 1] != myrow) {
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {
			v2 = work[vecsidx + (k - 1) * 3 + 1];
			v3 = work[vecsidx + (k - 1) * 3 + 2];
			t1 = work[vecsidx + (k - 1) * 3 + 3];
/* Computing MIN */
			i__4 = 3, i__5 = i__ - k + 1;
			nr = min(i__4,i__5);
			t2 = t1 * v2;
			if (nr == 3 && krow[ki - 1] <= kp2row[ki - 1]) {
			    t3 = t1 * v3;
			    if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__4 = k2[ki - 1] + 1, i__5 = i__ - 1;
				itmp1 = min(i__4,i__5) + 1;
			    } else {
				if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				    i__4 = k2[ki - 1] + 1, i__5 = i__ - 1;
				    itmp1 = min(i__4,i__5) + 1;
				}
				if ((k - 1) % hbl == hbl - 2) {
/* Computing MIN */
				    i__4 = k + 4;
				    itmp1 = min(i__4,i2) + 1;
				}
				if ((k - 1) % hbl == hbl - 1) {
/* Computing MIN */
				    i__4 = k + 3;
				    itmp1 = min(i__4,i2) + 1;
				}
			    }

			    irow1 = krow[ki - 1] + k - istart;
			    irow2 = kp2row[ki - 1] + k - istart;
			    infog1l_(&itmp1, &hbl, &npcol, &mycol, &c__0, &
				    icol1, &icol2);
			    icol2 = numroc_(&i2, &hbl, &mycol, &c__0, &npcol);
			    if ((k - 1) % hbl == hbl - 2 && nprow > 1) {
				if (irow1 != irow2) {
				    i__4 = icol2 - icol1 + 1;
				    dgerv2d_(&contxt, &c__1, &i__4, &work[
					    irbuf + 1], &c__1, &down, &mycol);
				    i__4 = icol2;
				    for (j = icol1; j <= i__4; ++j) {
					sum = a[(j - 1) * lda + irow1] + v2 * 
						a[(j - 1) * lda + irow1 + 1] 
						+ v3 * work[irbuf + j - icol1 
						+ 1];
					a[(j - 1) * lda + irow1] -= sum * t1;
					a[(j - 1) * lda + irow1 + 1] -= sum * 
						t2;
					work[irbuf + j - icol1 + 1] -= sum * 
						t3;
/* L190: */
				    }
				    i__4 = icol2 - icol1 + 1;
				    dgesd2d_(&contxt, &c__1, &i__4, &work[
					    irbuf + 1], &c__1, &down, &mycol);
				}
			    }
			    if ((k - 1) % hbl == hbl - 1 && nprow > 1) {
				if (irow1 != irow2) {
				    i__4 = icol2 - icol1 + 1;
				    dgerv2d_(&contxt, &c__1, &i__4, &work[
					    irbuf + 1], &c__1, &up, &mycol);
				    i__4 = icol2;
				    for (j = icol1; j <= i__4; ++j) {
					sum = work[irbuf + j - icol1 + 1] + 
						v2 * a[(j - 1) * lda + irow1] 
						+ v3 * a[(j - 1) * lda + 
						irow1 + 1];
					work[irbuf + j - icol1 + 1] -= sum * 
						t1;
					a[(j - 1) * lda + irow1] -= sum * t2;
					a[(j - 1) * lda + irow1 + 1] -= sum * 
						t3;
/* L200: */
				    }
				    i__4 = icol2 - icol1 + 1;
				    dgesd2d_(&contxt, &c__1, &i__4, &work[
					    irbuf + 1], &c__1, &up, &mycol);
				}
			    }
			}
/* L210: */
		    }
		}
L220:
		;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (krow[ki - 1] > kp2row[ki - 1]) {
		    goto L240;
		}
		if (myrow != icurrow[ki - 1] && down != icurrow[ki - 1]) {
		    goto L240;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);
		if (istart == istop || (istart - 1) % hbl >= hbl - 2 || 
			icurrow[ki - 1] != myrow) {
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {
			v2 = work[vecsidx + (k - 1) * 3 + 1];
			v3 = work[vecsidx + (k - 1) * 3 + 2];
			t1 = work[vecsidx + (k - 1) * 3 + 3];
/* Computing MIN */
			i__4 = 3, i__5 = i__ - k + 1;
			nr = min(i__4,i__5);
			t2 = t1 * v2;
			if (nr == 3 && krow[ki - 1] <= kp2row[ki - 1]) {
			    t3 = t1 * v3;
			    if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__4 = k2[ki - 1] + 1, i__5 = i__ - 1;
				itmp1 = min(i__4,i__5) + 1;
			    } else {
				if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				    i__4 = k2[ki - 1] + 1, i__5 = i__ - 1;
				    itmp1 = min(i__4,i__5) + 1;
				}
				if ((k - 1) % hbl == hbl - 2) {
/* Computing MIN */
				    i__4 = k + 4;
				    itmp1 = min(i__4,i2) + 1;
				}
				if ((k - 1) % hbl == hbl - 1) {
/* Computing MIN */
				    i__4 = k + 3;
				    itmp1 = min(i__4,i2) + 1;
				}
			    }

			    irow1 = krow[ki - 1] + k - istart;
			    irow2 = kp2row[ki - 1] + k - istart;
			    infog1l_(&itmp1, &hbl, &npcol, &mycol, &c__0, &
				    icol1, &icol2);
			    icol2 = numroc_(&i2, &hbl, &mycol, &c__0, &npcol);
			    if ((k - 1) % hbl == hbl - 2 && nprow > 1) {
				if (irow1 == irow2) {
				    i__4 = icol2 - icol1 + 1;
				    dgerv2d_(&contxt, &c__1, &i__4, &a[(icol1 
					    - 1) * lda + irow2], &lda, &up, &
					    mycol);
				}
			    }
			    if ((k - 1) % hbl == hbl - 1 && nprow > 1) {
				if (irow1 == irow2) {
				    i__4 = icol2 - icol1 + 1;
				    dgerv2d_(&contxt, &c__1, &i__4, &a[(icol1 
					    - 1) * lda + irow1], &lda, &down, 
					    &mycol);
				}
			    }
			}
/* L230: */
		    }
		}
L240:
		;
	    }
/* L250: */

/*        Now start major set of block COL reflections */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (mycol != icurcol[ki - 1] && right != icurcol[ki - 1]) {
		    goto L260;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);

		if (((istart - 1) % hbl < hbl - 2 || npcol == 1) && icurcol[
			ki - 1] == mycol && i__ - istop + 1 >= 3) {
		    k = istart;
		    if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
			i__3 = istart + 1;
			itmp1 = min(i__3,i__) - 1;
		    } else {
			if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
			    i__3 = k + 3;
			    itmp1 = min(i__3,i__);
			}
			if ((k - 1) % hbl == hbl - 2) {
/* Computing MAX */
			    i__3 = i1, i__4 = k - 1;
			    itmp1 = max(i__3,i__4) - 1;
			}
			if ((k - 1) % hbl == hbl - 1) {
/* Computing MAX */
			    i__3 = i1, i__4 = k - 2;
			    itmp1 = max(i__3,i__4) - 1;
			}
		    }

		    icol1 = kcol[ki - 1];
		    infog1l_(&i1, &hbl, &nprow, &myrow, &c__0, &irow1, &irow2)
			    ;
		    irow2 = numroc_(&itmp1, &hbl, &myrow, &c__0, &nprow);
		    if (irow1 <= irow2) {
			itmp2 = irow2;
		    } else {
			itmp2 = -1;
		    }
		    dlaref_("Col", &a[1], &lda, wantz, &z__[1], &ldz, &c_true,
			     &icol1, &icol1, &istart, &istop, &irow1, &irow2, 
			    &liloz, &lihiz, &work[vecsidx + 1], &v2, &v3, &t1,
			     &t2, &t3, (ftnlen)3);
		    k = istop;
		    if ((k - 1) % hbl < hbl - 2) {

/*                 Do from ITMP1+1 to MIN(K+3,I) */

			if ((k - 1) % hbl < hbl - 3) {
			    irow1 = itmp2 + 1;
			    if (itmp1 / hbl % nprow == myrow) {
				if (itmp2 > 0) {
/* Computing MIN */
				    i__3 = k + 3;
				    irow2 = itmp2 + min(i__3,i__) - itmp1;
				} else {
				    irow2 = irow1 - 1;
				}
			    } else {
				irow2 = irow1 - 1;
			    }
			} else {
			    i__3 = itmp1 + 1;
			    infog1l_(&i__3, &hbl, &nprow, &myrow, &c__0, &
				    irow1, &irow2);
/* Computing MIN */
			    i__4 = k + 3;
			    i__3 = min(i__4,i__);
			    irow2 = numroc_(&i__3, &hbl, &myrow, &c__0, &
				    nprow);
			}
			v2 = work[vecsidx + (k - 1) * 3 + 1];
			v3 = work[vecsidx + (k - 1) * 3 + 2];
			t1 = work[vecsidx + (k - 1) * 3 + 3];
			t2 = t1 * v2;
			t3 = t1 * v3;
			icol1 = kcol[ki - 1] + istop - istart;
			dlaref_("Col", &a[1], &lda, &c_false, &z__[1], &ldz, &
				c_false, &icol1, &icol1, &istart, &istop, &
				irow1, &irow2, &liloz, &lihiz, &work[vecsidx 
				+ 1], &v2, &v3, &t1, &t2, &t3, (ftnlen)3);
		    }
		}
L260:
		;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (kcol[ki - 1] > kp2col[ki - 1]) {
		    goto L320;
		}
		if (mycol != icurcol[ki - 1] && right != icurcol[ki - 1]) {
		    goto L320;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);
		if ((istart - 1) % hbl >= hbl - 2) {

/*              INFO is found in a buffer */

		    ispec = 1;
		} else {

/*              All INFO is local */

		    ispec = 0;
		}

		i__3 = istop;
		for (k = istart; k <= i__3; ++k) {

		    v2 = work[vecsidx + (k - 1) * 3 + 1];
		    v3 = work[vecsidx + (k - 1) * 3 + 2];
		    t1 = work[vecsidx + (k - 1) * 3 + 3];
/* Computing MIN */
		    i__4 = 3, i__5 = i__ - k + 1;
		    nr = min(i__4,i__5);
		    t2 = t1 * v2;
		    if (nr == 3 && kcol[ki - 1] <= kp2col[ki - 1]) {
			t3 = t1 * v3;

			if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
			    i__4 = istart + 1;
			    itmp1 = min(i__4,i__) - 1;
			} else {
			    if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__4 = k + 3;
				itmp1 = min(i__4,i__);
			    }
			    if ((k - 1) % hbl == hbl - 2) {
/* Computing MAX */
				i__4 = i1, i__5 = k - 1;
				itmp1 = max(i__4,i__5) - 1;
			    }
			    if ((k - 1) % hbl == hbl - 1) {
/* Computing MAX */
				i__4 = i1, i__5 = k - 2;
				itmp1 = max(i__4,i__5) - 1;
			    }
			}
			icol1 = kcol[ki - 1] + k - istart;
			icol2 = kp2col[ki - 1] + k - istart;
			infog1l_(&i1, &hbl, &nprow, &myrow, &c__0, &irow1, &
				irow2);
			irow2 = numroc_(&itmp1, &hbl, &myrow, &c__0, &nprow);
			if ((k - 1) % hbl == hbl - 2 && npcol > 1) {
			    if (icol1 == icol2) {
				i__4 = irow2 - irow1 + 1;
				dgesd2d_(&contxt, &i__4, &c__1, &a[(icol1 - 1)
					 * lda + irow1], &lda, &myrow, &left);
				i__4 = irow2 - irow1 + 1;
				dgerv2d_(&contxt, &i__4, &c__1, &a[(icol1 - 1)
					 * lda + irow1], &lda, &myrow, &left);
			    } else {
				i__4 = irow2 - irow1 + 1;
				i__5 = irow2 - irow1 + 1;
				dgerv2d_(&contxt, &i__4, &c__1, &work[icbuf + 
					1], &i__5, &myrow, &right);
				i__4 = irow2;
				for (j = irow1; j <= i__4; ++j) {
				    sum = a[(icol1 - 1) * lda + j] + v2 * a[
					    icol1 * lda + j] + v3 * work[
					    icbuf + j - irow1 + 1];
				    a[(icol1 - 1) * lda + j] -= sum * t1;
				    a[icol1 * lda + j] -= sum * t2;
				    work[icbuf + j - irow1 + 1] -= sum * t3;
/* L270: */
				}
				i__4 = irow2 - irow1 + 1;
				i__5 = irow2 - irow1 + 1;
				dgesd2d_(&contxt, &i__4, &c__1, &work[icbuf + 
					1], &i__5, &myrow, &right);
			    }
			}
			if ((k - 1) % hbl == hbl - 1 && npcol > 1) {
			    if (icol1 == icol2) {
				i__4 = irow2 - irow1 + 1;
				dgesd2d_(&contxt, &i__4, &c__1, &a[(icol1 - 1)
					 * lda + irow1], &lda, &myrow, &right)
					;
				i__4 = irow2 - irow1 + 1;
				dgerv2d_(&contxt, &i__4, &c__1, &a[(icol1 - 1)
					 * lda + irow1], &lda, &myrow, &right)
					;
			    } else {
				i__4 = irow2 - irow1 + 1;
				i__5 = irow2 - irow1 + 1;
				dgerv2d_(&contxt, &i__4, &c__1, &work[icbuf + 
					1], &i__5, &myrow, &left);
				i__4 = irow2;
				for (j = irow1; j <= i__4; ++j) {
				    sum = work[icbuf + j - irow1 + 1] + v2 * 
					    a[(icol1 - 1) * lda + j] + v3 * a[
					    icol1 * lda + j];
				    work[icbuf + j - irow1 + 1] -= sum * t1;
				    a[(icol1 - 1) * lda + j] -= sum * t2;
				    a[icol1 * lda + j] -= sum * t3;
/* L280: */
				}
				i__4 = irow2 - irow1 + 1;
				i__5 = irow2 - irow1 + 1;
				dgesd2d_(&contxt, &i__4, &c__1, &work[icbuf + 
					1], &i__5, &myrow, &left);
			    }
			}

/*                 If we want Z and we haven't already done any Z */
			if (*wantz && (k - 1) % hbl >= hbl - 2 && npcol > 1) {

/*                    Accumulate transformations in the matrix Z */

			    irow1 = liloz;
			    irow2 = lihiz;
			    if ((k - 1) % hbl == hbl - 2) {
				if (icol1 == icol2) {
				    i__4 = irow2 - irow1 + 1;
				    dgesd2d_(&contxt, &i__4, &c__1, &z__[(
					    icol1 - 1) * ldz + irow1], &ldz, &
					    myrow, &left);
				    i__4 = irow2 - irow1 + 1;
				    dgerv2d_(&contxt, &i__4, &c__1, &z__[(
					    icol1 - 1) * ldz + irow1], &ldz, &
					    myrow, &left);
				} else {
				    i__4 = irow2 - irow1 + 1;
				    i__5 = irow2 - irow1 + 1;
				    dgerv2d_(&contxt, &i__4, &c__1, &work[
					    icbuf + 1], &i__5, &myrow, &right)
					    ;
				    icol1 = (icol1 - 1) * ldz;
				    i__4 = irow2;
				    for (j = irow1; j <= i__4; ++j) {
					sum = z__[icol1 + j] + v2 * z__[icol1 
						+ j + ldz] + v3 * work[icbuf 
						+ j - irow1 + 1];
					z__[j + icol1] -= sum * t1;
					z__[j + icol1 + ldz] -= sum * t2;
					work[icbuf + j - irow1 + 1] -= sum * 
						t3;
/* L290: */
				    }
				    i__4 = irow2 - irow1 + 1;
				    i__5 = irow2 - irow1 + 1;
				    dgesd2d_(&contxt, &i__4, &c__1, &work[
					    icbuf + 1], &i__5, &myrow, &right)
					    ;
				}
			    }
			    if ((k - 1) % hbl == hbl - 1) {
				if (icol1 == icol2) {
				    i__4 = irow2 - irow1 + 1;
				    dgesd2d_(&contxt, &i__4, &c__1, &z__[(
					    icol1 - 1) * ldz + irow1], &ldz, &
					    myrow, &right);
				    i__4 = irow2 - irow1 + 1;
				    dgerv2d_(&contxt, &i__4, &c__1, &z__[(
					    icol1 - 1) * ldz + irow1], &ldz, &
					    myrow, &right);
				} else {
				    i__4 = irow2 - irow1 + 1;
				    i__5 = irow2 - irow1 + 1;
				    dgerv2d_(&contxt, &i__4, &c__1, &work[
					    icbuf + 1], &i__5, &myrow, &left);
				    icol1 = (icol1 - 1) * ldz;
				    i__4 = irow2;
				    for (j = irow1; j <= i__4; ++j) {
					sum = work[icbuf + j - irow1 + 1] + 
						v2 * z__[j + icol1] + v3 * 
						z__[j + icol1 + ldz];
					work[icbuf + j - irow1 + 1] -= sum * 
						t1;
					z__[j + icol1] -= sum * t2;
					z__[j + icol1 + ldz] -= sum * t3;
/* L300: */
				    }
				    i__4 = irow2 - irow1 + 1;
				    i__5 = irow2 - irow1 + 1;
				    dgesd2d_(&contxt, &i__4, &c__1, &work[
					    icbuf + 1], &i__5, &myrow, &left);
				}
			    }
			}
			if (icurcol[ki - 1] == mycol) {
			    if (ispec == 0 || npcol == 1) {
				++localk2[ki - 1];
			    }
			} else {
			    if ((k - 1) % hbl == hbl - 1 && icurcol[ki - 1] ==
				     right) {
				if (k > m) {
				    localk2[ki - 1] += 2;
				} else {
				    ++localk2[ki - 1];
				}
			    }
			    if ((k - 1) % hbl == hbl - 2 && i__ - k == 2 && 
				    icurcol[ki - 1] == right) {
				localk2[ki - 1] += 2;
			    }
			}
		    }
/* L310: */
		}
L320:
		;
	    }

/*        Column work done */

/* L330: */

/*        Now do NR=2 work */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);
		if ((istart - 1) % hbl >= hbl - 2) {

/*              INFO is found in a buffer */

		    ispec = 1;
		} else {

/*              All INFO is local */

		    ispec = 0;
		}

		i__3 = istop;
		for (k = istart; k <= i__3; ++k) {

		    v2 = work[vecsidx + (k - 1) * 3 + 1];
		    v3 = work[vecsidx + (k - 1) * 3 + 2];
		    t1 = work[vecsidx + (k - 1) * 3 + 3];
/* Computing MIN */
		    i__4 = 3, i__5 = i__ - k + 1;
		    nr = min(i__4,i__5);
		    t2 = t1 * v2;
		    if (nr == 2) {

/*              Apply G from the left to transform the rows of the matrix */
/*              in columns K to I2. */

			infog1l_(&k, &hbl, &npcol, &mycol, &c__0, &liloh, &
				lihih);
			lihih = numroc_(&i2, &hbl, &mycol, &c__0, &npcol);
			infog1l_(&c__1, &hbl, &nprow, &myrow, &c__0, &itmp2, &
				itmp1);
			i__4 = k + 1;
			itmp1 = numroc_(&i__4, &hbl, &myrow, &c__0, &nprow);
			if (icurrow[ki - 1] == myrow) {
			    if (ispec == 0 || nprow == 1 || (k - 1) % hbl == 
				    hbl - 2) {
				--itmp1;
				i__4 = (lihih - 1) * lda;
				i__5 = lda;
				for (j = (liloh - 1) * lda; i__5 < 0 ? j >= 
					i__4 : j <= i__4; j += i__5) {
				    sum = a[itmp1 + j] + v2 * a[itmp1 + 1 + j]
					    ;
				    a[itmp1 + j] -= sum * t1;
				    a[itmp1 + 1 + j] -= sum * t2;
/* L340: */
				}
			    } else {
				if ((k - 1) % hbl == hbl - 1) {
				    i__5 = lihih - liloh + 1;
				    dgerv2d_(&contxt, &c__1, &i__5, &work[
					    irbuf + 1], &c__1, &up, &mycol);
				    i__5 = lihih;
				    for (j = liloh; j <= i__5; ++j) {
					sum = work[irbuf + j - liloh + 1] + 
						v2 * a[(j - 1) * lda + itmp1];
					work[irbuf + j - liloh + 1] -= sum * 
						t1;
					a[(j - 1) * lda + itmp1] -= sum * t2;
/* L350: */
				    }
				    i__5 = lihih - liloh + 1;
				    dgesd2d_(&contxt, &c__1, &i__5, &work[
					    irbuf + 1], &c__1, &up, &mycol);
				}
			    }
			} else {
			    if ((k - 1) % hbl == hbl - 1 && icurrow[ki - 1] ==
				     down) {
				i__5 = lihih - liloh + 1;
				dgesd2d_(&contxt, &c__1, &i__5, &a[(liloh - 1)
					 * lda + itmp1], &lda, &down, &mycol);
				i__5 = lihih - liloh + 1;
				dgerv2d_(&contxt, &c__1, &i__5, &a[(liloh - 1)
					 * lda + itmp1], &lda, &down, &mycol);
			    }
			}

/*              Apply G from the right to transform the columns of the */
/*              matrix in rows I1 to MIN(K+3,I). */

			infog1l_(&i1, &hbl, &nprow, &myrow, &c__0, &liloh, &
				lihih);
			lihih = numroc_(&i__, &hbl, &myrow, &c__0, &nprow);

			if (icurcol[ki - 1] == mycol) {
/*                 LOCAL A(LILOZ:LIHIZ,LOCALK2:LOCALK2+2) */
			    if (ispec == 0 || npcol == 1 || (k - 1) % hbl == 
				    hbl - 2) {
				infog1l_(&k, &hbl, &npcol, &mycol, &c__0, &
					itmp1, &itmp2);
				i__5 = k + 1;
				itmp2 = numroc_(&i__5, &hbl, &mycol, &c__0, &
					npcol);
				i__5 = lihih;
				for (j = liloh; j <= i__5; ++j) {
				    sum = a[(itmp1 - 1) * lda + j] + v2 * a[
					    itmp1 * lda + j];
				    a[(itmp1 - 1) * lda + j] -= sum * t1;
				    a[itmp1 * lda + j] -= sum * t2;
/* L360: */
				}
			    } else {
				itmp1 = localk2[ki - 1];
				if ((k - 1) % hbl == hbl - 1) {
				    i__5 = lihih - liloh + 1;
				    i__4 = lihih - liloh + 1;
				    dgerv2d_(&contxt, &i__5, &c__1, &work[
					    icbuf + 1], &i__4, &myrow, &left);
				    i__5 = lihih;
				    for (j = liloh; j <= i__5; ++j) {
					sum = work[icbuf + j] + v2 * a[(itmp1 
						- 1) * lda + j];
					work[icbuf + j] -= sum * t1;
					a[(itmp1 - 1) * lda + j] -= sum * t2;
/* L370: */
				    }
				    i__5 = lihih - liloh + 1;
				    i__4 = lihih - liloh + 1;
				    dgesd2d_(&contxt, &i__5, &c__1, &work[
					    icbuf + 1], &i__4, &myrow, &left);
				}
			    }
			} else {
			    if ((k - 1) % hbl == hbl - 1 && icurcol[ki - 1] ==
				     right) {
				itmp1 = kcol[ki - 1];
				i__5 = lihih - liloh + 1;
				dgesd2d_(&contxt, &i__5, &c__1, &a[(itmp1 - 1)
					 * lda + liloh], &lda, &myrow, &right)
					;
				infog1l_(&k, &hbl, &npcol, &mycol, &c__0, &
					itmp1, &itmp2);
				i__5 = k + 1;
				itmp2 = numroc_(&i__5, &hbl, &mycol, &c__0, &
					npcol);
				i__5 = lihih - liloh + 1;
				dgerv2d_(&contxt, &i__5, &c__1, &a[(itmp1 - 1)
					 * lda + liloh], &lda, &myrow, &right)
					;
			    }
			}

			if (*wantz) {

/*                 Accumulate transformations in the matrix Z */

			    if (icurcol[ki - 1] == mycol) {
/*                    LOCAL Z(LILOZ:LIHIZ,LOCALK2:LOCALK2+2) */
				if (ispec == 0 || npcol == 1 || (k - 1) % hbl 
					== hbl - 2) {
				    itmp1 = kcol[ki - 1] + k - istart;
				    itmp1 = (itmp1 - 1) * ldz;
				    i__5 = lihiz;
				    for (j = liloz; j <= i__5; ++j) {
					sum = z__[j + itmp1] + v2 * z__[j + 
						itmp1 + ldz];
					z__[j + itmp1] -= sum * t1;
					z__[j + itmp1 + ldz] -= sum * t2;
/* L380: */
				    }
				    ++localk2[ki - 1];
				} else {
				    itmp1 = localk2[ki - 1];
/*                       IF WE ACTUALLY OWN COLUMN K */
				    if ((k - 1) % hbl == hbl - 1) {
					i__5 = lihiz - liloz + 1;
					dgerv2d_(&contxt, &i__5, &c__1, &work[
						icbuf + 1], &ldz, &myrow, &
						left);
					itmp1 = (itmp1 - 1) * ldz;
					i__5 = lihiz;
					for (j = liloz; j <= i__5; ++j) {
					    sum = work[icbuf + j] + v2 * z__[
						    j + itmp1];
					    work[icbuf + j] -= sum * t1;
					    z__[j + itmp1] -= sum * t2;
/* L390: */
					}
					i__5 = lihiz - liloz + 1;
					dgesd2d_(&contxt, &i__5, &c__1, &work[
						icbuf + 1], &ldz, &myrow, &
						left);
					++localk2[ki - 1];
				    }
				}
			    } else {

/*                    NO WORK BUT NEED TO UPDATE ANYWAY???? */

				if ((k - 1) % hbl == hbl - 1 && icurcol[ki - 
					1] == right) {
				    itmp1 = kcol[ki - 1];
				    itmp1 = (itmp1 - 1) * ldz;
				    i__5 = lihiz - liloz + 1;
				    dgesd2d_(&contxt, &i__5, &c__1, &z__[
					    liloz + itmp1], &ldz, &myrow, &
					    right);
				    i__5 = lihiz - liloz + 1;
				    dgerv2d_(&contxt, &i__5, &c__1, &z__[
					    liloz + itmp1], &ldz, &myrow, &
					    right);
				    ++localk2[ki - 1];
				}
			    }
			}
		    }
/* L400: */
		}

/*        Adjust local information for this bulge */

		if (nprow == 1) {
		    krow[ki - 1] = krow[ki - 1] + k2[ki - 1] - k1[ki - 1] + 1;
		    kp2row[ki - 1] = kp2row[ki - 1] + k2[ki - 1] - k1[ki - 1] 
			    + 1;
		}
		if ((k1[ki - 1] - 1) % hbl < hbl - 2 && icurrow[ki - 1] == 
			myrow && nprow > 1) {
		    krow[ki - 1] = krow[ki - 1] + k2[ki - 1] - k1[ki - 1] + 1;
		}
		if (k2[ki - 1] % hbl < hbl - 2 && icurrow[ki - 1] == myrow && 
			nprow > 1) {
		    kp2row[ki - 1] = kp2row[ki - 1] + k2[ki - 1] - k1[ki - 1] 
			    + 1;
		}
		if ((k1[ki - 1] - 1) % hbl >= hbl - 2 && (myrow == icurrow[ki 
			- 1] || down == icurrow[ki - 1]) && nprow > 1) {
		    i__3 = k2[ki - 1] + 1;
		    infog1l_(&i__3, &hbl, &nprow, &myrow, &c__0, &krow[ki - 1]
			    , &itmp2);
		    itmp2 = numroc_(n, &hbl, &myrow, &c__0, &nprow);
		}
		if (k2[ki - 1] % hbl >= hbl - 2 && (myrow == icurrow[ki - 1] 
			|| up == icurrow[ki - 1]) && nprow > 1) {
		    infog1l_(&c__1, &hbl, &nprow, &myrow, &c__0, &itmp2, &
			    kp2row[ki - 1]);
		    i__3 = k2[ki - 1] + 3;
		    kp2row[ki - 1] = numroc_(&i__3, &hbl, &myrow, &c__0, &
			    nprow);
		}
		if (npcol == 1) {
		    kcol[ki - 1] = kcol[ki - 1] + k2[ki - 1] - k1[ki - 1] + 1;
		    kp2col[ki - 1] = kp2col[ki - 1] + k2[ki - 1] - k1[ki - 1] 
			    + 1;
		}
		if ((k1[ki - 1] - 1) % hbl < hbl - 2 && icurcol[ki - 1] == 
			mycol && npcol > 1) {
		    kcol[ki - 1] = kcol[ki - 1] + k2[ki - 1] - k1[ki - 1] + 1;
		}
		if (k2[ki - 1] % hbl < hbl - 2 && icurcol[ki - 1] == mycol && 
			npcol > 1) {
		    kp2col[ki - 1] = kp2col[ki - 1] + k2[ki - 1] - k1[ki - 1] 
			    + 1;
		}
		if ((k1[ki - 1] - 1) % hbl >= hbl - 2 && (mycol == icurcol[ki 
			- 1] || right == icurcol[ki - 1]) && npcol > 1) {
		    i__3 = k2[ki - 1] + 1;
		    infog1l_(&i__3, &hbl, &npcol, &mycol, &c__0, &kcol[ki - 1]
			    , &itmp2);
		    itmp2 = numroc_(n, &hbl, &mycol, &c__0, &npcol);
		}
		if (k2[ki - 1] % hbl >= hbl - 2 && (mycol == icurcol[ki - 1] 
			|| left == icurcol[ki - 1]) && npcol > 1) {
		    infog1l_(&c__1, &hbl, &npcol, &mycol, &c__0, &itmp2, &
			    kp2col[ki - 1]);
		    i__3 = k2[ki - 1] + 3;
		    kp2col[ki - 1] = numroc_(&i__3, &hbl, &mycol, &c__0, &
			    npcol);
		}
		k1[ki - 1] = k2[ki - 1] + 1;
/* Computing MIN */
		i__3 = k1[ki - 1] + rotn - 1, i__5 = i__ - 2;
		istop = min(i__3,i__5);
/* Computing MIN */
		i__3 = istop, i__5 = k1[ki - 1] + hbl - 3 - (k1[ki - 1] - 1) %
			 hbl;
		istop = min(i__3,i__5);
/* Computing MIN */
		i__3 = istop, i__5 = i2 - 2;
		istop = min(i__3,i__5);
/* Computing MAX */
		i__3 = istop, i__5 = k1[ki - 1];
		istop = max(i__3,i__5);
/*        ISTOP = MIN( ISTOP , I-1 ) */
		k2[ki - 1] = istop;
		if (k1[ki - 1] == istop) {
		    if ((istop - 1) % hbl == hbl - 2 && i__ - istop > 1) {

/*              Next step switches rows & cols */

			icurrow[ki - 1] = (icurrow[ki - 1] + 1) % nprow;
			icurcol[ki - 1] = (icurcol[ki - 1] + 1) % npcol;
		    }
		}
/* L410: */
	    }
	    if (k2[ibulge - 1] <= i__ - 1) {
		goto L40;
	    }
	}

/* L420: */
    }

/*     Failure to converge in remaining number of iterations */

    *info = i__;
    return 0;

L430:

    if (l == i__) {

/*        H(I,I-1) is negligible: one eigenvalue has converged. */

	infog2l_(&i__, &i__, &desca[1], &nprow, &npcol, &myrow, &mycol, &irow,
		 &icol, &itmp1, &itmp2);
	if (myrow == itmp1 && mycol == itmp2) {
	    wr[i__] = a[(icol - 1) * lda + irow];
	} else {
	    wr[i__] = 0.;
	}
	wi[i__] = 0.;
    } else if (l == i__ - 1) {

/*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged. */

	wr[i__ - 1] = 0.;
	wr[i__] = 0.;
	wi[i__ - 1] = 0.;
	wi[i__] = 0.;
	modkm1 = (i__ - 1 + hbl) % hbl;
	i__1 = i__ - 1;
	i__2 = i__ - 1;
	infog2l_(&i__1, &i__2, &desca[1], &nprow, &npcol, &myrow, &mycol, &
		irow1, &icol1, &ii, &jj);
	if (myrow == ii && mycol == jj) {
	    if (modkm1 != 0) {
		h11 = a[(icol1 - 1) * lda + irow1];
		h21 = a[(icol1 - 1) * lda + irow1 + 1];
		h12 = a[icol1 * lda + irow1];
		h22 = a[icol1 * lda + irow1 + 1];
	    } else {
		if (nprow > 1) {
		    dgerv2d_(&contxt, &c__1, &c__1, &h21, &c__1, &down, &
			    mycol);
		} else {
		    h21 = a[(icol1 - 1) * lda + irow1 + 1];
		}
		if (npcol > 1) {
		    dgerv2d_(&contxt, &c__1, &c__1, &h12, &c__1, &myrow, &
			    right);
		} else {
		    h12 = a[icol1 * lda + irow1];
		}
		if (num > 1) {
		    dgerv2d_(&contxt, &c__1, &c__1, &h22, &c__1, &down, &
			    right);
		} else {
		    h22 = a[icol1 * lda + irow1 + 1];
		}
	    }
	    h00 = (h11 + h22) * .5;
	    h10 = h11 * h22 - h12 * h21;
	} else {
	    if (modkm1 == 0) {
		if (nprow > 1 && mycol == jj && up == ii) {
		    i__1 = i__ - 1;
		    infog2l_(&i__, &i__1, &desca[1], &nprow, &npcol, &myrow, &
			    mycol, &irow1, &icol1, &itmp1, &itmp2);
		    dgesd2d_(&contxt, &c__1, &c__1, &a[(icol1 - 1) * lda + 
			    irow1], &c__1, &ii, &jj);
		}
		if (npcol > 1 && left == jj && myrow == ii) {
		    i__1 = i__ - 1;
		    infog2l_(&i__1, &i__, &desca[1], &nprow, &npcol, &myrow, &
			    mycol, &irow1, &icol1, &itmp1, &itmp2);
		    dgesd2d_(&contxt, &c__1, &c__1, &a[(icol1 - 1) * lda + 
			    irow1], &c__1, &ii, &jj);
		}
		if (num > 1 && left == jj && up == ii) {
		    infog2l_(&i__, &i__, &desca[1], &nprow, &npcol, &myrow, &
			    mycol, &irow1, &icol1, &itmp1, &itmp2);
		    dgesd2d_(&contxt, &c__1, &c__1, &a[(icol1 - 1) * lda + 
			    irow1], &c__1, &ii, &jj);
		}
	    }
	    h00 = 0.;
	    h10 = 0.;
	}
	h21 = h00 * h00 - h10;
	if (h21 >= 0.) {
	    h21 = sqrt(h21);
	    wr[i__ - 1] = h00 + h21;
	    wi[i__ - 1] = 0.;
	    wr[i__] = h00 - h21;
	    wi[i__] = 0.;
	} else {
	    h21 = sqrt((abs(h21)));
	    wr[i__ - 1] = h00;
	    wi[i__ - 1] = h21;
	    wr[i__] = h00;
	    wi[i__] = -h21;
	}
    } else {

/*        Find the eigenvalues in H(L:I,L:I), L < I-1 */

	jblk = i__ - l + 1;
	if (jblk <= 64) {
	    i__1 = i__ - l + 1;
	    pdlacp3_(&i__1, &l, &a[1], &desca[1], s1, &c__64, &c__0, &c__0, &
		    c__0);
	    dlahqr_(&c_false, &c_false, &jblk, &c__1, &jblk, s1, &c__64, &wr[
		    l], &wi[l], &c__1, &jblk, &z__[1], &ldz, &ierr);
	    if (node != 0) {

/*           Erase the eigenvalues */

		i__1 = i__;
		for (k = l; k <= i__1; ++k) {
		    wr[k] = 0.;
		    wi[k] = 0.;
/* L440: */
		}
	    }
	}
    }

/*     Decrement number of remaining iterations, and return to start of */
/*     the main loop with new value of I. */

    itn -= its;
    if (m == l - 10) {
	i__ = l - 1;
    } else {
	i__ = m;
    }
/*     I = L - 1 */
    goto L10;

L450:
    dgsum2d_(&contxt, "All", " ", n, &c__1, &wr[1], n, &c_n1, &c_n1, (ftnlen)
	    3, (ftnlen)1);
    dgsum2d_(&contxt, "All", " ", n, &c__1, &wi[1], n, &c_n1, &c_n1, (ftnlen)
	    3, (ftnlen)1);
    return 0;

/*     END OF PDLAHQR */

} /* pdlahqr_ */

