/* /home4/luszczek/mscratch/build/SCALAPACK/SRC/pzlahqr.f -- translated by f2c (version 20031025).
   You must link the resulting object file with libf2c:
	on Microsoft Windows system, link with libf2c.lib;
	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
	or, if you install libf2c.a in a standard place, with -lf2c -lm
	-- in that order, at the end of the command line, as in
		cc *.o -lf2c -lm
	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,

		http://www.netlib.org/f2c/libf2c.zip
*/

#include "f2c.h"

/* Table of constant values */

static integer c__1 = 1;
static integer c_n1 = -1;
static integer c__64 = 64;
static integer c__0 = 0;
static logical c_false = FALSE_;
static integer c__6 = 6;
static integer c__3 = 3;
static logical c_true = TRUE_;
static integer c__2 = 2;

/* Subroutine */ int pzlahqr_(logical *wantt, logical *wantz, integer *n, 
	integer *ilo, integer *ihi, doublecomplex *a, integer *desca, 
	doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__, 
	integer *descz, doublecomplex *work, integer *lwork, integer *iwork, 
	integer *ilwork, integer *info)
{
    /* System generated locals */
    integer i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8;
    doublereal d__1, d__2, d__3, d__4, d__5;
    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8;

    /* Builtin functions */
    double d_imag(doublecomplex *);
    void d_cnjg(doublecomplex *, doublecomplex *);

    /* Local variables */
    integer i__, j, k, l, m;
    doublereal s;
    integer i1, i2, k1[32], k2[32];
    doublecomplex s1[4096]	/* was [64][64] */, t1, t2, t3, v2, v3;
    extern /* Subroutine */ int pzlaconsb_(doublecomplex *, integer *, 
	    integer *, integer *, integer *, doublecomplex *, doublecomplex *,
	     doublecomplex *, doublecomplex *, integer *);
    integer istartcol;
    extern /* Subroutine */ int pzlasmsub_(doublecomplex *, integer *, 
	    integer *, integer *, integer *, doublereal *, doublecomplex *, 
	    integer *);
    doublecomplex h10, h11;
    integer istartrow;
    doublecomplex h22, h33, h44;
    integer ii, jj, ki, nh;
    doublereal cs;
    integer nq, nr;
    doublecomplex sn;
    integer up, nz, lda, hbl, ldz, itn, its, num;
    doublereal ulp;
    doublecomplex sum;
    integer idia;
    doublecomplex h43h34;
    integer jblk;
    extern integer ilcm_(integer *, integer *);
    integer node, icol, kcol[32], left, ierr, isub;
    doublereal unfl;
    logical skip;
    integer down;
    doublereal ovfl;
    integer irow, isup, rotn, krow[32], icol1, icol2, itmp1, itmp2, irow1, 
	    irow2, icbuf, lihih, lcmrc, ispec, irbuf, liloh, npcol, right, 
	    izbuf, lihiz, mycol, liloz, istop;
    doublecomplex vcopy[3];
    extern /* Subroutine */ int lfc_SLzcopy(integer *, doublecomplex *, integer *, 
	    doublecomplex *, integer *);
    integer nprow, myrow;
    extern /* Subroutine */ int pzrot_(integer *, doublecomplex *, integer *, 
	    integer *, integer *, integer *, doublecomplex *, integer *, 
	    integer *, integer *, integer *, doublereal *, doublecomplex *);
    integer modkm1, kp2col[32];
    doublecomplex v1save, v2save, v3save;
    extern /* Subroutine */ int zlanv2_(doublecomplex *, doublecomplex *, 
	    doublecomplex *, doublecomplex *, doublecomplex *, doublecomplex *
	    , doublereal *, doublecomplex *);
    doublecomplex t1copy;
    integer kp2row[32], localk, ibulge, localm;
    doublecomplex smalla[1152]	/* was [6][6][32] */;
    integer nbulge;
    extern /* Subroutine */ int zlaref_(char *, doublecomplex *, integer *, 
	    logical *, doublecomplex *, integer *, logical *, integer *, 
	    integer *, integer *, integer *, integer *, integer *, integer *, 
	    integer *, doublecomplex *, doublecomplex *, doublecomplex *, 
	    doublecomplex *, doublecomplex *, doublecomplex *, ftnlen), 
	    zlarfg_(integer *, doublecomplex *, doublecomplex *, integer *, 
	    doublecomplex *), zlamsh_(doublecomplex *, integer *, integer *, 
	    integer *, doublecomplex *, integer *, integer *, doublereal *);
    extern integer numroc_(integer *, integer *, integer *, integer *, 
	    integer *);
    extern /* Subroutine */ int blacs_gridinfo__(integer *, integer *, 
	    integer *, integer *, integer *);
    integer istart;
    doublereal smlnum;
    extern /* Subroutine */ int igebr2d_(integer *, char *, char *, integer *,
	     integer *, integer *, integer *, integer *, integer *, ftnlen, 
	    ftnlen);
    integer contxt;
    extern /* Subroutine */ int igebs2d_(integer *, char *, char *, integer *,
	     integer *, integer *, integer *, ftnlen, ftnlen), igamn2d_(
	    integer *, char *, char *, integer *, integer *, integer *, 
	    integer *, integer *, integer *, integer *, integer *, integer *, 
	    ftnlen, ftnlen);
    integer locali1, locali2;
    extern /* Subroutine */ int infog1l_(integer *, integer *, integer *, 
	    integer *, integer *, integer *, integer *), infog2l_(integer *, 
	    integer *, integer *, integer *, integer *, integer *, integer *, 
	    integer *, integer *, integer *, integer *), zgebr2d_(integer *, 
	    char *, char *, integer *, integer *, doublecomplex *, integer *, 
	    integer *, integer *, ftnlen, ftnlen), zgebs2d_(integer *, char *,
	     char *, integer *, integer *, doublecomplex *, integer *, ftnlen,
	     ftnlen), zgesd2d_(integer *, integer *, integer *, doublecomplex 
	    *, integer *, integer *, integer *), pzlacp3_(integer *, integer *
	    , doublecomplex *, integer *, doublecomplex *, integer *, integer 
	    *, integer *, integer *), zgerv2d_(integer *, integer *, integer *
	    , doublecomplex *, integer *, integer *, integer *), zlahqr2_(
	    logical *, logical *, integer *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, integer *, integer *,
	     doublecomplex *, integer *, integer *), pdlabad_(integer *, 
	    doublereal *, doublereal *), zgsum2d_(integer *, char *, char *, 
	    integer *, integer *, doublecomplex *, integer *, integer *, 
	    integer *, ftnlen, ftnlen);
    extern doublereal pdlamch_(integer *, char *, ftnlen);
    extern /* Subroutine */ int pxerbla_(integer *, char *, integer *, ftnlen)
	    ;
    integer icurcol[32], iafirst, jafirst, vecsidx, itermax;
    extern /* Subroutine */ int pzlawil_(integer *, integer *, integer *, 
	    doublecomplex *, integer *, doublecomplex *, doublecomplex *, 
	    doublecomplex *, doublecomplex *);
    integer icurrow[32];


/*  -- ScaLAPACK routine (version 1.7) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory, */
/*     and University of California, Berkeley. */
/*     July 31, 2001 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  PZLAHQR is an auxiliary routine used to find the Schur decomposition */
/*    and or eigenvalues of a matrix already in Hessenberg form from */
/*    cols ILO to IHI. */
/*  If Z = I, and WANTT=WANTZ=.TRUE., H gets replaced with Z'HZ, */
/*    with Z'Z=I, and H in Schur form. */

/*  Notes */
/*  ===== */

/*  Each global data object is described by an associated description */
/*  vector.  This vector stores the information required to establish */
/*  the mapping between an object element and its corresponding process */
/*  and memory location. */

/*  Let A be a generic term for any 2D block cyclicly distributed array. */
/*  Such a global array has an associated description vector DESCA. */
/*  In the following comments, the character _ should be read as */
/*  "of the global array". */

/*  NOTATION        STORED IN      EXPLANATION */
/*  --------------- -------------- -------------------------------------- */
/*  DTYPE_A(global) DESCA( DTYPE_ )The descriptor type.  In this case, */
/*                                 DTYPE_A = 1. */
/*  CTXT_A (global) DESCA( CTXT_ ) The BLACS context handle, indicating */
/*                                 the BLACS process grid A is distribu- */
/*                                 ted over. The context itself is glo- */
/*                                 bal, but the handle (the integer */
/*                                 value) may vary. */
/*  M_A    (global) DESCA( M_ )    The number of rows in the global */
/*                                 array A. */
/*  N_A    (global) DESCA( N_ )    The number of columns in the global */
/*                                 array A. */
/*  MB_A   (global) DESCA( MB_ )   The blocking factor used to distribute */
/*                                 the rows of the array. */
/*  NB_A   (global) DESCA( NB_ )   The blocking factor used to distribute */
/*                                 the columns of the array. */
/*  RSRC_A (global) DESCA( RSRC_ ) The process row over which the first */
/*                                 row of the array A is distributed. */
/*  CSRC_A (global) DESCA( CSRC_ ) The process column over which the */
/*                                 first column of the array A is */
/*                                 distributed. */
/*  LLD_A  (local)  DESCA( LLD_ )  The leading dimension of the local */
/*                                 array.  LLD_A >= MAX(1,LOCp(M_A)). */

/*  Let K be the number of rows or columns of a distributed matrix, */
/*  and assume that its process grid has dimension p x q. */
/*  LOCp( K ) denotes the number of elements of K that a process */
/*  would receive if K were distributed over the p processes of its */
/*  process column. */
/*  Similarly, LOCq( K ) denotes the number of elements of K that a */
/*  process would receive if K were distributed over the q processes of */
/*  its process row. */
/*  The values of LOCp() and LOCq() may be determined via a call to the */
/*  ScaLAPACK tool function, NUMROC: */
/*          LOCp( M ) = NUMROC( M, MB_A, MYROW, RSRC_A, NPROW ), */
/*          LOCq( N ) = NUMROC( N, NB_A, MYCOL, CSRC_A, NPCOL ). */
/*  An upper bound for these quantities may be computed by: */
/*          LOCp( M ) <= ceil( ceil(M/MB_A)/NPROW )*MB_A */
/*          LOCq( N ) <= ceil( ceil(N/NB_A)/NPCOL )*NB_A */

/*  Arguments */
/*  ========= */

/*  WANTT   (global input) LOGICAL */
/*          = .TRUE. : the full Schur form T is required; */
/*          = .FALSE.: only eigenvalues are required. */

/*  WANTZ   (global input) LOGICAL */
/*          = .TRUE. : the matrix of Schur vectors Z is required; */
/*          = .FALSE.: Schur vectors are not required. */

/*  N       (global input) INTEGER */
/*          The order of the Hessenberg matrix A (and Z if WANTZ). */
/*          N >= 0. */

/*  ILO     (global input) INTEGER */
/*  IHI     (global input) INTEGER */
/*          It is assumed that A is already upper quasi-triangular in */
/*          rows and columns IHI+1:N, and that A(ILO,ILO-1) = 0 (unless */
/*          ILO = 1). PZLAHQR works primarily with the Hessenberg */
/*          submatrix in rows and columns ILO to IHI, but applies */
/*          transformations to all of H if WANTT is .TRUE.. */
/*          1 <= ILO <= max(1,IHI); IHI <= N. */

/*  A       (global input/output) COMPLEX*16 array, dimension */
/*          (DESCA(LLD_),*) */
/*          On entry, the upper Hessenberg matrix A. */
/*          On exit, if WANTT is .TRUE., A is upper triangular in rows */
/*          and columns ILO:IHI.  If WANTT is .FALSE., the contents of */
/*          A are unspecified on exit. */

/*  DESCA   (global and local input) INTEGER array of dimension DLEN_. */
/*          The array descriptor for the distributed matrix A. */

/*  W      (global replicated output) COMPLEX*16 array, dimension (N) */
/*          The computed eigenvalues ILO to IHI are stored in the */
/*          corresponding elements of W.  If WANTT is .TRUE., the */
/*          eigenvalues are stored in the same order as on the diagonal */
/*          of the Schur form returned in A.  A may be returned with */
/*          larger diagonal blocks until the next release. */

/*  ILOZ    (global input) INTEGER */
/*  IHIZ    (global input) INTEGER */
/*          Specify the rows of Z to which transformations must be */
/*          applied if WANTZ is .TRUE.. */
/*          1 <= ILOZ <= ILO; IHI <= IHIZ <= N. */

/*  Z       (global input/output) COMPLEX*16 array. */
/*          If WANTZ is .TRUE., on entry Z must contain the current */
/*          matrix Z of transformations accumulated by PZHSEQR, and on */
/*          exit Z has been updated; transformations are applied only to */
/*          the submatrix Z(ILOZ:IHIZ,ILO:IHI). */
/*          If WANTZ is .FALSE., Z is not referenced. */

/*  DESCZ   (global and local input) INTEGER array of dimension DLEN_. */
/*          The array descriptor for the distributed matrix Z. */

/*  WORK    (local output) COMPLEX*16 array of size LWORK */
/*          (Unless LWORK=-1, in which case WORK must be at least size 1) */

/*  LWORK   (local input) INTEGER */
/*          WORK(LWORK) is a local array and LWORK is assumed big enough */
/*          so that LWORK >= 3*N + */
/*                MAX( 2*MAX(DESCZ(LLD_),DESCA(LLD_)) + 2*LOCq(N), */
/*                     7*Ceil(N/HBL)/LCM(NPROW,NPCOL)) + */
/*                MAX( 2*N, (8*LCM(NPROW,NPCOL)+2)**2 ) */
/*          If LWORK=-1, then WORK(1) gets set to the above number and */
/*          the code returns immediately. */

/*  IWORK   (global and local input) INTEGER array of size ILWORK */
/*          This will hold some of the IBLK integer arrays. */
/*          This is held as a place holder for a future release. */
/*          Currently unreferenced. */

/*  ILWORK  (local input) INTEGER */
/*          This will hold the size of the IWORK array. */
/*          This is held as a place holder for a future release. */
/*          Currently unreferenced. */

/*  INFO    (global output) INTEGER */
/*          < 0: parameter number -INFO incorrect or inconsistent */
/*          = 0: successful exit */
/*          > 0: PZLAHQR failed to compute all the eigenvalues ILO to IHI */
/*               in a total of 30*(IHI-ILO+1) iterations; if INFO = i, */
/*               elements i+1:ihi of W contains those eigenvalues */
/*               which have been successfully computed. */

/*  Logic: */
/*       This algorithm is very similar to DLAHQR.  Unlike DLAHQR, */
/*       instead of sending one double shift through the largest */
/*       unreduced submatrix, this algorithm sends multiple double shifts */
/*       and spaces them apart so that there can be parallelism across */
/*       several processor row/columns.  Another critical difference is */
/*       that this algorithm aggregrates multiple transforms together in */
/*       order to apply them in a block fashion. */

/*  Important Local Variables: */
/*       IBLK = The maximum number of bulges that can be computed. */
/*           Currently fixed.  Future releases this won't be fixed. */
/*       HBL  = The square block size (HBL=DESCA(MB_)=DESCA(NB_)) */
/*       ROTN = The number of transforms to block together */
/*       NBULGE = The number of bulges that will be attempted on the */
/*           current submatrix. */
/*       IBULGE = The current number of bulges started. */
/*       K1(*),K2(*) = The current bulge loops from K1(*) to K2(*). */

/*  Subroutines: */
/*       From LAPACK, this routine calls: */
/*           ZLAHQR     -> Serial QR used to determine shifts and */
/*                         eigenvalues */
/*           ZLARFG     -> Determine the Householder transforms */

/*       This ScaLAPACK, this routine calls: */
/*           PZLACONSB  -> To determine where to start each iteration */
/*           ZLAMSH     -> Sends multiple shifts through a small */
/*                         submatrix to see how the consecutive */
/*                         subdiagonals change (if PZLACONSB indicates */
/*                         we can start a run in the middle) */
/*           PZLAWIL    -> Given the shift, get the transformation */
/*           PZLACP3    -> Parallel array to local replicated array copy */
/*                         & back. */
/*           ZLAREF     -> Row/column reflector applier.  Core routine */
/*                         here. */
/*           PZLASMSUB  -> Finds negligible subdiagonal elements. */

/*  Current Notes and/or Restrictions: */
/*       1.) This code requires the distributed block size to be square */
/*           and at least six (6); unlike simpler codes like LU, this */
/*           algorithm is extremely sensitive to block size.  Unwise */
/*           choices of too small a block size can lead to bad */
/*           performance. */
/*       2.) This code requires A and Z to be distributed identically */
/*           and have identical contxts.  A future version may allow Z to */
/*           have a different contxt to 1D row map it to all nodes (so no */
/*           communication on Z is necessary.) */
/*       3.) This code does not currently block the initial transforms */
/*           so that none of the rows or columns for any bulge are */
/*           completed until all are started.  To offset pipeline */
/*           start-up it is recommended that at least 2*LCM(NPROW,NPCOL) */
/*           bulges are used (if possible) */
/*       4.) The maximum number of bulges currently supported is fixed at */
/*           32.  In future versions this will be limited only by the */
/*           incoming WORK and IWORK array. */
/*       5.) The matrix A must be in upper Hessenberg form.  If elements */
/*           below the subdiagonal are nonzero, the resulting transforms */
/*           may be nonsimilar.  This is also true with the LAPACK */
/*           routine ZLAHQR. */
/*       6.) For this release, this code has only been tested for */
/*           RSRC_=CSRC_=0, but it has been written for the general case. */
/*       7.) Currently, all the eigenvalues are distributed to all the */
/*           nodes.  Future releases will probably distribute the */
/*           eigenvalues by the column partitioning. */
/*       8.) The internals of this routine are subject to change. */
/*       9.) To optimize this for your architecture, try tuning ZLAREF. */
/*       10.) This code has only been tested for WANTZ = .TRUE. and may */
/*           behave unpredictably for WANTZ set to .FALSE. */

/*  Further Details */
/*  =============== */

/*  Contributed by Mark Fahey, June, 2000. */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */

/*     .. */
/*     .. Statement Functions .. */
/*     .. */
/*     .. Statement Function definitions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    --iwork;
    --work;
    --descz;
    --z__;
    --w;
    --desca;
    --a;

    /* Function Body */
    *info = 0;

    itermax = (*ihi - *ilo + 1) * 30;
    if (*n == 0) {
	return 0;
    }

/*     NODE (IAFIRST,JAFIRST) OWNS A(1,1) */

    hbl = desca[5];
    contxt = desca[2];
    lda = desca[9];
    iafirst = desca[7];
    jafirst = desca[8];
    ldz = descz[9];
    blacs_gridinfo__(&contxt, &nprow, &npcol, &myrow, &mycol);
    node = myrow * npcol + mycol;
    num = nprow * npcol;
    left = (mycol + npcol - 1) % npcol;
    right = (mycol + 1) % npcol;
    up = (myrow + nprow - 1) % nprow;
    down = (myrow + 1) % nprow;
    lcmrc = ilcm_(&nprow, &npcol);
    if (nprow <= 3 || npcol <= 3) {
	skip = TRUE_;
    } else {
	skip = FALSE_;
    }

/*     Determine the number of columns we have so we can check workspace */

    nq = numroc_(n, &hbl, &mycol, &jafirst, &npcol);
    jj = *n / hbl;
    if (jj * hbl < *n) {
	++jj;
    }
    jj = jj * 7 / lcmrc;
/* Computing MAX */
    i__1 = (max(lda,ldz) << 1) + (nq << 1);
    jj = *n * 3 + max(i__1,jj);
/* Computing MAX */
/* Computing 2nd power */
    i__3 = (lcmrc << 3) + 2;
    i__1 = *n << 1, i__2 = i__3 * i__3;
    jj += max(i__1,i__2);
    if (*lwork == -1) {
	work[1].r = (doublereal) jj, work[1].i = 0.;
	return 0;
    }
    if (*lwork < jj) {
	*info = -14;
    }
    if (descz[2] != desca[2]) {
	*info = -1302;
    }
    if (desca[5] != desca[6]) {
	*info = -706;
    }
    if (descz[5] != descz[6]) {
	*info = -1306;
    }
    if (desca[5] != descz[5]) {
	*info = -1305;
    }
    if (desca[7] != 0 || desca[8] != 0) {
	*info = -707;
    }
    if (descz[7] != 0 || descz[8] != 0) {
	*info = -1307;
    }
    if (*ilo > *n || *ilo < 1) {
	*info = -4;
    }
    if (*ihi > *n || *ihi < 1) {
	*info = -5;
    }
    if (hbl < 5) {
	*info = -705;
    }
    igamn2d_(&contxt, "ALL", " ", &c__1, &c__1, info, &c__1, &itmp1, &itmp2, &
	    c_n1, &c_n1, &c_n1, (ftnlen)3, (ftnlen)1);
    if (*info < 0) {
	i__1 = -(*info);
	pxerbla_(&contxt, "PZLAHQR", &i__1, (ftnlen)7);
	return 0;
    }

/*     Set work array indices */

    vecsidx = 0;
    idia = *n * 3;
    isub = *n * 3;
    isup = *n * 3;
    irbuf = *n * 3;
    icbuf = *n * 3;
    izbuf = *n * 5;

/*     Find a value for ROTN */

    rotn = hbl / 3;
/* Computing MIN */
    i__1 = rotn, i__2 = hbl - 2;
    rotn = min(i__1,i__2);
    rotn = max(rotn,1);

    if (*ilo == *ihi) {
	infog2l_(ilo, ilo, &desca[1], &nprow, &npcol, &myrow, &mycol, &irow, &
		icol, &ii, &jj);
	if (myrow == ii && mycol == jj) {
	    i__1 = *ilo;
	    i__2 = (icol - 1) * lda + irow;
	    w[i__1].r = a[i__2].r, w[i__1].i = a[i__2].i;
	} else {
	    i__1 = *ilo;
	    w[i__1].r = 0., w[i__1].i = 0.;
	}
	return 0;
    }

    nh = *ihi - *ilo + 1;
    nz = *ihiz - *iloz + 1;

    infog1l_(iloz, &hbl, &nprow, &myrow, &iafirst, &liloz, &lihiz);
    lihiz = numroc_(ihiz, &hbl, &myrow, &iafirst, &nprow);

/*     Set machine-dependent constants for the stopping criterion. */
/*     If NORM(H) <= SQRT(OVFL), overflow should not occur. */

    unfl = pdlamch_(&contxt, "SAFE MINIMUM", (ftnlen)12);
    ovfl = 1. / unfl;
    pdlabad_(&contxt, &unfl, &ovfl);
    ulp = pdlamch_(&contxt, "PRECISION", (ftnlen)9);
    smlnum = unfl * (nh / ulp);

/*     I1 and I2 are the indices of the first row and last column of H */
/*     to which transformations must be applied. If eigenvalues only are */
/*     being computed, I1 and I2 are set inside the main loop. */

    if (*wantt) {
	i1 = 1;
	i2 = *n;
    }

/*     ITN is the total number of QR iterations allowed. */

    itn = itermax;

/*     The main loop begins here. I is the loop index and decreases from */
/*     IHI to ILO in steps of our schur block size (<=2*IBLK). Each */
/*     iteration of the loop works  with the active submatrix in rows */
/*     and columns L to I.   Eigenvalues I+1 to IHI have already */
/*     converged. Either L = ILO or the global A(L,L-1) is negligible */
/*     so that the matrix splits. */

    i__ = *ihi;
L10:
    l = *ilo;
    if (i__ < *ilo) {
	goto L570;
    }

/*     Perform QR iterations on rows and columns ILO to I until a */
/*     submatrix of order 1 or 2 splits off at the bottom because a */
/*     subdiagonal element has become negligible. */

    i__1 = itn;
    for (its = 0; its <= i__1; ++its) {

/*        Look for a single small subdiagonal element. */

	i__2 = *lwork - irbuf;
	pzlasmsub_(&a[1], &desca[1], &i__, &l, &k, &smlnum, &work[irbuf + 1], 
		&i__2);
	l = k;

	if (l > *ilo) {

/*           H(L,L-1) is negligible */

	    i__2 = l - 1;
	    infog2l_(&l, &i__2, &desca[1], &nprow, &npcol, &myrow, &mycol, &
		    irow, &icol, &itmp1, &itmp2);
	    if (myrow == itmp1 && mycol == itmp2) {
		i__2 = (icol - 1) * lda + irow;
		a[i__2].r = 0., a[i__2].i = 0.;
	    }
	    i__2 = isub + l - 1;
	    work[i__2].r = 0., work[i__2].i = 0.;
	}

/*        Exit from loop if a submatrix of order 1 or 2 has split off. */

	if (*wantt) {
/*           For Schur form, use 2x2 blocks */
	    if (l >= i__ - 1) {
		goto L550;
	    }
	} else {
/*           If we don't want the Schur form, use bigger blocks. */
	    if (l >= i__ - 63) {
		goto L550;
	    }
	}

/*        Now the active submatrix is in rows and columns L to I. If */
/*        eigenvalues only are being computed, only the active submatrix */
/*        need be transformed. */

	if (! (*wantt)) {
	    i1 = l;
	    i2 = i__;
	}

/*        Copy submatrix of size 2*JBLK and prepare to do generalized */
/*           Wilkinson shift or an exceptional shift */

/* Computing MIN */
	i__2 = 32, i__3 = (i__ - l + 1) / 2 - 1;
	jblk = min(i__2,i__3);
	if (jblk > lcmrc) {

/*           Make sure it's divisible by LCM (we want even workloads!) */

	    jblk -= jblk % lcmrc;
	}
/* Computing MIN */
	i__2 = jblk, i__3 = lcmrc << 1;
	jblk = min(i__2,i__3);
	jblk = max(jblk,1);

	i__2 = jblk << 1;
	i__3 = i__ - (jblk << 1) + 1;
	pzlacp3_(&i__2, &i__3, &a[1], &desca[1], s1, &c__64, &c_n1, &c_n1, &
		c__0);
	if ((its == 20 || its == 40) && jblk > 1) {

/*           Exceptional shift. */

	    for (ii = jblk << 1; ii >= 1; --ii) {
		i__2 = ii + (ii << 6) - 65;
		i__3 = ii + (ii << 6) - 65;
		i__4 = ii + (ii - 1 << 6) - 65;
		d__5 = ((d__1 = s1[i__3].r, abs(d__1)) + (d__2 = d_imag(&s1[
			ii + (ii << 6) - 65]), abs(d__2)) + ((d__3 = s1[i__4]
			.r, abs(d__3)) + (d__4 = d_imag(&s1[ii + (ii - 1 << 6)
			 - 65]), abs(d__4)))) * 1.5;
		s1[i__2].r = d__5, s1[i__2].i = 0.;
		i__2 = ii + (ii - 1 << 6) - 65;
		s1[i__2].r = 0., s1[i__2].i = 0.;
		i__2 = ii - 1 + (ii << 6) - 65;
		s1[i__2].r = 0., s1[i__2].i = 0.;
/* L20: */
	    }
	} else {
	    i__2 = jblk << 1;
	    i__3 = jblk << 1;
	    i__4 = jblk << 1;
	    zlahqr2_(&c_false, &c_false, &i__2, &c__1, &i__3, s1, &c__64, &
		    work[irbuf + 1], &c__1, &i__4, &z__[1], &ldz, &ierr);

/*           Prepare to use Wilkinson's double shift */

	    i__2 = (jblk << 1) + (jblk << 7) - 65;
	    h44.r = s1[i__2].r, h44.i = s1[i__2].i;
	    i__2 = (jblk << 1) - 1 + ((jblk << 1) - 1 << 6) - 65;
	    h33.r = s1[i__2].r, h33.i = s1[i__2].i;
	    i__2 = (jblk << 1) - 1 + (jblk << 7) - 65;
	    i__3 = (jblk << 1) + ((jblk << 1) - 1 << 6) - 65;
	    z__1.r = s1[i__2].r * s1[i__3].r - s1[i__2].i * s1[i__3].i, 
		    z__1.i = s1[i__2].r * s1[i__3].i + s1[i__2].i * s1[i__3]
		    .r;
	    h43h34.r = z__1.r, h43h34.i = z__1.i;

	}

/*        Look for two consecutive small subdiagonal elements: */
/*           PZLACONSB is the routine that does this. */

	i__2 = *lwork - irbuf;
	pzlaconsb_(&a[1], &desca[1], &i__, &l, &m, &h44, &h33, &h43h34, &work[
		irbuf + 1], &i__2);

/*        Double-shift QR step */

/*        NBULGE is the number of bulges that will be attempted */

/* Computing MIN */
	i__2 = m + rotn - 1 - (m - m / hbl * hbl - 1) % rotn, i__3 = i__ - 2;
	istop = min(i__2,i__3);
/* Computing MIN */
	i__2 = istop, i__3 = m + hbl - 3 - (m - 1) % hbl;
	istop = min(i__2,i__3);
/* Computing MIN */
	i__2 = istop, i__3 = i2 - 2;
	istop = min(i__2,i__3);
	istop = max(istop,m);
	nbulge = (i__ - 1 - istop) / hbl;

/*        Do not exceed maximum determined. */

	nbulge = min(nbulge,jblk);
	if (nbulge > lcmrc) {

/*           Make sure it's divisible by LCM (we want even workloads!) */

	    nbulge -= nbulge % lcmrc;
	}
	nbulge = max(nbulge,1);

/*        If we are starting in the middle because of consecutive small */
/*           subdiagonal elements, we need to see how many bulges we */
/*           can send through without breaking the consecutive small */
/*           subdiagonal property. */

	if (nbulge > 1 && m > l) {

/*           Copy a chunk of elements from global A(M-1:,M-1:) */

	    i__2 = m + 2;
	    i__3 = m + 2;
	    infog2l_(&i__2, &i__3, &desca[1], &nprow, &npcol, &myrow, &mycol, 
		    &irow1, &icol1, &itmp1, &itmp2);
/* Computing MIN */
	    i__2 = (nbulge << 2) + 2, i__3 = *n - m + 2;
	    ii = min(i__2,i__3);
	    i__2 = m - 1;
	    pzlacp3_(&ii, &i__2, &a[1], &desca[1], &work[irbuf + 1], &ii, &
		    itmp1, &itmp2, &c__0);
	    if (myrow == itmp1 && mycol == itmp2) {

/*              Find a new NBULGE based on the bulges we have. */

		zlamsh_(s1, &c__64, &nbulge, &jblk, &work[irbuf + 1], &ii, &
			ii, &ulp);
		if (num > 1) {
		    igebs2d_(&contxt, "ALL", " ", &c__1, &c__1, &nbulge, &
			    c__1, (ftnlen)3, (ftnlen)1);
		}
	    } else {

/*              Everyone needs to receive the new NBULGE */

		igebr2d_(&contxt, "ALL", " ", &c__1, &c__1, &nbulge, &c__1, &
			itmp1, &itmp2, (ftnlen)3, (ftnlen)1);
	    }
	}

/*        IBULGE is the number of bulges going so far */

	ibulge = 1;

/*        "A" row defs : main row transforms from LOCALK to LOCALI2 */

	infog1l_(&m, &hbl, &npcol, &mycol, &jafirst, &itmp1, &localk);
	localk = nq;
	infog1l_(&c__1, &hbl, &npcol, &mycol, &jafirst, &icol1, &locali2);
	locali2 = numroc_(&i2, &hbl, &mycol, &jafirst, &npcol);

/*        "A" col defs : main col transforms from LOCALI1 to LOCALM */

	infog1l_(&i1, &hbl, &nprow, &myrow, &iafirst, &locali1, &icol1);
	infog1l_(&c__1, &hbl, &nprow, &myrow, &iafirst, &localm, &icol1);
/* Computing MIN */
	i__3 = m + 3;
	i__2 = min(i__3,i__);
	icol1 = numroc_(&i__2, &hbl, &myrow, &iafirst, &nprow);

/*        Which row & column will start the bulges */

	istartrow = (m + 1) / hbl % nprow + iafirst;
	istartcol = (m + 1) / hbl % npcol + jafirst;

	infog1l_(&m, &hbl, &nprow, &myrow, &iafirst, &ii, &itmp2);
	infog1l_(&m, &hbl, &npcol, &mycol, &jafirst, &jj, &itmp2);
	infog1l_(&c__1, &hbl, &nprow, &myrow, &iafirst, &istop, kp2row);
	i__2 = m + 2;
	kp2row[0] = numroc_(&i__2, &hbl, &myrow, &iafirst, &nprow);
	infog1l_(&c__1, &hbl, &npcol, &mycol, &jafirst, &istop, kp2col);
	i__2 = m + 2;
	kp2col[0] = numroc_(&i__2, &hbl, &mycol, &jafirst, &npcol);

/*        Set all values for bulges.  All bulges are stored in */
/*          intermediate steps as loops over KI.  Their current "task" */
/*          over the global M to I-1 values is always K1(KI) to K2(KI). */
/*          However, because there are many bulges, K1(KI) & K2(KI) might */
/*          go past that range while later bulges (KI+1,KI+2,etc..) are */
/*          finishing up.  Even if ROTN=1, in order to minimize border */
/*          communication sometimes K1(KI)=HBL-2 & K2(KI)=HBL-1 so both */
/*          border messages can be handled at once. */

/*        Rules: */
/*              If MOD(K1(KI)-1,HBL) < HBL-2 then MOD(K2(KI)-1,HBL)<HBL-2 */
/*              If MOD(K1(KI)-1,HBL) = HBL-1 then MOD(K2(KI)-1,HBL)=HBL-1 */
/*              K2(KI)-K1(KI) <= ROTN */

/*        We first hit a border when MOD(K1(KI)-1,HBL)=HBL-2 and we hit */
/*        it again when MOD(K1(KI)-1,HBL)=HBL-1. */

	i__2 = nbulge;
	for (ki = 1; ki <= i__2; ++ki) {
	    k1[ki - 1] = m;
/* Computing MIN */
	    i__3 = m + rotn - 1 - (m - m / hbl * hbl - 1) % rotn, i__4 = i__ 
		    - 2;
	    istop = min(i__3,i__4);
/* Computing MIN */
	    i__3 = istop, i__4 = m + hbl - 3 - (m - 1) % hbl;
	    istop = min(i__3,i__4);
/* Computing MIN */
	    i__3 = istop, i__4 = i2 - 2;
	    istop = min(i__3,i__4);
	    istop = max(istop,m);
/* Computing MIN */
	    i__3 = i__ - 2, i__4 = i2 - 2;
	    if ((m - 1) % hbl == hbl - 2 && istop < min(i__3,i__4)) {
		++istop;
	    }
	    k2[ki - 1] = istop;
	    icurrow[ki - 1] = istartrow;
	    icurcol[ki - 1] = istartcol;
	    krow[ki - 1] = ii;
	    kcol[ki - 1] = jj;
	    if (ki > 1) {
		kp2row[ki - 1] = kp2row[0];
	    }
	    if (ki > 1) {
		kp2col[ki - 1] = kp2col[0];
	    }
/* L30: */
	}

/*        Get first transform on node who owns M+2,M+2 */

	itmp1 = istartrow;
	itmp2 = istartcol;
	pzlawil_(&itmp1, &itmp2, &m, &a[1], &desca[1], &h44, &h33, &h43h34, 
		vcopy);
	v1save.r = vcopy[0].r, v1save.i = vcopy[0].i;
	v2save.r = vcopy[1].r, v2save.i = vcopy[1].i;
	v3save.r = vcopy[2].r, v3save.i = vcopy[2].i;

/*        The main implicit shift Francis loops over the bulges starts */
/*           here! */

	if (k2[ibulge - 1] <= i__ - 1) {
L40:
	    if (k1[ibulge - 1] >= m + 5 && ibulge < nbulge) {
		if ((k2[ibulge - 1] + 2) % hbl == (k2[ibulge] + 2) % hbl && 
			k1[0] <= i__ - 1) {
		    i__2 = (jblk << 1) - (ibulge << 1) + ((jblk << 1) - (
			    ibulge << 1) << 6) - 65;
		    h44.r = s1[i__2].r, h44.i = s1[i__2].i;
		    i__2 = (jblk << 1) - (ibulge << 1) - 1 + ((jblk << 1) - (
			    ibulge << 1) - 1 << 6) - 65;
		    h33.r = s1[i__2].r, h33.i = s1[i__2].i;
		    i__2 = (jblk << 1) - (ibulge << 1) - 1 + ((jblk << 1) - (
			    ibulge << 1) << 6) - 65;
		    i__3 = (jblk << 1) - (ibulge << 1) + ((jblk << 1) - (
			    ibulge << 1) - 1 << 6) - 65;
		    z__1.r = s1[i__2].r * s1[i__3].r - s1[i__2].i * s1[i__3]
			    .i, z__1.i = s1[i__2].r * s1[i__3].i + s1[i__2].i 
			    * s1[i__3].r;
		    h43h34.r = z__1.r, h43h34.i = z__1.i;
		    itmp1 = istartrow;
		    itmp2 = istartcol;
		    pzlawil_(&itmp1, &itmp2, &m, &a[1], &desca[1], &h44, &h33,
			     &h43h34, vcopy);
		    v1save.r = vcopy[0].r, v1save.i = vcopy[0].i;
		    v2save.r = vcopy[1].r, v2save.i = vcopy[1].i;
		    v3save.r = vcopy[2].r, v3save.i = vcopy[2].i;
		    ++ibulge;
		}
	    }

/*        When we hit a border, there are row and column transforms that */
/*          overlap over several processors and the code gets very */
/*          "congested."  As a remedy, when we first hit a border, a 6x6 */
/*          *local* matrix is generated on one node (called SMALLA) and */
/*          work is done on that.  At the end of the border, the data is */
/*          passed back and everything stays a lot simpler. */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {

/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__4 = i__ - 1;
		istop = min(i__3,i__4);
		k = istart;
		modkm1 = (k - 1) % hbl;
		if (modkm1 >= hbl - 2) {
		    if (modkm1 == hbl - 2 && k < i__ - 1) {

/*                 Copy 6 elements from global A(K-1:K+4,K-1:K+4) */

			itmp1 = icurrow[ki - 1];
			itmp2 = icurcol[ki - 1];
/* Computing MIN */
			i__4 = 6, i__5 = *n - k + 2;
			i__3 = min(i__4,i__5);
			i__6 = k - 1;
			pzlacp3_(&i__3, &i__6, &a[1], &desca[1], &smalla[(ki *
				 6 + 1) * 6 - 42], &c__6, &itmp1, &itmp2, &
				c__0);
		    }
		    if (modkm1 == hbl - 1 && k == m) {

/*                 Copy 6 elements from global A(K-2:K+3,K-2:K+3) */

			i__3 = k + 1;
			i__4 = k + 1;
			infog2l_(&i__3, &i__4, &desca[1], &nprow, &npcol, &
				myrow, &mycol, &irow1, &icol1, &itmp1, &itmp2)
				;
/* Computing MIN */
			i__4 = 6, i__5 = *n - k + 3;
			i__3 = min(i__4,i__5);
			i__6 = k - 2;
			pzlacp3_(&i__3, &i__6, &a[1], &desca[1], &smalla[(ki *
				 6 + 1) * 6 - 42], &c__6, &itmp1, &itmp2, &
				c__0);
		    }
		}


/*           ZLAHQR used to have a single row application and a single */
/*              column application to H.  Here we do something a little */
/*              more clever.  We break each transformation down into 3 */
/*              parts: */
/*                  1.) The minimum amount of work it takes to determine */
/*                        a group of ROTN transformations (this is on */
/*                        the critical path.) (Loops 50-120) */
/*                  (the data is broadcast now: loops 180-240) */
/*                  2.) The small work it takes so that each of the rows */
/*                        and columns is at the same place.  For example, */
/*                        all ROTN row transforms are all complete */
/*                        through some column TMP.  (Loops 250-260) */
/*                  3.) The majority of the row and column transforms */
/*                        are then applied in a block fashion. */
/*                        (row transforms are in loops 280-380) */
/*                        (col transforms are in loops 400-540) */

/*           Each of these three parts are further subdivided into 3 */
/*           parts: */
/*               A.) Work at the start of a border when */
/*                       MOD(ISTART-1,HBL) = HBL-2 */
/*               B.) Work at the end of a border when */
/*                       MOD(ISTART-1,HBL) = HBL-1 */
/*               C.) Work in the middle of the block when */
/*                       MOD(ISTART-1,HBL) < HBL-2 */

/*           Further optimization is met with the boolean SKIP.  A border */
/*              communication can be broken into several parts for */
/*              efficient parallelism: */
/*                 Loop over all the bulges, just sending the data out */
/*                 Loop over all the bulges, just doing the work */
/*                 Loop over all the bulges, just sending the data back. */


/* Computing MIN */
		i__3 = i__ - 1, i__4 = istop + 1;
		if (myrow == icurrow[ki - 1] && mycol == icurcol[ki - 1] && 
			modkm1 == hbl - 2 && istart < min(i__3,i__4)) {
		    k = istart;
/* Computing MIN */
		    i__3 = 3, i__4 = i__ - k + 1;
		    nr = min(i__3,i__4);
		    if (k > m) {
			lfc_SLzcopy(&nr, &smalla[(ki * 6 + 1) * 6 - 41], &c__1, 
				vcopy, &c__1);
		    } else {
			vcopy[0].r = v1save.r, vcopy[0].i = v1save.i;
			vcopy[1].r = v2save.r, vcopy[1].i = v2save.i;
			vcopy[2].r = v3save.r, vcopy[2].i = v3save.i;
		    }
		    zlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
		    if (k > m) {
			i__3 = (ki * 6 + 1) * 6 - 41;
			smalla[i__3].r = vcopy[0].r, smalla[i__3].i = vcopy[0]
				.i;
			i__3 = (ki * 6 + 1) * 6 - 40;
			smalla[i__3].r = 0., smalla[i__3].i = 0.;
			if (k < i__ - 1) {
			    i__3 = (ki * 6 + 1) * 6 - 39;
			    smalla[i__3].r = 0., smalla[i__3].i = 0.;
			}
		    } else if (m > l) {

/*                 Following differs in comparison to pdlahqr. */

			i__3 = (ki * 6 + 1) * 6 - 41;
			i__4 = (ki * 6 + 1) * 6 - 41;
			d_cnjg(&z__3, &t1copy);
			i__5 = (ki * 6 + 1) * 6 - 41;
			z__2.r = z__3.r * smalla[i__5].r - z__3.i * smalla[
				i__5].i, z__2.i = z__3.r * smalla[i__5].i + 
				z__3.i * smalla[i__5].r;
			z__1.r = smalla[i__4].r - z__2.r, z__1.i = smalla[
				i__4].i - z__2.i;
			smalla[i__3].r = z__1.r, smalla[i__3].i = z__1.i;
		    }
		    v2.r = vcopy[1].r, v2.i = vcopy[1].i;
		    z__1.r = t1copy.r * v2.r - t1copy.i * v2.i, z__1.i = 
			    t1copy.r * v2.i + t1copy.i * v2.r;
		    t2.r = z__1.r, t2.i = z__1.i;
		    i__3 = vecsidx + (k - 1) * 3 + 1;
		    work[i__3].r = vcopy[1].r, work[i__3].i = vcopy[1].i;
		    i__3 = vecsidx + (k - 1) * 3 + 2;
		    work[i__3].r = vcopy[2].r, work[i__3].i = vcopy[2].i;
		    i__3 = vecsidx + (k - 1) * 3 + 3;
		    work[i__3].r = t1copy.r, work[i__3].i = t1copy.i;
		    if (nr == 3) {

/*                    Do some work so next step is ready... */

			t1.r = t1copy.r, t1.i = t1copy.i;
			v3.r = vcopy[2].r, v3.i = vcopy[2].i;
			z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r * 
				v3.i + t1.i * v3.r;
			t3.r = z__1.r, t3.i = z__1.i;
/* Computing MIN */
			i__3 = 6, i__4 = i2 + 2 - k;
			itmp1 = min(i__3,i__4);
/* Computing MAX */
			i__3 = i1 - k + 2;
			itmp2 = max(i__3,1);
			i__3 = itmp1;
			for (j = 2; j <= i__3; ++j) {
			    d_cnjg(&z__4, &t1);
			    i__4 = (j + ki * 6) * 6 - 41;
			    z__3.r = z__4.r * smalla[i__4].r - z__4.i * 
				    smalla[i__4].i, z__3.i = z__4.r * smalla[
				    i__4].i + z__4.i * smalla[i__4].r;
			    d_cnjg(&z__6, &t2);
			    i__5 = (j + ki * 6) * 6 - 40;
			    z__5.r = z__6.r * smalla[i__5].r - z__6.i * 
				    smalla[i__5].i, z__5.i = z__6.r * smalla[
				    i__5].i + z__6.i * smalla[i__5].r;
			    z__2.r = z__3.r + z__5.r, z__2.i = z__3.i + 
				    z__5.i;
			    d_cnjg(&z__8, &t3);
			    i__6 = (j + ki * 6) * 6 - 39;
			    z__7.r = z__8.r * smalla[i__6].r - z__8.i * 
				    smalla[i__6].i, z__7.i = z__8.r * smalla[
				    i__6].i + z__8.i * smalla[i__6].r;
			    z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + 
				    z__7.i;
			    sum.r = z__1.r, sum.i = z__1.i;
			    i__4 = (j + ki * 6) * 6 - 41;
			    i__5 = (j + ki * 6) * 6 - 41;
			    z__1.r = smalla[i__5].r - sum.r, z__1.i = smalla[
				    i__5].i - sum.i;
			    smalla[i__4].r = z__1.r, smalla[i__4].i = z__1.i;
			    i__4 = (j + ki * 6) * 6 - 40;
			    i__5 = (j + ki * 6) * 6 - 40;
			    z__2.r = sum.r * v2.r - sum.i * v2.i, z__2.i = 
				    sum.r * v2.i + sum.i * v2.r;
			    z__1.r = smalla[i__5].r - z__2.r, z__1.i = smalla[
				    i__5].i - z__2.i;
			    smalla[i__4].r = z__1.r, smalla[i__4].i = z__1.i;
			    i__4 = (j + ki * 6) * 6 - 39;
			    i__5 = (j + ki * 6) * 6 - 39;
			    z__2.r = sum.r * v3.r - sum.i * v3.i, z__2.i = 
				    sum.r * v3.i + sum.i * v3.r;
			    z__1.r = smalla[i__5].r - z__2.r, z__1.i = smalla[
				    i__5].i - z__2.i;
			    smalla[i__4].r = z__1.r, smalla[i__4].i = z__1.i;
/* L50: */
			}
			for (j = itmp2; j <= 5; ++j) {
			    i__3 = j + (ki * 6 + 2) * 6 - 43;
			    z__3.r = t1.r * smalla[i__3].r - t1.i * smalla[
				    i__3].i, z__3.i = t1.r * smalla[i__3].i + 
				    t1.i * smalla[i__3].r;
			    i__4 = j + (ki * 6 + 3) * 6 - 43;
			    z__4.r = t2.r * smalla[i__4].r - t2.i * smalla[
				    i__4].i, z__4.i = t2.r * smalla[i__4].i + 
				    t2.i * smalla[i__4].r;
			    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + 
				    z__4.i;
			    i__5 = j + (ki * 6 + 4) * 6 - 43;
			    z__5.r = t3.r * smalla[i__5].r - t3.i * smalla[
				    i__5].i, z__5.i = t3.r * smalla[i__5].i + 
				    t3.i * smalla[i__5].r;
			    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + 
				    z__5.i;
			    sum.r = z__1.r, sum.i = z__1.i;
			    i__3 = j + (ki * 6 + 2) * 6 - 43;
			    i__4 = j + (ki * 6 + 2) * 6 - 43;
			    z__1.r = smalla[i__4].r - sum.r, z__1.i = smalla[
				    i__4].i - sum.i;
			    smalla[i__3].r = z__1.r, smalla[i__3].i = z__1.i;
			    i__3 = j + (ki * 6 + 3) * 6 - 43;
			    i__4 = j + (ki * 6 + 3) * 6 - 43;
			    d_cnjg(&z__3, &v2);
			    z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i =
				     sum.r * z__3.i + sum.i * z__3.r;
			    z__1.r = smalla[i__4].r - z__2.r, z__1.i = smalla[
				    i__4].i - z__2.i;
			    smalla[i__3].r = z__1.r, smalla[i__3].i = z__1.i;
			    i__3 = j + (ki * 6 + 4) * 6 - 43;
			    i__4 = j + (ki * 6 + 4) * 6 - 43;
			    d_cnjg(&z__3, &v3);
			    z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i =
				     sum.r * z__3.i + sum.i * z__3.r;
			    z__1.r = smalla[i__4].r - z__2.r, z__1.i = smalla[
				    i__4].i - z__2.i;
			    smalla[i__3].r = z__1.r, smalla[i__3].i = z__1.i;
/* L60: */
			}
		    }
		}

		if ((istop - 1) % hbl == hbl - 1 && myrow == icurrow[ki - 1] 
			&& mycol == icurcol[ki - 1] && istart <= min(i__,
			istop)) {
		    k = istop;
/* Computing MIN */
		    i__3 = 3, i__4 = i__ - k + 1;
		    nr = min(i__3,i__4);
		    if (k > m) {
			lfc_SLzcopy(&nr, &smalla[(ki * 6 + 2) * 6 - 40], &c__1, 
				vcopy, &c__1);
		    } else {
			vcopy[0].r = v1save.r, vcopy[0].i = v1save.i;
			vcopy[1].r = v2save.r, vcopy[1].i = v2save.i;
			vcopy[2].r = v3save.r, vcopy[2].i = v3save.i;
		    }
		    zlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
		    if (k > m) {
			i__3 = (ki * 6 + 2) * 6 - 40;
			smalla[i__3].r = vcopy[0].r, smalla[i__3].i = vcopy[0]
				.i;
			i__3 = (ki * 6 + 2) * 6 - 39;
			smalla[i__3].r = 0., smalla[i__3].i = 0.;
			if (k < i__ - 1) {
			    i__3 = (ki * 6 + 2) * 6 - 38;
			    smalla[i__3].r = 0., smalla[i__3].i = 0.;
			}

/*                    Set a subdiagonal to zero now if it's possible */

			if (k - 2 > m && (k - 1) % hbl > 1) {
			    i__3 = (ki * 6 + 1) * 6 - 42;
			    h11.r = smalla[i__3].r, h11.i = smalla[i__3].i;
			    i__3 = (ki * 6 + 1) * 6 - 41;
			    h10.r = smalla[i__3].r, h10.i = smalla[i__3].i;
			    i__3 = (ki * 6 + 2) * 6 - 41;
			    h22.r = smalla[i__3].r, h22.i = smalla[i__3].i;
			    s = (d__1 = h11.r, abs(d__1)) + (d__2 = d_imag(&
				    h11), abs(d__2)) + ((d__3 = h22.r, abs(
				    d__3)) + (d__4 = d_imag(&h22), abs(d__4)))
				    ;
/* Computing MAX */
			    d__3 = ulp * s;
			    if ((d__1 = h10.r, abs(d__1)) + (d__2 = d_imag(&
				    h10), abs(d__2)) <= max(d__3,smlnum)) {
				i__3 = (ki * 6 + 1) * 6 - 41;
				smalla[i__3].r = 0., smalla[i__3].i = 0.;
			    }
			}
		    } else if (m > l) {

/*                 Following differs in comparison to pdlahqr. */

			i__3 = (ki * 6 + 2) * 6 - 40;
			i__4 = (ki * 6 + 2) * 6 - 40;
			d_cnjg(&z__3, &t1copy);
			i__5 = (ki * 6 + 2) * 6 - 40;
			z__2.r = z__3.r * smalla[i__5].r - z__3.i * smalla[
				i__5].i, z__2.i = z__3.r * smalla[i__5].i + 
				z__3.i * smalla[i__5].r;
			z__1.r = smalla[i__4].r - z__2.r, z__1.i = smalla[
				i__4].i - z__2.i;
			smalla[i__3].r = z__1.r, smalla[i__3].i = z__1.i;
		    }
		    v2.r = vcopy[1].r, v2.i = vcopy[1].i;
		    z__1.r = t1copy.r * v2.r - t1copy.i * v2.i, z__1.i = 
			    t1copy.r * v2.i + t1copy.i * v2.r;
		    t2.r = z__1.r, t2.i = z__1.i;
		    i__3 = vecsidx + (k - 1) * 3 + 1;
		    work[i__3].r = vcopy[1].r, work[i__3].i = vcopy[1].i;
		    i__3 = vecsidx + (k - 1) * 3 + 2;
		    work[i__3].r = vcopy[2].r, work[i__3].i = vcopy[2].i;
		    i__3 = vecsidx + (k - 1) * 3 + 3;
		    work[i__3].r = t1copy.r, work[i__3].i = t1copy.i;
		    if (nr == 3) {

/*                    Do some work so next step is ready... */

			t1.r = t1copy.r, t1.i = t1copy.i;
			v3.r = vcopy[2].r, v3.i = vcopy[2].i;
			z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r * 
				v3.i + t1.i * v3.r;
			t3.r = z__1.r, t3.i = z__1.i;
/* Computing MIN */
			i__3 = 6, i__4 = i2 - k + 3;
			itmp1 = min(i__3,i__4);
/* Computing MAX */
			i__3 = i1 - k + 3;
			itmp2 = max(i__3,1);
			i__3 = itmp1;
			for (j = 3; j <= i__3; ++j) {
			    d_cnjg(&z__4, &t1);
			    i__4 = (j + ki * 6) * 6 - 40;
			    z__3.r = z__4.r * smalla[i__4].r - z__4.i * 
				    smalla[i__4].i, z__3.i = z__4.r * smalla[
				    i__4].i + z__4.i * smalla[i__4].r;
			    d_cnjg(&z__6, &t2);
			    i__5 = (j + ki * 6) * 6 - 39;
			    z__5.r = z__6.r * smalla[i__5].r - z__6.i * 
				    smalla[i__5].i, z__5.i = z__6.r * smalla[
				    i__5].i + z__6.i * smalla[i__5].r;
			    z__2.r = z__3.r + z__5.r, z__2.i = z__3.i + 
				    z__5.i;
			    d_cnjg(&z__8, &t3);
			    i__6 = (j + ki * 6) * 6 - 38;
			    z__7.r = z__8.r * smalla[i__6].r - z__8.i * 
				    smalla[i__6].i, z__7.i = z__8.r * smalla[
				    i__6].i + z__8.i * smalla[i__6].r;
			    z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + 
				    z__7.i;
			    sum.r = z__1.r, sum.i = z__1.i;
			    i__4 = (j + ki * 6) * 6 - 40;
			    i__5 = (j + ki * 6) * 6 - 40;
			    z__1.r = smalla[i__5].r - sum.r, z__1.i = smalla[
				    i__5].i - sum.i;
			    smalla[i__4].r = z__1.r, smalla[i__4].i = z__1.i;
			    i__4 = (j + ki * 6) * 6 - 39;
			    i__5 = (j + ki * 6) * 6 - 39;
			    z__2.r = sum.r * v2.r - sum.i * v2.i, z__2.i = 
				    sum.r * v2.i + sum.i * v2.r;
			    z__1.r = smalla[i__5].r - z__2.r, z__1.i = smalla[
				    i__5].i - z__2.i;
			    smalla[i__4].r = z__1.r, smalla[i__4].i = z__1.i;
			    i__4 = (j + ki * 6) * 6 - 38;
			    i__5 = (j + ki * 6) * 6 - 38;
			    z__2.r = sum.r * v3.r - sum.i * v3.i, z__2.i = 
				    sum.r * v3.i + sum.i * v3.r;
			    z__1.r = smalla[i__5].r - z__2.r, z__1.i = smalla[
				    i__5].i - z__2.i;
			    smalla[i__4].r = z__1.r, smalla[i__4].i = z__1.i;
/* L70: */
			}
			for (j = itmp2; j <= 6; ++j) {
			    i__3 = j + (ki * 6 + 3) * 6 - 43;
			    z__3.r = t1.r * smalla[i__3].r - t1.i * smalla[
				    i__3].i, z__3.i = t1.r * smalla[i__3].i + 
				    t1.i * smalla[i__3].r;
			    i__4 = j + (ki * 6 + 4) * 6 - 43;
			    z__4.r = t2.r * smalla[i__4].r - t2.i * smalla[
				    i__4].i, z__4.i = t2.r * smalla[i__4].i + 
				    t2.i * smalla[i__4].r;
			    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + 
				    z__4.i;
			    i__5 = j + (ki * 6 + 5) * 6 - 43;
			    z__5.r = t3.r * smalla[i__5].r - t3.i * smalla[
				    i__5].i, z__5.i = t3.r * smalla[i__5].i + 
				    t3.i * smalla[i__5].r;
			    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + 
				    z__5.i;
			    sum.r = z__1.r, sum.i = z__1.i;
			    i__3 = j + (ki * 6 + 3) * 6 - 43;
			    i__4 = j + (ki * 6 + 3) * 6 - 43;
			    z__1.r = smalla[i__4].r - sum.r, z__1.i = smalla[
				    i__4].i - sum.i;
			    smalla[i__3].r = z__1.r, smalla[i__3].i = z__1.i;
			    i__3 = j + (ki * 6 + 4) * 6 - 43;
			    i__4 = j + (ki * 6 + 4) * 6 - 43;
			    d_cnjg(&z__3, &v2);
			    z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i =
				     sum.r * z__3.i + sum.i * z__3.r;
			    z__1.r = smalla[i__4].r - z__2.r, z__1.i = smalla[
				    i__4].i - z__2.i;
			    smalla[i__3].r = z__1.r, smalla[i__3].i = z__1.i;
			    i__3 = j + (ki * 6 + 5) * 6 - 43;
			    i__4 = j + (ki * 6 + 5) * 6 - 43;
			    d_cnjg(&z__3, &v3);
			    z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i =
				     sum.r * z__3.i + sum.i * z__3.r;
			    z__1.r = smalla[i__4].r - z__2.r, z__1.i = smalla[
				    i__4].i - z__2.i;
			    smalla[i__3].r = z__1.r, smalla[i__3].i = z__1.i;
/* L80: */
			}
		    }
		}

		if (modkm1 == 0 && istart <= i__ - 1 && myrow == icurrow[ki - 
			1] && right == icurcol[ki - 1]) {

/*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART) */

		    irow1 = krow[ki - 1];
		    icol1 = kcol[ki - 1];

/*                 The ELSE part of this IF needs updated VCOPY, this */
/*                 was not necessary in PDLAHQR. */

		    if (istart > m) {
			i__3 = (ki * 6 + 3) * 6 - 39;
			vcopy[0].r = smalla[i__3].r, vcopy[0].i = smalla[i__3]
				.i;
			i__3 = (ki * 6 + 3) * 6 - 38;
			vcopy[1].r = smalla[i__3].r, vcopy[1].i = smalla[i__3]
				.i;
			i__3 = (ki * 6 + 3) * 6 - 37;
			vcopy[2].r = smalla[i__3].r, vcopy[2].i = smalla[i__3]
				.i;
/* Computing MIN */
			i__3 = 3, i__4 = i__ - istart + 1;
			nr = min(i__3,i__4);
			zlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
			i__3 = (icol1 - 2) * lda + irow1;
			a[i__3].r = vcopy[0].r, a[i__3].i = vcopy[0].i;
			i__3 = (icol1 - 2) * lda + irow1 + 1;
			a[i__3].r = 0., a[i__3].i = 0.;
			if (istart < i__ - 1) {
			    i__3 = (icol1 - 2) * lda + irow1 + 2;
			    a[i__3].r = 0., a[i__3].i = 0.;
			}
		    } else {

/*                    If NPCOL.NE.1 THEN we need updated VCOPY. */

/* Computing MIN */
			i__3 = 3, i__4 = i__ - istart + 1;
			nr = min(i__3,i__4);
			if (npcol == 1) {
			    vcopy[0].r = v1save.r, vcopy[0].i = v1save.i;
			    vcopy[1].r = v2save.r, vcopy[1].i = v2save.i;
			    vcopy[2].r = v3save.r, vcopy[2].i = v3save.i;
			} else {

/*                    Get updated VCOPY from RIGHT */

			    zgerv2d_(&contxt, &c__3, &c__1, vcopy, &c__1, &
				    myrow, &right);
			}
			zlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
			if (m > l) {

/*                    Following differs in comparison to pdlahqr. */

			    i__3 = (icol1 - 2) * lda + irow1;
			    i__4 = (icol1 - 2) * lda + irow1;
			    z__3.r = 1. - t1copy.r, z__3.i = 0. - t1copy.i;
			    d_cnjg(&z__2, &z__3);
			    z__1.r = a[i__4].r * z__2.r - a[i__4].i * z__2.i, 
				    z__1.i = a[i__4].r * z__2.i + a[i__4].i * 
				    z__2.r;
			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
			}
		    }
		}

		if (myrow == icurrow[ki - 1] && mycol == icurcol[ki - 1] && (
			modkm1 == hbl - 2 && istart == i__ - 1 || modkm1 < 
			hbl - 2 && istart <= i__ - 1)) {

/*              (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART) */

		    irow1 = krow[ki - 1];
		    icol1 = kcol[ki - 1];
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {

/*                    Create and do these transforms */

/* Computing MIN */
			i__4 = 3, i__5 = i__ - k + 1;
			nr = min(i__4,i__5);
			if (k > m) {
			    if ((k - 1) % hbl == 0) {
				i__4 = (ki * 6 + 3) * 6 - 39;
				vcopy[0].r = smalla[i__4].r, vcopy[0].i = 
					smalla[i__4].i;
				i__4 = (ki * 6 + 3) * 6 - 38;
				vcopy[1].r = smalla[i__4].r, vcopy[1].i = 
					smalla[i__4].i;
				i__4 = (ki * 6 + 3) * 6 - 37;
				vcopy[2].r = smalla[i__4].r, vcopy[2].i = 
					smalla[i__4].i;
			    } else {
				i__4 = (icol1 - 2) * lda + irow1;
				vcopy[0].r = a[i__4].r, vcopy[0].i = a[i__4]
					.i;
				i__4 = (icol1 - 2) * lda + irow1 + 1;
				vcopy[1].r = a[i__4].r, vcopy[1].i = a[i__4]
					.i;
				if (nr == 3) {
				    i__4 = (icol1 - 2) * lda + irow1 + 2;
				    vcopy[2].r = a[i__4].r, vcopy[2].i = a[
					    i__4].i;
				}
			    }
			} else {
			    vcopy[0].r = v1save.r, vcopy[0].i = v1save.i;
			    vcopy[1].r = v2save.r, vcopy[1].i = v2save.i;
			    vcopy[2].r = v3save.r, vcopy[2].i = v3save.i;
			}

/*                    Must send uptodate copy of VCOPY to left. */

			if (npcol > 1 && istart <= m && (k - 1) % hbl == 0) {
			    zgesd2d_(&contxt, &c__3, &c__1, vcopy, &c__1, &
				    myrow, &left);
			}
			zlarfg_(&nr, vcopy, &vcopy[1], &c__1, &t1copy);
			if (k > m) {
			    if ((k - 1) % hbl > 0) {
				i__4 = (icol1 - 2) * lda + irow1;
				a[i__4].r = vcopy[0].r, a[i__4].i = vcopy[0]
					.i;
				i__4 = (icol1 - 2) * lda + irow1 + 1;
				a[i__4].r = 0., a[i__4].i = 0.;
				if (k < i__ - 1) {
				    i__4 = (icol1 - 2) * lda + irow1 + 2;
				    a[i__4].r = 0., a[i__4].i = 0.;
				}

/*                       Set a subdiagonal to zero now if it's possible */

				if (irow1 > 2 && icol1 > 2 && k - 2 > m && (k 
					- 1) % hbl > 1) {
				    i__4 = (icol1 - 3) * lda + irow1 - 2;
				    h11.r = a[i__4].r, h11.i = a[i__4].i;
				    i__4 = (icol1 - 3) * lda + irow1 - 1;
				    h10.r = a[i__4].r, h10.i = a[i__4].i;
				    i__4 = (icol1 - 2) * lda + irow1 - 1;
				    h22.r = a[i__4].r, h22.i = a[i__4].i;
				    s = (d__1 = h11.r, abs(d__1)) + (d__2 = 
					    d_imag(&h11), abs(d__2)) + ((d__3 
					    = h22.r, abs(d__3)) + (d__4 = 
					    d_imag(&h22), abs(d__4)));
/* Computing MAX */
				    d__3 = ulp * s;
				    if ((d__1 = h10.r, abs(d__1)) + (d__2 = 
					    d_imag(&h10), abs(d__2)) <= max(
					    d__3,smlnum)) {
					i__4 = (icol1 - 3) * lda + irow1 - 1;
					a[i__4].r = 0., a[i__4].i = 0.;
				    }
				}
			    }
			} else if (m > l) {
			    if ((k - 1) % hbl > 0) {

/*                       Following differs in comparison to pdlahqr. */

				i__4 = (icol1 - 2) * lda + irow1;
				i__5 = (icol1 - 2) * lda + irow1;
				z__3.r = 1. - t1copy.r, z__3.i = 0. - 
					t1copy.i;
				d_cnjg(&z__2, &z__3);
				z__1.r = a[i__5].r * z__2.r - a[i__5].i * 
					z__2.i, z__1.i = a[i__5].r * z__2.i + 
					a[i__5].i * z__2.r;
				a[i__4].r = z__1.r, a[i__4].i = z__1.i;
			    }
			}
			v2.r = vcopy[1].r, v2.i = vcopy[1].i;
			z__1.r = t1copy.r * v2.r - t1copy.i * v2.i, z__1.i = 
				t1copy.r * v2.i + t1copy.i * v2.r;
			t2.r = z__1.r, t2.i = z__1.i;
			i__4 = vecsidx + (k - 1) * 3 + 1;
			work[i__4].r = vcopy[1].r, work[i__4].i = vcopy[1].i;
			i__4 = vecsidx + (k - 1) * 3 + 2;
			work[i__4].r = vcopy[2].r, work[i__4].i = vcopy[2].i;
			i__4 = vecsidx + (k - 1) * 3 + 3;
			work[i__4].r = t1copy.r, work[i__4].i = t1copy.i;
			t1.r = t1copy.r, t1.i = t1copy.i;
			if (k < istop) {

/*                       Do some work so next step is ready... */

			    v3.r = vcopy[2].r, v3.i = vcopy[2].i;
			    z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r 
				    * v3.i + t1.i * v3.r;
			    t3.r = z__1.r, t3.i = z__1.i;
/* Computing MIN */
			    i__5 = k2[ki - 1] + 1, i__6 = i__ - 1;
			    i__4 = (min(i__5,i__6) + icol1 - k - 1) * lda + 
				    irow1;
			    i__7 = lda;
			    for (j = (icol1 - 1) * lda + irow1; i__7 < 0 ? j 
				    >= i__4 : j <= i__4; j += i__7) {
				d_cnjg(&z__4, &t1);
				i__5 = j;
				z__3.r = z__4.r * a[i__5].r - z__4.i * a[i__5]
					.i, z__3.i = z__4.r * a[i__5].i + 
					z__4.i * a[i__5].r;
				d_cnjg(&z__6, &t2);
				i__6 = j + 1;
				z__5.r = z__6.r * a[i__6].r - z__6.i * a[i__6]
					.i, z__5.i = z__6.r * a[i__6].i + 
					z__6.i * a[i__6].r;
				z__2.r = z__3.r + z__5.r, z__2.i = z__3.i + 
					z__5.i;
				d_cnjg(&z__8, &t3);
				i__8 = j + 2;
				z__7.r = z__8.r * a[i__8].r - z__8.i * a[i__8]
					.i, z__7.i = z__8.r * a[i__8].i + 
					z__8.i * a[i__8].r;
				z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + 
					z__7.i;
				sum.r = z__1.r, sum.i = z__1.i;
				i__5 = j;
				i__6 = j;
				z__1.r = a[i__6].r - sum.r, z__1.i = a[i__6]
					.i - sum.i;
				a[i__5].r = z__1.r, a[i__5].i = z__1.i;
				i__5 = j + 1;
				i__6 = j + 1;
				z__2.r = sum.r * v2.r - sum.i * v2.i, z__2.i =
					 sum.r * v2.i + sum.i * v2.r;
				z__1.r = a[i__6].r - z__2.r, z__1.i = a[i__6]
					.i - z__2.i;
				a[i__5].r = z__1.r, a[i__5].i = z__1.i;
				i__5 = j + 2;
				i__6 = j + 2;
				z__2.r = sum.r * v3.r - sum.i * v3.i, z__2.i =
					 sum.r * v3.i + sum.i * v3.r;
				z__1.r = a[i__6].r - z__2.r, z__1.i = a[i__6]
					.i - z__2.i;
				a[i__5].r = z__1.r, a[i__5].i = z__1.i;
/* L90: */
			    }
			    i__7 = irow1 + 3;
			    for (j = irow1 + 1; j <= i__7; ++j) {
				i__4 = (icol1 - 1) * lda + j;
				z__3.r = t1.r * a[i__4].r - t1.i * a[i__4].i, 
					z__3.i = t1.r * a[i__4].i + t1.i * a[
					i__4].r;
				i__5 = icol1 * lda + j;
				z__4.r = t2.r * a[i__5].r - t2.i * a[i__5].i, 
					z__4.i = t2.r * a[i__5].i + t2.i * a[
					i__5].r;
				z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + 
					z__4.i;
				i__6 = (icol1 + 1) * lda + j;
				z__5.r = t3.r * a[i__6].r - t3.i * a[i__6].i, 
					z__5.i = t3.r * a[i__6].i + t3.i * a[
					i__6].r;
				z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + 
					z__5.i;
				sum.r = z__1.r, sum.i = z__1.i;
				i__4 = (icol1 - 1) * lda + j;
				i__5 = (icol1 - 1) * lda + j;
				z__1.r = a[i__5].r - sum.r, z__1.i = a[i__5]
					.i - sum.i;
				a[i__4].r = z__1.r, a[i__4].i = z__1.i;
				i__4 = icol1 * lda + j;
				i__5 = icol1 * lda + j;
				d_cnjg(&z__3, &v2);
				z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					z__2.i = sum.r * z__3.i + sum.i * 
					z__3.r;
				z__1.r = a[i__5].r - z__2.r, z__1.i = a[i__5]
					.i - z__2.i;
				a[i__4].r = z__1.r, a[i__4].i = z__1.i;
				i__4 = (icol1 + 1) * lda + j;
				i__5 = (icol1 + 1) * lda + j;
				d_cnjg(&z__3, &v3);
				z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					z__2.i = sum.r * z__3.i + sum.i * 
					z__3.r;
				z__1.r = a[i__5].r - z__2.r, z__1.i = a[i__5]
					.i - z__2.i;
				a[i__4].r = z__1.r, a[i__4].i = z__1.i;
/* L100: */
			    }
			}
			++irow1;
			++icol1;
/* L110: */
		    }
		}
/* L120: */
	    }

/*           First part of applying the transforms is complete. */
/*           Broadcasts of the Householder data is done here. */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {

/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);

/*              Broadcast Householder information from the block */

		if (myrow == icurrow[ki - 1] && npcol > 1 && istart <= istop) 
			{
		    if (mycol != icurcol[ki - 1]) {
			i__3 = (istop - istart + 1) * 3;
			i__7 = (istop - istart + 1) * 3;
			zgebr2d_(&contxt, "ROW", " ", &i__3, &c__1, &work[
				vecsidx + (istart - 1) * 3 + 1], &i__7, &
				myrow, &icurcol[ki - 1], (ftnlen)3, (ftnlen)1)
				;
		    } else {
			i__3 = (istop - istart + 1) * 3;
			i__7 = (istop - istart + 1) * 3;
			zgebs2d_(&contxt, "ROW", " ", &i__3, &c__1, &work[
				vecsidx + (istart - 1) * 3 + 1], &i__7, (
				ftnlen)3, (ftnlen)1);
		    }
		}
/* L130: */
	    }

/*           Now do column transforms and finish work */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {

/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);

		if (mycol == icurcol[ki - 1] && nprow > 1 && istart <= istop) 
			{
		    if (myrow != icurrow[ki - 1]) {
			i__3 = (istop - istart + 1) * 3;
			i__7 = (istop - istart + 1) * 3;
			zgebr2d_(&contxt, "COL", " ", &i__3, &c__1, &work[
				vecsidx + (istart - 1) * 3 + 1], &i__7, &
				icurrow[ki - 1], &mycol, (ftnlen)3, (ftnlen)1)
				;
		    } else {
			i__3 = (istop - istart + 1) * 3;
			i__7 = (istop - istart + 1) * 3;
			zgebs2d_(&contxt, "COL", " ", &i__3, &c__1, &work[
				vecsidx + (istart - 1) * 3 + 1], &i__7, (
				ftnlen)3, (ftnlen)1);
		    }
		}
/* L140: */
	    }


/*           Now do make up work to have things in block fashion */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);

		modkm1 = (istart - 1) % hbl;
		if (myrow == icurrow[ki - 1] && mycol == icurcol[ki - 1] && (
			modkm1 == hbl - 2 && istart == i__ - 1 || modkm1 < 
			hbl - 2 && istart <= i__ - 1)) {

/*                 (IROW1,ICOL1) is (I,J)-coordinates of H(ISTART,ISTART) */

		    irow1 = krow[ki - 1];
		    icol1 = kcol[ki - 1];
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {

/*              Catch up on column & border work */

/* Computing MIN */
			i__7 = 3, i__4 = i__ - k + 1;
			nr = min(i__7,i__4);
			i__7 = vecsidx + (k - 1) * 3 + 1;
			v2.r = work[i__7].r, v2.i = work[i__7].i;
			i__7 = vecsidx + (k - 1) * 3 + 2;
			v3.r = work[i__7].r, v3.i = work[i__7].i;
			i__7 = vecsidx + (k - 1) * 3 + 3;
			t1.r = work[i__7].r, t1.i = work[i__7].i;
			z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * 
				v2.i + t1.i * v2.r;
			t2.r = z__1.r, t2.i = z__1.i;
			if (k < istop) {

/*                 Do some work so next step is ready... */

			    z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r 
				    * v3.i + t1.i * v3.r;
			    t3.r = z__1.r, t3.i = z__1.i;
/* Computing MIN */
			    i__4 = istart + 1;
			    i__7 = min(i__4,i__) - k + irow1;
			    zlaref_("Col", &a[1], &lda, &c_false, &z__[1], &
				    ldz, &c_false, &icol1, &icol1, &istart, &
				    istop, &i__7, &irow1, &liloz, &lihiz, &
				    work[vecsidx + 1], &v2, &v3, &t1, &t2, &
				    t3, (ftnlen)3);
			    ++irow1;
			    ++icol1;
			} else {
			    if (nr == 3 && (k - 1) % hbl < hbl - 2) {
				z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = 
					t1.r * v3.i + t1.i * v3.r;
				t3.r = z__1.r, t3.i = z__1.i;
/* Computing MIN */
/* Computing MIN */
				i__5 = k2[ki - 1] + 1, i__6 = i__ - 1;
				i__4 = min(i__5,i__6);
				i__7 = min(i__4,i2) - k + icol1;
				zlaref_("Row", &a[1], &lda, &c_false, &z__[1],
					 &ldz, &c_false, &irow1, &irow1, &
					istart, &istop, &icol1, &i__7, &liloz,
					 &lihiz, &work[vecsidx + 1], &v2, &v3,
					 &t1, &t2, &t3, (ftnlen)3);
			    }
			}
/* L150: */
		    }
		}

/*           Send SMALLA back again. */

		k = istart;
		modkm1 = (k - 1) % hbl;
		if (modkm1 >= hbl - 2 && k <= i__ - 1) {
		    if (modkm1 == hbl - 2 && k < i__ - 1) {

/*                    Copy 6 elements from global A(K-1:K+4,K-1:K+4) */

			itmp1 = icurrow[ki - 1];
			itmp2 = icurcol[ki - 1];
/* Computing MIN */
			i__7 = 6, i__4 = *n - k + 2;
			i__3 = min(i__7,i__4);
			i__5 = k - 1;
			pzlacp3_(&i__3, &i__5, &a[1], &desca[1], &smalla[(ki *
				 6 + 1) * 6 - 42], &c__6, &itmp1, &itmp2, &
				c__1);

		    }
		    if (modkm1 == hbl - 1) {

/*                    Copy 6 elements from global A(K-2:K+3,K-2:K+3) */

			itmp1 = icurrow[ki - 1];
			itmp2 = icurcol[ki - 1];
/* Computing MIN */
			i__7 = 6, i__4 = *n - k + 3;
			i__3 = min(i__7,i__4);
			i__5 = k - 2;
			pzlacp3_(&i__3, &i__5, &a[1], &desca[1], &smalla[(ki *
				 6 + 1) * 6 - 42], &c__6, &itmp1, &itmp2, &
				c__1);
		    }
		}

/* L160: */
	    }

/* L170: */

/*           Now start major set of block ROW reflections */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (myrow != icurrow[ki - 1] && down != icurrow[ki - 1]) {
		    goto L180;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);

		if (istop > istart && (istart - 1) % hbl < hbl - 2 && icurrow[
			ki - 1] == myrow) {
/* Computing MIN */
		    i__3 = k2[ki - 1] + 1, i__7 = i__ - 1;
		    irow1 = min(i__3,i__7) + 1;
		    infog1l_(&irow1, &hbl, &npcol, &mycol, &jafirst, &itmp1, &
			    itmp2);
		    itmp2 = locali2;
		    ii = krow[ki - 1];
		    zlaref_("Row", &a[1], &lda, wantz, &z__[1], &ldz, &c_true,
			     &ii, &ii, &istart, &istop, &itmp1, &itmp2, &
			    liloz, &lihiz, &work[vecsidx + 1], &v2, &v3, &t1, 
			    &t2, &t3, (ftnlen)3);
		}
L180:
		;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (krow[ki - 1] > kp2row[ki - 1]) {
		    goto L220;
		}
		if (myrow != icurrow[ki - 1] && down != icurrow[ki - 1]) {
		    goto L220;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);
		if (istart == istop || (istart - 1) % hbl >= hbl - 2 || 
			icurrow[ki - 1] != myrow) {
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {
			i__7 = vecsidx + (k - 1) * 3 + 1;
			v2.r = work[i__7].r, v2.i = work[i__7].i;
			i__7 = vecsidx + (k - 1) * 3 + 2;
			v3.r = work[i__7].r, v3.i = work[i__7].i;
			i__7 = vecsidx + (k - 1) * 3 + 3;
			t1.r = work[i__7].r, t1.i = work[i__7].i;
/* Computing MIN */
			i__7 = 3, i__4 = i__ - k + 1;
			nr = min(i__7,i__4);
			z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * 
				v2.i + t1.i * v2.r;
			t2.r = z__1.r, t2.i = z__1.i;
			if (nr == 3 && krow[ki - 1] <= kp2row[ki - 1]) {
			    z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r 
				    * v3.i + t1.i * v3.r;
			    t3.r = z__1.r, t3.i = z__1.i;
			    if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__7 = k2[ki - 1] + 1, i__4 = i__ - 1;
				itmp1 = min(i__7,i__4) + 1;
			    } else {
				if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				    i__7 = k2[ki - 1] + 1, i__4 = i__ - 1;
				    itmp1 = min(i__7,i__4) + 1;
				}
				if ((k - 1) % hbl == hbl - 2) {
/* Computing MIN */
				    i__7 = k + 4;
				    itmp1 = min(i__7,i2) + 1;
				}
				if ((k - 1) % hbl == hbl - 1) {
/* Computing MIN */
				    i__7 = k + 3;
				    itmp1 = min(i__7,i2) + 1;
				}
			    }

/*                    Find local coor of rows K through K+2 */

			    irow1 = krow[ki - 1];
			    irow2 = kp2row[ki - 1];
			    if (k > istart && (k - 1) % hbl >= hbl - 2) {
				if (down == icurrow[ki - 1]) {
				    ++irow1;
				}
				if (myrow == icurrow[ki - 1]) {
				    ++irow2;
				}
			    }
			    infog1l_(&itmp1, &hbl, &npcol, &mycol, &jafirst, &
				    icol1, &icol2);
			    icol2 = locali2;
			    if ((k - 1) % hbl < hbl - 2 || nprow == 1) {
				zlaref_("Row", &a[1], &lda, wantz, &z__[1], &
					ldz, &c_false, &irow1, &irow1, &
					istart, &istop, &icol1, &icol2, &
					liloz, &lihiz, &work[vecsidx + 1], &
					v2, &v3, &t1, &t2, &t3, (ftnlen)3);
			    }
			    if ((k - 1) % hbl == hbl - 2 && nprow > 1) {
				if (irow1 != irow2) {
				    i__7 = icol2 - icol1 + 1;
				    zgesd2d_(&contxt, &c__2, &i__7, &a[(icol1 
					    - 1) * lda + irow1], &lda, &down, 
					    &mycol);
				    if (skip && istart == istop) {
					i__7 = icol2 - icol1 + 1;
					zgerv2d_(&contxt, &c__2, &i__7, &a[(
						icol1 - 1) * lda + irow1], &
						lda, &down, &mycol);
				    }
				} else if (skip) {
				    i__7 = icol2 - icol1 + 1;
				    zgerv2d_(&contxt, &c__2, &i__7, &work[
					    irbuf + 1], &c__2, &up, &mycol);
				    i__7 = icol2;
				    for (j = icol1; j <= i__7; ++j) {
					d_cnjg(&z__4, &t1);
					i__4 = irbuf + (j - icol1 << 1) + 1;
					z__3.r = z__4.r * work[i__4].r - 
						z__4.i * work[i__4].i, z__3.i 
						= z__4.r * work[i__4].i + 
						z__4.i * work[i__4].r;
					d_cnjg(&z__6, &t2);
					i__5 = irbuf + (j - icol1 << 1) + 2;
					z__5.r = z__6.r * work[i__5].r - 
						z__6.i * work[i__5].i, z__5.i 
						= z__6.r * work[i__5].i + 
						z__6.i * work[i__5].r;
					z__2.r = z__3.r + z__5.r, z__2.i = 
						z__3.i + z__5.i;
					d_cnjg(&z__8, &t3);
					i__6 = (j - 1) * lda + irow1;
					z__7.r = z__8.r * a[i__6].r - z__8.i *
						 a[i__6].i, z__7.i = z__8.r * 
						a[i__6].i + z__8.i * a[i__6]
						.r;
					z__1.r = z__2.r + z__7.r, z__1.i = 
						z__2.i + z__7.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__4 = irbuf + (j - icol1 << 1) + 1;
					i__5 = irbuf + (j - icol1 << 1) + 1;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = irbuf + (j - icol1 << 1) + 2;
					i__5 = irbuf + (j - icol1 << 1) + 2;
					z__2.r = sum.r * v2.r - sum.i * v2.i, 
						z__2.i = sum.r * v2.i + sum.i 
						* v2.r;
					z__1.r = work[i__5].r - z__2.r, 
						z__1.i = work[i__5].i - 
						z__2.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = (j - 1) * lda + irow1;
					i__5 = (j - 1) * lda + irow1;
					z__2.r = sum.r * v3.r - sum.i * v3.i, 
						z__2.i = sum.r * v3.i + sum.i 
						* v3.r;
					z__1.r = a[i__5].r - z__2.r, z__1.i = 
						a[i__5].i - z__2.i;
					a[i__4].r = z__1.r, a[i__4].i = 
						z__1.i;
/* L190: */
				    }
				    if (istart == istop) {
					i__7 = icol2 - icol1 + 1;
					zgesd2d_(&contxt, &c__2, &i__7, &work[
						irbuf + 1], &c__2, &up, &
						mycol);
				    }
				}
			    }
			    if ((k - 1) % hbl == hbl - 1 && nprow > 1) {
				if (irow1 == irow2) {
				    if (istart == istop) {
					i__7 = icol2 - icol1 + 1;
					zgesd2d_(&contxt, &c__2, &i__7, &a[(
						icol1 - 1) * lda + irow1 - 1],
						 &lda, &down, &mycol);
				    }
				    if (skip) {
					i__7 = icol2 - icol1 + 1;
					zgerv2d_(&contxt, &c__2, &i__7, &a[(
						icol1 - 1) * lda + irow1 - 1],
						 &lda, &down, &mycol);
				    }
				} else if (skip) {
				    if (istart == istop) {
					i__7 = icol2 - icol1 + 1;
					zgerv2d_(&contxt, &c__2, &i__7, &work[
						irbuf + 1], &c__2, &up, &
						mycol);
				    }
				    i__7 = icol2;
				    for (j = icol1; j <= i__7; ++j) {
					d_cnjg(&z__4, &t1);
					i__4 = irbuf + (j - icol1 << 1) + 2;
					z__3.r = z__4.r * work[i__4].r - 
						z__4.i * work[i__4].i, z__3.i 
						= z__4.r * work[i__4].i + 
						z__4.i * work[i__4].r;
					d_cnjg(&z__6, &t2);
					i__5 = (j - 1) * lda + irow1;
					z__5.r = z__6.r * a[i__5].r - z__6.i *
						 a[i__5].i, z__5.i = z__6.r * 
						a[i__5].i + z__6.i * a[i__5]
						.r;
					z__2.r = z__3.r + z__5.r, z__2.i = 
						z__3.i + z__5.i;
					d_cnjg(&z__8, &t3);
					i__6 = (j - 1) * lda + irow1 + 1;
					z__7.r = z__8.r * a[i__6].r - z__8.i *
						 a[i__6].i, z__7.i = z__8.r * 
						a[i__6].i + z__8.i * a[i__6]
						.r;
					z__1.r = z__2.r + z__7.r, z__1.i = 
						z__2.i + z__7.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__4 = irbuf + (j - icol1 << 1) + 2;
					i__5 = irbuf + (j - icol1 << 1) + 2;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = (j - 1) * lda + irow1;
					i__5 = (j - 1) * lda + irow1;
					z__2.r = sum.r * v2.r - sum.i * v2.i, 
						z__2.i = sum.r * v2.i + sum.i 
						* v2.r;
					z__1.r = a[i__5].r - z__2.r, z__1.i = 
						a[i__5].i - z__2.i;
					a[i__4].r = z__1.r, a[i__4].i = 
						z__1.i;
					i__4 = (j - 1) * lda + irow1 + 1;
					i__5 = (j - 1) * lda + irow1 + 1;
					z__2.r = sum.r * v3.r - sum.i * v3.i, 
						z__2.i = sum.r * v3.i + sum.i 
						* v3.r;
					z__1.r = a[i__5].r - z__2.r, z__1.i = 
						a[i__5].i - z__2.i;
					a[i__4].r = z__1.r, a[i__4].i = 
						z__1.i;
/* L200: */
				    }
				    i__7 = icol2 - icol1 + 1;
				    zgesd2d_(&contxt, &c__2, &i__7, &work[
					    irbuf + 1], &c__2, &up, &mycol);

				}
			    }
			}
/* L210: */
		    }
		}
L220:
		;
	    }

	    if (skip) {
		goto L290;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (krow[ki - 1] > kp2row[ki - 1]) {
		    goto L260;
		}
		if (myrow != icurrow[ki - 1] && down != icurrow[ki - 1]) {
		    goto L260;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);
		if (istart == istop || (istart - 1) % hbl >= hbl - 2 || 
			icurrow[ki - 1] != myrow) {
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {
			i__7 = vecsidx + (k - 1) * 3 + 1;
			v2.r = work[i__7].r, v2.i = work[i__7].i;
			i__7 = vecsidx + (k - 1) * 3 + 2;
			v3.r = work[i__7].r, v3.i = work[i__7].i;
			i__7 = vecsidx + (k - 1) * 3 + 3;
			t1.r = work[i__7].r, t1.i = work[i__7].i;
/* Computing MIN */
			i__7 = 3, i__4 = i__ - k + 1;
			nr = min(i__7,i__4);
			z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * 
				v2.i + t1.i * v2.r;
			t2.r = z__1.r, t2.i = z__1.i;
			if (nr == 3 && krow[ki - 1] <= kp2row[ki - 1]) {
			    z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r 
				    * v3.i + t1.i * v3.r;
			    t3.r = z__1.r, t3.i = z__1.i;
			    if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__7 = k2[ki - 1] + 1, i__4 = i__ - 1;
				itmp1 = min(i__7,i__4) + 1;
			    } else {
				if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				    i__7 = k2[ki - 1] + 1, i__4 = i__ - 1;
				    itmp1 = min(i__7,i__4) + 1;
				}
				if ((k - 1) % hbl == hbl - 2) {
/* Computing MIN */
				    i__7 = k + 4;
				    itmp1 = min(i__7,i2) + 1;
				}
				if ((k - 1) % hbl == hbl - 1) {
/* Computing MIN */
				    i__7 = k + 3;
				    itmp1 = min(i__7,i2) + 1;
				}
			    }

/*                    Find local coor of rows K through K+2 */

			    irow1 = krow[ki - 1];
			    irow2 = kp2row[ki - 1];
			    if (k > istart && (k - 1) % hbl >= hbl - 2) {
				if (down == icurrow[ki - 1]) {
				    ++irow1;
				}
				if (myrow == icurrow[ki - 1]) {
				    ++irow2;
				}
			    }
			    infog1l_(&itmp1, &hbl, &npcol, &mycol, &jafirst, &
				    icol1, &icol2);
			    icol2 = locali2;
			    if ((k - 1) % hbl == hbl - 2 && nprow > 1) {
				if (irow1 == irow2) {
				    i__7 = icol2 - icol1 + 1;
				    zgerv2d_(&contxt, &c__2, &i__7, &work[
					    irbuf + 1], &c__2, &up, &mycol);
				    i__7 = icol2;
				    for (j = icol1; j <= i__7; ++j) {
					d_cnjg(&z__4, &t1);
					i__4 = irbuf + (j - icol1 << 1) + 1;
					z__3.r = z__4.r * work[i__4].r - 
						z__4.i * work[i__4].i, z__3.i 
						= z__4.r * work[i__4].i + 
						z__4.i * work[i__4].r;
					d_cnjg(&z__6, &t2);
					i__5 = irbuf + (j - icol1 << 1) + 2;
					z__5.r = z__6.r * work[i__5].r - 
						z__6.i * work[i__5].i, z__5.i 
						= z__6.r * work[i__5].i + 
						z__6.i * work[i__5].r;
					z__2.r = z__3.r + z__5.r, z__2.i = 
						z__3.i + z__5.i;
					d_cnjg(&z__8, &t3);
					i__6 = (j - 1) * lda + irow1;
					z__7.r = z__8.r * a[i__6].r - z__8.i *
						 a[i__6].i, z__7.i = z__8.r * 
						a[i__6].i + z__8.i * a[i__6]
						.r;
					z__1.r = z__2.r + z__7.r, z__1.i = 
						z__2.i + z__7.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__4 = irbuf + (j - icol1 << 1) + 1;
					i__5 = irbuf + (j - icol1 << 1) + 1;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = irbuf + (j - icol1 << 1) + 2;
					i__5 = irbuf + (j - icol1 << 1) + 2;
					z__2.r = sum.r * v2.r - sum.i * v2.i, 
						z__2.i = sum.r * v2.i + sum.i 
						* v2.r;
					z__1.r = work[i__5].r - z__2.r, 
						z__1.i = work[i__5].i - 
						z__2.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = (j - 1) * lda + irow1;
					i__5 = (j - 1) * lda + irow1;
					z__2.r = sum.r * v3.r - sum.i * v3.i, 
						z__2.i = sum.r * v3.i + sum.i 
						* v3.r;
					z__1.r = a[i__5].r - z__2.r, z__1.i = 
						a[i__5].i - z__2.i;
					a[i__4].r = z__1.r, a[i__4].i = 
						z__1.i;
/* L230: */
				    }
				    if (istart == istop) {
					i__7 = icol2 - icol1 + 1;
					zgesd2d_(&contxt, &c__2, &i__7, &work[
						irbuf + 1], &c__2, &up, &
						mycol);
				    }
				}
			    }
			    if ((k - 1) % hbl == hbl - 1 && nprow > 1) {
				if (irow1 != irow2) {
				    if (istart == istop) {
					i__7 = icol2 - icol1 + 1;
					zgerv2d_(&contxt, &c__2, &i__7, &work[
						irbuf + 1], &c__2, &up, &
						mycol);
				    }
				    i__7 = icol2;
				    for (j = icol1; j <= i__7; ++j) {
					d_cnjg(&z__4, &t1);
					i__4 = irbuf + (j - icol1 << 1) + 2;
					z__3.r = z__4.r * work[i__4].r - 
						z__4.i * work[i__4].i, z__3.i 
						= z__4.r * work[i__4].i + 
						z__4.i * work[i__4].r;
					d_cnjg(&z__6, &t2);
					i__5 = (j - 1) * lda + irow1;
					z__5.r = z__6.r * a[i__5].r - z__6.i *
						 a[i__5].i, z__5.i = z__6.r * 
						a[i__5].i + z__6.i * a[i__5]
						.r;
					z__2.r = z__3.r + z__5.r, z__2.i = 
						z__3.i + z__5.i;
					d_cnjg(&z__8, &t3);
					i__6 = (j - 1) * lda + irow1 + 1;
					z__7.r = z__8.r * a[i__6].r - z__8.i *
						 a[i__6].i, z__7.i = z__8.r * 
						a[i__6].i + z__8.i * a[i__6]
						.r;
					z__1.r = z__2.r + z__7.r, z__1.i = 
						z__2.i + z__7.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__4 = irbuf + (j - icol1 << 1) + 2;
					i__5 = irbuf + (j - icol1 << 1) + 2;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = (j - 1) * lda + irow1;
					i__5 = (j - 1) * lda + irow1;
					z__2.r = sum.r * v2.r - sum.i * v2.i, 
						z__2.i = sum.r * v2.i + sum.i 
						* v2.r;
					z__1.r = a[i__5].r - z__2.r, z__1.i = 
						a[i__5].i - z__2.i;
					a[i__4].r = z__1.r, a[i__4].i = 
						z__1.i;
					i__4 = (j - 1) * lda + irow1 + 1;
					i__5 = (j - 1) * lda + irow1 + 1;
					z__2.r = sum.r * v3.r - sum.i * v3.i, 
						z__2.i = sum.r * v3.i + sum.i 
						* v3.r;
					z__1.r = a[i__5].r - z__2.r, z__1.i = 
						a[i__5].i - z__2.i;
					a[i__4].r = z__1.r, a[i__4].i = 
						z__1.i;
/* L240: */
				    }
				    i__7 = icol2 - icol1 + 1;
				    zgesd2d_(&contxt, &c__2, &i__7, &work[
					    irbuf + 1], &c__2, &up, &mycol);
				}
			    }
			}
/* L250: */
		    }
		}
L260:
		;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (krow[ki - 1] > kp2row[ki - 1]) {
		    goto L280;
		}
		if (myrow != icurrow[ki - 1] && down != icurrow[ki - 1]) {
		    goto L280;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);
		if (istart == istop || (istart - 1) % hbl >= hbl - 2 || 
			icurrow[ki - 1] != myrow) {
		    i__3 = istop;
		    for (k = istart; k <= i__3; ++k) {
			i__7 = vecsidx + (k - 1) * 3 + 1;
			v2.r = work[i__7].r, v2.i = work[i__7].i;
			i__7 = vecsidx + (k - 1) * 3 + 2;
			v3.r = work[i__7].r, v3.i = work[i__7].i;
			i__7 = vecsidx + (k - 1) * 3 + 3;
			t1.r = work[i__7].r, t1.i = work[i__7].i;
/* Computing MIN */
			i__7 = 3, i__4 = i__ - k + 1;
			nr = min(i__7,i__4);
			z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * 
				v2.i + t1.i * v2.r;
			t2.r = z__1.r, t2.i = z__1.i;
			if (nr == 3 && krow[ki - 1] <= kp2row[ki - 1]) {
			    z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r 
				    * v3.i + t1.i * v3.r;
			    t3.r = z__1.r, t3.i = z__1.i;
			    if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__7 = k2[ki - 1] + 1, i__4 = i__ - 1;
				itmp1 = min(i__7,i__4) + 1;
			    } else {
				if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				    i__7 = k2[ki - 1] + 1, i__4 = i__ - 1;
				    itmp1 = min(i__7,i__4) + 1;
				}
				if ((k - 1) % hbl == hbl - 2) {
/* Computing MIN */
				    i__7 = k + 4;
				    itmp1 = min(i__7,i2) + 1;
				}
				if ((k - 1) % hbl == hbl - 1) {
/* Computing MIN */
				    i__7 = k + 3;
				    itmp1 = min(i__7,i2) + 1;
				}
			    }

/*                    Find local coor of rows K through K+2 */

			    irow1 = krow[ki - 1];
			    irow2 = kp2row[ki - 1];
			    if (k > istart && (k - 1) % hbl >= hbl - 2) {
				if (down == icurrow[ki - 1]) {
				    ++irow1;
				}
				if (myrow == icurrow[ki - 1]) {
				    ++irow2;
				}
			    }
			    infog1l_(&itmp1, &hbl, &npcol, &mycol, &jafirst, &
				    icol1, &icol2);
			    icol2 = locali2;
			    if ((k - 1) % hbl == hbl - 2 && nprow > 1) {
				if (irow1 != irow2) {
				    if (istart == istop) {
					i__7 = icol2 - icol1 + 1;
					zgerv2d_(&contxt, &c__2, &i__7, &a[(
						icol1 - 1) * lda + irow1], &
						lda, &down, &mycol);
				    }
				}
			    }
			    if ((k - 1) % hbl == hbl - 1 && nprow > 1) {
				if (irow1 == irow2) {
				    i__7 = icol2 - icol1 + 1;
				    zgerv2d_(&contxt, &c__2, &i__7, &a[(icol1 
					    - 1) * lda + irow1 - 1], &lda, &
					    down, &mycol);
				}
			    }
			}
/* L270: */
		    }
		}
L280:
		;
	    }

L290:

/*           Now start major set of block COL reflections */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (mycol != icurcol[ki - 1] && right != icurcol[ki - 1]) {
		    goto L300;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);

		if (((istart - 1) % hbl < hbl - 2 || npcol == 1) && icurcol[
			ki - 1] == mycol && i__ - istop + 1 >= 3) {
		    k = istart;
		    if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
			i__3 = istart + 1;
			itmp1 = min(i__3,i__) - 1;
		    } else {
			if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
			    i__3 = k + 3;
			    itmp1 = min(i__3,i__);
			}
			if ((k - 1) % hbl == hbl - 2) {
/* Computing MAX */
			    i__3 = i1, i__7 = k - 1;
			    itmp1 = max(i__3,i__7) - 1;
			}
			if ((k - 1) % hbl == hbl - 1) {
/* Computing MAX */
			    i__3 = i1, i__7 = k - 2;
			    itmp1 = max(i__3,i__7) - 1;
			}
		    }

		    icol1 = kcol[ki - 1];
		    infog1l_(&i1, &hbl, &nprow, &myrow, &iafirst, &irow1, &
			    irow2);
		    irow2 = numroc_(&itmp1, &hbl, &myrow, &iafirst, &nprow);
		    if (irow1 <= irow2) {
			itmp2 = irow2;
		    } else {
			itmp2 = -1;
		    }
		    zlaref_("Col", &a[1], &lda, wantz, &z__[1], &ldz, &c_true,
			     &icol1, &icol1, &istart, &istop, &irow1, &irow2, 
			    &liloz, &lihiz, &work[vecsidx + 1], &v2, &v3, &t1,
			     &t2, &t3, (ftnlen)3);
		    k = istop;
		    if ((k - 1) % hbl < hbl - 2) {

/*                 Do from ITMP1+1 to MIN(K+3,I) */

			if ((k - 1) % hbl < hbl - 3) {
			    irow1 = itmp2 + 1;
			    if (itmp1 / hbl % nprow == myrow) {
				if (itmp2 > 0) {
/* Computing MIN */
				    i__3 = k + 3;
				    irow2 = itmp2 + min(i__3,i__) - itmp1;
				} else {
				    irow2 = irow1 - 1;
				}
			    } else {
				irow2 = irow1 - 1;
			    }
			} else {
			    i__3 = itmp1 + 1;
			    infog1l_(&i__3, &hbl, &nprow, &myrow, &iafirst, &
				    irow1, &irow2);
/* Computing MIN */
			    i__7 = k + 3;
			    i__3 = min(i__7,i__);
			    irow2 = numroc_(&i__3, &hbl, &myrow, &iafirst, &
				    nprow);
			}
			i__3 = vecsidx + (k - 1) * 3 + 1;
			v2.r = work[i__3].r, v2.i = work[i__3].i;
			i__3 = vecsidx + (k - 1) * 3 + 2;
			v3.r = work[i__3].r, v3.i = work[i__3].i;
			i__3 = vecsidx + (k - 1) * 3 + 3;
			t1.r = work[i__3].r, t1.i = work[i__3].i;
			z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * 
				v2.i + t1.i * v2.r;
			t2.r = z__1.r, t2.i = z__1.i;
			z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r * 
				v3.i + t1.i * v3.r;
			t3.r = z__1.r, t3.i = z__1.i;
			icol1 = kcol[ki - 1] + istop - istart;
			zlaref_("Col", &a[1], &lda, &c_false, &z__[1], &ldz, &
				c_false, &icol1, &icol1, &istart, &istop, &
				irow1, &irow2, &liloz, &lihiz, &work[vecsidx 
				+ 1], &v2, &v3, &t1, &t2, &t3, (ftnlen)3);
		    }
		}
L300:
		;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (kcol[ki - 1] > kp2col[ki - 1]) {
		    goto L360;
		}
		if (mycol != icurcol[ki - 1] && right != icurcol[ki - 1]) {
		    goto L360;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);
		if ((istart - 1) % hbl >= hbl - 2) {

/*              INFO is found in a buffer */

		    ispec = 1;
		} else {

/*              All INFO is local */

		    ispec = 0;
		}
		i__3 = istop;
		for (k = istart; k <= i__3; ++k) {

		    i__7 = vecsidx + (k - 1) * 3 + 1;
		    v2.r = work[i__7].r, v2.i = work[i__7].i;
		    i__7 = vecsidx + (k - 1) * 3 + 2;
		    v3.r = work[i__7].r, v3.i = work[i__7].i;
		    i__7 = vecsidx + (k - 1) * 3 + 3;
		    t1.r = work[i__7].r, t1.i = work[i__7].i;
/* Computing MIN */
		    i__7 = 3, i__4 = i__ - k + 1;
		    nr = min(i__7,i__4);
		    z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i 
			    + t1.i * v2.r;
		    t2.r = z__1.r, t2.i = z__1.i;
		    if (nr == 3 && kcol[ki - 1] <= kp2col[ki - 1]) {
			z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r * 
				v3.i + t1.i * v3.r;
			t3.r = z__1.r, t3.i = z__1.i;

			if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
			    i__7 = istart + 1;
			    itmp1 = min(i__7,i__) - 1;
			} else {
			    if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__7 = k + 3;
				itmp1 = min(i__7,i__);
			    }
			    if ((k - 1) % hbl == hbl - 2) {
/* Computing MAX */
				i__7 = i1, i__4 = k - 1;
				itmp1 = max(i__7,i__4) - 1;
			    }
			    if ((k - 1) % hbl == hbl - 1) {
/* Computing MAX */
				i__7 = i1, i__4 = k - 2;
				itmp1 = max(i__7,i__4) - 1;
			    }
			}
			if ((k - 1) % hbl < hbl - 2) {
			    icol1 = kcol[ki - 1] + k - istart;
			    icol2 = kp2col[ki - 1] + k - istart;
			} else {
			    icol1 = kcol[ki - 1];
			    icol2 = kp2col[ki - 1];
			    if (k > istart) {
				if (right == icurcol[ki - 1]) {
				    ++icol1;
				}
				if (mycol == icurcol[ki - 1]) {
				    ++icol2;
				}
			    }
			}
			infog1l_(&i1, &hbl, &nprow, &myrow, &iafirst, &irow1, 
				&irow2);
			irow2 = numroc_(&itmp1, &hbl, &myrow, &iafirst, &
				nprow);
			if ((k - 1) % hbl == hbl - 2 && npcol > 1) {
			    if (icol1 != icol2) {
				i__7 = irow2 - irow1 + 1;
				zgesd2d_(&contxt, &i__7, &c__2, &a[(icol1 - 1)
					 * lda + irow1], &lda, &myrow, &right)
					;
				if (istart == istop && skip) {
				    i__7 = irow2 - irow1 + 1;
				    zgerv2d_(&contxt, &i__7, &c__2, &a[(icol1 
					    - 1) * lda + irow1], &lda, &myrow,
					     &right);
				}
			    } else if (skip) {
				i__7 = irow2 - irow1 + 1;
				i__4 = irow2 - irow1 + 1;
				zgerv2d_(&contxt, &i__7, &c__2, &work[icbuf + 
					1], &i__4, &myrow, &left);
				ii = icbuf - irow1 + 1;
				jj = icbuf + irow2 - (irow1 << 1) + 2;
				i__7 = irow2;
				for (j = irow1; j <= i__7; ++j) {
				    i__4 = ii + j;
				    z__3.r = t1.r * work[i__4].r - t1.i * 
					    work[i__4].i, z__3.i = t1.r * 
					    work[i__4].i + t1.i * work[i__4]
					    .r;
				    i__5 = jj + j;
				    z__4.r = t2.r * work[i__5].r - t2.i * 
					    work[i__5].i, z__4.i = t2.r * 
					    work[i__5].i + t2.i * work[i__5]
					    .r;
				    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i 
					    + z__4.i;
				    i__6 = (icol1 - 1) * lda + j;
				    z__5.r = t3.r * a[i__6].r - t3.i * a[i__6]
					    .i, z__5.i = t3.r * a[i__6].i + 
					    t3.i * a[i__6].r;
				    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i 
					    + z__5.i;
				    sum.r = z__1.r, sum.i = z__1.i;
				    i__4 = ii + j;
				    i__5 = ii + j;
				    z__1.r = work[i__5].r - sum.r, z__1.i = 
					    work[i__5].i - sum.i;
				    work[i__4].r = z__1.r, work[i__4].i = 
					    z__1.i;
				    i__4 = jj + j;
				    i__5 = jj + j;
				    d_cnjg(&z__3, &v2);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = work[i__5].r - z__2.r, z__1.i = 
					    work[i__5].i - z__2.i;
				    work[i__4].r = z__1.r, work[i__4].i = 
					    z__1.i;
				    i__4 = (icol1 - 1) * lda + j;
				    i__5 = (icol1 - 1) * lda + j;
				    d_cnjg(&z__3, &v3);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = a[i__5].r - z__2.r, z__1.i = a[
					    i__5].i - z__2.i;
				    a[i__4].r = z__1.r, a[i__4].i = z__1.i;
/* L310: */
				}
				if (istart == istop) {
				    i__7 = irow2 - irow1 + 1;
				    i__4 = irow2 - irow1 + 1;
				    zgesd2d_(&contxt, &i__7, &c__2, &work[
					    icbuf + 1], &i__4, &myrow, &left);
				}
			    }
			}
			if ((k - 1) % hbl == hbl - 1 && npcol > 1) {
			    if (icol1 == icol2) {
				if (istart == istop) {
				    i__7 = irow2 - irow1 + 1;
				    zgesd2d_(&contxt, &i__7, &c__2, &a[(icol1 
					    - 2) * lda + irow1], &lda, &myrow,
					     &right);
				}
				if (skip) {
				    i__7 = irow2 - irow1 + 1;
				    zgerv2d_(&contxt, &i__7, &c__2, &a[(icol1 
					    - 2) * lda + irow1], &lda, &myrow,
					     &right);
				}
			    } else if (skip) {
				if (istart == istop) {
				    i__7 = irow2 - irow1 + 1;
				    i__4 = irow2 - irow1 + 1;
				    zgerv2d_(&contxt, &i__7, &c__2, &work[
					    icbuf + 1], &i__4, &myrow, &left);
				}
				ii = icbuf + irow2 - (irow1 << 1) + 2;
				i__7 = irow2;
				for (j = irow1; j <= i__7; ++j) {
				    i__4 = j + ii;
				    z__3.r = t1.r * work[i__4].r - t1.i * 
					    work[i__4].i, z__3.i = t1.r * 
					    work[i__4].i + t1.i * work[i__4]
					    .r;
				    i__5 = (icol1 - 1) * lda + j;
				    z__4.r = t2.r * a[i__5].r - t2.i * a[i__5]
					    .i, z__4.i = t2.r * a[i__5].i + 
					    t2.i * a[i__5].r;
				    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i 
					    + z__4.i;
				    i__6 = icol1 * lda + j;
				    z__5.r = t3.r * a[i__6].r - t3.i * a[i__6]
					    .i, z__5.i = t3.r * a[i__6].i + 
					    t3.i * a[i__6].r;
				    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i 
					    + z__5.i;
				    sum.r = z__1.r, sum.i = z__1.i;
				    i__4 = j + ii;
				    i__5 = j + ii;
				    z__1.r = work[i__5].r - sum.r, z__1.i = 
					    work[i__5].i - sum.i;
				    work[i__4].r = z__1.r, work[i__4].i = 
					    z__1.i;
				    i__4 = (icol1 - 1) * lda + j;
				    i__5 = (icol1 - 1) * lda + j;
				    d_cnjg(&z__3, &v2);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = a[i__5].r - z__2.r, z__1.i = a[
					    i__5].i - z__2.i;
				    a[i__4].r = z__1.r, a[i__4].i = z__1.i;
				    i__4 = icol1 * lda + j;
				    i__5 = icol1 * lda + j;
				    d_cnjg(&z__3, &v3);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = a[i__5].r - z__2.r, z__1.i = a[
					    i__5].i - z__2.i;
				    a[i__4].r = z__1.r, a[i__4].i = z__1.i;
/* L320: */
				}
				i__7 = irow2 - irow1 + 1;
				i__4 = irow2 - irow1 + 1;
				zgesd2d_(&contxt, &i__7, &c__2, &work[icbuf + 
					1], &i__4, &myrow, &left);
			    }
			}

/*                    If we want Z and we haven't already done any Z */

			if (*wantz && (k - 1) % hbl >= hbl - 2 && npcol > 1) {

/*                       Accumulate transformations in the matrix Z */

			    irow1 = liloz;
			    irow2 = lihiz;
			    if ((k - 1) % hbl == hbl - 2) {
				if (icol1 != icol2) {
				    i__7 = irow2 - irow1 + 1;
				    zgesd2d_(&contxt, &i__7, &c__2, &z__[(
					    icol1 - 1) * ldz + irow1], &ldz, &
					    myrow, &right);
				    if (istart == istop && skip) {
					i__7 = irow2 - irow1 + 1;
					zgerv2d_(&contxt, &i__7, &c__2, &z__[(
						icol1 - 1) * ldz + irow1], &
						ldz, &myrow, &right);
				    }
				} else if (skip) {
				    i__7 = irow2 - irow1 + 1;
				    i__4 = irow2 - irow1 + 1;
				    zgerv2d_(&contxt, &i__7, &c__2, &work[
					    izbuf + 1], &i__4, &myrow, &left);
				    icol1 = (icol1 - 1) * ldz;
				    ii = izbuf - irow1 + 1;
				    jj = izbuf + irow2 - (irow1 << 1) + 2;
				    i__7 = irow2;
				    for (j = irow1; j <= i__7; ++j) {
					i__4 = ii + j;
					z__3.r = t1.r * work[i__4].r - t1.i * 
						work[i__4].i, z__3.i = t1.r * 
						work[i__4].i + t1.i * work[
						i__4].r;
					i__5 = jj + j;
					z__4.r = t2.r * work[i__5].r - t2.i * 
						work[i__5].i, z__4.i = t2.r * 
						work[i__5].i + t2.i * work[
						i__5].r;
					z__2.r = z__3.r + z__4.r, z__2.i = 
						z__3.i + z__4.i;
					i__6 = icol1 + j;
					z__5.r = t3.r * z__[i__6].r - t3.i * 
						z__[i__6].i, z__5.i = t3.r * 
						z__[i__6].i + t3.i * z__[i__6]
						.r;
					z__1.r = z__2.r + z__5.r, z__1.i = 
						z__2.i + z__5.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__4 = ii + j;
					i__5 = ii + j;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = jj + j;
					i__5 = jj + j;
					d_cnjg(&z__3, &v2);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = work[i__5].r - z__2.r, 
						z__1.i = work[i__5].i - 
						z__2.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = icol1 + j;
					i__5 = icol1 + j;
					d_cnjg(&z__3, &v3);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = z__[i__5].r - z__2.r, z__1.i 
						= z__[i__5].i - z__2.i;
					z__[i__4].r = z__1.r, z__[i__4].i = 
						z__1.i;
/* L330: */
				    }
				    if (istart == istop) {
					i__7 = irow2 - irow1 + 1;
					i__4 = irow2 - irow1 + 1;
					zgesd2d_(&contxt, &i__7, &c__2, &work[
						izbuf + 1], &i__4, &myrow, &
						left);
				    }
				}
			    }
			    if ((k - 1) % hbl == hbl - 1) {
				if (icol1 == icol2) {
				    if (istart == istop) {
					i__7 = irow2 - irow1 + 1;
					zgesd2d_(&contxt, &i__7, &c__2, &z__[(
						icol1 - 2) * ldz + irow1], &
						ldz, &myrow, &right);
				    }
				    if (skip) {
					i__7 = irow2 - irow1 + 1;
					zgerv2d_(&contxt, &i__7, &c__2, &z__[(
						icol1 - 2) * ldz + irow1], &
						ldz, &myrow, &right);
				    }
				} else if (skip) {
				    if (istart == istop) {
					i__7 = irow2 - irow1 + 1;
					i__4 = irow2 - irow1 + 1;
					zgerv2d_(&contxt, &i__7, &c__2, &work[
						izbuf + 1], &i__4, &myrow, &
						left);
				    }
				    icol1 = (icol1 - 1) * ldz;
				    ii = izbuf + irow2 - (irow1 << 1) + 2;
				    i__7 = irow2;
				    for (j = irow1; j <= i__7; ++j) {
					i__4 = ii + j;
					z__3.r = t1.r * work[i__4].r - t1.i * 
						work[i__4].i, z__3.i = t1.r * 
						work[i__4].i + t1.i * work[
						i__4].r;
					i__5 = j + icol1;
					z__4.r = t2.r * z__[i__5].r - t2.i * 
						z__[i__5].i, z__4.i = t2.r * 
						z__[i__5].i + t2.i * z__[i__5]
						.r;
					z__2.r = z__3.r + z__4.r, z__2.i = 
						z__3.i + z__4.i;
					i__6 = j + icol1 + ldz;
					z__5.r = t3.r * z__[i__6].r - t3.i * 
						z__[i__6].i, z__5.i = t3.r * 
						z__[i__6].i + t3.i * z__[i__6]
						.r;
					z__1.r = z__2.r + z__5.r, z__1.i = 
						z__2.i + z__5.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__4 = ii + j;
					i__5 = ii + j;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = j + icol1;
					i__5 = j + icol1;
					d_cnjg(&z__3, &v2);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = z__[i__5].r - z__2.r, z__1.i 
						= z__[i__5].i - z__2.i;
					z__[i__4].r = z__1.r, z__[i__4].i = 
						z__1.i;
					i__4 = j + icol1 + ldz;
					i__5 = j + icol1 + ldz;
					d_cnjg(&z__3, &v3);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = z__[i__5].r - z__2.r, z__1.i 
						= z__[i__5].i - z__2.i;
					z__[i__4].r = z__1.r, z__[i__4].i = 
						z__1.i;
/* L340: */
				    }
				    i__7 = irow2 - irow1 + 1;
				    i__4 = irow2 - irow1 + 1;
				    zgesd2d_(&contxt, &i__7, &c__2, &work[
					    izbuf + 1], &i__4, &myrow, &left);
				}
			    }
			}
		    }
/* L350: */
		}
L360:
		;
	    }

	    if (skip) {
		goto L450;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (kcol[ki - 1] > kp2col[ki - 1]) {
		    goto L420;
		}
		if (mycol != icurcol[ki - 1] && right != icurcol[ki - 1]) {
		    goto L420;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);
		if ((istart - 1) % hbl >= hbl - 2) {

/*                 INFO is found in a buffer */

		    ispec = 1;
		} else {

/*                 All INFO is local */

		    ispec = 0;
		}
		i__3 = istop;
		for (k = istart; k <= i__3; ++k) {

		    i__7 = vecsidx + (k - 1) * 3 + 1;
		    v2.r = work[i__7].r, v2.i = work[i__7].i;
		    i__7 = vecsidx + (k - 1) * 3 + 2;
		    v3.r = work[i__7].r, v3.i = work[i__7].i;
		    i__7 = vecsidx + (k - 1) * 3 + 3;
		    t1.r = work[i__7].r, t1.i = work[i__7].i;
/* Computing MIN */
		    i__7 = 3, i__4 = i__ - k + 1;
		    nr = min(i__7,i__4);
		    z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i 
			    + t1.i * v2.r;
		    t2.r = z__1.r, t2.i = z__1.i;
		    if (nr == 3 && kcol[ki - 1] <= kp2col[ki - 1]) {
			z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r * 
				v3.i + t1.i * v3.r;
			t3.r = z__1.r, t3.i = z__1.i;

			if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
			    i__7 = istart + 1;
			    itmp1 = min(i__7,i__) - 1;
			} else {
			    if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__7 = k + 3;
				itmp1 = min(i__7,i__);
			    }
			    if ((k - 1) % hbl == hbl - 2) {
/* Computing MAX */
				i__7 = i1, i__4 = k - 1;
				itmp1 = max(i__7,i__4) - 1;
			    }
			    if ((k - 1) % hbl == hbl - 1) {
/* Computing MAX */
				i__7 = i1, i__4 = k - 2;
				itmp1 = max(i__7,i__4) - 1;
			    }
			}
			if ((k - 1) % hbl < hbl - 2) {
			    icol1 = kcol[ki - 1] + k - istart;
			    icol2 = kp2col[ki - 1] + k - istart;
			} else {
			    icol1 = kcol[ki - 1];
			    icol2 = kp2col[ki - 1];
			    if (k > istart) {
				if (right == icurcol[ki - 1]) {
				    ++icol1;
				}
				if (mycol == icurcol[ki - 1]) {
				    ++icol2;
				}
			    }
			}
			infog1l_(&i1, &hbl, &nprow, &myrow, &iafirst, &irow1, 
				&irow2);
			irow2 = numroc_(&itmp1, &hbl, &myrow, &iafirst, &
				nprow);
			if ((k - 1) % hbl == hbl - 2 && npcol > 1) {
			    if (icol1 == icol2) {
				i__7 = irow2 - irow1 + 1;
				i__4 = irow2 - irow1 + 1;
				zgerv2d_(&contxt, &i__7, &c__2, &work[icbuf + 
					1], &i__4, &myrow, &left);
				ii = icbuf - irow1 + 1;
				jj = icbuf + irow2 - (irow1 << 1) + 2;
				i__7 = irow2;
				for (j = irow1; j <= i__7; ++j) {
				    i__4 = ii + j;
				    z__3.r = t1.r * work[i__4].r - t1.i * 
					    work[i__4].i, z__3.i = t1.r * 
					    work[i__4].i + t1.i * work[i__4]
					    .r;
				    i__5 = jj + j;
				    z__4.r = t2.r * work[i__5].r - t2.i * 
					    work[i__5].i, z__4.i = t2.r * 
					    work[i__5].i + t2.i * work[i__5]
					    .r;
				    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i 
					    + z__4.i;
				    i__6 = (icol1 - 1) * lda + j;
				    z__5.r = t3.r * a[i__6].r - t3.i * a[i__6]
					    .i, z__5.i = t3.r * a[i__6].i + 
					    t3.i * a[i__6].r;
				    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i 
					    + z__5.i;
				    sum.r = z__1.r, sum.i = z__1.i;
				    i__4 = ii + j;
				    i__5 = ii + j;
				    z__1.r = work[i__5].r - sum.r, z__1.i = 
					    work[i__5].i - sum.i;
				    work[i__4].r = z__1.r, work[i__4].i = 
					    z__1.i;
				    i__4 = jj + j;
				    i__5 = jj + j;
				    d_cnjg(&z__3, &v2);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = work[i__5].r - z__2.r, z__1.i = 
					    work[i__5].i - z__2.i;
				    work[i__4].r = z__1.r, work[i__4].i = 
					    z__1.i;
				    i__4 = (icol1 - 1) * lda + j;
				    i__5 = (icol1 - 1) * lda + j;
				    d_cnjg(&z__3, &v3);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = a[i__5].r - z__2.r, z__1.i = a[
					    i__5].i - z__2.i;
				    a[i__4].r = z__1.r, a[i__4].i = z__1.i;
/* L370: */
				}
				if (istart == istop) {
				    i__7 = irow2 - irow1 + 1;
				    i__4 = irow2 - irow1 + 1;
				    zgesd2d_(&contxt, &i__7, &c__2, &work[
					    icbuf + 1], &i__4, &myrow, &left);
				}
			    }
			}
			if ((k - 1) % hbl == hbl - 1 && npcol > 1) {
			    if (icol1 != icol2) {
				if (istart == istop) {
				    i__7 = irow2 - irow1 + 1;
				    i__4 = irow2 - irow1 + 1;
				    zgerv2d_(&contxt, &i__7, &c__2, &work[
					    icbuf + 1], &i__4, &myrow, &left);
				}
				ii = icbuf + irow2 - (irow1 << 1) + 2;
				i__7 = irow2;
				for (j = irow1; j <= i__7; ++j) {
				    i__4 = j + ii;
				    z__3.r = t1.r * work[i__4].r - t1.i * 
					    work[i__4].i, z__3.i = t1.r * 
					    work[i__4].i + t1.i * work[i__4]
					    .r;
				    i__5 = (icol1 - 1) * lda + j;
				    z__4.r = t2.r * a[i__5].r - t2.i * a[i__5]
					    .i, z__4.i = t2.r * a[i__5].i + 
					    t2.i * a[i__5].r;
				    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i 
					    + z__4.i;
				    i__6 = icol1 * lda + j;
				    z__5.r = t3.r * a[i__6].r - t3.i * a[i__6]
					    .i, z__5.i = t3.r * a[i__6].i + 
					    t3.i * a[i__6].r;
				    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i 
					    + z__5.i;
				    sum.r = z__1.r, sum.i = z__1.i;
				    i__4 = j + ii;
				    i__5 = j + ii;
				    z__1.r = work[i__5].r - sum.r, z__1.i = 
					    work[i__5].i - sum.i;
				    work[i__4].r = z__1.r, work[i__4].i = 
					    z__1.i;
				    i__4 = (icol1 - 1) * lda + j;
				    i__5 = (icol1 - 1) * lda + j;
				    d_cnjg(&z__3, &v2);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = a[i__5].r - z__2.r, z__1.i = a[
					    i__5].i - z__2.i;
				    a[i__4].r = z__1.r, a[i__4].i = z__1.i;
				    i__4 = icol1 * lda + j;
				    i__5 = icol1 * lda + j;
				    d_cnjg(&z__3, &v3);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = a[i__5].r - z__2.r, z__1.i = a[
					    i__5].i - z__2.i;
				    a[i__4].r = z__1.r, a[i__4].i = z__1.i;
/* L380: */
				}
				i__7 = irow2 - irow1 + 1;
				i__4 = irow2 - irow1 + 1;
				zgesd2d_(&contxt, &i__7, &c__2, &work[icbuf + 
					1], &i__4, &myrow, &left);
			    }
			}


/*                 If we want Z and we haven't already done any Z */
			if (*wantz && (k - 1) % hbl >= hbl - 2 && npcol > 1) {

/*                    Accumulate transformations in the matrix Z */

			    irow1 = liloz;
			    irow2 = lihiz;
			    if ((k - 1) % hbl == hbl - 2) {
				if (icol1 == icol2) {
				    i__7 = irow2 - irow1 + 1;
				    i__4 = irow2 - irow1 + 1;
				    zgerv2d_(&contxt, &i__7, &c__2, &work[
					    izbuf + 1], &i__4, &myrow, &left);
				    icol1 = (icol1 - 1) * ldz;
				    ii = izbuf - irow1 + 1;
				    jj = izbuf + irow2 - (irow1 << 1) + 2;
				    i__7 = irow2;
				    for (j = irow1; j <= i__7; ++j) {
					i__4 = ii + j;
					z__3.r = t1.r * work[i__4].r - t1.i * 
						work[i__4].i, z__3.i = t1.r * 
						work[i__4].i + t1.i * work[
						i__4].r;
					i__5 = jj + j;
					z__4.r = t2.r * work[i__5].r - t2.i * 
						work[i__5].i, z__4.i = t2.r * 
						work[i__5].i + t2.i * work[
						i__5].r;
					z__2.r = z__3.r + z__4.r, z__2.i = 
						z__3.i + z__4.i;
					i__6 = icol1 + j;
					z__5.r = t3.r * z__[i__6].r - t3.i * 
						z__[i__6].i, z__5.i = t3.r * 
						z__[i__6].i + t3.i * z__[i__6]
						.r;
					z__1.r = z__2.r + z__5.r, z__1.i = 
						z__2.i + z__5.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__4 = ii + j;
					i__5 = ii + j;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = jj + j;
					i__5 = jj + j;
					d_cnjg(&z__3, &v2);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = work[i__5].r - z__2.r, 
						z__1.i = work[i__5].i - 
						z__2.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = icol1 + j;
					i__5 = icol1 + j;
					d_cnjg(&z__3, &v3);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = z__[i__5].r - z__2.r, z__1.i 
						= z__[i__5].i - z__2.i;
					z__[i__4].r = z__1.r, z__[i__4].i = 
						z__1.i;
/* L390: */
				    }
				    if (istart == istop) {
					i__7 = irow2 - irow1 + 1;
					i__4 = irow2 - irow1 + 1;
					zgesd2d_(&contxt, &i__7, &c__2, &work[
						izbuf + 1], &i__4, &myrow, &
						left);
				    }
				}
			    }
			    if ((k - 1) % hbl == hbl - 1) {
				if (icol1 != icol2) {
				    if (istart == istop) {
					i__7 = irow2 - irow1 + 1;
					i__4 = irow2 - irow1 + 1;
					zgerv2d_(&contxt, &i__7, &c__2, &work[
						izbuf + 1], &i__4, &myrow, &
						left);
				    }
				    icol1 = (icol1 - 1) * ldz;
				    ii = izbuf + irow2 - (irow1 << 1) + 2;
				    i__7 = irow2;
				    for (j = irow1; j <= i__7; ++j) {
					i__4 = ii + j;
					z__3.r = t1.r * work[i__4].r - t1.i * 
						work[i__4].i, z__3.i = t1.r * 
						work[i__4].i + t1.i * work[
						i__4].r;
					i__5 = j + icol1;
					z__4.r = t2.r * z__[i__5].r - t2.i * 
						z__[i__5].i, z__4.i = t2.r * 
						z__[i__5].i + t2.i * z__[i__5]
						.r;
					z__2.r = z__3.r + z__4.r, z__2.i = 
						z__3.i + z__4.i;
					i__6 = j + icol1 + ldz;
					z__5.r = t3.r * z__[i__6].r - t3.i * 
						z__[i__6].i, z__5.i = t3.r * 
						z__[i__6].i + t3.i * z__[i__6]
						.r;
					z__1.r = z__2.r + z__5.r, z__1.i = 
						z__2.i + z__5.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__4 = ii + j;
					i__5 = ii + j;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__4].r = z__1.r, work[i__4].i = 
						z__1.i;
					i__4 = j + icol1;
					i__5 = j + icol1;
					d_cnjg(&z__3, &v2);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = z__[i__5].r - z__2.r, z__1.i 
						= z__[i__5].i - z__2.i;
					z__[i__4].r = z__1.r, z__[i__4].i = 
						z__1.i;
					i__4 = j + icol1 + ldz;
					i__5 = j + icol1 + ldz;
					d_cnjg(&z__3, &v3);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = z__[i__5].r - z__2.r, z__1.i 
						= z__[i__5].i - z__2.i;
					z__[i__4].r = z__1.r, z__[i__4].i = 
						z__1.i;
/* L400: */
				    }
				    i__7 = irow2 - irow1 + 1;
				    i__4 = irow2 - irow1 + 1;
				    zgesd2d_(&contxt, &i__7, &c__2, &work[
					    izbuf + 1], &i__4, &myrow, &left);
				}
			    }
			}
		    }
/* L410: */
		}
L420:
		;
	    }

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
		if (kcol[ki - 1] > kp2col[ki - 1]) {
		    goto L440;
		}
		if (mycol != icurcol[ki - 1] && right != icurcol[ki - 1]) {
		    goto L440;
		}
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);
		if ((istart - 1) % hbl >= hbl - 2) {

/*              INFO is found in a buffer */

		    ispec = 1;
		} else {

/*              All INFO is local */

		    ispec = 0;
		}
		i__3 = istop;
		for (k = istart; k <= i__3; ++k) {

		    i__7 = vecsidx + (k - 1) * 3 + 1;
		    v2.r = work[i__7].r, v2.i = work[i__7].i;
		    i__7 = vecsidx + (k - 1) * 3 + 2;
		    v3.r = work[i__7].r, v3.i = work[i__7].i;
		    i__7 = vecsidx + (k - 1) * 3 + 3;
		    t1.r = work[i__7].r, t1.i = work[i__7].i;
/* Computing MIN */
		    i__7 = 3, i__4 = i__ - k + 1;
		    nr = min(i__7,i__4);
		    z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i 
			    + t1.i * v2.r;
		    t2.r = z__1.r, t2.i = z__1.i;
		    if (nr == 3 && kcol[ki - 1] <= kp2col[ki - 1]) {
			z__1.r = t1.r * v3.r - t1.i * v3.i, z__1.i = t1.r * 
				v3.i + t1.i * v3.r;
			t3.r = z__1.r, t3.i = z__1.i;

			if (k < istop && (k - 1) % hbl < hbl - 2) {
/* Computing MIN */
			    i__7 = istart + 1;
			    itmp1 = min(i__7,i__) - 1;
			} else {
			    if ((k - 1) % hbl < hbl - 2) {
/* Computing MIN */
				i__7 = k + 3;
				itmp1 = min(i__7,i__);
			    }
			    if ((k - 1) % hbl == hbl - 2) {
/* Computing MAX */
				i__7 = i1, i__4 = k - 1;
				itmp1 = max(i__7,i__4) - 1;
			    }
			    if ((k - 1) % hbl == hbl - 1) {
/* Computing MAX */
				i__7 = i1, i__4 = k - 2;
				itmp1 = max(i__7,i__4) - 1;
			    }
			}
			if ((k - 1) % hbl < hbl - 2) {
			    icol1 = kcol[ki - 1] + k - istart;
			    icol2 = kp2col[ki - 1] + k - istart;
			} else {
			    icol1 = kcol[ki - 1];
			    icol2 = kp2col[ki - 1];
			    if (k > istart) {
				if (right == icurcol[ki - 1]) {
				    ++icol1;
				}
				if (mycol == icurcol[ki - 1]) {
				    ++icol2;
				}
			    }
			}
			infog1l_(&i1, &hbl, &nprow, &myrow, &iafirst, &irow1, 
				&irow2);
			irow2 = numroc_(&itmp1, &hbl, &myrow, &iafirst, &
				nprow);
			if ((k - 1) % hbl == hbl - 2 && npcol > 1) {
			    if (icol1 != icol2) {
				if (istart == istop) {
				    i__7 = irow2 - irow1 + 1;
				    zgerv2d_(&contxt, &i__7, &c__2, &a[(icol1 
					    - 1) * lda + irow1], &lda, &myrow,
					     &right);
				}
			    }
			}
			if ((k - 1) % hbl == hbl - 1 && npcol > 1) {
			    if (icol1 == icol2) {
				i__7 = irow2 - irow1 + 1;
				zgerv2d_(&contxt, &i__7, &c__2, &a[(icol1 - 2)
					 * lda + irow1], &lda, &myrow, &right)
					;
			    }
			}

/*                    If we want Z and we haven't already done any Z */

			if (*wantz && (k - 1) % hbl >= hbl - 2 && npcol > 1) {

/*                       Accumulate transformations in the matrix Z */

			    irow1 = liloz;
			    irow2 = lihiz;
			    if ((k - 1) % hbl == hbl - 2) {
				if (icol1 != icol2) {
				    if (istart == istop) {
					i__7 = irow2 - irow1 + 1;
					zgerv2d_(&contxt, &i__7, &c__2, &z__[(
						icol1 - 1) * ldz + irow1], &
						ldz, &myrow, &right);
				    }
				}
			    }
			    if ((k - 1) % hbl == hbl - 1) {
				if (icol1 == icol2) {
				    i__7 = irow2 - irow1 + 1;
				    zgerv2d_(&contxt, &i__7, &c__2, &z__[(
					    icol1 - 2) * ldz + irow1], &ldz, &
					    myrow, &right);
				}
			    }
			}
		    }
/* L430: */
		}
L440:
		;
	    }

/*           Column work done */

L450:

/*           Now do NR=2 work */

	    i__2 = ibulge;
	    for (ki = 1; ki <= i__2; ++ki) {
/* Computing MAX */
		i__3 = k1[ki - 1];
		istart = max(i__3,m);
/* Computing MIN */
		i__3 = k2[ki - 1], i__7 = i__ - 1;
		istop = min(i__3,i__7);
		if ((istart - 1) % hbl >= hbl - 2) {

/*                 INFO is found in a buffer */

		    ispec = 1;
		} else {

/*                 All INFO is local */

		    ispec = 0;
		}

		i__3 = istop;
		for (k = istart; k <= i__3; ++k) {

		    i__7 = vecsidx + (k - 1) * 3 + 1;
		    v2.r = work[i__7].r, v2.i = work[i__7].i;
		    i__7 = vecsidx + (k - 1) * 3 + 2;
		    v3.r = work[i__7].r, v3.i = work[i__7].i;
		    i__7 = vecsidx + (k - 1) * 3 + 3;
		    t1.r = work[i__7].r, t1.i = work[i__7].i;
/* Computing MIN */
		    i__7 = 3, i__4 = i__ - k + 1;
		    nr = min(i__7,i__4);
		    z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i 
			    + t1.i * v2.r;
		    t2.r = z__1.r, t2.i = z__1.i;
		    if (nr == 2) {

/*              Apply G from the left to transform the rows of the matrix */
/*              in columns K to I2. */

			infog1l_(&k, &hbl, &npcol, &mycol, &jafirst, &liloh, &
				lihih);
			lihih = locali2;
			infog1l_(&c__1, &hbl, &nprow, &myrow, &iafirst, &
				itmp2, &itmp1);
			i__7 = k + 1;
			itmp1 = numroc_(&i__7, &hbl, &myrow, &iafirst, &nprow)
				;
			if (icurrow[ki - 1] == myrow) {
			    if (ispec == 0 || nprow == 1 || (k - 1) % hbl == 
				    hbl - 2) {
				--itmp1;
				i__7 = (lihih - 1) * lda;
				i__4 = lda;
				for (j = (liloh - 1) * lda; i__4 < 0 ? j >= 
					i__7 : j <= i__7; j += i__4) {
				    d_cnjg(&z__3, &t1);
				    i__5 = itmp1 + j;
				    z__2.r = z__3.r * a[i__5].r - z__3.i * a[
					    i__5].i, z__2.i = z__3.r * a[i__5]
					    .i + z__3.i * a[i__5].r;
				    d_cnjg(&z__5, &t2);
				    i__6 = itmp1 + 1 + j;
				    z__4.r = z__5.r * a[i__6].r - z__5.i * a[
					    i__6].i, z__4.i = z__5.r * a[i__6]
					    .i + z__5.i * a[i__6].r;
				    z__1.r = z__2.r + z__4.r, z__1.i = z__2.i 
					    + z__4.i;
				    sum.r = z__1.r, sum.i = z__1.i;
				    i__5 = itmp1 + j;
				    i__6 = itmp1 + j;
				    z__1.r = a[i__6].r - sum.r, z__1.i = a[
					    i__6].i - sum.i;
				    a[i__5].r = z__1.r, a[i__5].i = z__1.i;
				    i__5 = itmp1 + 1 + j;
				    i__6 = itmp1 + 1 + j;
				    z__2.r = sum.r * v2.r - sum.i * v2.i, 
					    z__2.i = sum.r * v2.i + sum.i * 
					    v2.r;
				    z__1.r = a[i__6].r - z__2.r, z__1.i = a[
					    i__6].i - z__2.i;
				    a[i__5].r = z__1.r, a[i__5].i = z__1.i;
/* L460: */
				}
			    } else {
				if ((k - 1) % hbl == hbl - 1) {
				    i__4 = lihih - liloh + 1;
				    zgerv2d_(&contxt, &c__1, &i__4, &work[
					    irbuf + 1], &c__1, &up, &mycol);
				    i__4 = lihih;
				    for (j = liloh; j <= i__4; ++j) {
					d_cnjg(&z__3, &t1);
					i__7 = irbuf + j - liloh + 1;
					z__2.r = z__3.r * work[i__7].r - 
						z__3.i * work[i__7].i, z__2.i 
						= z__3.r * work[i__7].i + 
						z__3.i * work[i__7].r;
					d_cnjg(&z__5, &t2);
					i__5 = (j - 1) * lda + itmp1;
					z__4.r = z__5.r * a[i__5].r - z__5.i *
						 a[i__5].i, z__4.i = z__5.r * 
						a[i__5].i + z__5.i * a[i__5]
						.r;
					z__1.r = z__2.r + z__4.r, z__1.i = 
						z__2.i + z__4.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__7 = irbuf + j - liloh + 1;
					i__5 = irbuf + j - liloh + 1;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__7].r = z__1.r, work[i__7].i = 
						z__1.i;
					i__7 = (j - 1) * lda + itmp1;
					i__5 = (j - 1) * lda + itmp1;
					z__2.r = sum.r * v2.r - sum.i * v2.i, 
						z__2.i = sum.r * v2.i + sum.i 
						* v2.r;
					z__1.r = a[i__5].r - z__2.r, z__1.i = 
						a[i__5].i - z__2.i;
					a[i__7].r = z__1.r, a[i__7].i = 
						z__1.i;
/* L470: */
				    }
				    i__4 = lihih - liloh + 1;
				    zgesd2d_(&contxt, &c__1, &i__4, &work[
					    irbuf + 1], &c__1, &up, &mycol);
				}
			    }
			} else {
			    if ((k - 1) % hbl == hbl - 1 && icurrow[ki - 1] ==
				     down) {
				i__4 = lihih - liloh + 1;
				zgesd2d_(&contxt, &c__1, &i__4, &a[(liloh - 1)
					 * lda + itmp1], &lda, &down, &mycol);
				i__4 = lihih - liloh + 1;
				zgerv2d_(&contxt, &c__1, &i__4, &a[(liloh - 1)
					 * lda + itmp1], &lda, &down, &mycol);
			    }
			}

/*              Apply G from the right to transform the columns of the */
/*              matrix in rows I1 to MIN(K+3,I). */

			infog1l_(&i1, &hbl, &nprow, &myrow, &iafirst, &liloh, 
				&lihih);
			lihih = numroc_(&i__, &hbl, &myrow, &iafirst, &nprow);

			if (icurcol[ki - 1] == mycol) {
/*                       LOCAL A(LILOZ:LIHIZ,KCOL:KCOL+2) */
			    if (ispec == 0 || npcol == 1 || (k - 1) % hbl == 
				    hbl - 2) {
				infog1l_(&k, &hbl, &npcol, &mycol, &jafirst, &
					itmp1, &itmp2);
				i__4 = k + 1;
				itmp2 = numroc_(&i__4, &hbl, &mycol, &jafirst,
					 &npcol);
				i__4 = lihih;
				for (j = liloh; j <= i__4; ++j) {
				    i__7 = (itmp1 - 1) * lda + j;
				    z__2.r = t1.r * a[i__7].r - t1.i * a[i__7]
					    .i, z__2.i = t1.r * a[i__7].i + 
					    t1.i * a[i__7].r;
				    i__5 = itmp1 * lda + j;
				    z__3.r = t2.r * a[i__5].r - t2.i * a[i__5]
					    .i, z__3.i = t2.r * a[i__5].i + 
					    t2.i * a[i__5].r;
				    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i 
					    + z__3.i;
				    sum.r = z__1.r, sum.i = z__1.i;
				    i__7 = (itmp1 - 1) * lda + j;
				    i__5 = (itmp1 - 1) * lda + j;
				    z__1.r = a[i__5].r - sum.r, z__1.i = a[
					    i__5].i - sum.i;
				    a[i__7].r = z__1.r, a[i__7].i = z__1.i;
				    i__7 = itmp1 * lda + j;
				    i__5 = itmp1 * lda + j;
				    d_cnjg(&z__3, &v2);
				    z__2.r = sum.r * z__3.r - sum.i * z__3.i, 
					    z__2.i = sum.r * z__3.i + sum.i * 
					    z__3.r;
				    z__1.r = a[i__5].r - z__2.r, z__1.i = a[
					    i__5].i - z__2.i;
				    a[i__7].r = z__1.r, a[i__7].i = z__1.i;
/* L480: */
				}
			    } else {
				itmp1 = kcol[ki - 1];
				if ((k - 1) % hbl == hbl - 1) {
				    i__4 = lihih - liloh + 1;
				    i__7 = lihih - liloh + 1;
				    zgerv2d_(&contxt, &i__4, &c__1, &work[
					    icbuf + 1], &i__7, &myrow, &left);
				    i__4 = lihih;
				    for (j = liloh; j <= i__4; ++j) {
					i__7 = icbuf + j;
					z__2.r = t1.r * work[i__7].r - t1.i * 
						work[i__7].i, z__2.i = t1.r * 
						work[i__7].i + t1.i * work[
						i__7].r;
					i__5 = (itmp1 - 1) * lda + j;
					z__3.r = t2.r * a[i__5].r - t2.i * a[
						i__5].i, z__3.i = t2.r * a[
						i__5].i + t2.i * a[i__5].r;
					z__1.r = z__2.r + z__3.r, z__1.i = 
						z__2.i + z__3.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__7 = icbuf + j;
					i__5 = icbuf + j;
					z__1.r = work[i__5].r - sum.r, z__1.i 
						= work[i__5].i - sum.i;
					work[i__7].r = z__1.r, work[i__7].i = 
						z__1.i;
					i__7 = (itmp1 - 1) * lda + j;
					i__5 = (itmp1 - 1) * lda + j;
					d_cnjg(&z__3, &v2);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = a[i__5].r - z__2.r, z__1.i = 
						a[i__5].i - z__2.i;
					a[i__7].r = z__1.r, a[i__7].i = 
						z__1.i;
/* L490: */
				    }
				    i__4 = lihih - liloh + 1;
				    i__7 = lihih - liloh + 1;
				    zgesd2d_(&contxt, &i__4, &c__1, &work[
					    icbuf + 1], &i__7, &myrow, &left);
				}
			    }
			} else {
			    if ((k - 1) % hbl == hbl - 1 && icurcol[ki - 1] ==
				     right) {
				itmp1 = kcol[ki - 1];
				i__4 = lihih - liloh + 1;
				zgesd2d_(&contxt, &i__4, &c__1, &a[(itmp1 - 1)
					 * lda + liloh], &lda, &myrow, &right)
					;
				infog1l_(&k, &hbl, &npcol, &mycol, &jafirst, &
					itmp1, &itmp2);
				i__4 = k + 1;
				itmp2 = numroc_(&i__4, &hbl, &mycol, &jafirst,
					 &npcol);
				i__4 = lihih - liloh + 1;
				zgerv2d_(&contxt, &i__4, &c__1, &a[(itmp1 - 1)
					 * lda + liloh], &lda, &myrow, &right)
					;
			    }
			}

			if (*wantz) {

/*                       Accumulate transformations in the matrix Z */

			    if (icurcol[ki - 1] == mycol) {
/*                          LOCAL Z(LILOZ:LIHIZ,KCOL:KCOL+2) */
				if (ispec == 0 || npcol == 1 || (k - 1) % hbl 
					== hbl - 2) {
				    itmp1 = kcol[ki - 1] + k - istart;
				    itmp1 = (itmp1 - 1) * ldz;
				    i__4 = lihiz;
				    for (j = liloz; j <= i__4; ++j) {
					i__7 = j + itmp1;
					z__2.r = t1.r * z__[i__7].r - t1.i * 
						z__[i__7].i, z__2.i = t1.r * 
						z__[i__7].i + t1.i * z__[i__7]
						.r;
					i__5 = j + itmp1 + ldz;
					z__3.r = t2.r * z__[i__5].r - t2.i * 
						z__[i__5].i, z__3.i = t2.r * 
						z__[i__5].i + t2.i * z__[i__5]
						.r;
					z__1.r = z__2.r + z__3.r, z__1.i = 
						z__2.i + z__3.i;
					sum.r = z__1.r, sum.i = z__1.i;
					i__7 = j + itmp1;
					i__5 = j + itmp1;
					z__1.r = z__[i__5].r - sum.r, z__1.i =
						 z__[i__5].i - sum.i;
					z__[i__7].r = z__1.r, z__[i__7].i = 
						z__1.i;
					i__7 = j + itmp1 + ldz;
					i__5 = j + itmp1 + ldz;
					d_cnjg(&z__3, &v2);
					z__2.r = sum.r * z__3.r - sum.i * 
						z__3.i, z__2.i = sum.r * 
						z__3.i + sum.i * z__3.r;
					z__1.r = z__[i__5].r - z__2.r, z__1.i 
						= z__[i__5].i - z__2.i;
					z__[i__7].r = z__1.r, z__[i__7].i = 
						z__1.i;
/* L500: */
				    }
				} else {
				    itmp1 = kcol[ki - 1];
/*                             IF WE ACTUALLY OWN COLUMN K */
				    if ((k - 1) % hbl == hbl - 1) {
					i__4 = lihiz - liloz + 1;
					zgerv2d_(&contxt, &i__4, &c__1, &work[
						izbuf + 1], &ldz, &myrow, &
						left);
					itmp1 = (itmp1 - 1) * ldz;
					i__4 = lihiz;
					for (j = liloz; j <= i__4; ++j) {
					    i__7 = izbuf + j;
					    z__2.r = t1.r * work[i__7].r - 
						    t1.i * work[i__7].i, 
						    z__2.i = t1.r * work[i__7]
						    .i + t1.i * work[i__7].r;
					    i__5 = j + itmp1;
					    z__3.r = t2.r * z__[i__5].r - 
						    t2.i * z__[i__5].i, 
						    z__3.i = t2.r * z__[i__5]
						    .i + t2.i * z__[i__5].r;
					    z__1.r = z__2.r + z__3.r, z__1.i =
						     z__2.i + z__3.i;
					    sum.r = z__1.r, sum.i = z__1.i;
					    i__7 = izbuf + j;
					    i__5 = izbuf + j;
					    z__1.r = work[i__5].r - sum.r, 
						    z__1.i = work[i__5].i - 
						    sum.i;
					    work[i__7].r = z__1.r, work[i__7]
						    .i = z__1.i;
					    i__7 = j + itmp1;
					    i__5 = j + itmp1;
					    d_cnjg(&z__3, &v2);
					    z__2.r = sum.r * z__3.r - sum.i * 
						    z__3.i, z__2.i = sum.r * 
						    z__3.i + sum.i * z__3.r;
					    z__1.r = z__[i__5].r - z__2.r, 
						    z__1.i = z__[i__5].i - 
						    z__2.i;
					    z__[i__7].r = z__1.r, z__[i__7].i 
						    = z__1.i;
/* L510: */
					}
					i__4 = lihiz - liloz + 1;
					zgesd2d_(&contxt, &i__4, &c__1, &work[
						izbuf + 1], &ldz, &myrow, &
						left);
				    }
				}
			    } else {

/*                          NO WORK BUT NEED TO UPDATE ANYWAY???? */

				if ((k - 1) % hbl == hbl - 1 && icurcol[ki - 
					1] == right) {
				    itmp1 = kcol[ki - 1];
				    itmp1 = (itmp1 - 1) * ldz;
				    i__4 = lihiz - liloz + 1;
				    zgesd2d_(&contxt, &i__4, &c__1, &z__[
					    liloz + itmp1], &ldz, &myrow, &
					    right);
				    i__4 = lihiz - liloz + 1;
				    zgerv2d_(&contxt, &i__4, &c__1, &z__[
					    liloz + itmp1], &ldz, &myrow, &
					    right);
				}
			    }
			}
		    }
/* L520: */
		}

/*        Adjust local information for this bulge */

		if (nprow == 1) {
		    krow[ki - 1] = krow[ki - 1] + k2[ki - 1] - k1[ki - 1] + 1;
		    kp2row[ki - 1] = kp2row[ki - 1] + k2[ki - 1] - k1[ki - 1] 
			    + 1;
		}
		if ((k1[ki - 1] - 1) % hbl < hbl - 2 && icurrow[ki - 1] == 
			myrow && nprow > 1) {
		    krow[ki - 1] = krow[ki - 1] + k2[ki - 1] - k1[ki - 1] + 1;
		}
		if (k2[ki - 1] % hbl < hbl - 2 && icurrow[ki - 1] == myrow && 
			nprow > 1) {
		    kp2row[ki - 1] = kp2row[ki - 1] + k2[ki - 1] - k1[ki - 1] 
			    + 1;
		}
		if ((k1[ki - 1] - 1) % hbl >= hbl - 2 && (myrow == icurrow[ki 
			- 1] || down == icurrow[ki - 1]) && nprow > 1) {
		    i__3 = k2[ki - 1] + 1;
		    infog1l_(&i__3, &hbl, &nprow, &myrow, &iafirst, &krow[ki 
			    - 1], &itmp2);
		}
		if (k2[ki - 1] % hbl >= hbl - 2 && (myrow == icurrow[ki - 1] 
			|| up == icurrow[ki - 1]) && nprow > 1) {
		    i__3 = k2[ki - 1] + 3;
		    kp2row[ki - 1] = numroc_(&i__3, &hbl, &myrow, &iafirst, &
			    nprow);
		}
		if (npcol == 1) {
		    kcol[ki - 1] = kcol[ki - 1] + k2[ki - 1] - k1[ki - 1] + 1;
		    kp2col[ki - 1] = kp2col[ki - 1] + k2[ki - 1] - k1[ki - 1] 
			    + 1;
		}
		if ((k1[ki - 1] - 1) % hbl < hbl - 2 && icurcol[ki - 1] == 
			mycol && npcol > 1) {
		    kcol[ki - 1] = kcol[ki - 1] + k2[ki - 1] - k1[ki - 1] + 1;
		}
		if (k2[ki - 1] % hbl < hbl - 2 && icurcol[ki - 1] == mycol && 
			npcol > 1) {
		    kp2col[ki - 1] = kp2col[ki - 1] + k2[ki - 1] - k1[ki - 1] 
			    + 1;
		}
		if ((k1[ki - 1] - 1) % hbl >= hbl - 2 && (mycol == icurcol[ki 
			- 1] || right == icurcol[ki - 1]) && npcol > 1) {
		    i__3 = k2[ki - 1] + 1;
		    infog1l_(&i__3, &hbl, &npcol, &mycol, &jafirst, &kcol[ki 
			    - 1], &itmp2);
		}
		if (k2[ki - 1] % hbl >= hbl - 2 && (mycol == icurcol[ki - 1] 
			|| left == icurcol[ki - 1]) && npcol > 1) {
		    i__3 = k2[ki - 1] + 3;
		    kp2col[ki - 1] = numroc_(&i__3, &hbl, &mycol, &jafirst, &
			    npcol);
		}
		k1[ki - 1] = k2[ki - 1] + 1;
/* Computing MIN */
		i__3 = k1[ki - 1] + rotn - 1, i__4 = i__ - 2;
		istop = min(i__3,i__4);
/* Computing MIN */
		i__3 = istop, i__4 = k1[ki - 1] + hbl - 3 - (k1[ki - 1] - 1) %
			 hbl;
		istop = min(i__3,i__4);
/* Computing MIN */
		i__3 = istop, i__4 = i2 - 2;
		istop = min(i__3,i__4);
/* Computing MAX */
		i__3 = istop, i__4 = k1[ki - 1];
		istop = max(i__3,i__4);
/* Computing MIN */
		i__3 = i__ - 2, i__4 = i2 - 2;
		if ((k1[ki - 1] - 1) % hbl == hbl - 2 && istop < min(i__3,
			i__4)) {
		    ++istop;
		}
		k2[ki - 1] = istop;
		if (k1[ki - 1] <= istop) {
		    if ((k1[ki - 1] - 1) % hbl == hbl - 2 && i__ - k1[ki - 1] 
			    > 1) {

/*                    Next step switches rows & cols */

			icurrow[ki - 1] = (icurrow[ki - 1] + 1) % nprow;
			icurcol[ki - 1] = (icurcol[ki - 1] + 1) % npcol;
		    }
		}
/* L530: */
	    }

	    if (k2[ibulge - 1] <= i__ - 1) {
		goto L40;
	    }
	}

/* L540: */
    }

/*     Failure to converge in remaining number of iterations */

    *info = i__;
    return 0;

L550:

    if (l == i__) {

/*        H(I,I-1) is negligible: one eigenvalue has converged. */

	infog2l_(&i__, &i__, &desca[1], &nprow, &npcol, &myrow, &mycol, &irow,
		 &icol, &itmp1, &itmp2);
	if (myrow == itmp1 && mycol == itmp2) {
	    i__1 = i__;
	    i__2 = (icol - 1) * lda + irow;
	    w[i__1].r = a[i__2].r, w[i__1].i = a[i__2].i;
	} else {
	    i__1 = i__;
	    w[i__1].r = 0., w[i__1].i = 0.;
	}
    } else if (l == i__ - 1) {

/*        H(I-1,I-2) is negligible: a pair of eigenvalues have converged. */

	i__1 = i__ - 1;
	pzlacp3_(&c__2, &i__1, &a[1], &desca[1], s1, &c__64, &c_n1, &c_n1, &
		c__0);
	zlanv2_(s1, &s1[64], &s1[1], &s1[65], &w[i__ - 1], &w[i__], &cs, &sn);
	i__1 = i__ - 1;
	pzlacp3_(&c__2, &i__1, &a[1], &desca[1], s1, &c__64, &c__0, &c__0, &
		c__1);

	if (node != 0) {
/*           Erase the eigenvalues other eigenvalues */
	    i__1 = i__ - 1;
	    w[i__1].r = 0., w[i__1].i = 0.;
	    i__1 = i__;
	    w[i__1].r = 0., w[i__1].i = 0.;
	}

	if (*wantt) {

/*           Apply the transformation to A. */

	    if (i2 > i__) {
		i__1 = i2 - i__;
		i__2 = i__ - 1;
		i__3 = i__ + 1;
		i__4 = i__ + 1;
		pzrot_(&i__1, &a[1], &i__2, &i__3, &desca[1], n, &a[1], &i__, 
			&i__4, &desca[1], n, &cs, &sn);
	    }
	    i__1 = i__ - i1 - 1;
	    i__2 = i__ - 1;
	    d_cnjg(&z__1, &sn);
	    pzrot_(&i__1, &a[1], &i1, &i__2, &desca[1], &c__1, &a[1], &i1, &
		    i__, &desca[1], &c__1, &cs, &z__1);
	}
	if (*wantz) {

/*           Apply the transformation to Z. */

	    i__1 = i__ - 1;
	    d_cnjg(&z__1, &sn);
	    pzrot_(&nz, &z__[1], iloz, &i__1, &descz[1], &c__1, &z__[1], iloz,
		     &i__, &descz[1], &c__1, &cs, &z__1);
	}

    } else {

/*        Find the eigenvalues in H(L:I,L:I), L < I-1 */

	jblk = i__ - l + 1;
	if (jblk <= 64) {
	    i__1 = i__ - l + 1;
	    pzlacp3_(&i__1, &l, &a[1], &desca[1], s1, &c__64, &c__0, &c__0, &
		    c__0);
	    zlahqr2_(&c_false, &c_false, &jblk, &c__1, &jblk, s1, &c__64, &w[
		    l], &c__1, &jblk, &z__[1], &ldz, &ierr);
	    if (node != 0) {

/*              Erase the eigenvalues */

		i__1 = i__;
		for (k = l; k <= i__1; ++k) {
		    i__2 = k;
		    w[i__2].r = 0., w[i__2].i = 0.;
/* L560: */
		}
	    }
	}
    }

/*     Decrement number of remaining iterations, and return to start of */
/*     the main loop with new value of I. */

    itn -= its;
    i__ = l - 1;
    goto L10;

L570:
    zgsum2d_(&contxt, "All", " ", n, &c__1, &w[1], n, &c_n1, &c_n1, (ftnlen)3,
	     (ftnlen)1);
    return 0;

/*     END OF PZLAHQR */

} /* pzlahqr_ */

