/* /home4/luszczek/mscratch/build/SCALAPACK/PBLAS/SRC/PBBLAS/pbztran.f -- translated by f2c (version 20031025).
   You must link the resulting object file with libf2c:
	on Microsoft Windows system, link with libf2c.lib;
	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
	or, if you install libf2c.a in a standard place, with -lf2c -lm
	-- in that order, at the end of the command line, as in
		cc *.o -lf2c -lm
	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,

		http://www.netlib.org/f2c/libf2c.zip
*/

#include "f2c.h"

/* Table of constant values */

static doublecomplex c_b1 = {1.,0.};
static doublecomplex c_b2 = {0.,0.};
static integer c__0 = 0;

/* Subroutine */ int pbztran_(integer *icontxt, char *adist, char *trans, 
	integer *m, integer *n, integer *nb, doublecomplex *a, integer *lda, 
	doublecomplex *beta, doublecomplex *c__, integer *ldc, integer *iarow,
	 integer *iacol, integer *icrow, integer *iccol, doublecomplex *work, 
	ftnlen adist_len, ftnlen trans_len)
{
    /* System generated locals */
    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    extern /* Subroutine */ int pbztr2af_(integer *, char *, integer *, 
	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
	     doublecomplex *, integer *, integer *, integer *, integer *, 
	    ftnlen), pbztr2at_(integer *, char *, char *, integer *, integer *
	    , integer *, doublecomplex *, integer *, doublecomplex *, 
	    doublecomplex *, integer *, integer *, integer *, ftnlen, ftnlen),
	     pbztr2bt_(integer *, char *, char *, integer *, integer *, 
	    integer *, doublecomplex *, integer *, doublecomplex *, 
	    doublecomplex *, integer *, integer *, ftnlen, ftnlen), pbztrget_(
	    integer *, char *, integer *, integer *, integer *, doublecomplex 
	    *, integer *, integer *, integer *, integer *, integer *, integer 
	    *, integer *, integer *, ftnlen);
    integer i__;
    extern /* Subroutine */ int pbztrsrt_(integer *, char *, integer *, 
	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
	     doublecomplex *, integer *, integer *, integer *, integer *, 
	    ftnlen), pbzmatadd_(integer *, char *, integer *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, doublecomplex *, 
	    doublecomplex *, integer *, ftnlen);
    integer ml, mp, mq, np, nq, mq0, np0, igd, lcm;
    extern integer ilcm_(integer *, integer *);
    integer idex, jdex, info, lcmp, lcmq;
    extern integer iceil_(integer *, integer *);
    integer jccol, mccol;
    doublecomplex tbeta;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    integer npcol, mrcol, jcrow, mycol, mcrow, nprow, mrrow, myrow;
    extern integer numroc_(integer *, integer *, integer *, integer *, 
	    integer *);
    extern /* Subroutine */ int blacs_gridinfo__(integer *, integer *, 
	    integer *, integer *, integer *), zgebr2d_(integer *, char *, 
	    char *, integer *, integer *, doublecomplex *, integer *, integer 
	    *, integer *, ftnlen, ftnlen), zgebs2d_(integer *, char *, char *,
	     integer *, integer *, doublecomplex *, integer *, ftnlen, ftnlen)
	    , zgesd2d_(integer *, integer *, integer *, doublecomplex *, 
	    integer *, integer *, integer *), zgerv2d_(integer *, integer *, 
	    integer *, doublecomplex *, integer *, integer *, integer *), 
	    pxerbla_(integer *, char *, integer *, ftnlen);
    logical colform, rowform;


/*  -- PB-BLAS routine (version 2.1) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory. */
/*     April 28, 1996 */

/*     Jaeyoung Choi, Oak Ridge National Laboratory */
/*     Jack Dongarra, University of Tennessee and Oak Ridge National Lab. */
/*     David Walker,  Oak Ridge National Laboratory */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  PBZTRAN  transposes  a column block to row block, or a row block to */
/*  column block by reallocating data distribution. */

/*     C := A^T + beta*C, or C := A^C + beta*C */

/*  where A is an M-by-N matrix  and C is an N-by-M matrix, and the size */
/*  of M or N is limited to its block size NB. */

/*  The first elements  of the matrices A, and C  should  be  located  at */
/*  the beginnings of their first blocks. (not the middle of the blocks.) */

/*  Parameters */
/*  ========== */

/*  ICONTXT (input) INTEGER */
/*          ICONTXT is the BLACS mechanism for partitioning communication */
/*          space.  A defining property of a context is that a message in */
/*          a context cannot be sent or received in another context.  The */
/*          BLACS context includes the definition of a grid, and each */
/*          process' coordinates in it. */

/*  ADIST  - (input) CHARACTER*1 */
/*           ADIST specifies whether A is a column block or a row block. */

/*              ADIST = 'C',  A is a column block */
/*              ADIST = 'R',  A is a row block */

/*  TRANS  - (input) CHARACTER*1 */
/*           TRANS specifies whether the transposed format is transpose */
/*           or conjugate transpose.  If the matrices A and C are real, */
/*           the argument is ignored. */

/*              TRANS = 'T',  transpose */
/*              TRANS = 'C',  conjugate transpose */

/*  M      - (input) INTEGER */
/*           M specifies the (global) number of rows of the matrix (block */
/*           column or block row) A and of columns of the matrix C. */
/*           M >= 0. */

/*  N      - (input) INTEGER */
/*           N specifies the (global) number of columns of the matrix */
/*           (block column or block row) A  and of columns of the matrix */
/*           C.  N >= 0. */

/*  NB     - (input) INTEGER */
/*           NB specifies  the column block size of the matrix A and the */
/*           row block size of the matrix C when ADIST = 'C'.  Otherwise, */
/*           it specifies  the row block size of the matrix A and the */
/*           column block size of the matrix C. NB >= 1. */

/*  A       (input) COMPLEX*16 array of DIMENSION ( LDA, Lx ), */
/*          where Lx is N  when ADIST = 'C', or Nq when ADIST = 'R'. */
/*          Before entry with  ADIST = 'C',  the leading Mp by N part of */
/*          the array A must contain the matrix A, otherwise the leading */
/*          M by Nq part of the array A  must contain the matrix A.  See */
/*          parameter details for the values of Mp and Nq. */

/*  LDA     (input) INTEGER */
/*          LDA specifies the leading dimension of (local) A as declared */
/*          in the calling (sub) program.  LDA >= MAX(1,Mp) when */
/*          ADIST = 'C', or LDA >= MAX(1,M) otherwise. */

/*  BETA    (input) COMPLEX*16 */
/*          BETA specifies scaler beta. */

/*  C       (input/output) COMPLEX*16 array of DIMENSION ( LDC, Lx ), */
/*          where Lx is Mq when ADIST = 'C', or N when ADIST = 'R'. */
/*          If ADIST = 'C', the leading N-by-Mq part of the array C */
/*          contains the (local) matrix C, otherwise the leading */
/*          Np-by-M part of the array C must contain the (local) matrix */
/*          C.  C will not be referenced if beta is zero. */

/*  LDC     (input) INTEGER */
/*          LDC specifies the leading dimension of (local) C as declared */
/*          in the calling (sub) program. LDC >= MAX(1,N) when ADIST='C', */
/*          or LDC >= MAX(1,Np) otherwise. */

/*  IAROW   (input) INTEGER */
/*          IAROW specifies  a row  of the process  template, */
/*          which holds the first block  of the matrix  A. If A is a row */
/*          of blocks (ADIST = 'R') and all rows of processes have a copy */
/*          of A, then set IAROW = -1. */

/*  IACOL   (input) INTEGER */
/*          IACOL specifies  a column of the process template, */
/*          which holds  the first block  of the matrix A.  If  A is  a */
/*          column of blocks (ADIST = 'C') and all columns of processes */
/*          have a copy of A, then set IACOL = -1. */

/*  ICROW   (input) INTEGER */
/*          ICROW specifies the current row process which holds */
/*          the first block  of the matrix C,  which is transposed of A. */
/*          If C is a row of blocks (ADIST = 'C') and the transposed */
/*          row block C is distributed all rows of processes, set */
/*          ICROW = -1. */

/*  ICCOL   (input) INTEGER */
/*          ICCOL specifies  the current column process which holds */
/*          the first block of the matrix C,  which is transposed of A. */
/*          If C is a column of blocks (ADIST = 'R') and the transposed */
/*          column block C is distributed all columns of processes, */
/*          set ICCOL = -1. */

/*  WORK    (workspace) COMPLEX*16 array of dimension Size(WORK). */
/*          It needs extra working space of A'. */

/*  Parameters Details */
/*  ================== */

/*  Lx      It is  a local portion  of L  owned  by  a process,  (L is */
/*          replaced by M, or N,  and x is replaced by either p (=NPROW) */
/*          or q (=NPCOL)).  The value is  determined by  L, LB, x,  and */
/*          MI, where  LB is  a block size  and  MI is a  row  or column */
/*          position  in a process template.  Lx is  equal to  or less */
/*          than Lx0 = CEIL( L, LB*x ) * LB. */

/*  Communication Scheme */
/*  ==================== */

/*  The communication scheme of the routine is set to '1-tree', which is */
/*  fan-out.  (For details, see BLACS user's guide.) */

/*  Memory Requirement of WORK */
/*  ========================== */

/*  Mqb  = CEIL( M, NB*NPCOL ) */
/*  Npb  = CEIL( N, NB*NPROW ) */
/*  LCMQ = LCM / NPCOL */
/*  LCMP = LCM / NPROW */

/*  (1) ADIST = 'C' */
/*   (a) IACOL != -1 */
/*       Size(WORK) = N * CEIL(Mqb,LCMQ)*NB */
/*   (b) IACOL = -1 */
/*       Size(WORK) = N * CEIL(Mqb,LCMQ)*NB * MIN(LCMQ,CEIL(M,NB)) */

/*  (2) ADIST = 'R' */
/*   (a) IAROW != -1 */
/*       Size(WORK) = M * CEIL(Npb,LCMP)*NB */
/*   (b) IAROW = -1 */
/*       Size(WORK) = M * CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(N,NB)) */

/*  Notes */
/*  ----- */
/*  More precise space can be computed as */

/*  CEIL(Mqb,LCMQ)*NB => NUMROC( NUMROC(M,NB,0,0,NPCOL), NB, 0, 0, LCMQ ) */
/*  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(N,NB,0,0,NPROW), NB, 0, 0, LCMP ) */

/*  ===================================================================== */

/*     .. */
/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;
    --work;

    /* Function Body */
    if (*m == 0 || *n == 0) {
	return 0;
    }

    blacs_gridinfo__(icontxt, &nprow, &npcol, &myrow, &mycol);

    colform = lsame_(adist, "C", (ftnlen)1, (ftnlen)1);
    rowform = lsame_(adist, "R", (ftnlen)1, (ftnlen)1);

/*     Test the input parameters. */

    info = 0;
    if (! colform && ! rowform) {
	info = 2;
    } else if (*m < 0) {
	info = 4;
    } else if (*n < 0) {
	info = 5;
    } else if (*nb < 1) {
	info = 6;
    } else if (*iarow < -1 || *iarow >= nprow || *iarow == -1 && colform) {
	info = 12;
    } else if (*iacol < -1 || *iacol >= npcol || *iacol == -1 && rowform) {
	info = 13;
    } else if (*icrow < -1 || *icrow >= nprow || *icrow == -1 && rowform) {
	info = 14;
    } else if (*iccol < -1 || *iccol >= npcol || *iccol == -1 && colform) {
	info = 15;
    }

L10:
    if (info != 0) {
	pxerbla_(icontxt, "PBZTRAN ", &info, (ftnlen)8);
	return 0;
    }

/*     Start the operations. */

/*     LCM : the least common multiple of NPROW and NPCOL */

    lcm = ilcm_(&nprow, &npcol);
    lcmp = lcm / nprow;
    lcmq = lcm / npcol;
    igd = npcol / lcmp;

/*     When A is a column block */

    if (colform) {

/*       Form  C <== A'  ( A is a column block ) */
/*                                         _ */
/*                                        | | */
/*                                        | | */
/*            _____________               | | */
/*           |______C______|     <==      |A| */
/*                                        | | */
/*                                        | | */
/*                                        |_| */

/*       MRROW : row relative position in template from IAROW */
/*       MRCOL : column relative position in template from ICCOL */

	mrrow = (nprow + myrow - *iarow) % nprow;
	mrcol = (npcol + mycol - *iccol) % npcol;
	jcrow = *icrow;
	if (*icrow == -1) {
	    jcrow = *iarow;
	}

	mp = numroc_(m, nb, &myrow, iarow, &nprow);
	mq = numroc_(m, nb, &mycol, iccol, &npcol);
	i__1 = numroc_(m, nb, &c__0, &c__0, &npcol);
	mq0 = numroc_(&i__1, nb, &c__0, &c__0, &lcmq);

	if (*lda < mp && (*iacol == mycol || *iacol == -1)) {
	    info = 8;
	} else if (*ldc < *n && (*icrow == myrow || *icrow == -1)) {
	    info = 11;
	}
	if (info != 0) {
	    goto L10;
	}

/*       When a column process of IACOL has a column block A, */

	if (*iacol >= 0) {
	    tbeta.r = 0., tbeta.i = 0.;
	    if (myrow == jcrow) {
		tbeta.r = beta->r, tbeta.i = beta->i;
	    }

/* Computing MIN */
	    i__2 = lcm, i__3 = iceil_(m, nb);
	    i__1 = min(i__2,i__3) - 1;
	    for (i__ = 0; i__ <= i__1; ++i__) {
		mcrow = (i__ % nprow + *iarow) % nprow;
		mccol = (i__ % npcol + *iccol) % npcol;
		if (lcmq == 1) {
		    mq0 = numroc_(m, nb, &i__, &c__0, &npcol);
		}
		jdex = i__ / npcol * *nb;

/*           A source node copies the blocks to WORK, and send it */

		if (myrow == mcrow && mycol == *iacol) {

/*             The source node is a destination node */

		    idex = i__ / nprow * *nb;
		    if (myrow == jcrow && mycol == mccol) {
			i__2 = mp - idex;
			pbztr2at_(icontxt, "Col", trans, &i__2, n, nb, &a[
				idex + 1 + a_dim1], lda, &tbeta, &c__[(jdex + 
				1) * c_dim1 + 1], ldc, &lcmp, &lcmq, (ftnlen)
				3, (ftnlen)1);

/*             The source node sends blocks to a destination node */

		    } else {
			i__2 = mp - idex;
			i__3 = lcmp * *nb;
			pbztr2bt_(icontxt, "Col", trans, &i__2, n, nb, &a[
				idex + 1 + a_dim1], lda, &c_b2, &work[1], n, &
				i__3, (ftnlen)3, (ftnlen)1);
			zgesd2d_(icontxt, n, &mq0, &work[1], n, &jcrow, &
				mccol);
		    }

/*           A destination node receives the copied blocks */

		} else if (myrow == jcrow && mycol == mccol) {
		    if (lcmq == 1 && (tbeta.r == 0. && tbeta.i == 0.)) {
			zgerv2d_(icontxt, n, &mq0, &c__[c_offset], ldc, &
				mcrow, iacol);
		    } else {
			zgerv2d_(icontxt, n, &mq0, &work[1], n, &mcrow, iacol)
				;
			i__2 = mq - jdex;
			pbztr2af_(icontxt, "Row", n, &i__2, nb, &work[1], n, &
				tbeta, &c__[(jdex + 1) * c_dim1 + 1], ldc, &
				lcmp, &lcmq, &mq0, (ftnlen)3);
		    }
		}
/* L20: */
	    }

/*         Broadcast a row block of C in each column of template */

	    if (*icrow == -1) {
		if (myrow == jcrow) {
		    zgebs2d_(icontxt, "Col", "1-tree", n, &mq, &c__[c_offset],
			     ldc, (ftnlen)3, (ftnlen)6);
		} else {
		    zgebr2d_(icontxt, "Col", "1-tree", n, &mq, &c__[c_offset],
			     ldc, &jcrow, &mycol, (ftnlen)3, (ftnlen)6);
		}
	    }

/*       When all column procesors have a copy of the column block A, */

	} else {
	    if (lcmq == 1) {
		mq0 = mq;
	    }

/*         Processors, which have diagonal blocks of A, copy them to */
/*         WORK array in transposed form */

	    i__1 = lcmp - 1;
	    for (i__ = 0; i__ <= i__1; ++i__) {
		if (mrcol == (nprow * i__ + mrrow) % npcol) {
		    if (lcmq == 1 && (*icrow == -1 || *icrow == myrow)) {
			i__2 = mp - i__ * *nb;
			i__3 = lcmp * *nb;
			pbztr2bt_(icontxt, "Col", trans, &i__2, n, nb, &a[i__ 
				* *nb + 1 + a_dim1], lda, beta, &c__[c_offset]
				, ldc, &i__3, (ftnlen)3, (ftnlen)1);
		    } else {
			i__2 = mp - i__ * *nb;
			i__3 = lcmp * *nb;
			pbztr2bt_(icontxt, "Col", trans, &i__2, n, nb, &a[i__ 
				* *nb + 1 + a_dim1], lda, &c_b2, &work[1], n, 
				&i__3, (ftnlen)3, (ftnlen)1);
		    }
		}
/* L30: */
	    }

/*         Get diagonal blocks of A for each column of the template */

	    mcrow = (mrcol % nprow + *iarow) % nprow;
	    if (lcmq > 1) {
		mccol = (npcol + mycol - *iccol) % npcol;
		i__1 = iceil_(m, nb);
		pbztrget_(icontxt, "Row", n, &mq0, &i__1, &work[1], n, &mcrow,
			 &mccol, &igd, &myrow, &mycol, &nprow, &npcol, (
			ftnlen)3);
	    }

/*         Broadcast a row block of WORK in every row of template */

	    if (*icrow == -1) {
		if (myrow == mcrow) {
		    if (lcmq > 1) {
			pbztrsrt_(icontxt, "Row", n, &mq, nb, &work[1], n, 
				beta, &c__[c_offset], ldc, &lcmp, &lcmq, &mq0,
				 (ftnlen)3);
		    }
		    zgebs2d_(icontxt, "Col", "1-tree", n, &mq, &c__[c_offset],
			     ldc, (ftnlen)3, (ftnlen)6);
		} else {
		    zgebr2d_(icontxt, "Col", "1-tree", n, &mq, &c__[c_offset],
			     ldc, &mcrow, &mycol, (ftnlen)3, (ftnlen)6);
		}

/*         Send a row block of WORK to the destination row */

	    } else {
		if (lcmq == 1) {
		    if (myrow == mcrow) {
			if (myrow != *icrow) {
			    zgesd2d_(icontxt, n, &mq, &work[1], n, icrow, &
				    mycol);
			}
		    } else if (myrow == *icrow) {
			if (beta->r == 0. && beta->i == 0.) {
			    zgerv2d_(icontxt, n, &mq, &c__[c_offset], ldc, &
				    mcrow, &mycol);
			} else {
			    zgerv2d_(icontxt, n, &mq, &work[1], n, &mcrow, &
				    mycol);
			    pbzmatadd_(icontxt, "G", n, &mq, &c_b1, &work[1], 
				    n, beta, &c__[c_offset], ldc, (ftnlen)1);
			}
		    }

		} else {
/* Computing MIN */
/* Computing MAX */
		    i__3 = 0, i__4 = iceil_(m, nb) - mccol;
		    i__1 = lcmq, i__2 = max(i__3,i__4);
		    ml = mq0 * min(i__1,i__2);
		    if (myrow == mcrow) {
			if (myrow != *icrow) {
			    zgesd2d_(icontxt, n, &ml, &work[1], n, icrow, &
				    mycol);
			}
		    } else if (myrow == *icrow) {
			zgerv2d_(icontxt, n, &ml, &work[1], n, &mcrow, &mycol)
				;
		    }

		    if (myrow == *icrow) {
			pbztrsrt_(icontxt, "Row", n, &mq, nb, &work[1], n, 
				beta, &c__[c_offset], ldc, &lcmp, &lcmq, &mq0,
				 (ftnlen)3);
		    }
		}
	    }

	}

/*     When A is a row block */

    } else {

/*        Form  C <== A'  ( A is a row block ) */
/*            _ */
/*           | | */
/*           | | */
/*           | |                _____________ */
/*           |C|      <==      |______A______| */
/*           | | */
/*           | | */
/*           |_| */

/*        MRROW : row relative position in template from ICROW */
/*        MRCOL : column relative position in template from IACOL */

	mrrow = (nprow + myrow - *icrow) % nprow;
	mrcol = (npcol + mycol - *iacol) % npcol;
	jccol = *iccol;
	if (*iccol == -1) {
	    jccol = *iacol;
	}

	np = numroc_(n, nb, &myrow, icrow, &nprow);
	nq = numroc_(n, nb, &mycol, iacol, &npcol);
	i__1 = numroc_(n, nb, &c__0, &c__0, &nprow);
	np0 = numroc_(&i__1, nb, &c__0, &c__0, &lcmp);

	if (*lda < *m && (*iarow == myrow || *iarow == -1)) {
	    info = 8;
	} else if (*ldc < np && (*iccol == mycol || *iccol == -1)) {
	    info = 11;
	}
	if (info != 0) {
	    goto L10;
	}

/*        When a row process of IAROW has a row block A, */

	if (*iarow >= 0) {
	    tbeta.r = 0., tbeta.i = 0.;
	    if (mycol == jccol) {
		tbeta.r = beta->r, tbeta.i = beta->i;
	    }

/* Computing MIN */
	    i__2 = lcm, i__3 = iceil_(n, nb);
	    i__1 = min(i__2,i__3) - 1;
	    for (i__ = 0; i__ <= i__1; ++i__) {
		mcrow = (i__ % nprow + *icrow) % nprow;
		mccol = (i__ % npcol + *iacol) % npcol;
		if (lcmp == 1) {
		    np0 = numroc_(n, nb, &i__, &c__0, &nprow);
		}
		idex = i__ / nprow * *nb;

/*            A source node copies the blocks to WORK, and send it */

		if (myrow == *iarow && mycol == mccol) {

/*              The source node is a destination node */

		    jdex = i__ / npcol * *nb;
		    if (myrow == mcrow && mycol == jccol) {
			i__2 = nq - jdex;
			pbztr2at_(icontxt, "Row", trans, m, &i__2, nb, &a[(
				jdex + 1) * a_dim1 + 1], lda, &tbeta, &c__[
				idex + 1 + c_dim1], ldc, &lcmp, &lcmq, (
				ftnlen)3, (ftnlen)1);

/*              The source node sends blocks to a destination node */

		    } else {
			i__2 = nq - jdex;
			i__3 = lcmq * *nb;
			pbztr2bt_(icontxt, "Row", trans, m, &i__2, nb, &a[(
				jdex + 1) * a_dim1 + 1], lda, &c_b2, &work[1],
				 &np0, &i__3, (ftnlen)3, (ftnlen)1);
			zgesd2d_(icontxt, &np0, m, &work[1], &np0, &mcrow, &
				jccol);
		    }

/*           A destination node receives the copied blocks */

		} else if (myrow == mcrow && mycol == jccol) {
		    if (lcmp == 1 && (tbeta.r == 0. && tbeta.i == 0.)) {
			zgerv2d_(icontxt, &np0, m, &c__[c_offset], ldc, iarow,
				 &mccol);
		    } else {
			zgerv2d_(icontxt, &np0, m, &work[1], &np0, iarow, &
				mccol);
			i__2 = np - idex;
			pbztr2af_(icontxt, "Col", &i__2, m, nb, &work[1], &
				np0, &tbeta, &c__[idex + 1 + c_dim1], ldc, &
				lcmp, &lcmq, &np0, (ftnlen)3);
		    }
		}
/* L40: */
	    }

/*         Broadcast a column block of WORK in each row of template */

	    if (*iccol == -1) {
		if (mycol == jccol) {
		    zgebs2d_(icontxt, "Row", "1-tree", &np, m, &c__[c_offset],
			     ldc, (ftnlen)3, (ftnlen)6);
		} else {
		    zgebr2d_(icontxt, "Row", "1-tree", &np, m, &c__[c_offset],
			     ldc, &myrow, &jccol, (ftnlen)3, (ftnlen)6);
		}
	    }

/*       When all row procesors have a copy of the row block A, */

	} else {
	    if (lcmp == 1) {
		np0 = np;
	    }

/*         Processors, which have diagonal blocks of A, copy them to */
/*         WORK array in transposed form */

	    i__1 = lcmq - 1;
	    for (i__ = 0; i__ <= i__1; ++i__) {
		if (mrrow == (npcol * i__ + mrcol) % nprow) {
		    if (lcmp == 1 && (*iccol == -1 || *iccol == mycol)) {
			i__2 = nq - i__ * *nb;
			i__3 = lcmq * *nb;
			pbztr2bt_(icontxt, "Row", trans, m, &i__2, nb, &a[(
				i__ * *nb + 1) * a_dim1 + 1], lda, beta, &c__[
				c_offset], ldc, &i__3, (ftnlen)3, (ftnlen)1);
		    } else {
			i__2 = nq - i__ * *nb;
			i__3 = lcmq * *nb;
			pbztr2bt_(icontxt, "Row", trans, m, &i__2, nb, &a[(
				i__ * *nb + 1) * a_dim1 + 1], lda, &c_b2, &
				work[1], &np0, &i__3, (ftnlen)3, (ftnlen)1);
		    }
		}
/* L50: */
	    }

/*         Get diagonal blocks of A for each row of the template */

	    mccol = (mrrow % npcol + *iacol) % npcol;
	    if (lcmp > 1) {
		mcrow = (nprow + myrow - *icrow) % nprow;
		i__1 = iceil_(n, nb);
		pbztrget_(icontxt, "Col", &np0, m, &i__1, &work[1], &np0, &
			mcrow, &mccol, &igd, &myrow, &mycol, &nprow, &npcol, (
			ftnlen)3);
	    }

/*         Broadcast a column block of WORK in every column of template */

	    if (*iccol == -1) {
		if (mycol == mccol) {
		    if (lcmp > 1) {
			pbztrsrt_(icontxt, "Col", &np, m, nb, &work[1], &np0, 
				beta, &c__[c_offset], ldc, &lcmp, &lcmq, &np0,
				 (ftnlen)3);
		    }
		    zgebs2d_(icontxt, "Row", "1-tree", &np, m, &c__[c_offset],
			     ldc, (ftnlen)3, (ftnlen)6);
		} else {
		    zgebr2d_(icontxt, "Row", "1-tree", &np, m, &c__[c_offset],
			     ldc, &myrow, &mccol, (ftnlen)3, (ftnlen)6);
		}

/*         Send a column block of WORK to the destination column */

	    } else {
		if (lcmp == 1) {
		    if (mycol == mccol) {
			if (mycol != *iccol) {
			    zgesd2d_(icontxt, &np, m, &work[1], &np, &myrow, 
				    iccol);
			}
		    } else if (mycol == *iccol) {
			if (beta->r == 0. && beta->i == 0.) {
			    zgerv2d_(icontxt, &np, m, &c__[c_offset], ldc, &
				    myrow, &mccol);
			} else {
			    zgerv2d_(icontxt, &np, m, &work[1], &np, &myrow, &
				    mccol);
			    pbzmatadd_(icontxt, "G", &np, m, &c_b1, &work[1], 
				    &np, beta, &c__[c_offset], ldc, (ftnlen)1)
				    ;
			}
		    }

		} else {
/* Computing MIN */
/* Computing MAX */
		    i__3 = 0, i__4 = iceil_(n, nb) - mcrow;
		    i__1 = lcmp, i__2 = max(i__3,i__4);
		    ml = *m * min(i__1,i__2);
		    if (mycol == mccol) {
			if (mycol != *iccol) {
			    zgesd2d_(icontxt, &np0, &ml, &work[1], &np0, &
				    myrow, iccol);
			}
		    } else if (mycol == *iccol) {
			zgerv2d_(icontxt, &np0, &ml, &work[1], &np0, &myrow, &
				mccol);
		    }

		    if (mycol == *iccol) {
			pbztrsrt_(icontxt, "Col", &np, m, nb, &work[1], &np0, 
				beta, &c__[c_offset], ldc, &lcmp, &lcmq, &np0,
				 (ftnlen)3);
		    }
		}
	    }

	}
    }

    return 0;

/*     End of PBZTRAN */

} /* pbztran_ */


/* ======================================================================= */
/*     SUBROUTINE PBZTR2AT */
/* ======================================================================= */

/* Subroutine */ int pbztr2at_(integer *icontxt, char *adist, char *trans, 
	integer *m, integer *n, integer *nb, doublecomplex *a, integer *lda, 
	doublecomplex *beta, doublecomplex *b, integer *ldb, integer *lcmp, 
	integer *lcmq, ftnlen adist_len, ftnlen trans_len)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer k;
    extern /* Subroutine */ int pbzmatadd_(integer *, char *, integer *, 
	    integer *, doublecomplex *, doublecomplex *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, ftnlen);
    integer ia, ib, intv, jntv;
    extern integer iceil_(integer *, integer *);
    extern logical lsame_(char *, char *, ftnlen, ftnlen);


/*  -- PB-BLAS routine (version 2.1) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory. */
/*     April 28, 1996 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  PBZTR2AT forms   B <== A^T + beta*B, or A^C + beta*B */
/*  B is a ((conjugate) transposed) scattered block row (or column), */
/*  copied from a scattered block column (or row) of A */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Excutable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;

    /* Function Body */
    if (*lcmp == *lcmq) {
	pbzmatadd_(icontxt, trans, n, m, &c_b1, &a[a_offset], lda, beta, &b[
		b_offset], ldb, (ftnlen)1);

    } else {

/*        If A is a column block ( ADIST = 'C' ), */

	if (lsame_(adist, "C", (ftnlen)1, (ftnlen)1)) {
	    intv = *lcmp * *nb;
	    jntv = *lcmq * *nb;
	    ia = 1;
	    ib = 1;
	    i__1 = iceil_(m, &intv);
	    for (k = 1; k <= i__1; ++k) {
/* Computing MIN */
		i__3 = *m - ia + 1;
		i__2 = min(i__3,*nb);
		pbzmatadd_(icontxt, trans, n, &i__2, &c_b1, &a[ia + a_dim1], 
			lda, beta, &b[ib * b_dim1 + 1], ldb, (ftnlen)1);
		ia += intv;
		ib += jntv;
/* L10: */
	    }

/*        If A is a row block ( ADIST = 'R' ), */

	} else {
	    intv = *lcmp * *nb;
	    jntv = *lcmq * *nb;
	    ia = 1;
	    ib = 1;
	    i__1 = iceil_(n, &jntv);
	    for (k = 1; k <= i__1; ++k) {
/* Computing MIN */
		i__3 = *n - ia + 1;
		i__2 = min(i__3,*nb);
		pbzmatadd_(icontxt, trans, &i__2, m, &c_b1, &a[ia * a_dim1 + 
			1], lda, beta, &b[ib + b_dim1], ldb, (ftnlen)1);
		ia += jntv;
		ib += intv;
/* L20: */
	    }
	}
    }

    return 0;

/*     End of PBZTR2AT */

} /* pbztr2at_ */


/* ======================================================================= */
/*     SUBROUTINE PBZTR2BT */
/* ======================================================================= */

/* Subroutine */ int pbztr2bt_(integer *icontxt, char *adist, char *trans, 
	integer *m, integer *n, integer *nb, doublecomplex *a, integer *lda, 
	doublecomplex *beta, doublecomplex *b, integer *ldb, integer *intv, 
	ftnlen adist_len, ftnlen trans_len)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer k;
    extern /* Subroutine */ int pbzmatadd_(integer *, char *, integer *, 
	    integer *, doublecomplex *, doublecomplex *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, ftnlen);
    integer ia, ib;
    extern integer iceil_(integer *, integer *);
    extern logical lsame_(char *, char *, ftnlen, ftnlen);


/*  -- PB-BLAS routine (version 2.1) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory. */
/*     April 28, 1996 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  PBZTR2BT forms T <== A^T + beta*T or A^C + beta*T, where T is a */
/*  ((conjugate) transposed) condensed block row (or column), copied from */
/*  a scattered block column (or row) of A */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Excutable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;

    /* Function Body */
    if (*intv == *nb) {
	pbzmatadd_(icontxt, trans, n, m, &c_b1, &a[a_offset], lda, beta, &b[
		b_offset], ldb, (ftnlen)1);

    } else {

/*        If A is a column block ( ADIST = 'C' ), */

	if (lsame_(adist, "C", (ftnlen)1, (ftnlen)1)) {
	    ia = 1;
	    ib = 1;
	    i__1 = iceil_(m, intv);
	    for (k = 1; k <= i__1; ++k) {
/* Computing MIN */
		i__3 = *m - ia + 1;
		i__2 = min(i__3,*nb);
		pbzmatadd_(icontxt, trans, n, &i__2, &c_b1, &a[ia + a_dim1], 
			lda, beta, &b[ib * b_dim1 + 1], ldb, (ftnlen)1);
		ia += *intv;
		ib += *nb;
/* L10: */
	    }

/*        If A is a row block (ADIST = 'R'), */

	} else {
	    ia = 1;
	    ib = 1;
	    i__1 = iceil_(n, intv);
	    for (k = 1; k <= i__1; ++k) {
/* Computing MIN */
		i__3 = *n - ia + 1;
		i__2 = min(i__3,*nb);
		pbzmatadd_(icontxt, trans, &i__2, m, &c_b1, &a[ia * a_dim1 + 
			1], lda, beta, &b[ib + b_dim1], ldb, (ftnlen)1);
		ia += *intv;
		ib += *nb;
/* L20: */
	    }
	}
    }

    return 0;

/*     End of PBZTR2BT */

} /* pbztr2bt_ */


/* ======================================================================= */
/*     SUBROUTINE PBZTR2AF */
/* ======================================================================= */

/* Subroutine */ int pbztr2af_(integer *icontxt, char *adist, integer *m, 
	integer *n, integer *nb, doublecomplex *a, integer *lda, 
	doublecomplex *beta, doublecomplex *b, integer *ldb, integer *lcmp, 
	integer *lcmq, integer *nint, ftnlen adist_len)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;

    /* Local variables */
    integer k;
    extern /* Subroutine */ int pbzmatadd_(integer *, char *, integer *, 
	    integer *, doublecomplex *, doublecomplex *, integer *, 
	    doublecomplex *, doublecomplex *, integer *, ftnlen);
    integer ja, jb, intv;
    extern integer iceil_(integer *, integer *);
    extern logical lsame_(char *, char *, ftnlen, ftnlen);


/*  -- PB-BLAS routine (version 2.1) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory. */
/*     April 28, 1996 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  PBZTR2AF forms  T <== A + BETA*T, where T is a scattered block */
/*  row (or column) copied from a (condensed) block column (or row) of A */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;

    /* Function Body */
    if (lsame_(adist, "R", (ftnlen)1, (ftnlen)1)) {
	intv = *nb * *lcmq;
	ja = 1;
	jb = 1;
	i__1 = iceil_(nint, nb);
	for (k = 1; k <= i__1; ++k) {
/* Computing MIN */
	    i__3 = *n - jb + 1;
	    i__2 = min(i__3,*nb);
	    pbzmatadd_(icontxt, "G", m, &i__2, &c_b1, &a[ja * a_dim1 + 1], 
		    lda, beta, &b[jb * b_dim1 + 1], ldb, (ftnlen)1);
	    ja += *nb;
	    jb += intv;
/* L10: */
	}

/*     if( LSAME( ADIST, 'C' ) ) then */

    } else {
	intv = *nb * *lcmp;
	ja = 1;
	jb = 1;
	i__1 = iceil_(nint, nb);
	for (k = 1; k <= i__1; ++k) {
/* Computing MIN */
	    i__3 = *m - jb + 1;
	    i__2 = min(i__3,*nb);
	    pbzmatadd_(icontxt, "G", &i__2, n, &c_b1, &a[ja + a_dim1], lda, 
		    beta, &b[jb + b_dim1], ldb, (ftnlen)1);
	    ja += *nb;
	    jb += intv;
/* L20: */
	}
    }

    return 0;

/*     End of PBZTR2AF */

} /* pbztr2af_ */

