/* /home4/luszczek/mscratch/build/SCALAPACK/PBLAS/SRC/PBBLAS/pbstrnv.f -- translated by f2c (version 20031025).
   You must link the resulting object file with libf2c:
	on Microsoft Windows system, link with libf2c.lib;
	on Linux or Unix systems, link with .../path/to/libf2c.a -lm
	or, if you install libf2c.a in a standard place, with -lf2c -lm
	-- in that order, at the end of the command line, as in
		cc *.o -lf2c -lm
	Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,

		http://www.netlib.org/f2c/libf2c.zip
*/

#include "f2c.h"

/* Table of constant values */

static integer c__0 = 0;
static real c_b12 = 0.f;
static integer c__1 = 1;
static real c_b49 = 1.f;

/* Subroutine */ int pbstrnv_(integer *icontxt, char *xdist, char *trans, 
	integer *n, integer *nb, integer *nz, real *x, integer *incx, real *
	beta, real *y, integer *incy, integer *ixrow, integer *ixcol, integer 
	*iyrow, integer *iycol, real *work, ftnlen xdist_len, ftnlen 
	trans_len)
{
    /* System generated locals */
    integer i__1, i__2, i__3, i__4;

    /* Local variables */
    extern /* Subroutine */ int pbstrst1_(integer *, char *, integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
	    , integer *, integer *, integer *, ftnlen), pbstrget_(integer *, 
	    char *, integer *, integer *, integer *, real *, integer *, 
	    integer *, integer *, integer *, integer *, integer *, integer *, 
	    integer *, ftnlen);
    integer i__;
    extern /* Subroutine */ int pbsvecadd_(integer *, char *, integer *, real 
	    *, real *, integer *, real *, real *, integer *, ftnlen);
    integer nn, np, nq, jz, kz, np0, np1, nq0, nq1, igd, lcm;
    extern integer ilcm_(integer *, integer *);
    integer idex, jdex, info, lcmp, lcmq;
    extern integer iceil_(integer *, integer *);
    integer mccol;
    real tbeta;
    extern logical lsame_(char *, char *, ftnlen, ftnlen);
    integer npcol, mrcol, jycol, mycol, mcrow, nprow, mrrow, jyrow, myrow;
    extern integer numroc_(integer *, integer *, integer *, integer *, 
	    integer *);
    extern /* Subroutine */ int blacs_gridinfo__(integer *, integer *, 
	    integer *, integer *, integer *), sgebr2d_(integer *, char *, 
	    char *, integer *, integer *, real *, integer *, integer *, 
	    integer *, ftnlen, ftnlen), sgebs2d_(integer *, char *, char *, 
	    integer *, integer *, real *, integer *, ftnlen, ftnlen), 
	    sgesd2d_(integer *, integer *, integer *, real *, integer *, 
	    integer *, integer *), sgerv2d_(integer *, integer *, integer *, 
	    real *, integer *, integer *, integer *), pxerbla_(integer *, 
	    char *, integer *, ftnlen), pbstr2a1_(integer *, integer *, 
	    integer *, integer *, real *, integer *, real *, real *, integer *
	    , integer *), pbstr2b1_(integer *, char *, integer *, integer *, 
	    integer *, real *, integer *, real *, real *, integer *, integer *
	    , integer *, ftnlen);
    logical colform, rowform;


/*  -- PB-BLAS routine (version 2.1) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory. */
/*     April 28, 1996 */

/*     Jaeyoung Choi, Oak Ridge National Laboratory */
/*     Jack Dongarra, University of Tennessee and Oak Ridge National Lab. */
/*     David Walker,  Oak Ridge National Laboratory */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  PBSTRNV transposes a column vector to row vector, or a row vector to */
/*  column vector by reallocating data distribution. */

/*     Y := X' */

/*  where X and Y are N vectors. */

/*  Parameters */
/*  ========== */

/*  ICONTXT (input) INTEGER */
/*          ICONTXT is the BLACS mechanism for partitioning communication */
/*          space.  A defining property of a context is that a message in */
/*          a context cannot be sent or received in another context.  The */
/*          BLACS context includes the definition of a grid, and each */
/*          process' coordinates in it. */

/*  XDIST   (input) CHARACTER*1 */
/*          XDIST specifies whether X is a column vector or a row vector, */

/*            XDIST = 'C',  X is a column vector (distributed columnwise) */
/*            XDIST = 'R',  X is a row vector    (distributed rowwise) */

/*  TRANS   (input) CHARACTER*1 */
/*          TRANS specifies whether the transposed format is transpose */
/*          or conjugate transpose.  If the vectors X and Y are real, */
/*          the argument is ignored. */

/*             TRANS = 'T',  transpose */
/*             TRANS = 'C',  conjugate transpose */

/*  N       (input) INTEGER */
/*          N specifies the (global) number of the vector X and the */
/*          vector Y.  N >= 0. */

/*  NB      (input) INTEGER */
/*          NB specifies the block size of vectors X and Y.  NB >= 0. */

/*  NZ      (input) INTEGER */
/*          NZ is the column offset to specify the column distance from */
/*          the beginning of the block to the first element of the */
/*          vector X, and the row offset to the first element of the */
/*          vector Y if XDIST = 'C'. */
/*          Otherwise, it is row offset to specify the row distance */
/*          from the beginning of the block to the first element of the */
/*          vector X, and the column offset to the first element of the */
/*          vector Y.  0 < NZ <= NB. */

/*  X       (input) REAL array of dimension  at least */
/*          ( 1 + (Np-1) * abs(INCX)) in IXCOL if XDIST = 'C', or */
/*          ( 1 + (Nq-1) * abs(INCX)) in IXROW if XDIST = 'R'. */
/*          The incremented array X must contain the vector X. */

/*  INCX    (input) INTEGER */
/*          INCX specifies the increment for the elements of X. */
/*          INCX <> 0. */

/*  BETA    (input) REAL */
/*          BETA specifies scaler beta. */

/*  Y       (input/output) REAL array of dimension at least */
/*          ( 1 + (Nq-1) * abs(INCY)) in IYROW if XDIST = 'C', or */
/*          ( 1 + (Np-1) * abs(INCY)) in IYCOL if XDIST = 'R', or */
/*          The incremented array Y must contain the vector Y. */
/*          Y will not be referenced if beta is zero. */

/*  INCY    (input) INTEGER */
/*          INCY specifies the increment for the elements of Y. */
/*          INCY <> 0. */

/*  IXROW   (input) INTEGER */
/*          IXROW specifies a row of the process template, which holds */
/*          the first element of the vector X. If X is a row vector and */
/*          all rows of processes have a copy of X, then set IXROW = -1. */

/*  IXCOL   (input) INTEGER */
/*          IXCOL specifies  a column of the process template, */
/*          which holds the first element of the vector X.  If  X is  a */
/*          column block and all columns of processes have a copy of X, */
/*          then set IXCOL = -1. */

/*  IYROW   (input) INTEGER */
/*          IYROW specifies the current row process which holds the */
/*          first element of the vector Y, which is transposed of X. */
/*          If X  is a column vector and the transposed  row vector Y is */
/*          distributed all rows of processes, set IYROW = -1. */

/*  IYCOL   (input) INTEGER */
/*          IYCOL specifies  the current column process  which holds */
/*          the first element of the vector Y, which is transposed of Y. */
/*          If X is a row block and the transposed column vector Y is */
/*          distributed all columns of processes, set IYCOL = -1. */

/*  WORK    (workspace) REAL array of dimension Size(WORK). */
/*          It needs extra working space of x**T or x**H. */

/*  Parameters Details */
/*  ================== */

/*  Nx      It is a local portion  of N owned by a process, where x is */
/*          replaced by  either p (=NPROW) or q (=NPCOL)).  The value is */
/*          determined by N, NB, NZ, x, and MI, where NB is a block size, */
/*          NZ is a offset from the beginning of the block,  and MI is a */
/*          row or column position  in a process template. Nx is equal */
/*          to  or less than Nx0 = CEIL( N+NZ, NB*x ) * NB. */

/*  Communication Scheme */
/*  ==================== */

/*  The communication scheme of the routine is set to '1-tree', which is */
/*  fan-out.  (For details, see BLACS user's guide.) */

/*  Memory Requirement of WORK */
/*  ========================== */

/*  NN   = N + NZ */
/*  Npb  = CEIL( NN, NB*NPROW ) */
/*  Nqb  = CEIL( NN, NB*NPCOL ) */
/*  LCMP = LCM / NPROW */
/*  LCMQ = LCM / NPCOL */

/*   (1) XDIST = 'C' */
/*     (a) IXCOL != -1 */
/*         Size(WORK) = CEIL(Nqb,LCMQ)*NB */
/*     (b) IXCOL = -1 */
/*         Size(WORK) = CEIL(Nqb,LCMQ)*NB * MIN(LCMQ,CEIL(NN,NB)) */

/*   (2) XDIST = 'R' */
/*     (a) IXROW != -1 */
/*         Size(WORK) = CEIL(Npb,LCMP)*NB */
/*     (b) IXROW = -1 */
/*         Size(WORK) = CEIL(Npb,LCMP)*NB * MIN(LCMP,CEIL(NN,NB)) */

/*  Notes */
/*  ----- */
/*  More precise space can be computed as */

/*  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP) */
/*  CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible. */

    /* Parameter adjustments */
    --work;
    --y;
    --x;

    /* Function Body */
    if (*n == 0) {
	return 0;
    }

    blacs_gridinfo__(icontxt, &nprow, &npcol, &myrow, &mycol);

    colform = lsame_(xdist, "C", (ftnlen)1, (ftnlen)1);
    rowform = lsame_(xdist, "R", (ftnlen)1, (ftnlen)1);

/*     Test the input parameters. */

    info = 0;
    if (! colform && ! rowform) {
	info = 2;
    } else if (*n < 0) {
	info = 4;
    } else if (*nb < 1) {
	info = 5;
    } else if (*nz < 0 || *nz >= *nb) {
	info = 6;
    } else if (*incx == 0) {
	info = 8;
    } else if (*incy == 0) {
	info = 11;
    } else if (*ixrow < -1 || *ixrow >= nprow || *ixrow == -1 && colform) {
	info = 12;
    } else if (*ixcol < -1 || *ixcol >= npcol || *ixcol == -1 && rowform) {
	info = 13;
    } else if (*iyrow < -1 || *iyrow >= nprow || *iyrow == -1 && rowform) {
	info = 14;
    } else if (*iycol < -1 || *iycol >= npcol || *iycol == -1 && colform) {
	info = 15;
    }

L10:
    if (info != 0) {
	pxerbla_(icontxt, "PBSTRNV ", &info, (ftnlen)8);
	return 0;
    }

/*     Start the operations. */

/*     LCM : the least common multiple of NPROW and NPCOL */

    lcm = ilcm_(&nprow, &npcol);
    lcmp = lcm / nprow;
    lcmq = lcm / npcol;
    igd = npcol / lcmp;
    nn = *n + *nz;

/*     When x is a column vector */

    if (colform) {

/*       Form  y <== x'  ( x is a column vector ) */

/*                                        || */
/*                                        || */
/*            _____________               || */
/*            -----(y)-----      <==     (x) */
/*                                        || */
/*                                        || */
/*                                        || */

	if (*ixrow < 0 || *ixrow >= nprow) {
	    info = 12;
	} else if (*ixcol < -1 || *ixcol >= npcol) {
	    info = 13;
	} else if (*iyrow < -1 || *iyrow >= nprow) {
	    info = 14;
	} else if (*iycol < 0 || *iycol >= npcol) {
	    info = 15;
	}
	if (info != 0) {
	    goto L10;
	}

/*       MRROW : row relative position in template from IXROW */
/*       MRCOL : column relative position in template from IYCOL */

	mrrow = (nprow + myrow - *ixrow) % nprow;
	mrcol = (npcol + mycol - *iycol) % npcol;
	jyrow = *iyrow;
	if (*iyrow == -1) {
	    jyrow = *ixrow;
	}

	np = numroc_(&nn, nb, &myrow, ixrow, &nprow);
	if (mrrow == 0) {
	    np -= *nz;
	}
	nq = numroc_(&nn, nb, &mycol, iycol, &npcol);
	if (mrcol == 0) {
	    nq -= *nz;
	}
	i__1 = numroc_(&nn, nb, &c__0, &c__0, &npcol);
	nq0 = numroc_(&i__1, nb, &c__0, &c__0, &lcmq);

/*       When a column process of IXCOL has a column block A, */

	if (*ixcol >= 0) {
	    tbeta = 0.f;
	    if (myrow == jyrow) {
		tbeta = *beta;
	    }
	    kz = *nz;

/* Computing MIN */
	    i__2 = lcm, i__3 = iceil_(&nn, nb);
	    i__1 = min(i__2,i__3) - 1;
	    for (i__ = 0; i__ <= i__1; ++i__) {
		mcrow = (i__ % nprow + *ixrow) % nprow;
		mccol = (i__ % npcol + *iycol) % npcol;
		if (lcmq == 1) {
		    nq0 = numroc_(&nn, nb, &i__, &c__0, &npcol);
		}
		jdex = i__ / npcol * *nb;
		if (mrcol == 0) {
/* Computing MAX */
		    i__2 = 0, i__3 = jdex - *nz;
		    jdex = max(i__2,i__3);
		}

/*           A source node copies the blocks to WORK, and send it */

		if (myrow == mcrow && mycol == *ixcol) {

/*             The source node is a destination node */

		    idex = i__ / nprow * *nb;
		    if (mrrow == 0) {
/* Computing MAX */
			i__2 = 0, i__3 = idex - *nz;
			idex = max(i__2,i__3);
		    }
		    if (myrow == jyrow && mycol == mccol) {
			i__2 = np - idex;
			pbstr2b1_(icontxt, trans, &i__2, nb, &kz, &x[idex * *
				incx + 1], incx, &tbeta, &y[jdex * *incy + 1],
				 incy, &lcmp, &lcmq, (ftnlen)1);

/*             The source node sends blocks to a destination node */

		    } else {
			i__2 = np - idex;
			pbstr2b1_(icontxt, trans, &i__2, nb, &kz, &x[idex * *
				incx + 1], incx, &c_b12, &work[1], &c__1, &
				lcmp, &c__1, (ftnlen)1);
			i__2 = nq0 - kz;
			sgesd2d_(icontxt, &c__1, &i__2, &work[1], &c__1, &
				jyrow, &mccol);
		    }

/*           A destination node receives the copied vector */

		} else if (myrow == jyrow && mycol == mccol) {
		    if (lcmq == 1 && tbeta == 0.f) {
			i__2 = nq0 - kz;
			sgerv2d_(icontxt, &c__1, &i__2, &y[1], incy, &mcrow, 
				ixcol);
		    } else {
			i__2 = nq0 - kz;
			sgerv2d_(icontxt, &c__1, &i__2, &work[1], &c__1, &
				mcrow, ixcol);
			i__2 = nq - jdex;
			i__3 = lcmq * *nb;
			pbstr2a1_(icontxt, &i__2, nb, &kz, &work[1], &c__1, &
				tbeta, &y[jdex * *incy + 1], incy, &i__3);
		    }
		}
		kz = 0;
/* L20: */
	    }

/*         Broadcast a row block of WORK in each column of template */

	    if (*iyrow == -1) {
		if (myrow == jyrow) {
		    sgebs2d_(icontxt, "Col", "1-tree", &c__1, &nq, &y[1], 
			    incy, (ftnlen)3, (ftnlen)6);
		} else {
		    sgebr2d_(icontxt, "Col", "1-tree", &c__1, &nq, &y[1], 
			    incy, &jyrow, &mycol, (ftnlen)3, (ftnlen)6);
		}
	    }

/*       When all column procesors have a copy of the column block A, */

	} else {
	    if (lcmq == 1) {
		nq0 = nq;
	    }

/*         Processors, which have diagonal blocks of X, copy them to */
/*         WORK array in transposed form */

	    kz = 0;
	    if (mrrow == 0) {
		kz = *nz;
	    }
	    jz = 0;
	    if (mrrow == 0 && mycol == *iycol) {
		jz = *nz;
	    }

	    i__1 = lcmp - 1;
	    for (i__ = 0; i__ <= i__1; ++i__) {
		if (mrcol == (nprow * i__ + mrrow) % npcol) {
/* Computing MAX */
		    i__2 = 0, i__3 = i__ * *nb - kz;
		    idex = max(i__2,i__3);
		    if (lcmq == 1 && (*iyrow == -1 || *iyrow == myrow)) {
			i__2 = np - idex;
			pbstr2b1_(icontxt, trans, &i__2, nb, &jz, &x[idex * *
				incx + 1], incx, beta, &y[1], incy, &lcmp, &
				c__1, (ftnlen)1);
		    } else {
			i__2 = np - idex;
			pbstr2b1_(icontxt, trans, &i__2, nb, &jz, &x[idex * *
				incx + 1], incx, &c_b12, &work[1], &c__1, &
				lcmp, &c__1, (ftnlen)1);
		    }
		}
/* L30: */
	    }

/*         Get diagonal blocks of A for each column of the template */

	    mcrow = (mrcol % nprow + *ixrow) % nprow;
	    if (lcmq > 1) {
		mccol = (npcol + mycol - *iycol) % npcol;
		i__1 = iceil_(&nn, nb);
		pbstrget_(icontxt, "Row", &c__1, &nq0, &i__1, &work[1], &c__1,
			 &mcrow, &mccol, &igd, &myrow, &mycol, &nprow, &npcol,
			 (ftnlen)3);
	    }

/*         Broadcast a row block of WORK in every row of template */

	    if (*iyrow == -1) {
		if (myrow == mcrow) {
		    if (lcmq > 1) {
			kz = 0;
			if (mycol == *iycol) {
			    kz = *nz;
			}
			pbstrst1_(icontxt, "Row", &nq, nb, &kz, &work[1], &
				c__1, beta, &y[1], incy, &lcmp, &lcmq, &nq0, (
				ftnlen)3);
		    }
		    sgebs2d_(icontxt, "Col", "1-tree", &c__1, &nq, &y[1], 
			    incy, (ftnlen)3, (ftnlen)6);
		} else {
		    sgebr2d_(icontxt, "Col", "1-tree", &c__1, &nq, &y[1], 
			    incy, &mcrow, &mycol, (ftnlen)3, (ftnlen)6);
		}

/*         Send a row block of WORK to the destination row */

	    } else {
		if (lcmq == 1) {
		    if (myrow == mcrow) {
			if (myrow != *iyrow) {
			    sgesd2d_(icontxt, &c__1, &nq0, &work[1], &c__1, 
				    iyrow, &mycol);
			}
		    } else if (myrow == *iyrow) {
			if (*beta == 0.f) {
			    sgerv2d_(icontxt, &c__1, &nq0, &y[1], incy, &
				    mcrow, &mycol);
			} else {
			    sgerv2d_(icontxt, &c__1, &nq0, &work[1], &c__1, &
				    mcrow, &mycol);
			    pbsvecadd_(icontxt, "G", &nq0, &c_b49, &work[1], &
				    c__1, beta, &y[1], incy, (ftnlen)1);
			}
		    }

		} else {
/* Computing MIN */
/* Computing MAX */
		    i__3 = 0, i__4 = iceil_(&nn, nb) - mccol;
		    i__1 = lcmq, i__2 = max(i__3,i__4);
		    nq1 = nq0 * min(i__1,i__2);
		    if (myrow == mcrow) {
			if (myrow != *iyrow) {
			    sgesd2d_(icontxt, &c__1, &nq1, &work[1], &c__1, 
				    iyrow, &mycol);
			}
		    } else if (myrow == *iyrow) {
			sgerv2d_(icontxt, &c__1, &nq1, &work[1], &c__1, &
				mcrow, &mycol);
		    }

		    if (myrow == *iyrow) {
			kz = 0;
			if (mycol == *iycol) {
			    kz = *nz;
			}
			pbstrst1_(icontxt, "Row", &nq, nb, &kz, &work[1], &
				c__1, beta, &y[1], incy, &lcmp, &lcmq, &nq0, (
				ftnlen)3);
		    }
		}
	    }
	}

/*     When x is a row vector */

    } else {

/*       Form  y <== x'  ( x is a row block ) */

/*           || */
/*           || */
/*           ||               _____________ */
/*          (y)      <==      -----(x)----- */
/*           || */
/*           || */
/*           || */

	if (*ixrow < -1 || *ixrow >= nprow) {
	    info = 12;
	} else if (*ixcol < 0 || *ixcol >= npcol) {
	    info = 13;
	} else if (*iyrow < 0 || *iyrow >= nprow) {
	    info = 14;
	} else if (*iycol < -1 || *iycol >= npcol) {
	    info = 15;
	}
	if (info != 0) {
	    goto L10;
	}

/*       MRROW : row relative position in template from IYROW */
/*       MRCOL : column relative position in template from IXCOL */

	mrrow = (nprow + myrow - *iyrow) % nprow;
	mrcol = (npcol + mycol - *ixcol) % npcol;
	jycol = *iycol;
	if (*iycol == -1) {
	    jycol = *ixcol;
	}

	np = numroc_(&nn, nb, &myrow, iyrow, &nprow);
	if (mrrow == 0) {
	    np -= *nz;
	}
	nq = numroc_(&nn, nb, &mycol, ixcol, &npcol);
	if (mrcol == 0) {
	    nq -= *nz;
	}
	i__1 = numroc_(&nn, nb, &c__0, &c__0, &nprow);
	np0 = numroc_(&i__1, nb, &c__0, &c__0, &lcmp);

/*       When a row process of IXROW has a row block A, */

	if (*ixrow >= 0) {
	    tbeta = 0.f;
	    if (mycol == jycol) {
		tbeta = *beta;
	    }
	    kz = *nz;

/* Computing MIN */
	    i__2 = lcm, i__3 = iceil_(&nn, nb);
	    i__1 = min(i__2,i__3) - 1;
	    for (i__ = 0; i__ <= i__1; ++i__) {
		mcrow = (i__ % nprow + *iyrow) % nprow;
		mccol = (i__ % npcol + *ixcol) % npcol;
		if (lcmp == 1) {
		    np0 = numroc_(&nn, nb, &i__, &c__0, &nprow);
		}
		jdex = i__ / nprow * *nb;
		if (mrrow == 0) {
/* Computing MAX */
		    i__2 = 0, i__3 = jdex - *nz;
		    jdex = max(i__2,i__3);
		}

/*           A source node copies the blocks to WORK, and send it */

		if (myrow == *ixrow && mycol == mccol) {

/*             The source node is a destination node */

		    idex = i__ / npcol * *nb;
		    if (mrcol == 0) {
/* Computing MAX */
			i__2 = 0, i__3 = idex - *nz;
			idex = max(i__2,i__3);
		    }
		    if (myrow == mcrow && mycol == jycol) {
			i__2 = nq - idex;
			pbstr2b1_(icontxt, trans, &i__2, nb, &kz, &x[idex * *
				incx + 1], incx, &tbeta, &y[jdex * *incy + 1],
				 incy, &lcmq, &lcmp, (ftnlen)1);

/*             The source node sends blocks to a destination node */

		    } else {
			i__2 = nq - idex;
			pbstr2b1_(icontxt, trans, &i__2, nb, &kz, &x[idex * *
				incx + 1], incx, &c_b12, &work[1], &c__1, &
				lcmq, &c__1, (ftnlen)1);
			i__2 = np0 - kz;
			sgesd2d_(icontxt, &c__1, &i__2, &work[1], &c__1, &
				mcrow, &jycol);
		    }

/*           A destination node receives the copied blocks */

		} else if (myrow == mcrow && mycol == jycol) {
		    if (lcmp == 1 && tbeta == 0.f) {
			i__2 = np0 - kz;
			sgerv2d_(icontxt, &c__1, &i__2, &y[1], incy, ixrow, &
				mccol);
		    } else {
			i__2 = np0 - kz;
			sgerv2d_(icontxt, &c__1, &i__2, &work[1], &c__1, 
				ixrow, &mccol);
			i__2 = np - jdex;
			i__3 = lcmp * *nb;
			pbstr2a1_(icontxt, &i__2, nb, &kz, &work[1], &c__1, &
				tbeta, &y[jdex * *incy + 1], incy, &i__3);
		    }
		}
		kz = 0;
/* L40: */
	    }

/*         Broadcast a column vector Y in each row of template */

	    if (*iycol == -1) {
		if (mycol == jycol) {
		    sgebs2d_(icontxt, "Row", "1-tree", &c__1, &np, &y[1], 
			    incy, (ftnlen)3, (ftnlen)6);
		} else {
		    sgebr2d_(icontxt, "Row", "1-tree", &c__1, &np, &y[1], 
			    incy, &myrow, &jycol, (ftnlen)3, (ftnlen)6);
		}
	    }

/*       When all row procesors have a copy of the row block A, */

	} else {
	    if (lcmp == 1) {
		np0 = np;
	    }

/*         Processors, which have diagonal blocks of A, copy them to */
/*         WORK array in transposed form */

	    kz = 0;
	    if (mrcol == 0) {
		kz = *nz;
	    }
	    jz = 0;
	    if (mrcol == 0 && myrow == *iyrow) {
		jz = *nz;
	    }

	    i__1 = lcmq - 1;
	    for (i__ = 0; i__ <= i__1; ++i__) {
		if (mrrow == (npcol * i__ + mrcol) % nprow) {
/* Computing MAX */
		    i__2 = 0, i__3 = i__ * *nb - kz;
		    idex = max(i__2,i__3);
		    if (lcmp == 1 && (*iycol == -1 || *iycol == mycol)) {
			i__2 = nq - idex;
			pbstr2b1_(icontxt, trans, &i__2, nb, &jz, &x[idex * *
				incx + 1], incx, beta, &y[1], incy, &lcmq, &
				c__1, (ftnlen)1);
		    } else {
			i__2 = nq - idex;
			pbstr2b1_(icontxt, trans, &i__2, nb, &jz, &x[idex * *
				incx + 1], incx, &c_b12, &work[1], &c__1, &
				lcmq, &c__1, (ftnlen)1);
		    }
		}
/* L50: */
	    }

/*         Get diagonal blocks of A for each row of the template */

	    mccol = (mrrow % npcol + *ixcol) % npcol;
	    if (lcmp > 1) {
		mcrow = (nprow + myrow - *iyrow) % nprow;
		i__1 = iceil_(&nn, nb);
		pbstrget_(icontxt, "Col", &c__1, &np0, &i__1, &work[1], &c__1,
			 &mcrow, &mccol, &igd, &myrow, &mycol, &nprow, &npcol,
			 (ftnlen)3);
	    }

/*         Broadcast a column block of WORK in every column of template */

	    if (*iycol == -1) {
		if (mycol == mccol) {
		    if (lcmp > 1) {
			kz = 0;
			if (myrow == *iyrow) {
			    kz = *nz;
			}
			pbstrst1_(icontxt, "Col", &np, nb, &kz, &work[1], &
				c__1, beta, &y[1], incy, &lcmp, &lcmq, &np0, (
				ftnlen)3);
		    }
		    sgebs2d_(icontxt, "Row", "1-tree", &c__1, &np, &y[1], 
			    incy, (ftnlen)3, (ftnlen)6);
		} else {
		    sgebr2d_(icontxt, "Row", "1-tree", &c__1, &np, &y[1], 
			    incy, &myrow, &mccol, (ftnlen)3, (ftnlen)6);
		}

/*         Send a column block of WORK to the destination column */

	    } else {
		if (lcmp == 1) {
		    if (mycol == mccol) {
			if (mycol != *iycol) {
			    sgesd2d_(icontxt, &c__1, &np, &work[1], &c__1, &
				    myrow, iycol);
			}
		    } else if (mycol == *iycol) {
			if (*beta == 0.f) {
			    sgerv2d_(icontxt, &c__1, &np, &y[1], incy, &myrow,
				     &mccol);
			} else {
			    sgerv2d_(icontxt, &c__1, &np, &work[1], &c__1, &
				    myrow, &mccol);
			    pbsvecadd_(icontxt, "G", &np, &c_b49, &work[1], &
				    c__1, beta, &y[1], incy, (ftnlen)1);
			}
		    }

		} else {
/* Computing MIN */
/* Computing MAX */
		    i__3 = 0, i__4 = iceil_(&nn, nb) - mcrow;
		    i__1 = lcmp, i__2 = max(i__3,i__4);
		    np1 = np0 * min(i__1,i__2);
		    if (mycol == mccol) {
			if (mycol != *iycol) {
			    sgesd2d_(icontxt, &c__1, &np1, &work[1], &c__1, &
				    myrow, iycol);
			}
		    } else if (mycol == *iycol) {
			sgerv2d_(icontxt, &c__1, &np1, &work[1], &c__1, &
				myrow, &mccol);
		    }

		    if (mycol == *iycol) {
			kz = 0;
			if (myrow == *iyrow) {
			    kz = *nz;
			}
			pbstrst1_(icontxt, "Col", &np, nb, &kz, &work[1], &
				c__1, beta, &y[1], incy, &lcmp, &lcmq, &np0, (
				ftnlen)3);
		    }
		}
	    }
	}
    }

    return 0;

/*     End of PBSTRNV */

} /* pbstrnv_ */


/* ======================================================================= */
/*     SUBROUTINE PBSTR2A1 */
/* ======================================================================= */

/* Subroutine */ int pbstr2a1_(integer *icontxt, integer *n, integer *nb, 
	integer *nz, real *x, integer *incx, real *beta, real *y, integer *
	incy, integer *intv)
{
    /* System generated locals */
    integer i__1, i__2, i__3;

    /* Local variables */
    integer k;
    extern /* Subroutine */ int pbsvecadd_(integer *, char *, integer *, real 
	    *, real *, integer *, real *, real *, integer *, ftnlen);
    integer ix, iy, jz, iter;
    extern integer iceil_(integer *, integer *);


/*  -- PB-BLAS routine (version 2.1) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory. */
/*     April 28, 1996 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*     y <== x */
/*     y is a scattered vector, copied from a condensed vector x. */

/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Parameters .. */
/*     .. */
/*     .. Local Variables .. */

    /* Parameter adjustments */
    --y;
    --x;

    /* Function Body */
    ix = 0;
    iy = 0;
    jz = *nz;
    i__1 = *n + *nz;
    iter = iceil_(&i__1, intv);

    if (iter > 1) {
	i__1 = *nb - jz;
	pbsvecadd_(icontxt, "G", &i__1, &c_b49, &x[ix * *incx + 1], incx, 
		beta, &y[iy * *incy + 1], incy, (ftnlen)1);
	ix = ix + *nb - jz;
	iy = iy + *intv - jz;
	jz = 0;

	i__1 = iter - 1;
	for (k = 2; k <= i__1; ++k) {
	    pbsvecadd_(icontxt, "G", nb, &c_b49, &x[ix * *incx + 1], incx, 
		    beta, &y[iy * *incy + 1], incy, (ftnlen)1);
	    ix += *nb;
	    iy += *intv;
/* L10: */
	}
    }

/* Computing MIN */
    i__2 = *n - iy, i__3 = *nb - jz;
    i__1 = min(i__2,i__3);
    pbsvecadd_(icontxt, "G", &i__1, &c_b49, &x[ix * *incx + 1], incx, beta, &
	    y[iy * *incy + 1], incy, (ftnlen)1);

    return 0;

/*     End of PBSTR2A1 */

} /* pbstr2a1_ */


/* ======================================================================= */
/*     SUBROUTINE PBSTR2B1 */
/* ======================================================================= */

/* Subroutine */ int pbstr2b1_(integer *icontxt, char *trans, integer *n, 
	integer *nb, integer *nz, real *x, integer *incx, real *beta, real *y,
	 integer *incy, integer *jinx, integer *jiny, ftnlen trans_len)
{
    /* System generated locals */
    integer i__1, i__2, i__3;

    /* Local variables */
    integer k;
    extern /* Subroutine */ int pbsvecadd_(integer *, char *, integer *, real 
	    *, real *, integer *, real *, real *, integer *, ftnlen);
    integer ix, iy, jz, iter, lenx, leny;
    extern integer iceil_(integer *, integer *);


/*  -- PB-BLAS routine (version 2.1) -- */
/*     University of Tennessee, Knoxville, Oak Ridge National Laboratory. */
/*     April 28, 1996 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*     y <== x + beta * y */
/*     y is a condensed vector, copied from a scattered vector x */

/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Parameters .. */
/*     .. */
/*     .. Local Variables .. */

    /* Parameter adjustments */
    --y;
    --x;

    /* Function Body */
    if (*jinx == 1 && *jiny == 1) {
	pbsvecadd_(icontxt, trans, n, &c_b49, &x[1], incx, beta, &y[1], incy, 
		(ftnlen)1);

    } else {
	ix = 0;
	iy = 0;
	jz = *nz;
	lenx = *nb * *jinx;
	leny = *nb * *jiny;
	i__1 = *n + *nz;
	iter = iceil_(&i__1, &lenx);

	if (iter > 1) {
	    i__1 = *nb - jz;
	    pbsvecadd_(icontxt, trans, &i__1, &c_b49, &x[ix * *incx + 1], 
		    incx, beta, &y[iy * *incy + 1], incy, (ftnlen)1);
	    ix = ix + lenx - jz;
	    iy = iy + leny - jz;
	    jz = 0;

	    i__1 = iter - 1;
	    for (k = 2; k <= i__1; ++k) {
		pbsvecadd_(icontxt, trans, nb, &c_b49, &x[ix * *incx + 1], 
			incx, beta, &y[iy * *incy + 1], incy, (ftnlen)1);
		ix += lenx;
		iy += leny;
/* L10: */
	    }
	}

/* Computing MIN */
	i__2 = *n - ix, i__3 = *nb - jz;
	i__1 = min(i__2,i__3);
	pbsvecadd_(icontxt, trans, &i__1, &c_b49, &x[ix * *incx + 1], incx, 
		beta, &y[iy * *incy + 1], incy, (ftnlen)1);
    }

    return 0;

/*     End of PBSTR2B1 */

} /* pbstr2b1_ */

