/* strsm.f -- translated by f2c (version 20030320).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2cblas.h"

/* Table of constant values */

/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag, 
	integer *m, integer *n, real *alpha, real *a, integer *lda, real *c__,
	 integer *ldc, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, 
	ftnlen diag_len)
{
real c_b15 = 0.f;
real c_b22 = -1.f;

integer c__0 = 0;
integer c__91 = 91;
integer c__92 = 92;
integer c__1 = 1;
integer c__64 = 64;
integer c__93 = 93;
    /* System generated locals */
    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, j;
    real t1[4096]	/* was [64][64] */, t2[4096]	/* was [64][64] */, 
	    t3[4096]	/* was [64][64] */;
    integer ii, jj, ix, jx;
    logical cldc;
    integer offd, isec, jsec;
    extern logical scld_(integer *);
    integer info, tsec;
    logical notr;
    real gamma, delta;
    logical lside;
    
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *, ftnlen, ftnlen), sgemv_(char *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *, ftnlen);
    integer nrowa;
    logical upper;
    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, 
	    integer *);
    logical tinym, tinyn;
    extern /* Subroutine */ int strsv_(char *, char *, char *, integer *, 
	    real *, integer *, real *, integer *, ftnlen, ftnlen, ftnlen);
    logical smalln, nounit;

/*     .. Scalar Arguments .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  STRSM  solves one of the matrix equations */

/*     op( A )*X = alpha*C,   or   X*op( A ) = alpha*C, */

/*  where alpha is a scalar, X and C are m by n matrices, A is a unit, or */
/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */

/*     op( A ) = A   or   op( A ) = A'. */

/*  The matrix X is overwritten on C. */

/*  Parameters */
/*  ========== */

/*  SIDE   - CHARACTER*1. */
/*           On entry, SIDE specifies whether op( A ) appears on the left */
/*           or right of X as follows: */

/*              SIDE = 'L' or 'l'   op( A )*X = alpha*C. */

/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*C. */

/*           Unchanged on exit. */

/*  UPLO   - CHARACTER*1. */
/*           On entry, UPLO specifies whether the matrix A is an upper or */
/*           lower triangular matrix as follows: */

/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */

/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */

/*           Unchanged on exit. */

/*  TRANSA - CHARACTER*1. */
/*           On entry, TRANSA specifies the form of op( A ) to be used in */
/*           the matrix multiplication as follows: */

/*              TRANSA = 'N' or 'n'   op( A ) = A. */

/*              TRANSA = 'T' or 't'   op( A ) = A'. */

/*              TRANSA = 'C' or 'c'   op( A ) = A'. */

/*           Unchanged on exit. */

/*  DIAG   - CHARACTER*1. */
/*           On entry, DIAG specifies whether or not A is unit triangular */
/*           as follows: */

/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */

/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
/*                                  triangular. */

/*           Unchanged on exit. */

/*  M      - INTEGER. */
/*           On entry, M specifies the number of rows of C. M must be at */
/*           least zero. */
/*           Unchanged on exit. */

/*  N      - INTEGER. */
/*           On entry, N specifies the number of columns of C.  N must be */
/*           at least zero. */
/*           Unchanged on exit. */

/*  ALPHA  - REAL. */
/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
/*           zero then  A is not referenced and  C need not be set before */
/*           entry. */
/*           Unchanged on exit. */

/*  A      - REAL array of DIMENSION ( LDA, k ), where k is m */
/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
/*           upper triangular part of the array  A must contain the upper */
/*           triangular matrix  and the strictly lower triangular part of */
/*           A is not referenced. */
/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
/*           lower triangular part of the array  A must contain the lower */
/*           triangular matrix  and the strictly upper triangular part of */
/*           A is not referenced. */
/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
/*           A  are not referenced either,  but are assumed to be  unity. */
/*           Unchanged on exit. */

/*  LDA    - INTEGER. */
/*           On entry, LDA specifies the first dimension of A as declared */
/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
/*           then LDA must be at least max( 1, n ). */
/*           Unchanged on exit. */

/*  C      - REAL array of DIMENSION ( LDC, n ). */
/*           Before entry,  the leading  m by n part of the array  C must */
/*           contain  the  right-hand  side  matrix  C,  and  on exit  is */
/*           overwritten by the solution matrix  X. */

/*  LDC    - INTEGER. */
/*           On entry, LDC specifies the first dimension of C as declared */
/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
/*           max( 1, m ). */
/*           Unchanged on exit. */


/*  Level 3 Blas routine. */

/*  -- Written on 8-February-1989. */
/*     Jack Dongarra, Argonne National Laboratory. */
/*     Iain Duff, AERE Harwell. */
/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
/*     Sven Hammarling, Numerical Algorithms Group Ltd. */

/*  -- Rewritten in December-1993. */
/*     GEMM-Based Level 3 BLAS. */
/*     Per Ling, Institute of Information Processing, */
/*     University of Umea, Sweden. */


/*     .. Local Scalars .. */
/*     .. Intrinsic Functions .. */
/*     .. External Functions .. */
/*     .. External Subroutines .. */
/*     .. Parameters .. */
/*     .. User specified parameters for STRSM .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;

    /* Function Body */
    lside = (side[0]=='L'?1:0);
    upper = (uplo[0]=='U'?1:0);
    notr = (transa[0]=='N'?1:0);
    nounit = (diag[0]=='N'?1:0);
    if (nounit) {
	offd = 0;
    } else {
	offd = 1;
    }
    if (lside) {
	nrowa = *m;
    } else {
	nrowa = *n;
    }
    info = 0;
    if (! lside && ! (side[0]=='R'?1:0)) {
	info = 1;
    } else if (! upper && ! (uplo[0]=='L'?1:0)) {
	info = 2;
    } else if (! notr && ! (transa[0]=='T'?1:0) && ! 
	    (transa[0]=='C'?1:0)) {
	info = 3;
    } else if (! nounit && ! (diag[0]=='U'?1:0)) {
	info = 4;
    } else if (*m < 0) {
	info = 5;
    } else if (*n < 0) {
	info = 6;
    } else if (*lda < max(1,nrowa)) {
	info = 9;
    } else if (*ldc < max(1,*m)) {
	info = 11;
    }
    if (info != 0) {
	xerbla_("STRSM ", &info, (ftnlen)6);
	return 0;
    }

/*     Quick return if possible. */

    if (*m == 0 || *n == 0) {
	return 0;
    }

/*     And when alpha.eq.zero. */

    if (*alpha == 0.f) {
	i__1 = max(*lda,*ldc);
	i__2 = max(*lda,*ldc);
	sgemm_("N", "N", m, n, &c__0, &c_b15, &c__[c_offset], &i__1, &c__[
		c_offset], &i__2, &c_b15, &c__[c_offset], ldc, (ftnlen)1, (
		ftnlen)1);
	return 0;
    }

/*     Start the operations. */

    if (lside) {
	if (upper) {
	    if (notr) {

/*              SSOLVE  A*X = alpha*C. Left, Upper, No transpose. */

		smalln = ! sbigp_(&c__91, m, n);
		if (smalln) {
		    tinyn = ! sbigp_(&c__92, m, n);
		    for (ii = *m - (*m - 1) % 64; ii >= 1; ii += -64) {
/* Computing MIN */
			i__1 = 64, i__2 = *m - ii + 1;
			isec = min(i__1,i__2);

/*                    C := -1*A*C + alpha*C, general matrix multiply */
/*                    involving a rectangular block of A. */

			i__1 = *m - ii - isec + 1;
			sgemm_("N", "N", &isec, n, &i__1, &c_b22, &a[ii + (ii 
				+ isec) * a_dim1], lda, &c__[ii + isec + 
				c_dim1], ldc, alpha, &c__[ii + c_dim1], ldc, (
				ftnlen)1, (ftnlen)1);
			if (tinyn) {

/*                       SSOLVE A*X = C, triangular system ssolve involving */
/*                       a upper triangular diagonal block of A. The */
/*                       block of X is overwritten on C. */

			    i__1 = *n;
			    for (j = 1; j <= i__1; ++j) {
				strsv_("U", "N", diag, &isec, &a[ii + ii * 
					a_dim1], lda, &c__[ii + j * c_dim1], &
					c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1)
					;
/* L10: */
			    }
			} else {

/*                       T3 := A, a upper unit or non-unit triangular */
/*                       diagonal block of A is copied to the upper */
/*                       triangular part of T3. */

			    i__1 = ii + isec - 1;
			    for (i__ = ii + offd; i__ <= i__1; ++i__) {
				i__2 = i__ - ii + 1 - offd;
				scopy_(&i__2, &a[ii + i__ * a_dim1], &c__1, &
					t3[(i__ - ii + 1 << 6) - 64], &c__1);
/* L20: */
			    }

/*                       SSOLVE T3*X = C, triangular system ssolve */
/*                       involving a upper triangular diagonal block of A */
/*                       stored in T3. The block of X is overwritten */
/*                       on C. */

			    i__1 = *n;
			    for (j = 1; j <= i__1; ++j) {
				strsv_("U", "N", diag, &isec, t3, &c__64, &
					c__[ii + j * c_dim1], &c__1, (ftnlen)
					1, (ftnlen)1, (ftnlen)1);
/* L30: */
			    }
			}
/* L40: */
		    }
		} else {
		    delta = 1.f;
		    cldc = scld_(ldc);
		    for (ii = *m - (*m - 1) % 64; ii >= 1; ii += -64) {
/* Computing MIN */
			i__1 = 64, i__2 = *m - ii + 1;
			isec = min(i__1,i__2);

/*                    C := -1*A*C + alpha*C, general matrix multiply */
/*                    involving a rectangular block of A. */

			i__1 = *m - ii - isec + 1;
			sgemm_("N", "N", &isec, n, &i__1, &c_b22, &a[ii + (ii 
				+ isec) * a_dim1], lda, &c__[ii + isec + 
				c_dim1], ldc, alpha, &c__[ii + c_dim1], ldc, (
				ftnlen)1, (ftnlen)1);

/*                    T2 := A', the transpose of a upper unit or non-unit */
/*                    triangular diagonal block of A is copied to the */
/*                    lower triangular part of T2. */

			i__1 = ii + isec - 1;
			for (i__ = ii + offd; i__ <= i__1; ++i__) {
			    i__2 = i__ - ii + 1 - offd;
			    scopy_(&i__2, &a[ii + i__ * a_dim1], &c__1, &t2[
				    i__ - ii], &c__64);
/* L50: */
			}
			i__1 = *n;
			for (jj = 1; jj <= i__1; jj += 64) {
/* Computing MIN */
			    i__2 = 64, i__3 = *n - jj + 1;
			    jsec = min(i__2,i__3);

/*                       T1 := C', the transpose of a rectangular block */
/*                       of C is copied to T1. */

			    if (cldc) {
				i__2 = jj + jsec - 1;
				for (j = jj; j <= i__2; ++j) {
				    scopy_(&isec, &c__[ii + j * c_dim1], &
					    c__1, &t1[j - jj], &c__64);
/* L60: */
				}
			    } else {
				i__2 = ii + isec - 1;
				for (i__ = ii; i__ <= i__2; ++i__) {
				    scopy_(&jsec, &c__[i__ + jj * c_dim1], 
					    ldc, &t1[(i__ - ii + 1 << 6) - 64]
					    , &c__1);
/* L70: */
				}
			    }

/*                       T1 := gamma*T1*T2 + delta*T1, triangular matrix */
/*                       multiply where the values of gamma and delta */
/*                       depend on whether T2 stores a unit or non-unit */
/*                       triangular block. Gamma and tsec are also used */
/*                       to compensate for a deficiency in SGEMV that */
/*                       appears if the second dimension (tsec) is zero. */

			    i__2 = ii;
			    for (i__ = ii + isec - 1; i__ >= i__2; --i__) {
				if (nounit) {
				    delta = 1.f / t2[i__ - ii + 1 + (i__ - ii 
					    + 1 << 6) - 65];
				}
				gamma = -delta;
				tsec = ii + isec - 1 - i__;
				if (tsec == 0) {
				    tsec = 1;
				    gamma = 0.f;
				}
				sgemv_("N", &jsec, &tsec, &gamma, &t1[(i__ - 
					ii + 2 << 6) - 64], &c__64, &t2[i__ - 
					ii + 2 + (i__ - ii + 1 << 6) - 65], &
					c__1, &delta, &t1[(i__ - ii + 1 << 6) 
					- 64], &c__1, (ftnlen)1);
/* L80: */
			    }

/*                       C := T1', the transpose of T1 is copied back */
/*                       to C. */

			    i__2 = jj + jsec - 1;
			    for (j = jj; j <= i__2; ++j) {
				scopy_(&isec, &t1[j - jj], &c__64, &c__[ii + 
					j * c_dim1], &c__1);
/* L90: */
			    }
/* L100: */
			}
/* L110: */
		    }
		}
	    } else {

/*              SSOLVE  A'*X = alpha*C. Left, Upper, Transpose. */

		smalln = ! sbigp_(&c__91, m, n);
		if (smalln) {
		    tinyn = ! sbigp_(&c__92, m, n);
		    i__1 = *m;
		    for (ii = 1; ii <= i__1; ii += 64) {
/* Computing MIN */
			i__2 = 64, i__3 = *m - ii + 1;
			isec = min(i__2,i__3);

/*                    C := -1*A'*C + alpha*C, general matrix multiply */
/*                    involving the transpose of a rectangular block */
/*                    of A. */

			i__2 = ii - 1;
			sgemm_("T", "N", &isec, n, &i__2, &c_b22, &a[ii * 
				a_dim1 + 1], lda, &c__[c_dim1 + 1], ldc, 
				alpha, &c__[ii + c_dim1], ldc, (ftnlen)1, (
				ftnlen)1);
			if (tinyn) {

/*                       SSOLVE A'*X = C, triangular system ssolve */
/*                       involving the transpose of a upper triangular */
/*                       diagonal block of A. The block of X is */
/*                       overwritten on C. */

			    i__2 = *n;
			    for (j = 1; j <= i__2; ++j) {
				strsv_("U", "T", diag, &isec, &a[ii + ii * 
					a_dim1], lda, &c__[ii + j * c_dim1], &
					c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1)
					;
/* L120: */
			    }
			} else {

/*                       T3 :=  A', the transpose of a upper unit or */
/*                       non-unit triangular diagonal block of A is */
/*                       copied to the lower triangular part of T3. */

			    i__2 = ii + isec - 1;
			    for (i__ = ii + offd; i__ <= i__2; ++i__) {
				i__3 = i__ - ii + 1 - offd;
				scopy_(&i__3, &a[ii + i__ * a_dim1], &c__1, &
					t3[i__ - ii], &c__64);
/* L130: */
			    }

/*                       SSOLVE T3*X = C, triangular system ssolve */
/*                       involving the transpose of a upper triangular */
/*                       diagonal block of A stored in T3. The block of X */
/*                       is overwritten on C. */

			    i__2 = *n;
			    for (j = 1; j <= i__2; ++j) {
				strsv_("L", "N", diag, &isec, t3, &c__64, &
					c__[ii + j * c_dim1], &c__1, (ftnlen)
					1, (ftnlen)1, (ftnlen)1);
/* L140: */
			    }
			}
/* L150: */
		    }
		} else {
		    delta = 1.f;
		    cldc = scld_(ldc);
		    i__1 = *m;
		    for (ii = 1; ii <= i__1; ii += 64) {
/* Computing MIN */
			i__2 = 64, i__3 = *m - ii + 1;
			isec = min(i__2,i__3);

/*                    C := -1*A'*C + alpha*C, general matrix multiply */
/*                    involving the transpose of a rectangular block */
/*                    of A. */

			i__2 = ii - 1;
			sgemm_("T", "N", &isec, n, &i__2, &c_b22, &a[ii * 
				a_dim1 + 1], lda, &c__[c_dim1 + 1], ldc, 
				alpha, &c__[ii + c_dim1], ldc, (ftnlen)1, (
				ftnlen)1);
			i__2 = *n;
			for (jj = 1; jj <= i__2; jj += 64) {
/* Computing MIN */
			    i__3 = 64, i__4 = *n - jj + 1;
			    jsec = min(i__3,i__4);

/*                       T1 := C', the transpose of a rectangular block */
/*                       of C is copied to T1. */

			    if (cldc) {
				i__3 = jj + jsec - 1;
				for (j = jj; j <= i__3; ++j) {
				    scopy_(&isec, &c__[ii + j * c_dim1], &
					    c__1, &t1[j - jj], &c__64);
/* L160: */
				}
			    } else {
				i__3 = ii + isec - 1;
				for (i__ = ii; i__ <= i__3; ++i__) {
				    scopy_(&jsec, &c__[i__ + jj * c_dim1], 
					    ldc, &t1[(i__ - ii + 1 << 6) - 64]
					    , &c__1);
/* L170: */
				}
			    }

/*                       T1 := gamma*T1*A + delta*T1, triangular matrix */
/*                       multiply where the values of gamma and delta */
/*                       depend on whether A is a unit or non-unit */
/*                       triangular matrix. Gamma and tsec are also used */
/*                       to compensate for a deficiency in SGEMV that */
/*                       appears if the second dimension (tsec) is zero. */

			    i__3 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__3; ++i__) {
				if (nounit) {
				    delta = 1.f / a[i__ + i__ * a_dim1];
				}
				gamma = -delta;
				tsec = i__ - ii;
				if (tsec == 0) {
				    tsec = 1;
				    gamma = 0.f;
				}
				sgemv_("N", &jsec, &tsec, &gamma, t1, &c__64, 
					&a[ii + i__ * a_dim1], &c__1, &delta, 
					&t1[(i__ - ii + 1 << 6) - 64], &c__1, 
					(ftnlen)1);
/* L180: */
			    }

/*                       C := T1', the transpose of T1 is copied back */
/*                       to C. */

			    i__3 = jj + jsec - 1;
			    for (j = jj; j <= i__3; ++j) {
				scopy_(&isec, &t1[j - jj], &c__64, &c__[ii + 
					j * c_dim1], &c__1);
/* L190: */
			    }
/* L200: */
			}
/* L210: */
		    }
		}
	    }
	} else {
	    if (notr) {

/*              SSOLVE  A*X = alpha*C. Left, Lower, No transpose. */

		smalln = ! sbigp_(&c__91, m, n);
		if (smalln) {
		    tinyn = ! sbigp_(&c__92, m, n);
		    i__1 = *m;
		    for (ix = (*m - 1) % 64 + 1; ix <= i__1; ix += 64) {
/* Computing MAX */
			i__2 = 1, i__3 = ix - 63;
			ii = max(i__2,i__3);
			isec = ix - ii + 1;

/*                    C := -1*A*C + alpha*C, general matrix multiply */
/*                    involving a rectangular block of A. */

			i__2 = ii - 1;
			sgemm_("N", "N", &isec, n, &i__2, &c_b22, &a[ii + 
				a_dim1], lda, &c__[c_dim1 + 1], ldc, alpha, &
				c__[ii + c_dim1], ldc, (ftnlen)1, (ftnlen)1);
			if (tinyn) {

/*                       SSOLVE A*X = C, triangular system ssolve involving */
/*                       a lower triangular diagonal block of A. The */
/*                       block of X is overwritten on C. */

			    i__2 = *n;
			    for (j = 1; j <= i__2; ++j) {
				strsv_("L", "N", diag, &isec, &a[ii + ii * 
					a_dim1], lda, &c__[ii + j * c_dim1], &
					c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1)
					;
/* L220: */
			    }
			} else {

/*                       T3 := A, a lower unit or non-unit triangular */
/*                       diagonal block of A is copied to the lower */
/*                       triangular part of T3. The block of X is */
/*                       overwritten on C. */

			    i__2 = ii + isec - 1 - offd;
			    for (i__ = ii; i__ <= i__2; ++i__) {
				i__3 = ii + isec - i__ - offd;
				scopy_(&i__3, &a[i__ + offd + i__ * a_dim1], &
					c__1, &t3[i__ - ii + 1 + offd + (i__ 
					- ii + 1 << 6) - 65], &c__1);
/* L230: */
			    }

/*                       SSOLVE T3*X = C, triangular system ssolve */
/*                       involving a lower triangular diagonal block of A */
/*                       stored in T3. The block of X is overwritten */
/*                       on C. */

			    i__2 = *n;
			    for (j = 1; j <= i__2; ++j) {
				strsv_("L", "N", diag, &isec, t3, &c__64, &
					c__[ii + j * c_dim1], &c__1, (ftnlen)
					1, (ftnlen)1, (ftnlen)1);
/* L240: */
			    }
			}
/* L250: */
		    }
		} else {
		    delta = 1.f;
		    cldc = scld_(ldc);
		    i__1 = *m;
		    for (ix = (*m - 1) % 64 + 1; ix <= i__1; ix += 64) {
/* Computing MAX */
			i__2 = 1, i__3 = ix - 63;
			ii = max(i__2,i__3);
			isec = ix - ii + 1;

/*                    C := -1*A*C + alpha*C, general matrix multiply */
/*                    involving a rectangular block of A. */

			i__2 = ii - 1;
			sgemm_("N", "N", &isec, n, &i__2, &c_b22, &a[ii + 
				a_dim1], lda, &c__[c_dim1 + 1], ldc, alpha, &
				c__[ii + c_dim1], ldc, (ftnlen)1, (ftnlen)1);

/*                    T2 := A', the transpose of a lower unit or non-unit */
/*                    triangular diagonal block of A is copied to the */
/*                    upper triangular part of T2. */

			i__2 = ii + isec - 1 - offd;
			for (i__ = ii; i__ <= i__2; ++i__) {
			    i__3 = ii + isec - i__ - offd;
			    scopy_(&i__3, &a[i__ + offd + i__ * a_dim1], &
				    c__1, &t2[i__ - ii + 1 + (i__ - ii + 1 + 
				    offd << 6) - 65], &c__64);
/* L260: */
			}
			i__2 = *n;
			for (jj = 1; jj <= i__2; jj += 64) {
/* Computing MIN */
			    i__3 = 64, i__4 = *n - jj + 1;
			    jsec = min(i__3,i__4);

/*                       T1 := C', the transpose of a rectangular block */
/*                       of C is copied to T1. */

			    if (cldc) {
				i__3 = jj + jsec - 1;
				for (j = jj; j <= i__3; ++j) {
				    scopy_(&isec, &c__[ii + j * c_dim1], &
					    c__1, &t1[j - jj], &c__64);
/* L270: */
				}
			    } else {
				i__3 = ii + isec - 1;
				for (i__ = ii; i__ <= i__3; ++i__) {
				    scopy_(&jsec, &c__[i__ + jj * c_dim1], 
					    ldc, &t1[(i__ - ii + 1 << 6) - 64]
					    , &c__1);
/* L280: */
				}
			    }

/*                       T1 := gamma*T1*T2 + delta*T1, triangular matrix */
/*                       multiply where the values of gamma and delta */
/*                       depend on whether T2 stores a unit or non-unit */
/*                       triangular block. Gamma and tsec are also used */
/*                       to compensate for a deficiency in SGEMV that */
/*                       appears if the second dimension (tsec) is zero. */

			    i__3 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__3; ++i__) {
				if (nounit) {
				    delta = 1.f / t2[i__ - ii + 1 + (i__ - ii 
					    + 1 << 6) - 65];
				}
				gamma = -delta;
				tsec = i__ - ii;
				if (tsec == 0) {
				    tsec = 1;
				    gamma = 0.f;
				}
				sgemv_("N", &jsec, &tsec, &gamma, t1, &c__64, 
					&t2[(i__ - ii + 1 << 6) - 64], &c__1, 
					&delta, &t1[(i__ - ii + 1 << 6) - 64],
					 &c__1, (ftnlen)1);
/* L290: */
			    }

/*                       C := T1', the transpose of T1 is copied back */
/*                       to C. */

			    i__3 = jj + jsec - 1;
			    for (j = jj; j <= i__3; ++j) {
				scopy_(&isec, &t1[j - jj], &c__64, &c__[ii + 
					j * c_dim1], &c__1);
/* L300: */
			    }
/* L310: */
			}
/* L320: */
		    }
		}
	    } else {

/*              SSOLVE  A'*X = alpha*C. Left, Lower, Transpose. */

		smalln = ! sbigp_(&c__91, m, n);
		if (smalln) {
		    tinyn = ! sbigp_(&c__92, m, n);
		    for (ix = *m; ix >= 1; ix += -64) {
/* Computing MAX */
			i__1 = 1, i__2 = ix - 63;
			ii = max(i__1,i__2);
			isec = ix - ii + 1;

/*                    C := -1*A'*C + alpha*C, general matrix multiply */
/*                    involving the transpose of a rectangular block */
/*                    of A. */

			i__1 = *m - ii - isec + 1;
			sgemm_("T", "N", &isec, n, &i__1, &c_b22, &a[ii + 
				isec + ii * a_dim1], lda, &c__[ii + isec + 
				c_dim1], ldc, alpha, &c__[ii + c_dim1], ldc, (
				ftnlen)1, (ftnlen)1);
			if (tinyn) {

/*                       SSOLVE A'*X = C, triangular system ssolve */
/*                       involving the transpose of a lower triangular */
/*                       diagonal block of A. The block of X is */
/*                       overwritten on C. */

			    i__1 = *n;
			    for (j = 1; j <= i__1; ++j) {
				strsv_("L", "T", diag, &isec, &a[ii + ii * 
					a_dim1], lda, &c__[ii + j * c_dim1], &
					c__1, (ftnlen)1, (ftnlen)1, (ftnlen)1)
					;
/* L330: */
			    }
			} else {

/*                       T3 :=  A', the transpose of a lower unit or */
/*                       non-unit triangular diagonal block of A is */
/*                       copied to the upper triangular part of T3. */

			    i__1 = ii + isec - 1 - offd;
			    for (i__ = ii; i__ <= i__1; ++i__) {
				i__2 = ii + isec - i__ - offd;
				scopy_(&i__2, &a[i__ + offd + i__ * a_dim1], &
					c__1, &t3[i__ - ii + 1 + (i__ - ii + 
					1 + offd << 6) - 65], &c__64);
/* L340: */
			    }

/*                       SSOLVE T3*X = C, triangular system ssolve */
/*                       involving the transpose of a lower triangular */
/*                       diagonal block of A stored in T3. The block of X */
/*                       is overwritten on C. */

			    i__1 = *n;
			    for (j = 1; j <= i__1; ++j) {
				strsv_("U", "N", diag, &isec, t3, &c__64, &
					c__[ii + j * c_dim1], &c__1, (ftnlen)
					1, (ftnlen)1, (ftnlen)1);
/* L350: */
			    }
			}
/* L360: */
		    }
		} else {
		    delta = 1.f;
		    cldc = scld_(ldc);
		    for (ix = *m; ix >= 1; ix += -64) {
/* Computing MAX */
			i__1 = 1, i__2 = ix - 63;
			ii = max(i__1,i__2);
			isec = ix - ii + 1;

/*                    C := -1*A'*C + alpha*C, general matrix multiply */
/*                    involving the transpose of a rectangular block */
/*                    of A. */

			i__1 = *m - ii - isec + 1;
			sgemm_("T", "N", &isec, n, &i__1, &c_b22, &a[ii + 
				isec + ii * a_dim1], lda, &c__[ii + isec + 
				c_dim1], ldc, alpha, &c__[ii + c_dim1], ldc, (
				ftnlen)1, (ftnlen)1);
			i__1 = *n;
			for (jj = 1; jj <= i__1; jj += 64) {
/* Computing MIN */
			    i__2 = 64, i__3 = *n - jj + 1;
			    jsec = min(i__2,i__3);

/*                       T1 := C', the transpose of a rectangular block */
/*                       of C is copied to T1. */

			    if (cldc) {
				i__2 = jj + jsec - 1;
				for (j = jj; j <= i__2; ++j) {
				    scopy_(&isec, &c__[ii + j * c_dim1], &
					    c__1, &t1[j - jj], &c__64);
/* L370: */
				}
			    } else {
				i__2 = ii + isec - 1;
				for (i__ = ii; i__ <= i__2; ++i__) {
				    scopy_(&jsec, &c__[i__ + jj * c_dim1], 
					    ldc, &t1[(i__ - ii + 1 << 6) - 64]
					    , &c__1);
/* L380: */
				}
			    }

/*                       T1 := gamma*T1*A + delta*T1, triangular matrix */
/*                       multiply where the values of gamma and delta */
/*                       depend on whether A is a unit or non-unit */
/*                       triangular matrix. Gamma and tsec are also used */
/*                       to compensate for a deficiency in SGEMV that */
/*                       appears if the second dimension (tsec) is zero. */

			    i__2 = ii;
			    for (i__ = ii + isec - 1; i__ >= i__2; --i__) {
				if (nounit) {
				    delta = 1.f / a[i__ + i__ * a_dim1];
				}
				gamma = -delta;
				tsec = ii + isec - 1 - i__;
				if (tsec == 0) {
				    tsec = 1;
				    gamma = 0.f;
				}
				sgemv_("N", &jsec, &tsec, &gamma, &t1[(i__ - 
					ii + 2 << 6) - 64], &c__64, &a[i__ + 
					1 + i__ * a_dim1], &c__1, &delta, &t1[
					(i__ - ii + 1 << 6) - 64], &c__1, (
					ftnlen)1);
/* L390: */
			    }

/*                       C := T1', the transpose of T1 is copied back */
/*                       to C. */

			    i__2 = jj + jsec - 1;
			    for (j = jj; j <= i__2; ++j) {
				scopy_(&isec, &t1[j - jj], &c__64, &c__[ii + 
					j * c_dim1], &c__1);
/* L400: */
			    }
/* L410: */
			}
/* L420: */
		    }
		}
	    }
	}
    } else {
	if (upper) {
	    if (notr) {

/*              SSOLVE  X*A = alpha*C. Right, Upper, No transpose. */

		tinym = ! sbigp_(&c__93, m, n);
		if (tinym) {
		    i__1 = *n;
		    for (jj = 1; jj <= i__1; jj += 64) {
/* Computing MIN */
			i__2 = 64, i__3 = *n - jj + 1;
			jsec = min(i__2,i__3);

/*                    C := -1*C*A + alpha*C, general matrix multiply */
/*                    involving a rectangular block of A. */

			i__2 = jj - 1;
			sgemm_("N", "N", m, &jsec, &i__2, &c_b22, &c__[c_dim1 
				+ 1], ldc, &a[jj * a_dim1 + 1], lda, alpha, &
				c__[jj * c_dim1 + 1], ldc, (ftnlen)1, (ftnlen)
				1);

/*                    SSOLVE X*A = C, triangular system ssolve involving */
/*                    a upper triangular diagonal block of A. The block */
/*                    of X is overwritten on C. */

			i__2 = *m;
			for (i__ = 1; i__ <= i__2; ++i__) {
			    strsv_("U", "T", diag, &jsec, &a[jj + jj * a_dim1]
				    , lda, &c__[i__ + jj * c_dim1], ldc, (
				    ftnlen)1, (ftnlen)1, (ftnlen)1);
/* L430: */
			}
/* L440: */
		    }
		} else {
		    delta = 1.f;
		    i__1 = *n;
		    for (jj = 1; jj <= i__1; jj += 64) {
/* Computing MIN */
			i__2 = 64, i__3 = *n - jj + 1;
			jsec = min(i__2,i__3);

/*                    C := -1*C*A + alpha*C, general matrix multiply */
/*                    involving a rectangular block of A. */

			i__2 = jj - 1;
			sgemm_("N", "N", m, &jsec, &i__2, &c_b22, &c__[c_dim1 
				+ 1], ldc, &a[jj * a_dim1 + 1], lda, alpha, &
				c__[jj * c_dim1 + 1], ldc, (ftnlen)1, (ftnlen)
				1);
			i__2 = *m;
			for (ii = 1; ii <= i__2; ii += 64) {
/* Computing MIN */
			    i__3 = 64, i__4 = *m - ii + 1;
			    isec = min(i__3,i__4);

/*                       T1 := C, a rectangular block of C is copied */
/*                       to T1. */

			    i__3 = jj + jsec - 1;
			    for (j = jj; j <= i__3; ++j) {
				scopy_(&isec, &c__[ii + j * c_dim1], &c__1, &
					t1[(j - jj + 1 << 6) - 64], &c__1);
/* L450: */
			    }

/*                       C := gamma*T1*A + delta*C, triangular matrix */
/*                       multiply where the values of gamma and delta */
/*                       depend on whether A is a unit or non-unit */
/*                       triangular matrix. Gamma and tsec are also used */
/*                       to compensate for a deficiency in SGEMV that */
/*                       appears if the second dimension (tsec) is zero. */

			    i__3 = jj + jsec - 1;
			    for (j = jj; j <= i__3; ++j) {
				if (nounit) {
				    delta = 1.f / a[j + j * a_dim1];
				}
				gamma = -delta;
				tsec = j - jj;
				if (tsec == 0) {
				    tsec = 1;
				    gamma = 0.f;
				}
				sgemv_("N", &isec, &tsec, &gamma, t1, &c__64, 
					&a[jj + j * a_dim1], &c__1, &delta, &
					t1[(j - jj + 1 << 6) - 64], &c__1, (
					ftnlen)1);
/* L460: */
			    }

/*                       C := T1, T1 is copied back to C. */

			    i__3 = jj + jsec - 1;
			    for (j = jj; j <= i__3; ++j) {
				scopy_(&isec, &t1[(j - jj + 1 << 6) - 64], &
					c__1, &c__[ii + j * c_dim1], &c__1);
/* L470: */
			    }
/* L480: */
			}
/* L490: */
		    }
		}
	    } else {

/*              SSOLVE  X*A' = alpha*C. Right, Upper, Transpose. */

		tinym = ! sbigp_(&c__93, m, n);
		if (tinym) {
		    for (jj = *n - (*n - 1) % 64; jj >= 1; jj += -64) {
/* Computing MIN */
			i__1 = 64, i__2 = *n - jj + 1;
			jsec = min(i__1,i__2);

/*                    C := -1*C*A' + alpha*C, general matrix multiply */
/*                    involving the transpose of a rectangular block */
/*                    of A. */

			i__1 = *n - jj - jsec + 1;
			sgemm_("N", "T", m, &jsec, &i__1, &c_b22, &c__[(jj + 
				jsec) * c_dim1 + 1], ldc, &a[jj + (jj + jsec) 
				* a_dim1], lda, alpha, &c__[jj * c_dim1 + 1], 
				ldc, (ftnlen)1, (ftnlen)1);

/*                    SSOLVE X*A' = C, triangular system ssolve involving */
/*                    the transpose of a upper triangular diagonal block */
/*                    of A. The block of X is overwritten on C. */

			i__1 = *m;
			for (i__ = 1; i__ <= i__1; ++i__) {
			    strsv_("U", "N", diag, &jsec, &a[jj + jj * a_dim1]
				    , lda, &c__[i__ + jj * c_dim1], ldc, (
				    ftnlen)1, (ftnlen)1, (ftnlen)1);
/* L500: */
			}
/* L510: */
		    }
		} else {
		    delta = 1.f;
		    for (jj = *n - (*n - 1) % 64; jj >= 1; jj += -64) {
/* Computing MIN */
			i__1 = 64, i__2 = *n - jj + 1;
			jsec = min(i__1,i__2);

/*                    C := -1*C*A' + alpha*C, general matrix multiply */
/*                    involving the transpose of a rectangular block */
/*                    of A. */

			i__1 = *n - jj - jsec + 1;
			sgemm_("N", "T", m, &jsec, &i__1, &c_b22, &c__[(jj + 
				jsec) * c_dim1 + 1], ldc, &a[jj + (jj + jsec) 
				* a_dim1], lda, alpha, &c__[jj * c_dim1 + 1], 
				ldc, (ftnlen)1, (ftnlen)1);

/*                    T2 := A', the transpose of a upper unit or non-unit */
/*                    triangular diagonal block of A is copied to the */
/*                    lower triangular part of T2. */

			i__1 = jj + jsec - 1;
			for (j = jj + offd; j <= i__1; ++j) {
			    i__2 = j - jj + 1 - offd;
			    scopy_(&i__2, &a[jj + j * a_dim1], &c__1, &t2[j - 
				    jj], &c__64);
/* L520: */
			}
			i__1 = *m;
			for (ii = 1; ii <= i__1; ii += 64) {
/* Computing MIN */
			    i__2 = 64, i__3 = *m - ii + 1;
			    isec = min(i__2,i__3);

/*                       T1 := C, a rectangular block of C is copied */
/*                       to T1. */

			    i__2 = jj + jsec - 1;
			    for (j = jj; j <= i__2; ++j) {
				scopy_(&isec, &c__[ii + j * c_dim1], &c__1, &
					t1[(j - jj + 1 << 6) - 64], &c__1);
/* L530: */
			    }

/*                       C := gamma*T1*T2 + delta*C, triangular matrix */
/*                       multiply where the values of gamma and delta */
/*                       depend on whether T2 is a unit or non-unit */
/*                       triangular matrix. Gamma and tsec are also used */
/*                       to compensate for a deficiency in SGEMV that */
/*                       appears if the second dimension (tsec) is zero. */

			    i__2 = jj;
			    for (j = jj + jsec - 1; j >= i__2; --j) {
				if (nounit) {
				    delta = 1.f / t2[j - jj + 1 + (j - jj + 1 
					    << 6) - 65];
				}
				gamma = -delta;
				tsec = jj + jsec - 1 - j;
				if (tsec == 0) {
				    tsec = 1;
				    gamma = 0.f;
				}
				sgemv_("N", &isec, &tsec, &gamma, &t1[(j - jj 
					+ 2 << 6) - 64], &c__64, &t2[j - jj + 
					2 + (j - jj + 1 << 6) - 65], &c__1, &
					delta, &t1[(j - jj + 1 << 6) - 64], &
					c__1, (ftnlen)1);
/* L540: */
			    }

/*                       C := T1, T1 is copied back to C. */

			    i__2 = jj + jsec - 1;
			    for (j = jj; j <= i__2; ++j) {
				scopy_(&isec, &t1[(j - jj + 1 << 6) - 64], &
					c__1, &c__[ii + j * c_dim1], &c__1);
/* L550: */
			    }
/* L560: */
			}
/* L570: */
		    }
		}
	    }
	} else {
	    if (notr) {

/*              SSOLVE  X*A = alpha*C. Right, Lower, No transpose. */

		tinym = ! sbigp_(&c__93, m, n);
		if (tinym) {
		    for (jx = *n; jx >= 1; jx += -64) {
/* Computing MAX */
			i__1 = 1, i__2 = jx - 63;
			jj = max(i__1,i__2);
			jsec = jx - jj + 1;

/*                    C := -1*C*A + alpha*C, general matrix multiply */
/*                    involving a rectangular block of A. */

			i__1 = *n - jj - jsec + 1;
			sgemm_("N", "N", m, &jsec, &i__1, &c_b22, &c__[(jj + 
				jsec) * c_dim1 + 1], ldc, &a[jj + jsec + jj * 
				a_dim1], lda, alpha, &c__[jj * c_dim1 + 1], 
				ldc, (ftnlen)1, (ftnlen)1);

/*                    SSOLVE X*A = C, triangular system ssolve involving */
/*                    a lower triangular diagonal block of A. The block */
/*                    of X is overwritten on C. */

			i__1 = *m;
			for (i__ = 1; i__ <= i__1; ++i__) {
			    strsv_("L", "T", diag, &jsec, &a[jj + jj * a_dim1]
				    , lda, &c__[i__ + jj * c_dim1], ldc, (
				    ftnlen)1, (ftnlen)1, (ftnlen)1);
/* L580: */
			}
/* L590: */
		    }
		} else {
		    delta = 1.f;
		    for (jx = *n; jx >= 1; jx += -64) {
/* Computing MAX */
			i__1 = 1, i__2 = jx - 63;
			jj = max(i__1,i__2);
			jsec = jx - jj + 1;

/*                    C := -1*C*A + alpha*C, general matrix multiply */
/*                    involving a rectangular block of A. */

			i__1 = *n - jj - jsec + 1;
			sgemm_("N", "N", m, &jsec, &i__1, &c_b22, &c__[(jj + 
				jsec) * c_dim1 + 1], ldc, &a[jj + jsec + jj * 
				a_dim1], lda, alpha, &c__[jj * c_dim1 + 1], 
				ldc, (ftnlen)1, (ftnlen)1);
			i__1 = *m;
			for (ii = 1; ii <= i__1; ii += 64) {
/* Computing MIN */
			    i__2 = 64, i__3 = *m - ii + 1;
			    isec = min(i__2,i__3);

/*                       T1 := C, a rectangular block of C is copied */
/*                       to T1. */

			    i__2 = jj + jsec - 1;
			    for (j = jj; j <= i__2; ++j) {
				scopy_(&isec, &c__[ii + j * c_dim1], &c__1, &
					t1[(j - jj + 1 << 6) - 64], &c__1);
/* L600: */
			    }

/*                       C := gamma*T1*A + delta*C, triangular matrix */
/*                       multiply where the values of gamma and delta */
/*                       depend on whether A is a unit or non-unit */
/*                       triangular matrix. Gamma and tsec are also used */
/*                       to compensate for a deficiency in SGEMV that */
/*                       appears if the second dimension (tsec) is zero. */

			    i__2 = jj;
			    for (j = jj + jsec - 1; j >= i__2; --j) {
				if (nounit) {
				    delta = 1.f / a[j + j * a_dim1];
				}
				gamma = -delta;
				tsec = jj + jsec - 1 - j;
				if (tsec == 0) {
				    tsec = 1;
				    gamma = 0.f;
				}
				sgemv_("N", &isec, &tsec, &gamma, &t1[(j - jj 
					+ 2 << 6) - 64], &c__64, &a[j + 1 + j 
					* a_dim1], &c__1, &delta, &t1[(j - jj 
					+ 1 << 6) - 64], &c__1, (ftnlen)1);
/* L610: */
			    }

/*                       C := T1, T1 is copied back to C. */

			    i__2 = jj + jsec - 1;
			    for (j = jj; j <= i__2; ++j) {
				scopy_(&isec, &t1[(j - jj + 1 << 6) - 64], &
					c__1, &c__[ii + j * c_dim1], &c__1);
/* L620: */
			    }
/* L630: */
			}
/* L640: */
		    }
		}
	    } else {

/*              SSOLVE  X*A' = alpha*C. Right, Lower, Transpose. */

		tinym = ! sbigp_(&c__93, m, n);
		if (tinym) {
		    i__1 = *n;
		    for (jx = (*n - 1) % 64 + 1; jx <= i__1; jx += 64) {
/* Computing MAX */
			i__2 = 1, i__3 = jx - 63;
			jj = max(i__2,i__3);
			jsec = jx - jj + 1;

/*                    C := -1*C*A' + alpha*C, general matrix multiply */
/*                    involving the transpose of a rectangular block */
/*                    of A. */

			i__2 = jj - 1;
			sgemm_("N", "T", m, &jsec, &i__2, &c_b22, &c__[c_dim1 
				+ 1], ldc, &a[jj + a_dim1], lda, alpha, &c__[
				jj * c_dim1 + 1], ldc, (ftnlen)1, (ftnlen)1);

/*                    SSOLVE X*A' = C, triangular system ssolve involving */
/*                    the transpose of a lower triangular diagonal block */
/*                    of A. The block of X is overwritten on C. */

			i__2 = *m;
			for (i__ = 1; i__ <= i__2; ++i__) {
			    strsv_("L", "N", diag, &jsec, &a[jj + jj * a_dim1]
				    , lda, &c__[i__ + jj * c_dim1], ldc, (
				    ftnlen)1, (ftnlen)1, (ftnlen)1);
/* L650: */
			}
/* L660: */
		    }
		} else {
		    delta = 1.f;
		    i__1 = *n;
		    for (jx = (*n - 1) % 64 + 1; jx <= i__1; jx += 64) {
/* Computing MAX */
			i__2 = 1, i__3 = jx - 63;
			jj = max(i__2,i__3);
			jsec = jx - jj + 1;

/*                    C := -1*C*A' + alpha*C, general matrix multiply */
/*                    involving the transpose of a rectangular block */
/*                    of A. */

			i__2 = jj - 1;
			sgemm_("N", "T", m, &jsec, &i__2, &c_b22, &c__[c_dim1 
				+ 1], ldc, &a[jj + a_dim1], lda, alpha, &c__[
				jj * c_dim1 + 1], ldc, (ftnlen)1, (ftnlen)1);

/*                    T2 := A', the transpose of a lower unit or non-unit */
/*                    triangular diagonal block of A is copied to the */
/*                    upper triangular part of T2. */

			i__2 = jj + jsec - 1 - offd;
			for (j = jj; j <= i__2; ++j) {
			    i__3 = jj + jsec - j - offd;
			    scopy_(&i__3, &a[j + offd + j * a_dim1], &c__1, &
				    t2[j - jj + 1 + (j - jj + 1 + offd << 6) 
				    - 65], &c__64);
/* L670: */
			}
			i__2 = *m;
			for (ii = 1; ii <= i__2; ii += 64) {
/* Computing MIN */
			    i__3 = 64, i__4 = *m - ii + 1;
			    isec = min(i__3,i__4);

/*                       T1 := C, a rectangular block of C is copied */
/*                       to T1. */

			    i__3 = jj + jsec - 1;
			    for (j = jj; j <= i__3; ++j) {
				scopy_(&isec, &c__[ii + j * c_dim1], &c__1, &
					t1[(j - jj + 1 << 6) - 64], &c__1);
/* L680: */
			    }

/*                       C := gamma*T1*T2 + delta*C, triangular matrix */
/*                       multiply where the values of gamma and delta */
/*                       depend on whether T2 is a unit or non-unit */
/*                       triangular matrix. Gamma and tsec are also used */
/*                       to compensate for a deficiency in SGEMV that */
/*                       appears if the second dimension (tsec) is zero. */

			    i__3 = jj + jsec - 1;
			    for (j = jj; j <= i__3; ++j) {
				if (nounit) {
				    delta = 1.f / t2[j - jj + 1 + (j - jj + 1 
					    << 6) - 65];
				}
				gamma = -delta;
				tsec = j - jj;
				if (tsec == 0) {
				    tsec = 1;
				    gamma = 0.f;
				}
				sgemv_("N", &isec, &tsec, &gamma, t1, &c__64, 
					&t2[(j - jj + 1 << 6) - 64], &c__1, &
					delta, &t1[(j - jj + 1 << 6) - 64], &
					c__1, (ftnlen)1);
/* L690: */
			    }

/*                       C := T1, T1 is copied back to C. */

			    i__3 = jj + jsec - 1;
			    for (j = jj; j <= i__3; ++j) {
				scopy_(&isec, &t1[(j - jj + 1 << 6) - 64], &
					c__1, &c__[ii + j * c_dim1], &c__1);
/* L700: */
			    }
/* L710: */
			}
/* L720: */
		    }
		}
	    }
	}
    }

    return 0;

/*     End of STRSM. */

} /* strsm_ */

