/* cherk.f -- translated by f2c (version 20030320).
   You must link the resulting object file with the libraries:
	-lf2c -lm   (in that order)
*/

#include "f2cblas.h"

/* Table of constant values */


/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k, 
	real *alpha, complex *a, integer *lda, real *beta, complex *c__, 
	integer *ldc, ftnlen uplo_len, ftnlen trans_len)
{
integer c__1 = 1;
integer c__51 = 51;
integer c__52 = 52;
integer c__64 = 64;
    /* System generated locals */
    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5;
    real r__1;
    complex q__1;

    /* Builtin functions */
    void r_cnjg(complex *, complex *);

    /* Local variables */
    integer i__, l;
    complex t1[4096]	/* was [64][64] */, t2[4096]	/* was [64][64] */, 
	    t3[4096]	/* was [64][64] */, t4[64];
    integer ii, ll, ix;
    logical clda;
    extern logical ccld_(integer *);
    extern /* Subroutine */ int cher_(char *, integer *, real *, complex *, 
	    integer *, complex *, integer *, ftnlen);
    integer isec, lsec, info;
    logical notr;
    complex cbeta;
    extern logical cbigp_(integer *, integer *, integer *);
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *), cgemm_(char *, char *, integer *, integer *, integer *
	    , complex *, complex *, integer *, complex *, integer *, complex *
	    , complex *, integer *, ftnlen, ftnlen);
    
    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
	    , complex *, integer *, complex *, integer *, complex *, complex *
	    , integer *, ftnlen), ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    integer nrowa;
    logical upper, tinyk;
    complex calpha, cdelta;
    
    logical smalln;

/*     .. Scalar Arguments .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  CHERK  performs one of the hermitian rank k operations */

/*     C := alpha*A*conjg( A' ) + beta*C, */

/*  or */

/*     C := alpha*conjg( A' )*A + beta*C, */

/*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian */
/*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n */
/*  matrix in the second case. */

/*  Parameters */
/*  ========== */

/*  UPLO   - CHARACTER*1. */
/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
/*           triangular  part  of the  array  C  is to be  referenced  as */
/*           follows: */

/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
/*                                  is to be referenced. */

/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
/*                                  is to be referenced. */

/*           Unchanged on exit. */

/*  TRANS  - CHARACTER*1. */
/*           On entry,  TRANS  specifies the operation to be performed as */
/*           follows: */

/*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C. */

/*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C. */

/*           Unchanged on exit. */

/*  N      - INTEGER. */
/*           On entry,  N specifies the order of the matrix C.  N must be */
/*           at least zero. */
/*           Unchanged on exit. */

/*  K      - INTEGER. */
/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
/*           of  columns   of  the   matrix   A,   and  on   entry   with */
/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
/*           matrix A.  K must be at least zero. */
/*           Unchanged on exit. */

/*  ALPHA  - REAL. */
/*           On entry, ALPHA specifies the scalar alpha. */
/*           Unchanged on exit. */

/*  A      - COMPLEX       array of DIMENSION ( LDA, ka ), where ka is */
/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
/*           part of the array  A  must contain the matrix  A,  otherwise */
/*           the leading  k by n  part of the array  A  must contain  the */
/*           matrix A. */
/*           Unchanged on exit. */

/*  LDA    - INTEGER. */
/*           On entry, LDA specifies the first dimension of A as declared */
/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
/*           be at least  max( 1, k ). */
/*           Unchanged on exit. */

/*  BETA   - REAL. */
/*           On entry, BETA specifies the scalar beta. */
/*           Unchanged on exit. */

/*  C      - COMPLEX       array of DIMENSION ( LDC, n ). */
/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
/*           upper triangular part of the array C must contain the upper */
/*           triangular part  of the  hermitian matrix  and the strictly */
/*           lower triangular part of C is not referenced.  On exit, the */
/*           upper triangular part of the array  C is overwritten by the */
/*           upper triangular part of the updated matrix. */
/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
/*           lower triangular part of the array C must contain the lower */
/*           triangular part  of the  hermitian matrix  and the strictly */
/*           upper triangular part of C is not referenced.  On exit, the */
/*           lower triangular part of the array  C is overwritten by the */
/*           lower triangular part of the updated matrix. */
/*           Note that the imaginary parts of the diagonal elements need */
/*           not be set,  they are assumed to be zero,  and on exit they */
/*           are set to zero. */

/*  LDC    - INTEGER. */
/*           On entry, LDC specifies the first dimension of C as declared */
/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
/*           max( 1, n ). */
/*           Unchanged on exit. */


/*  Level 3 Blas routine. */

/*  -- Written on 8-February-1989. */
/*     Jack Dongarra, Argonne National Laboratory. */
/*     Iain Duff, AERE Harwell. */
/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
/*     Sven Hammarling, Numerical Algorithms Group Ltd. */

/*  -- Rewritten in May-1994. */
/*     GEMM-Based Level 3 BLAS. */
/*     Per Ling, Institute of Information Processing, */
/*     University of Umea, Sweden. */


/*     .. Local Scalars .. */
/*     .. Intrinsic Functions .. */
/*     .. External Functions .. */
/*     .. External Subroutines .. */
/*     .. Parameters .. */
/*     .. User specified parameters for CHERK .. */
/*     .. Local Arrays .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1;
    c__ -= c_offset;

    /* Function Body */
    upper = (uplo[0]=='U'?1:0);
    notr = (trans[0]=='N'?1:0);
    if (notr) {
	nrowa = *n;
    } else {
	nrowa = *k;
    }
    info = 0;
    if (! upper && ! (uplo[0]=='L'?1:0)) {
	info = 1;
    } else if (! notr && ! (trans[0]=='C'?1:0)) {
	info = 2;
    } else if (*n < 0) {
	info = 3;
    } else if (*k < 0) {
	info = 4;
    } else if (*lda < max(1,nrowa)) {
	info = 7;
    } else if (*ldc < max(1,*n)) {
	info = 10;
    }
    if (info != 0) {
	xerbla_("CHERK ", &info, (ftnlen)6);
	return 0;
    }

/*     Quick return if possible. */

    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
	return 0;
    }

    q__1.r = *alpha, q__1.i = 0.f;
    calpha.r = q__1.r, calpha.i = q__1.i;
    q__1.r = *beta, q__1.i = 0.f;
    cbeta.r = q__1.r, cbeta.i = q__1.i;

/*     And when alpha.eq.zero or k.eq.0. */

    if (*alpha == 0.f || *k == 0) {
	if (upper) {
	    i__1 = c_dim1 + 1;
	    i__2 = c_dim1 + 1;
	    r__1 = *beta * c__[i__2].r;
	    q__1.r = r__1, q__1.i = 0.f;
	    c__[i__1].r = q__1.r, c__[i__1].i = q__1.i;
	    i__1 = *n;
	    for (i__ = 2; i__ <= i__1; ++i__) {
		i__2 = i__ - 1;
		cscal_(&i__2, &cbeta, &c__[i__ * c_dim1 + 1], &c__1);
		i__2 = i__ + i__ * c_dim1;
		i__3 = i__ + i__ * c_dim1;
		r__1 = *beta * c__[i__3].r;
		q__1.r = r__1, q__1.i = 0.f;
		c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
/* L10: */
	    }
	} else {
	    i__1 = *n - 1;
	    for (i__ = 1; i__ <= i__1; ++i__) {
		i__2 = i__ + i__ * c_dim1;
		i__3 = i__ + i__ * c_dim1;
		r__1 = *beta * c__[i__3].r;
		q__1.r = r__1, q__1.i = 0.f;
		c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
		i__2 = *n - i__;
		cscal_(&i__2, &cbeta, &c__[i__ + 1 + i__ * c_dim1], &c__1);
/* L20: */
	    }
	    i__1 = *n + *n * c_dim1;
	    i__2 = *n + *n * c_dim1;
	    r__1 = *beta * c__[i__2].r;
	    q__1.r = r__1, q__1.i = 0.f;
	    c__[i__1].r = q__1.r, c__[i__1].i = q__1.i;
	}
	return 0;
    }

/*     Start the operations. */

    if (upper) {
	if (notr) {

/*           Form  C := alpha*A*conjg( A' ) + beta*C. Upper, Notr. */

	    smalln = ! cbigp_(&c__51, n, k);
	    if (smalln) {
		tinyk = ! cbigp_(&c__52, n, k);
		i__1 = *n;
		for (ii = 1; ii <= i__1; ii += 64) {
/* Computing MIN */
		    i__2 = 64, i__3 = *n - ii + 1;
		    isec = min(i__2,i__3);

/*                 C := alpha*A*conjg( A' ) + beta*C, matrix multiply */
/*                 updating upper vertical blocks of C. */

		    if (ii > 1) {
			i__2 = ii - 1;
			cgemm_("N", "C", &i__2, &isec, k, &calpha, &a[a_dim1 
				+ 1], lda, &a[ii + a_dim1], lda, &cbeta, &c__[
				ii * c_dim1 + 1], ldc, (ftnlen)1, (ftnlen)1);
		    }
		    if (tinyk) {

/*                    C :=  beta*C, a upper triangular diagonal block */
/*                    of C is updated with beta. The imaginary part of */
/*                    the diagonal elements of C are set to ZERO. */

			if (*beta != 1.f) {
			    i__2 = ii + ii * c_dim1;
			    i__3 = ii + ii * c_dim1;
			    r__1 = *beta * c__[i__3].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
			    i__2 = ii + isec - 1;
			    for (i__ = ii + 1; i__ <= i__2; ++i__) {
				i__3 = i__ - ii;
				cscal_(&i__3, &cbeta, &c__[ii + i__ * c_dim1],
					 &c__1);
				i__3 = i__ + i__ * c_dim1;
				i__4 = i__ + i__ * c_dim1;
				r__1 = *beta * c__[i__4].r;
				q__1.r = r__1, q__1.i = 0.f;
				c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
/* L30: */
			    }
			}

/*                    C := alpha*A*conjg( A' ) + C, hermitian matrix */
/*                    multiply. C is a hermitian diagonal block having */
/*                    upper triangular storage format. */

			i__2 = *k;
			for (l = 1; l <= i__2; ++l) {
			    cher_("U", &isec, alpha, &a[ii + l * a_dim1], &
				    c__1, &c__[ii + ii * c_dim1], ldc, (
				    ftnlen)1);
/* L40: */
			}
		    } else {

/*                    T2 := C, a upper triangular diagonal block of the */
/*                    hermitian matrix C is copied to the upper */
/*                    triangular part of T2. */

			i__2 = ii + isec - 1;
			for (i__ = ii; i__ <= i__2; ++i__) {
			    i__3 = i__ - ii + 1;
			    ccopy_(&i__3, &c__[ii + i__ * c_dim1], &c__1, &t2[
				    (i__ - ii + 1 << 6) - 64], &c__1);
/* L50: */
			}

/*                    T2 :=  beta*T2, the upper triangular part of T2 is */
/*                    updated with beta. The imaginary part of the */
/*                    diagonal elements of T2 are set to ZERO. */

			if (*beta != 1.f) {
			    r__1 = *beta * t2[0].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    t2[0].r = q__1.r, t2[0].i = q__1.i;
			    i__2 = isec;
			    for (i__ = 2; i__ <= i__2; ++i__) {
				i__3 = i__ - 1;
				cscal_(&i__3, &cbeta, &t2[(i__ << 6) - 64], &
					c__1);
				i__3 = i__ + (i__ << 6) - 65;
				i__4 = i__ + (i__ << 6) - 65;
				r__1 = *beta * t2[i__4].r;
				q__1.r = r__1, q__1.i = 0.f;
				t2[i__3].r = q__1.r, t2[i__3].i = q__1.i;
/* L60: */
			    }
			}

/*                    T2 := alpha*A*conjg( A' ) + T2, hermitian matrix */
/*                    multiply. T2 contains a hermitian block having */
/*                    upper triangular storage format. */

			i__2 = *k;
			for (l = 1; l <= i__2; ++l) {
			    cher_("U", &isec, alpha, &a[ii + l * a_dim1], &
				    c__1, t2, &c__64, (ftnlen)1);
/* L70: */
			}

/*                    C := T2, the upper triangular part of T2 is copied */
/*                    back to C. */

			i__2 = ii + isec - 1;
			for (i__ = ii; i__ <= i__2; ++i__) {
			    i__3 = i__ - ii + 1;
			    ccopy_(&i__3, &t2[(i__ - ii + 1 << 6) - 64], &
				    c__1, &c__[ii + i__ * c_dim1], &c__1);
/* L80: */
			}
		    }
/* L90: */
		}
	    } else {
		i__1 = *n;
		for (ii = 1; ii <= i__1; ii += 64) {
/* Computing MIN */
		    i__2 = 64, i__3 = *n - ii + 1;
		    isec = min(i__2,i__3);

/*                 C := alpha*A*conjg( A' ) + beta*C, matrix multiply */
/*                 updating upper vertical blocks of C. */

		    if (ii > 1) {
			i__2 = ii - 1;
			cgemm_("N", "C", &i__2, &isec, k, &calpha, &a[a_dim1 
				+ 1], lda, &a[ii + a_dim1], lda, &cbeta, &c__[
				ii * c_dim1 + 1], ldc, (ftnlen)1, (ftnlen)1);
		    }
		    cdelta.r = cbeta.r, cdelta.i = cbeta.i;
		    i__2 = *k;
		    for (ll = 1; ll <= i__2; ll += 64) {
/* Computing MIN */
			i__3 = 64, i__4 = *k - ll + 1;
			lsec = min(i__3,i__4);

/*                    T1 := A, a rectangular block of A is copied to T1. */

			i__3 = ll + lsec - 1;
			for (l = ll; l <= i__3; ++l) {
			    ccopy_(&isec, &a[ii + l * a_dim1], &c__1, &t1[(l 
				    - ll + 1 << 6) - 64], &c__1);
/* L100: */
			}

/*                    C := alpha*T1*conjg( T1' ) + delta*C, C is */
/*                    hermitian having triangular storage format. Delta */
/*                    is used instead of beta to avoid updating the */
/*                    block of C with beta multiple times. The local */
/*                    array T4 is used for the conjugated transpose */
/*                    of vectors of T1. */

			i__3 = ii + isec - 1;
			for (i__ = ii; i__ <= i__3; ++i__) {
			    i__4 = ll + lsec - 1;
			    for (l = ll; l <= i__4; ++l) {
				i__5 = l - ll;
				r_cnjg(&q__1, &t1[i__ - ii + 1 + (l - ll + 1 
					<< 6) - 65]);
				t4[i__5].r = q__1.r, t4[i__5].i = q__1.i;
/* L110: */
			    }
			    i__4 = i__ - ii + 1;
			    cgemv_("N", &i__4, &lsec, &calpha, t1, &c__64, t4,
				     &c__1, &cdelta, &c__[ii + i__ * c_dim1], 
				    &c__1, (ftnlen)1);
			    i__4 = i__ + i__ * c_dim1;
			    i__5 = i__ + i__ * c_dim1;
			    r__1 = c__[i__5].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
/* L120: */
			}
			cdelta.r = 1.f, cdelta.i = 0.f;
/* L130: */
		    }
/* L140: */
		}
	    }
	} else {

/*           Form  C := alpha*conjg( A' )*A + beta*C. Upper, Trans. */

	    smalln = ! cbigp_(&c__51, n, k);
	    if (smalln) {
		tinyk = ! cbigp_(&c__52, n, k);
		i__1 = *n;
		for (ii = 1; ii <= i__1; ii += 64) {
/* Computing MIN */
		    i__2 = 64, i__3 = *n - ii + 1;
		    isec = min(i__2,i__3);

/*                 C := alpha*conjg( A' )*A + beta*C, matrix multiply */
/*                 updating upper vertical blocks of C. */

		    if (ii > 1) {
			i__2 = ii - 1;
			cgemm_("C", "N", &i__2, &isec, k, &calpha, &a[a_dim1 
				+ 1], lda, &a[ii * a_dim1 + 1], lda, &cbeta, &
				c__[ii * c_dim1 + 1], ldc, (ftnlen)1, (ftnlen)
				1);
		    }
		    if (tinyk) {

/*                    C :=  beta*C, a upper triangular diagonal block */
/*                    of C is updated with beta. The imaginary part of */
/*                    the diagonal elements of C are set to ZERO. */

			if (*beta != 1.f) {
			    i__2 = ii + ii * c_dim1;
			    i__3 = ii + ii * c_dim1;
			    r__1 = *beta * c__[i__3].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
			    i__2 = ii + isec - 1;
			    for (i__ = ii + 1; i__ <= i__2; ++i__) {
				i__3 = i__ - ii;
				cscal_(&i__3, &cbeta, &c__[ii + i__ * c_dim1],
					 &c__1);
				i__3 = i__ + i__ * c_dim1;
				i__4 = i__ + i__ * c_dim1;
				r__1 = *beta * c__[i__4].r;
				q__1.r = r__1, q__1.i = 0.f;
				c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
/* L150: */
			    }
			}

/*                    C := alpha*conjg( A' )*A + C, hermitian matrix */
/*                    multiply. C is a hermitian diagonal block having */
/*                    upper triangular storage format. The local array */
/*                    T3 is used for temporary storage of the conjugate */
/*                    transposed vectors of A. */

			i__2 = *k;
			for (l = 1; l <= i__2; ++l) {
			    i__3 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__3; ++i__) {
				i__4 = i__ - ii;
				r_cnjg(&q__1, &a[l + i__ * a_dim1]);
				t3[i__4].r = q__1.r, t3[i__4].i = q__1.i;
/* L160: */
			    }
			    cher_("U", &isec, alpha, t3, &c__1, &c__[ii + ii *
				     c_dim1], ldc, (ftnlen)1);
/* L170: */
			}
		    } else {

/*                    T2 := C, a upper triangular diagonal block of the */
/*                    hermitian matrix C is copied to the upper */
/*                    triangular part of T2. */

			i__2 = ii + isec - 1;
			for (i__ = ii; i__ <= i__2; ++i__) {
			    i__3 = i__ - ii + 1;
			    ccopy_(&i__3, &c__[ii + i__ * c_dim1], &c__1, &t2[
				    (i__ - ii + 1 << 6) - 64], &c__1);
/* L180: */
			}

/*                    T2 :=  beta*T2, the upper triangular part of T2 is */
/*                    updated with beta. */

			if (*beta != 1.f) {
			    i__2 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__2; ++i__) {
				i__3 = i__ - ii + 1;
				cscal_(&i__3, &cbeta, &t2[(i__ - ii + 1 << 6) 
					- 64], &c__1);
/* L190: */
			    }
			}
			i__2 = *k;
			for (ll = 1; ll <= i__2; ll += 64) {
/* Computing MIN */
			    i__3 = 64, i__4 = *k - ll + 1;
			    lsec = min(i__3,i__4);

/*                       T3 :=  A', the transpose of a square block of A */
/*                       is copied to T3. */

			    i__3 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__3; ++i__) {
				ccopy_(&lsec, &a[ll + i__ * a_dim1], &c__1, &
					t3[i__ - ii], &c__64);
/* L200: */
			    }

/*                       T2 := alpha*conjg( T3' )*T3 + T2, hermitian */
/*                       matrix multiply. T2 contains a hermitian block */
/*                       having upper triangular storage format. The */
/*                       local array T3 is used for temporary storage of */
/*                       the conjugate transposed vectors of A. */

			    i__3 = ll + lsec - 1;
			    for (l = ll; l <= i__3; ++l) {
				i__4 = isec;
				for (i__ = 1; i__ <= i__4; ++i__) {
				    i__5 = i__ + (l - ll + 1 << 6) - 65;
				    r_cnjg(&q__1, &t3[i__ + (l - ll + 1 << 6) 
					    - 65]);
				    t3[i__5].r = q__1.r, t3[i__5].i = q__1.i;
/* L210: */
				}
				cher_("U", &isec, alpha, &t3[(l - ll + 1 << 6)
					 - 64], &c__1, t2, &c__64, (ftnlen)1);
/* L220: */
			    }
/* L230: */
			}

/*                    C := T2, the upper triangular part of T2 is copied */
/*                    back to C. */

			i__2 = ii + isec - 1;
			for (i__ = ii; i__ <= i__2; ++i__) {
			    i__3 = i__ - ii + 1;
			    ccopy_(&i__3, &t2[(i__ - ii + 1 << 6) - 64], &
				    c__1, &c__[ii + i__ * c_dim1], &c__1);
/* L240: */
			}
		    }
/* L250: */
		}
	    } else {
		clda = ccld_(lda);
		i__1 = *n;
		for (ii = 1; ii <= i__1; ii += 64) {
/* Computing MIN */
		    i__2 = 64, i__3 = *n - ii + 1;
		    isec = min(i__2,i__3);

/*                 C := alpha*conjg( A' )*A + beta*C, matrix multiply */
/*                 updating upper vertical blocks of C. */

		    if (ii > 1) {
			i__2 = ii - 1;
			cgemm_("C", "N", &i__2, &isec, k, &calpha, &a[a_dim1 
				+ 1], lda, &a[ii * a_dim1 + 1], lda, &cbeta, &
				c__[ii * c_dim1 + 1], ldc, (ftnlen)1, (ftnlen)
				1);
		    }
		    cdelta.r = cbeta.r, cdelta.i = cbeta.i;
		    i__2 = *k;
		    for (ll = 1; ll <= i__2; ll += 64) {
/* Computing MIN */
			i__3 = 64, i__4 = *k - ll + 1;
			lsec = min(i__3,i__4);

/*                    T1 := conjg( A' ), the conjugated transpose of a */
/*                    rectangular block of A is copied to T1. */

			if (clda) {
			    i__3 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__3; ++i__) {
				i__4 = ll + lsec - 1;
				for (l = ll; l <= i__4; ++l) {
				    i__5 = i__ - ii + 1 + (l - ll + 1 << 6) - 
					    65;
				    r_cnjg(&q__1, &a[l + i__ * a_dim1]);
				    t1[i__5].r = q__1.r, t1[i__5].i = q__1.i;
/* L260: */
				}
/* L270: */
			    }
			} else {
			    i__3 = ll + lsec - 1;
			    for (l = ll; l <= i__3; ++l) {
				i__4 = ii + isec - 1;
				for (i__ = ii; i__ <= i__4; ++i__) {
				    i__5 = i__ - ii + 1 + (l - ll + 1 << 6) - 
					    65;
				    r_cnjg(&q__1, &a[l + i__ * a_dim1]);
				    t1[i__5].r = q__1.r, t1[i__5].i = q__1.i;
/* L280: */
				}
/* L290: */
			    }
			}

/*                    C := alpha*T1*conjg( T1' ) + delta*C, C is */
/*                    hermitian having triangular storage format. Delta */
/*                    is used instead of beta to avoid updating the */
/*                    block of C with beta multiple times. The local */
/*                    array T4 is used for the conjugated transpose */
/*                    of vectors of T1. */

			i__3 = ii + isec - 1;
			for (i__ = ii; i__ <= i__3; ++i__) {
			    i__4 = ll + lsec - 1;
			    for (l = ll; l <= i__4; ++l) {
				i__5 = l - ll;
				r_cnjg(&q__1, &t1[i__ - ii + 1 + (l - ll + 1 
					<< 6) - 65]);
				t4[i__5].r = q__1.r, t4[i__5].i = q__1.i;
/* L300: */
			    }
			    i__4 = i__ - ii + 1;
			    cgemv_("N", &i__4, &lsec, &calpha, t1, &c__64, t4,
				     &c__1, &cdelta, &c__[ii + i__ * c_dim1], 
				    &c__1, (ftnlen)1);
			    i__4 = i__ + i__ * c_dim1;
			    i__5 = i__ + i__ * c_dim1;
			    r__1 = c__[i__5].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
/* L310: */
			}
			cdelta.r = 1.f, cdelta.i = 0.f;
/* L320: */
		    }
/* L330: */
		}
	    }
	}
    } else {
	if (notr) {

/*           Form  C := alpha*A*conjg( A' ) + beta*C. Lower, Notr. */

	    smalln = ! cbigp_(&c__51, n, k);
	    if (smalln) {
		tinyk = ! cbigp_(&c__52, n, k);
		for (ix = *n; ix >= 1; ix += -64) {
/* Computing MAX */
		    i__1 = 1, i__2 = ix - 63;
		    ii = max(i__1,i__2);
		    isec = ix - ii + 1;
		    if (tinyk) {

/*                    C :=  beta*C, a lower triangular diagonal block */
/*                    of C is updated with beta. */

			if (*beta != 1.f) {
			    i__1 = ii + isec - 2;
			    for (i__ = ii; i__ <= i__1; ++i__) {
				i__2 = i__ + i__ * c_dim1;
				i__3 = i__ + i__ * c_dim1;
				r__1 = *beta * c__[i__3].r;
				q__1.r = r__1, q__1.i = 0.f;
				c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
				i__2 = ii + isec - i__ - 1;
				cscal_(&i__2, &cbeta, &c__[i__ + 1 + i__ * 
					c_dim1], &c__1);
/* L340: */
			    }
			    i__1 = ii + isec - 1 + (ii + isec - 1) * c_dim1;
			    i__2 = ii + isec - 1 + (ii + isec - 1) * c_dim1;
			    r__1 = *beta * c__[i__2].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    c__[i__1].r = q__1.r, c__[i__1].i = q__1.i;
			}

/*                    C := alpha*A*conjg( A' ) + C, hermitian matrix */
/*                    multiply. C is a hermitian diagonal block having */
/*                    lower triangular storage format. */

			i__1 = *k;
			for (l = 1; l <= i__1; ++l) {
			    cher_("L", &isec, alpha, &a[ii + l * a_dim1], &
				    c__1, &c__[ii + ii * c_dim1], ldc, (
				    ftnlen)1);
/* L350: */
			}
		    } else {

/*                    T2 := C, a lower triangular diagonal block of the */
/*                    hermitian matrix C is copied to the lower */
/*                    triangular part of T2. */

			i__1 = ii + isec - 1;
			for (i__ = ii; i__ <= i__1; ++i__) {
			    i__2 = ii + isec - i__;
			    ccopy_(&i__2, &c__[i__ + i__ * c_dim1], &c__1, &
				    t2[i__ - ii + 1 + (i__ - ii + 1 << 6) - 
				    65], &c__1);
/* L360: */
			}

/*                    T2 :=  beta*T2, the lower triangular part of T2 is */
/*                    updated with beta. The imaginary part of the */
/*                    diagonal elements of T2 are set to ZERO. */

			if (*beta != 1.f) {
			    i__1 = isec - 1;
			    for (i__ = 1; i__ <= i__1; ++i__) {
				i__2 = i__ + (i__ << 6) - 65;
				i__3 = i__ + (i__ << 6) - 65;
				r__1 = *beta * t2[i__3].r;
				q__1.r = r__1, q__1.i = 0.f;
				t2[i__2].r = q__1.r, t2[i__2].i = q__1.i;
				i__2 = isec - i__;
				cscal_(&i__2, &cbeta, &t2[i__ + 1 + (i__ << 6)
					 - 65], &c__1);
/* L370: */
			    }
			    i__1 = isec + (isec << 6) - 65;
			    i__2 = isec + (isec << 6) - 65;
			    r__1 = *beta * t2[i__2].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    t2[i__1].r = q__1.r, t2[i__1].i = q__1.i;
			}

/*                    T2 := alpha*A*conjg( A' ) + T2, symmetric matrix */
/*                    multiply. T2 contains a hermitian block having */
/*                    lower triangular storage format. */

			i__1 = *k;
			for (l = 1; l <= i__1; ++l) {
			    cher_("L", &isec, alpha, &a[ii + l * a_dim1], &
				    c__1, t2, &c__64, (ftnlen)1);
/* L380: */
			}

/*                    C := T2, the lower triangular part of T2 is copied */
/*                    back to C. */

			i__1 = ii + isec - 1;
			for (i__ = ii; i__ <= i__1; ++i__) {
			    i__2 = ii + isec - i__;
			    ccopy_(&i__2, &t2[i__ - ii + 1 + (i__ - ii + 1 << 
				    6) - 65], &c__1, &c__[i__ + i__ * c_dim1],
				     &c__1);
/* L390: */
			}
		    }

/*                 C := alpha*A*conjg( A' ) + beta*C, matrix multiply */
/*                 on lower vertical blocks of C. */

		    if (ii + isec <= *n) {
			i__1 = *n - ii - isec + 1;
			cgemm_("N", "C", &i__1, &isec, k, &calpha, &a[ii + 
				isec + a_dim1], lda, &a[ii + a_dim1], lda, &
				cbeta, &c__[ii + isec + ii * c_dim1], ldc, (
				ftnlen)1, (ftnlen)1);
		    }
/* L400: */
		}
	    } else {
		for (ix = *n; ix >= 1; ix += -64) {
/* Computing MAX */
		    i__1 = 1, i__2 = ix - 63;
		    ii = max(i__1,i__2);
		    isec = ix - ii + 1;
		    cdelta.r = cbeta.r, cdelta.i = cbeta.i;
		    i__1 = *k;
		    for (ll = 1; ll <= i__1; ll += 64) {
/* Computing MIN */
			i__2 = 64, i__3 = *k - ll + 1;
			lsec = min(i__2,i__3);

/*                    T1 := A, a rectangular block of A is copied to T1. */

			i__2 = ll + lsec - 1;
			for (l = ll; l <= i__2; ++l) {
			    ccopy_(&isec, &a[ii + l * a_dim1], &c__1, &t1[(l 
				    - ll + 1 << 6) - 64], &c__1);
/* L410: */
			}

/*                    C := alpha*T1*conjg( T1' ) + delta*C, C is */
/*                    hermitian having triangular storage format. Delta */
/*                    is used instead of beta to avoid updating the */
/*                    block of C with beta multiple times. The local */
/*                    array T4 is used for the conjugated transpose */
/*                    of vectors of T1. */

			i__2 = ii + isec - 1;
			for (i__ = ii; i__ <= i__2; ++i__) {
			    i__3 = ll + lsec - 1;
			    for (l = ll; l <= i__3; ++l) {
				i__4 = l - ll;
				r_cnjg(&q__1, &t1[i__ - ii + 1 + (l - ll + 1 
					<< 6) - 65]);
				t4[i__4].r = q__1.r, t4[i__4].i = q__1.i;
/* L420: */
			    }
			    i__3 = ii + isec - i__;
			    cgemv_("N", &i__3, &lsec, &calpha, &t1[i__ - ii], 
				    &c__64, t4, &c__1, &cdelta, &c__[i__ + 
				    i__ * c_dim1], &c__1, (ftnlen)1);
			    i__3 = i__ + i__ * c_dim1;
			    i__4 = i__ + i__ * c_dim1;
			    r__1 = c__[i__4].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
/* L430: */
			}
			cdelta.r = 1.f, cdelta.i = 0.f;
/* L440: */
		    }

/*                 C := alpha*A*conjg( A' ) + beta*C, matrix multiply */
/*                 updating lower vertical blocks of C. */

		    if (ii + isec <= *n) {
			i__1 = *n - ii - isec + 1;
			cgemm_("N", "C", &i__1, &isec, k, &calpha, &a[ii + 
				isec + a_dim1], lda, &a[ii + a_dim1], lda, &
				cbeta, &c__[ii + isec + ii * c_dim1], ldc, (
				ftnlen)1, (ftnlen)1);
		    }
/* L450: */
		}
	    }
	} else {

/*           Form  C := alpha*conjg( A' )*A + beta*C. Lower, Trans. */

	    smalln = ! cbigp_(&c__51, n, k);
	    if (smalln) {
		tinyk = ! cbigp_(&c__52, n, k);
		for (ix = *n; ix >= 1; ix += -64) {
/* Computing MAX */
		    i__1 = 1, i__2 = ix - 63;
		    ii = max(i__1,i__2);
		    isec = ix - ii + 1;
		    if (tinyk) {

/*                    C :=  beta*C, a lower triangular diagonal block */
/*                    of C is updated with beta. The imaginary part of */
/*                    the diagonal elements of C are set to ZERO. */

			if (*beta != 1.f) {
			    i__1 = ii + isec - 2;
			    for (i__ = ii; i__ <= i__1; ++i__) {
				i__2 = i__ + i__ * c_dim1;
				i__3 = i__ + i__ * c_dim1;
				r__1 = *beta * c__[i__3].r;
				q__1.r = r__1, q__1.i = 0.f;
				c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
				i__2 = ii + isec - i__ - 1;
				cscal_(&i__2, &cbeta, &c__[i__ + 1 + i__ * 
					c_dim1], &c__1);
/* L460: */
			    }
			    i__1 = ii + isec - 1 + (ii + isec - 1) * c_dim1;
			    i__2 = ii + isec - 1 + (ii + isec - 1) * c_dim1;
			    r__1 = *beta * c__[i__2].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    c__[i__1].r = q__1.r, c__[i__1].i = q__1.i;
			}

/*                    C := alpha*conjg( A' )*A + C, hermitian matrix */
/*                    multiply. C is a hermitian diagonal block having */
/*                    lower triangular storage format. The local array */
/*                    T3 is used for temporary storage of the conjugate */
/*                    transposed vectors of A. */

			i__1 = *k;
			for (l = 1; l <= i__1; ++l) {
			    i__2 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__2; ++i__) {
				i__3 = i__ - ii;
				r_cnjg(&q__1, &a[l + i__ * a_dim1]);
				t3[i__3].r = q__1.r, t3[i__3].i = q__1.i;
/* L470: */
			    }
			    cher_("L", &isec, alpha, t3, &c__1, &c__[ii + ii *
				     c_dim1], ldc, (ftnlen)1);
/* L480: */
			}
		    } else {

/*                    T2 := C, a lower triangular diagonal block of the */
/*                    symmetric matrix C is copied to the lower */
/*                    triangular part of T2. */

			i__1 = ii + isec - 1;
			for (i__ = ii; i__ <= i__1; ++i__) {
			    i__2 = ii + isec - i__;
			    ccopy_(&i__2, &c__[i__ + i__ * c_dim1], &c__1, &
				    t2[i__ - ii + 1 + (i__ - ii + 1 << 6) - 
				    65], &c__1);
/* L490: */
			}

/*                    T2 :=  beta*T2, the lower triangular part of T2 is */
/*                    updated with beta. */

			if (*beta != 1.f) {
			    i__1 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__1; ++i__) {
				i__2 = ii + isec - i__;
				cscal_(&i__2, &cbeta, &t2[i__ - ii + 1 + (i__ 
					- ii + 1 << 6) - 65], &c__1);
/* L500: */
			    }
			}
			i__1 = *k;
			for (ll = 1; ll <= i__1; ll += 64) {
/* Computing MIN */
			    i__2 = 64, i__3 = *k - ll + 1;
			    lsec = min(i__2,i__3);

/*                       T3 :=  A', the transpose of a square block of A */
/*                       is copied to T3. */

			    i__2 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__2; ++i__) {
				ccopy_(&lsec, &a[ll + i__ * a_dim1], &c__1, &
					t3[i__ - ii], &c__64);
/* L510: */
			    }

/*                       T2 := alpha*conjg( T3' )*T3 + T2, hermitian */
/*                       matrix multiply. T2 contains a hermitian block */
/*                       having lower triangular storage format. The */
/*                       local array T3 is used for temporary storage of */
/*                       the conjugate transposed vectors of A. */

			    i__2 = ll + lsec - 1;
			    for (l = ll; l <= i__2; ++l) {
				i__3 = isec;
				for (i__ = 1; i__ <= i__3; ++i__) {
				    i__4 = i__ + (l - ll + 1 << 6) - 65;
				    r_cnjg(&q__1, &t3[i__ + (l - ll + 1 << 6) 
					    - 65]);
				    t3[i__4].r = q__1.r, t3[i__4].i = q__1.i;
/* L520: */
				}
				cher_("L", &isec, alpha, &t3[(l - ll + 1 << 6)
					 - 64], &c__1, t2, &c__64, (ftnlen)1);
/* L530: */
			    }
/* L540: */
			}

/*                    C := T2, the lower triangular part of T2 is copied */
/*                    back to C. */

			i__1 = ii + isec - 1;
			for (i__ = ii; i__ <= i__1; ++i__) {
			    i__2 = ii + isec - i__;
			    ccopy_(&i__2, &t2[i__ - ii + 1 + (i__ - ii + 1 << 
				    6) - 65], &c__1, &c__[i__ + i__ * c_dim1],
				     &c__1);
/* L550: */
			}
		    }

/*                 C := alpha*conjg( A' )*A + beta*C, matrix multiply */
/*                 updating lower vertical blocks of C. */

		    if (ii + isec <= *n) {
			i__1 = *n - ii - isec + 1;
			cgemm_("C", "N", &i__1, &isec, k, &calpha, &a[(ii + 
				isec) * a_dim1 + 1], lda, &a[ii * a_dim1 + 1],
				 lda, &cbeta, &c__[ii + isec + ii * c_dim1], 
				ldc, (ftnlen)1, (ftnlen)1);
		    }
/* L560: */
		}
	    } else {
		clda = ccld_(lda);
		for (ix = *n; ix >= 1; ix += -64) {
/* Computing MAX */
		    i__1 = 1, i__2 = ix - 63;
		    ii = max(i__1,i__2);
		    isec = ix - ii + 1;
		    cdelta.r = cbeta.r, cdelta.i = cbeta.i;
		    i__1 = *k;
		    for (ll = 1; ll <= i__1; ll += 64) {
/* Computing MIN */
			i__2 = 64, i__3 = *k - ll + 1;
			lsec = min(i__2,i__3);

/*                    T1 := conjg( A' ), the conjugated transpose of a */
/*                    rectangular block of A is copied to T1. */

			if (clda) {
			    i__2 = ii + isec - 1;
			    for (i__ = ii; i__ <= i__2; ++i__) {
				i__3 = ll + lsec - 1;
				for (l = ll; l <= i__3; ++l) {
				    i__4 = i__ - ii + 1 + (l - ll + 1 << 6) - 
					    65;
				    r_cnjg(&q__1, &a[l + i__ * a_dim1]);
				    t1[i__4].r = q__1.r, t1[i__4].i = q__1.i;
/* L570: */
				}
/* L580: */
			    }
			} else {
			    i__2 = ll + lsec - 1;
			    for (l = ll; l <= i__2; ++l) {
				i__3 = ii + isec - 1;
				for (i__ = ii; i__ <= i__3; ++i__) {
				    i__4 = i__ - ii + 1 + (l - ll + 1 << 6) - 
					    65;
				    r_cnjg(&q__1, &a[l + i__ * a_dim1]);
				    t1[i__4].r = q__1.r, t1[i__4].i = q__1.i;
/* L590: */
				}
/* L600: */
			    }
			}

/*                    C := alpha*T1*conjg( T1' ) + delta*C, C is */
/*                    hermitian having triangular storage format. Delta */
/*                    is used instead of beta to avoid updating the */
/*                    block of C with beta multiple times. The local */
/*                    array T4 is used for the conjugated transpose */
/*                    of vectors of T1. */

			i__2 = ii + isec - 1;
			for (i__ = ii; i__ <= i__2; ++i__) {
			    i__3 = ll + lsec - 1;
			    for (l = ll; l <= i__3; ++l) {
				i__4 = l - ll;
				r_cnjg(&q__1, &t1[i__ - ii + 1 + (l - ll + 1 
					<< 6) - 65]);
				t4[i__4].r = q__1.r, t4[i__4].i = q__1.i;
/* L620: */
			    }
			    i__3 = ii + isec - i__;
			    cgemv_("N", &i__3, &lsec, &calpha, &t1[i__ - ii], 
				    &c__64, t4, &c__1, &cdelta, &c__[i__ + 
				    i__ * c_dim1], &c__1, (ftnlen)1);
			    i__3 = i__ + i__ * c_dim1;
			    i__4 = i__ + i__ * c_dim1;
			    r__1 = c__[i__4].r;
			    q__1.r = r__1, q__1.i = 0.f;
			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
/* L630: */
			}
			cdelta.r = 1.f, cdelta.i = 0.f;
/* L640: */
		    }

/*                 C := alpha*conjg( A' )*A + beta*C, matrix multiply */
/*                 updating lower vertical blocks of C. */

		    if (ii + isec <= *n) {
			i__1 = *n - ii - isec + 1;
			cgemm_("C", "N", &i__1, &isec, k, &calpha, &a[(ii + 
				isec) * a_dim1 + 1], lda, &a[ii * a_dim1 + 1],
				 lda, &cbeta, &c__[ii + isec + ii * c_dim1], 
				ldc, (ftnlen)1, (ftnlen)1);
		    }
/* L650: */
		}
	    }
	}
    }

    return 0;

/*     End of CHERK. */

} /* cherk_ */

