001:       SUBROUTINE CORE_ZTSQRT( M, N, IB, A1, LDA1, A2, LDA2, T, LDT,
002:      $                       TAU, WORK, INFO )
003: 
004: *********************************************************************
005: *     PLASMA core_blas routine (version 2.1.0)                      *
006: *     Author: Hatem Ltaief                                          *
007: *     Release Date: November, 15th 2009                             *
008: *     PLASMA is a software package provided by Univ. of Tennessee,  *
009: *     Univ. of California Berkeley and Univ. of Colorado Denver.    *
010: *********************************************************************
011: *
012: *     .. Scalar Arguments ..
013:       INTEGER            M, N, IB, LDA1, LDA2, LDT, INFO
014: *     ..
015: *     .. Array Arguments ..
016:       COMPLEX*16            A1( LDA1, * ), A2( LDA2, * )
017:       COMPLEX*16            T( LDT, * ), TAU( * ), WORK( * )
018: *     ..
019: *
020: *  Purpose
021: *  =======
022: *
023: *  CORE_ZTSQRT computes a QR factorization of a rectangular matrix
024: *  formed by coupling a complex N-by-N upper triangular tile A1
025: *  on top of a complex M-by-N tile A2:
026: *
027: *  | A1 | = Q * R
028: *  | A2 |
029: *
030: *  Arguments
031: *  =========
032: *
033: *  M       (input) INTEGER
034: *          The number of rows of the tile A2.  M >= 0.
035: *
036: *  N       (input) INTEGER
037: *          The number of columns of the tile A1 and A2.  N >= 0.
038: *
039: *  IB      (input) INTEGER
040: *          The inner-blocking size.  IB >= 0.
041: *
042: *  A1      (input/output) COMPLEX*16 array, dimension (LDA1,N)
043: *          On entry, the N-by-N tile A1.
044: *          On exit, the elements on and above the diagonal of the array
045: *          contain the N-by-N upper trapezoidal tile R;
046: *          the elements below the diagonal are not referenced.
047: *
048: *  LDA1    (input) INTEGER
049: *          The leading dimension of the array A1.  LDA1 >= max(1,N).
050: *
051: *  A2      (input/output) COMPLEX*16 array, dimension (LDA2,N)
052: *          On entry, the M-by-N tile A2.
053: *          On exit, all the elements with the array TAU, represent 
054: *          the unitary tile Q as a product of elementary reflectors 
055: *          (see Further Details).
056: *
057: *  LDA2    (input) INTEGER
058: *          The leading dimension of the array A2.  LDA2 >= max(1,M).
059: *
060: *  T       (output) COMPLEX*16 array, dimension (LDT,N)
061: *          The IB-by-N triangular factor T of the block reflector.
062: *          T is upper triangular by block (economic storage);
063: *          The rest of the array is not referenced.
064: *
065: *  LDT     (input) INTEGER
066: *          The leading dimension of the array T. LDT >= IB.
067: *
068: *  TAU     (output) COMPLEX*16 array, dimension (min(M,N))
069: *          The scalar factors of the elementary reflectors (see Further
070: *          Details).
071: *
072: *  WORK    (workspace) COMPLEX*16 array, dimension (N)
073: *
074: *  INFO    (output) INTEGER
075: *          = 0: successful exit
076: *          < 0: if INFO = -i, the i-th argument had an illegal value
077: *
078: *  Further Details
079: *  ===============
080: *
081: *  The tile Q is represented as a product of elementary reflectors
082: *
083: *     Q = H(1) H(2) . . . H(k), where k = min(M,N).
084: *
085: *  Each H(i) has the form
086: *
087: *     H(i) = I - tau * v * v'
088: *
089: *  where tau is a complex scalar, and v is a complex vector with
090: *  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A2(1:m,i),
091: *  and tau in TAU(i).
092: *
093: *  =====================================================================
094: *
095: *     .. Parameters ..
096:       COMPLEX*16            ZONE, ZZERO
097:       INTEGER            IONE
098:       PARAMETER          ( ZONE = ( 1.0D+0, 0.0D+0 ) )
099:       PARAMETER          ( ZZERO = ( 0.0D+0, 0.0D+0 ) )
100:       PARAMETER          ( IONE = 1 )
101: *     ..
102: *     .. Local Scalars ..
103:       INTEGER            I, II, SB, IINFO
104: *     ..
105: *     .. External Subroutines ..
106:       EXTERNAL           ZLARFG, CORE_ZSSMQR, XERBLA
107:       EXTERNAL           ZCOPY, ZGEMV, ZTRMV, ZAXPY, ZGERC
108: *     ..
109: *     .. Intrinsic Functions ..
110:       INTRINSIC          CONJG, MAX
111: *     ..
112: *     Test the input arguments.
113:       INFO = 0
114:       IF( M.LT.0 ) THEN
115:          INFO = -1
116:       ELSE IF( N.LT.0 ) THEN
117:          INFO = -2
118:       ELSE IF( IB.LT.0 ) THEN
119:          INFO = -3
120:       ELSE IF( LDA2.LT.MAX( 1, M ) ) THEN
121:          INFO = -7
122:       END IF
123:       IF( INFO.NE.0 ) THEN
124:          CALL XERBLA( 'CORE_ZTSQRT', -INFO )
125:          RETURN
126:       END IF
127: *
128: *     Quick return if possible.
129: *
130:       IF( M.EQ.0 .OR. N.EQ.0 .OR. IB.EQ.0 )
131:      $   RETURN
132: *
133:       DO 10 II = 1, N, IB
134:          SB = MIN( N-II+1, IB )
135:          DO 20 I = 1, SB
136: *
137: *           Generate elementary reflector H( II*IB+I ) to annihilate
138: *           A( II*IB+I:M, II*IB+I ).
139: *
140:             CALL ZLARFG( M+1, A1( II+I-1, II+I-1 ), A2( 1, II+I-1 ),
141:      $                  IONE, TAU( II+I-1 ) )
142: *
143:             IF( ( II+I-1 ).LT.N ) THEN
144: *
145: *              Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left.
146: *
147:                CALL ZCOPY( SB-I, A1( II+I-1, II+I ), LDA1,
148:      $                    WORK, IONE )
149: *#IFDEF COMPLEX .OR. COMPLEX_16
150:                CALL ZLACGV( SB-I, WORK, IONE )
151: *#ENDIF
152:                CALL ZGEMV( 'ConjTranspose', M, SB-I, ZONE,
153:      $                    A2( 1, II+I ), LDA2, A2( 1, II+I-1 ),
154:      $                    IONE, ZONE, WORK, IONE )
155: *#IFDEF COMPLEX .OR. COMPLEX_16
156:                CALL ZLACGV( SB-I, WORK, IONE )
157: *#ENDIF
158:                CALL ZAXPY( SB-I, -CONJG( TAU( II+I-1 ) ), WORK, IONE, 
159:      $                    A1( II+I-1, II+I ), LDA1 ) 
160: *#IFDEF COMPLEX .OR. COMPLEX_16
161:                CALL ZLACGV( SB-I, WORK, IONE )
162: *#ENDIF
163:                CALL ZGERC( M, SB-I, -CONJG( TAU( II+I-1 ) ),
164:      $                    A2( 1, II+I-1 ), IONE, WORK, IONE,
165:      $                    A2( 1, II+I ), LDA2 )
166:             END IF
167: *
168: *           Calculate T.
169: *
170:             CALL ZGEMV( 'ConjTranspose', M, I-1, -TAU( II+I-1 ),
171:      $                 A2( 1, II ), LDA2, A2( 1, II+I-1 ), IONE,
172:      $                 ZZERO, T( 1, II+I-1 ), IONE )
173: *
174:             CALL ZTRMV( 'Upper', 'Notranspose', 'Nonunit', I-1,
175:      $                 T( 1, II ), LDT, T( 1, II+I-1 ), IONE )
176:             T( I, II+I-1 ) = TAU( II+I-1 )
177:  20      CONTINUE
178:          IF ( N.GT.( II+IB-1 ) ) THEN   
179:              CALL CORE_ZSSMQR( 'Left', 'ConjTranspose',
180:      $                    SB, M, N-( II+SB-1 ), IB, IB,
181:      $                    A1( II, II+SB ), LDA1,
182:      $                    A2( 1, II+SB ), LDA2,
183:      $                    A2( 1, II ), LDA2,
184:      $                    T( 1, II ), LDT, WORK, SB, IINFO )
185:          ENDIF
186:  10   CONTINUE
187: *
188:       RETURN
189: *
190: *     End of CORE_ZTSQRT.
191: *
192:       END
193: