001:       SUBROUTINE CORE_CTSQRT( M, N, IB, A1, LDA1, A2, LDA2, T, LDT,
002:      $                       TAU, WORK, INFO )
003: 
004: *********************************************************************
005: *     PLASMA core_blas routine (version 2.1.0)                      *
006: *     Author: Hatem Ltaief                                          *
007: *     Release Date: November, 15th 2009                             *
008: *     PLASMA is a software package provided by Univ. of Tennessee,  *
009: *     Univ. of California Berkeley and Univ. of Colorado Denver.    *
010: *********************************************************************
011: *
012: *     .. Scalar Arguments ..
013:       INTEGER            M, N, IB, LDA1, LDA2, LDT, INFO
014: *     ..
015: *     .. Array Arguments ..
016:       COMPLEX            A1( LDA1, * ), A2( LDA2, * )
017:       COMPLEX            T( LDT, * ), TAU( * ), WORK( * )
018: *     ..
019: *
020: *  Purpose
021: *  =======
022: *
023: *  CORE_CTSQRT computes a QR factorization of a rectangular matrix
024: *  formed by coupling a complex N-by-N upper triangular tile A1
025: *  on top of a complex M-by-N tile A2:
026: *
027: *  | A1 | = Q * R
028: *  | A2 |
029: *
030: *  Arguments
031: *  =========
032: *
033: *  M       (input) INTEGER
034: *          The number of rows of the tile A2.  M >= 0.
035: *
036: *  N       (input) INTEGER
037: *          The number of columns of the tile A1 and A2.  N >= 0.
038: *
039: *  IB      (input) INTEGER
040: *          The inner-blocking size.  IB >= 0.
041: *
042: *  A1      (input/output) COMPLEX array, dimension (LDA1,N)
043: *          On entry, the N-by-N tile A1.
044: *          On exit, the elements on and above the diagonal of the array
045: *          contain the N-by-N upper trapezoidal tile R;
046: *          the elements below the diagonal are not referenced.
047: *
048: *  LDA1    (input) INTEGER
049: *          The leading dimension of the array A1.  LDA1 >= max(1,N).
050: *
051: *  A2      (input/output) COMPLEX array, dimension (LDA2,N)
052: *          On entry, the M-by-N tile A2.
053: *          On exit, all the elements with the array TAU, represent 
054: *          the unitary tile Q as a product of elementary reflectors 
055: *          (see Further Details).
056: *
057: *  LDA2    (input) INTEGER
058: *          The leading dimension of the array A2.  LDA2 >= max(1,M).
059: *
060: *  T       (output) COMPLEX array, dimension (LDT,N)
061: *          The IB-by-N triangular factor T of the block reflector.
062: *          T is upper triangular by block (economic storage);
063: *          The rest of the array is not referenced.
064: *
065: *  LDT     (input) INTEGER
066: *          The leading dimension of the array T. LDT >= IB.
067: *
068: *  TAU     (output) COMPLEX array, dimension (min(M,N))
069: *          The scalar factors of the elementary reflectors (see Further
070: *          Details).
071: *
072: *  WORK    (workspace) COMPLEX array, dimension (N)
073: *
074: *  INFO    (output) INTEGER
075: *          = 0: successful exit
076: *          < 0: if INFO = -i, the i-th argument had an illegal value
077: *
078: *  Further Details
079: *  ===============
080: *
081: *  The tile Q is represented as a product of elementary reflectors
082: *
083: *     Q = H(1) H(2) . . . H(k), where k = min(M,N).
084: *
085: *  Each H(i) has the form
086: *
087: *     H(i) = I - tau * v * v'
088: *
089: *  where tau is a complex scalar, and v is a complex vector with
090: *  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A2(1:m,i),
091: *  and tau in TAU(i).
092: *
093: *  =====================================================================
094: *
095: *     .. Parameters ..
096:       COMPLEX            CONE, CZERO
097:       INTEGER            IONE
098:       PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
099:       PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
100:       PARAMETER          ( IONE = 1 )
101: *     ..
102: *     .. Local Scalars ..
103:       INTEGER            I, II, SB, IINFO
104: *     ..
105: *     .. External Subroutines ..
106:       EXTERNAL           CLARFG, CORE_CSSMQR, XERBLA
107:       EXTERNAL           CCOPY, CGEMV, CTRMV, CAXPY, CGERC
108: *     ..
109: *     .. Intrinsic Functions ..
110:       INTRINSIC          CONJG, MAX
111: *     ..
112: *     Test the input arguments.
113:       INFO = 0
114:       IF( M.LT.0 ) THEN
115:          INFO = -1
116:       ELSE IF( N.LT.0 ) THEN
117:          INFO = -2
118:       ELSE IF( IB.LT.0 ) THEN
119:          INFO = -3
120:       ELSE IF( LDA2.LT.MAX( 1, M ) ) THEN
121:          INFO = -7
122:       END IF
123:       IF( INFO.NE.0 ) THEN
124:          CALL XERBLA( 'CORE_CTSQRT', -INFO )
125:          RETURN
126:       END IF
127: *
128: *     Quick return if possible.
129: *
130:       IF( M.EQ.0 .OR. N.EQ.0 .OR. IB.EQ.0 )
131:      $   RETURN
132: *
133:       DO 10 II = 1, N, IB
134:          SB = MIN( N-II+1, IB )
135:          DO 20 I = 1, SB
136: *
137: *           Generate elementary reflector H( II*IB+I ) to annihilate
138: *           A( II*IB+I:M, II*IB+I ).
139: *
140:             CALL CLARFG( M+1, A1( II+I-1, II+I-1 ), A2( 1, II+I-1 ),
141:      $                  IONE, TAU( II+I-1 ) )
142: *
143:             IF( ( II+I-1 ).LT.N ) THEN
144: *
145: *              Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left.
146: *
147:                CALL CCOPY( SB-I, A1( II+I-1, II+I ), LDA1,
148:      $                    WORK, IONE )
149:                CALL CLACGV( SB-I, WORK, IONE )
150:                CALL CGEMV( 'ConjTranspose', M, SB-I, CONE,
151:      $                    A2( 1, II+I ), LDA2, A2( 1, II+I-1 ),
152:      $                    IONE, CONE, WORK, IONE )
153:                CALL CLACGV( SB-I, WORK, IONE )
154:                CALL CAXPY( SB-I, -CONJG( TAU( II+I-1 ) ), WORK, IONE, 
155:      $                    A1( II+I-1, II+I ), LDA1 ) 
156:                CALL CLACGV( SB-I, WORK, IONE )
157:                CALL CGERC( M, SB-I, -CONJG( TAU( II+I-1 ) ),
158:      $                    A2( 1, II+I-1 ), IONE, WORK, IONE,
159:      $                    A2( 1, II+I ), LDA2 )
160:             END IF
161: *
162: *           Calculate T.
163: *
164:             CALL CGEMV( 'ConjTranspose', M, I-1, -TAU( II+I-1 ),
165:      $                 A2( 1, II ), LDA2, A2( 1, II+I-1 ), IONE,
166:      $                 CZERO, T( 1, II+I-1 ), IONE )
167: *
168:             CALL CTRMV( 'Upper', 'Notranspose', 'Nonunit', I-1,
169:      $                 T( 1, II ), LDT, T( 1, II+I-1 ), IONE )
170:             T( I, II+I-1 ) = TAU( II+I-1 )
171:  20      CONTINUE
172:          IF ( N.GT.( II+IB-1 ) ) THEN   
173:              CALL CORE_CSSMQR( 'Left', 'ConjTranspose',
174:      $                    SB, M, N-( II+SB-1 ), IB, IB,
175:      $                    A1( II, II+SB ), LDA1,
176:      $                    A2( 1, II+SB ), LDA2,
177:      $                    A2( 1, II ), LDA2,
178:      $                    T( 1, II ), LDT, WORK, SB, IINFO )
179:          ENDIF
180:  10   CONTINUE
181: *
182:       RETURN
183: *
184: *     End of CORE_CTSQRT.
185: *
186:       END
187: