001:       SUBROUTINE CORE_CTSLQT( M, N, IB, A1, LDA1, A2, LDA2, T, LDT,
002:      $                       TAU, WORK, INFO )
003: 
004: *********************************************************************
005: *     PLASMA core_blas routine (version 2.1.0)                      *
006: *     Author: Hatem Ltaief                                          *
007: *     Release Date: November, 15th 2009                             *
008: *     PLASMA is a software package provided by Univ. of Tennessee,  *
009: *     Univ. of California Berkeley and Univ. of Colorado Denver.    *
010: *********************************************************************
011: *
012: *     .. Scalar Arguments ..
013:       INTEGER            M, N, IB, LDA1, LDA2, LDT, INFO 
014: *     ..
015: *     .. Array Arguments ..
016:       COMPLEX            A1( LDA1, * ), A2( LDA2, * )
017:       COMPLEX            T( LDT, * ), TAU( * ), WORK( * )
018: *     ..
019: *
020: *  Purpose
021: *  =======
022: *
023: *  CORE_CTSLQT computes a LQ factorization of a rectangular matrix
024: *  formed by coupling side-by-side a complex M-by-M
025: *  lower triangular tile A1 and a complex M-by-N tile A2:
026: *
027: *  | A1 A2 | = L * Q
028: *
029: *  Arguments
030: *  =========
031: *
032: *  M       (input) INTEGER
033: *          The number of rows of the tile A1 and A2.  M >= 0.
034: *
035: *  N       (input) INTEGER
036: *          The number of columns of the tile A2.  N >= 0.
037: *
038: *  IB      (input) INTEGER
039: *          The inner-blocking size.  IB >= 0.
040: *
041: *  A1      (input/output) COMPLEX array, dimension (LDA1,M)
042: *          On entry, the M-by-M tile A1.
043: *          On exit, the elements on and below the diagonal of the array
044: *          contain the M-by-M lower trapezoidal tile L;
045: *          the elements above the diagonal are not referenced.
046: *
047: *  LDA1    (input) INTEGER
048: *          The leading dimension of the array A1.  LDA1 >= max(1,M).
049: *
050: *  A2      (input/output) COMPLEX array, dimension (LDA2,N)
051: *          On entry, the M-by-N tile A2.
052: *          On exit, all the elements with the array TAU, represent 
053: *          the unitary tile Q as a product of elementary reflectors 
054: *          (see Further Details).
055: *
056: *  LDA2    (input) INTEGER
057: *          The leading dimension of the array A2.  LDA2 >= max(1,M).
058: *
059: *  T       (output) COMPLEX array, dimension (LDT,N)
060: *          The IB-by-N triangular factor T of the block reflector.
061: *          T is upper triangular by block (economic storage);
062: *          The rest of the array is not referenced.
063: *
064: *  LDT     (input) INTEGER
065: *          The leading dimension of the array T. LDT >= IB.
066: *
067: *  TAU     (output) COMPLEX array, dimension (min(M,N))
068: *          The scalar factors of the elementary reflectors (see Further
069: *          Details).
070: *
071: *  WORK    (workspace) COMPLEX array, dimension (M)
072: *
073: *  INFO    (output) INTEGER
074: *          = 0:  successful exit
075: *          < 0:  if INFO = -i, the i-th argument had an illegal value
076: *
077: *  Further Details
078: *  ===============
079: *
080: *  The tile Q is represented as a product of elementary reflectors
081: *
082: *     Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
083: *
084: *  Each H(i) has the form
085: *
086: *     H(i) = I - tau * v * v'
087: *
088: *  where tau is a complex scalar, and v is a complex vector with
089: *  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
090: *  A2(i,1:n), and tau in TAU(i).
091: *
092: *  =====================================================================
093: *
094: *     .. Parameters ..
095:       COMPLEX            CONE, CZERO
096:       INTEGER            IONE
097:       PARAMETER          ( CONE = ( 1.0E+0, 0.0E+0 ) )
098:       PARAMETER          ( CZERO = ( 0.0E+0, 0.0E+0 ) )
099:       PARAMETER          ( IONE = 1 )
100: *     ..
101: *     .. Local Scalars ..
102:       INTEGER            I, II, SB, IINFO
103: *     ..
104: *     .. External Subroutines ..
105:       EXTERNAL           CLARFG, CORE_CSSMLQ, XERBLA
106:       EXTERNAL           CCOPY, CGEMV, CTRMV, CAXPY, CGERC
107: *     ..
108: *     .. Intrinsic Functions ..
109:       INTRINSIC          MAX
110: *     ..
111: *     Test the input arguments.
112:       INFO = 0
113:       IF( M.LT.0 ) THEN
114:          INFO = -1
115:       ELSE IF( N.LT.0 ) THEN
116:          INFO = -2
117:       ELSE IF( IB.LT.0 ) THEN
118:          INFO = -3
119:       ELSE IF( LDA2.LT.MAX( 1, M ) ) THEN
120:          INFO = -7
121:       END IF
122:       IF( INFO.NE.0 ) THEN
123:          CALL XERBLA( 'CORE_CTSLQT', -INFO )
124:          RETURN
125:       END IF
126: *
127: *     Quick return if possible.
128: *
129:       IF( M.EQ.0 .OR. N.EQ.0 .OR. IB.EQ.0 )
130:      $   RETURN
131: *
132:       DO 10 II = 1, M, IB
133:          SB = MIN( M-II+1, IB )
134:          DO 20 I = 1, SB
135: *
136: *           Generate elementary reflector H( II*IB+I ) to annihilate
137: *           A( II*IB+I, II*IB+I:M ).
138: *
139:             CALL CLACGV( N, A2( II+I-1, 1 ), LDA2 )
140:             CALL CLACGV( 1, A1( II+I-1, II+I-1 ), LDA1 )
141:             CALL CLARFG( N+1, A1( II+I-1, II+I-1 ), A2( II+I-1, 1 ),
142:      $                  LDA2, TAU( II+I-1 ) )
143: 
144:             IF( ( II+I-1 ).LT.M ) THEN
145: *
146: *              Apply H( II+I-1 ) to A( II+I:II+IB-1, II+I-1:M  ) from the right.
147: *
148:                CALL CCOPY( SB-I, A1( II+I, II+I-1 ), IONE,
149:      $                    WORK, IONE)
150:                CALL CGEMV( 'NoTranspose', SB-I, N, CONE,
151:      $                    A2( II+I, 1 ), LDA2,
152:      $                    A2( II+I-1, 1 ), LDA2, CONE, WORK, IONE )
153:                CALL CAXPY( SB-I, -TAU( II+I-1 ), WORK, IONE,
154:      $                    A1( II+I, II+I-1 ), IONE )
155:                CALL CGERC( SB-I, N, -TAU( II+I-1 ), WORK, IONE,
156:      $                   A2( II+I-1, 1 ), LDA2,
157:      $                   A2( II+I, 1 ),LDA2 )
158:             END IF
159: *
160: *           Calculate T.
161: *
162:             CALL CGEMV( 'NoTranspose', I-1, N, -TAU( II+I-1 ),
163:      $                 A2( II, 1 ), LDA2, A2( II+I-1, 1 ), LDA2,
164:      $                 CZERO, T( 1, II+I-1 ), IONE )
165:             CALL CLACGV( N, A2( II+I-1, 1 ), LDA2 )
166:             CALL CLACGV( 1, A1( II+I-1, II+I-1 ), LDA1 )
167:             CALL CTRMV( 'Upper', 'NoTranspose', 'Nonunit', I-1,
168:      $                 T( 1, II ), LDT, T( 1, II+I-1 ), IONE )
169:             T( I, II+I-1 ) = TAU( II+I-1 )
170:  20      CONTINUE
171:          IF ( M.GT.( II+IB-1 ) ) THEN   
172:              CALL CORE_CSSMLQ( 'Right', 'ConjTranspose',
173:      $                        SB, N, M-( II+SB-1 ), IB, IB,
174:      $                        A1( II+SB, II ), LDA1,
175:      $                        A2( II+SB, 1 ), LDA2,
176:      $                        A2( II, 1 ), LDA2, 
177:      $                        T( 1, II ), LDT, WORK, LDA1, IINFO)
178:          ENDIF
179:  10   CONTINUE
180:       RETURN
181: *
182: *     End of CORE_CTSLQT.
183: *
184:       END
185: