PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_zttlqt.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef REAL
20 #define COMPLEX
21 
22 /***************************************************************************/
96 #if defined(PLASMA_HAVE_WEAK)
97 #pragma weak CORE_zttlqt = PCORE_zttlqt
98 #define CORE_zttlqt PCORE_zttlqt
99 #endif
100 int CORE_zttlqt(int M, int N, int IB,
101  PLASMA_Complex64_t *A1, int LDA1,
102  PLASMA_Complex64_t *A2, int LDA2,
103  PLASMA_Complex64_t *T, int LDT,
105 {
106  static PLASMA_Complex64_t zone = 1.0;
107  static PLASMA_Complex64_t zzero = 0.0;
108 #ifdef COMPLEX
109  static int ione = 1;
110 #endif
111 
112  PLASMA_Complex64_t alpha;
113  int i, j, l, ii, sb, mi, ni;
114 
115  /* Check input arguments */
116  if (M < 0) {
117  coreblas_error(1, "Illegal value of M");
118  return -1;
119  }
120  if (N < 0) {
121  coreblas_error(2, "Illegal value of N");
122  return -2;
123  }
124  if (IB < 0) {
125  coreblas_error(3, "Illegal value of IB");
126  return -3;
127  }
128  if ((LDA2 < max(1,M)) && (M > 0)) {
129  coreblas_error(7, "Illegal value of LDA2");
130  return -7;
131  }
132 
133  /* Quick return */
134  if ((M == 0) || (N == 0) || (IB == 0))
135  return PLASMA_SUCCESS;
136 
137  /* TODO: Need to check why some cases require
138  * this to not have uninitialized values */
140  0., 0., T, LDT);
141 
142  for(ii = 0; ii < M; ii += IB) {
143  sb = min(M-ii, IB);
144  for(i = 0; i < sb; i++) {
145  j = ii + i;
146  mi = sb-i-1;
147  ni = min( j + 1, N);
148  /*
149  * Generate elementary reflector H( II*IB+I ) to annihilate A( II*IB+I, II*IB+I:M ).
150  */
151 #ifdef COMPLEX
152  LAPACKE_zlacgv_work(ni, &A2[j], LDA2);
153  LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1);
154 #endif
155  LAPACKE_zlarfg_work(ni+1, &A1[LDA1*j+j], &A2[j], LDA2, &TAU[j]);
156 
157  if (mi > 0) {
158  /*
159  * Apply H( j-1 ) to A( j:II+IB-1, j-1:M ) from the right.
160  */
161  cblas_zcopy(
162  mi,
163  &A1[LDA1*j+(j+1)], 1,
164  WORK, 1);
165 
166  cblas_zgemv(
168  mi, ni,
169  CBLAS_SADDR(zone), &A2[j+1], LDA2,
170  &A2[j], LDA2,
171  CBLAS_SADDR(zone), WORK, 1);
172 
173  alpha = -(TAU[j]);
174  cblas_zaxpy(
175  mi, CBLAS_SADDR(alpha),
176  WORK, 1,
177  &A1[LDA1*j+j+1], 1);
178 
179  cblas_zgerc(
180  CblasColMajor, mi, ni,
181  CBLAS_SADDR(alpha), WORK, 1,
182  &A2[j], LDA2,
183  &A2[j+1], LDA2);
184  }
185 
186  /*
187  * Calculate T.
188  */
189 
190  if (i > 0 ) {
191 
192  l = min(i, max(0, N-ii));
193  alpha = -(TAU[j]);
194 
195  CORE_zpemv(
197  i , min(j, N), l,
198  alpha, &A2[ii], LDA2,
199  &A2[j], LDA2,
200  zzero, &T[LDT*j], 1,
201  WORK);
202 
203  /* T(0:i-1, j) = T(0:i-1, ii:j-1) * T(0:i-1, j) */
204  cblas_ztrmv(
208  i, &T[LDT*ii], LDT,
209  &T[LDT*j], 1);
210 
211  }
212 
213 #ifdef COMPLEX
214  LAPACKE_zlacgv_work(ni, &A2[j], LDA2 );
215  LAPACKE_zlacgv_work(ione, &A1[LDA1*j+j], LDA1 );
216 #endif
217 
218  T[LDT*j+i] = TAU[j];
219  }
220 
221  /* Apply Q to the rest of the matrix to the right */
222  if (M > ii+sb) {
223  mi = M-(ii+sb);
224  ni = min(ii+sb, N);
225  l = min(sb, max(0, ni-ii));
226  CORE_zparfb(
229  mi, IB, mi, ni, sb, l,
230  &A1[LDA1*ii+ii+sb], LDA1,
231  &A2[ii+sb], LDA2,
232  &A2[ii], LDA2,
233  &T[LDT*ii], LDT,
234  WORK, M);
235 
236  }
237  }
238  return PLASMA_SUCCESS;
239 }
240 
241 /***************************************************************************/
244 void QUARK_CORE_zttlqt(Quark *quark, Quark_Task_Flags *task_flags,
245  int m, int n, int ib, int nb,
246  PLASMA_Complex64_t *A1, int lda1,
247  PLASMA_Complex64_t *A2, int lda2,
248  PLASMA_Complex64_t *T, int ldt)
249 {
251  QUARK_Insert_Task(quark, CORE_zttlqt_quark, task_flags,
252  sizeof(int), &m, VALUE,
253  sizeof(int), &n, VALUE,
254  sizeof(int), &ib, VALUE,
256  sizeof(int), &lda1, VALUE,
258  sizeof(int), &lda2, VALUE,
259  sizeof(PLASMA_Complex64_t)*ib*nb, T, OUTPUT,
260  sizeof(int), &ldt, VALUE,
261  sizeof(PLASMA_Complex64_t)*nb, NULL, SCRATCH,
262  sizeof(PLASMA_Complex64_t)*ib*nb, NULL, SCRATCH,
263  0);
264 }
265 
266 /***************************************************************************/
269 #if defined(PLASMA_HAVE_WEAK)
270 #pragma weak CORE_zttlqt_quark = PCORE_zttlqt_quark
271 #define CORE_zttlqt_quark PCORE_zttlqt_quark
272 #endif
274 {
275  int m;
276  int n;
277  int ib;
278  PLASMA_Complex64_t *A1;
279  int lda1;
280  PLASMA_Complex64_t *A2;
281  int lda2;
283  int ldt;
285  PLASMA_Complex64_t *WORK;
286 
287  quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
288  CORE_zttlqt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
289 }