PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ztsqrt.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef REAL
20 #define COMPLEX
21 
22 /***************************************************************************/
84 #if defined(PLASMA_HAVE_WEAK)
85 #pragma weak CORE_ztsqrt = PCORE_ztsqrt
86 #define CORE_ztsqrt PCORE_ztsqrt
87 #define CORE_ztsmqr PCORE_ztsmqr
88 int CORE_ztsmqr(int side, int trans,
89  int M1, int N1, int M2, int N2, int K, int IB,
90  PLASMA_Complex64_t *A1, int LDA1,
91  PLASMA_Complex64_t *A2, int LDA2,
92  PLASMA_Complex64_t *V, int LDV,
93  PLASMA_Complex64_t *T, int LDT,
94  PLASMA_Complex64_t *WORK, int LDWORK);
95 #endif
96 
97 int CORE_ztsqrt(int M, int N, int IB,
98  PLASMA_Complex64_t *A1, int LDA1,
99  PLASMA_Complex64_t *A2, int LDA2,
100  PLASMA_Complex64_t *T, int LDT,
102 {
103  static PLASMA_Complex64_t zone = 1.0;
104  static PLASMA_Complex64_t zzero = 0.0;
105 
106  PLASMA_Complex64_t alpha;
107  int i, ii, sb;
108 
109  /* Check input arguments */
110  if (M < 0) {
111  coreblas_error(1, "Illegal value of M");
112  return -1;
113  }
114  if (N < 0) {
115  coreblas_error(2, "Illegal value of N");
116  return -2;
117  }
118  if (IB < 0) {
119  coreblas_error(3, "Illegal value of IB");
120  return -3;
121  }
122  if ((LDA2 < max(1,M)) && (M > 0)) {
123  coreblas_error(8, "Illegal value of LDA2");
124  return -8;
125  }
126 
127  /* Quick return */
128  if ((M == 0) || (N == 0) || (IB == 0))
129  return PLASMA_SUCCESS;
130 
131  for(ii = 0; ii < N; ii += IB) {
132  sb = min(N-ii, IB);
133  for(i = 0; i < sb; i++) {
134  /*
135  * Generate elementary reflector H( II*IB+I ) to annihilate
136  * A( II*IB+I:M, II*IB+I )
137  */
138  LAPACKE_zlarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]);
139 
140  if (ii+i+1 < N) {
141  /*
142  * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left
143  */
144  alpha = -conj(TAU[ii+i]);
145  cblas_zcopy(
146  sb-i-1,
147  &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
148  WORK, 1);
149 #ifdef COMPLEX
150  LAPACKE_zlacgv_work(sb-i-1, WORK, 1);
151 #endif
152  cblas_zgemv(
154  M, sb-i-1,
155  CBLAS_SADDR(zone), &A2[LDA2*(ii+i+1)], LDA2,
156  &A2[LDA2*(ii+i)], 1,
157  CBLAS_SADDR(zone), WORK, 1);
158 #ifdef COMPLEX
159  LAPACKE_zlacgv_work(sb-i-1, WORK, 1 );
160 #endif
161  cblas_zaxpy(
162  sb-i-1, CBLAS_SADDR(alpha),
163  WORK, 1,
164  &A1[LDA1*(ii+i+1)+ii+i], LDA1);
165 #ifdef COMPLEX
166  LAPACKE_zlacgv_work(sb-i-1, WORK, 1 );
167 #endif
168  cblas_zgerc(
169  CblasColMajor, M, sb-i-1, CBLAS_SADDR(alpha),
170  &A2[LDA2*(ii+i)], 1,
171  WORK, 1,
172  &A2[LDA2*(ii+i+1)], LDA2);
173  }
174  /*
175  * Calculate T
176  */
177  alpha = -TAU[ii+i];
178  cblas_zgemv(
180  CBLAS_SADDR(alpha), &A2[LDA2*ii], LDA2,
181  &A2[LDA2*(ii+i)], 1,
182  CBLAS_SADDR(zzero), &T[LDT*(ii+i)], 1);
183 
184  cblas_ztrmv(
187  &T[LDT*ii], LDT,
188  &T[LDT*(ii+i)], 1);
189 
190  T[LDT*(ii+i)+i] = TAU[ii+i];
191  }
192  if (N > ii+sb) {
193  CORE_ztsmqr(
195  sb, N-(ii+sb), M, N-(ii+sb), IB, IB,
196  &A1[LDA1*(ii+sb)+ii], LDA1,
197  &A2[LDA2*(ii+sb)], LDA2,
198  &A2[LDA2*ii], LDA2,
199  &T[LDT*ii], LDT,
200  WORK, sb);
201  }
202  }
203  return PLASMA_SUCCESS;
204 }
205 
206 /***************************************************************************/
209 void QUARK_CORE_ztsqrt(Quark *quark, Quark_Task_Flags *task_flags,
210  int m, int n, int ib, int nb,
211  PLASMA_Complex64_t *A1, int lda1,
212  PLASMA_Complex64_t *A2, int lda2,
213  PLASMA_Complex64_t *T, int ldt)
214 {
216  QUARK_Insert_Task(quark, CORE_ztsqrt_quark, task_flags,
217  sizeof(int), &m, VALUE,
218  sizeof(int), &n, VALUE,
219  sizeof(int), &ib, VALUE,
220  sizeof(PLASMA_Complex64_t)*nb*nb, A1, INOUT | QUARK_REGION_D | QUARK_REGION_U,
221  sizeof(int), &lda1, VALUE,
222  sizeof(PLASMA_Complex64_t)*nb*nb, A2, INOUT | LOCALITY,
223  sizeof(int), &lda2, VALUE,
224  sizeof(PLASMA_Complex64_t)*ib*nb, T, OUTPUT,
225  sizeof(int), &ldt, VALUE,
226  sizeof(PLASMA_Complex64_t)*nb, NULL, SCRATCH,
227  sizeof(PLASMA_Complex64_t)*ib*nb, NULL, SCRATCH,
228  0);
229 }
230 
231 /***************************************************************************/
234 #if defined(PLASMA_HAVE_WEAK)
235 #pragma weak CORE_ztsqrt_quark = PCORE_ztsqrt_quark
236 #define CORE_ztsqrt_quark PCORE_ztsqrt_quark
237 #endif
239 {
240  int m;
241  int n;
242  int ib;
243  PLASMA_Complex64_t *A1;
244  int lda1;
245  PLASMA_Complex64_t *A2;
246  int lda2;
248  int ldt;
250  PLASMA_Complex64_t *WORK;
251 
252  quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
253  CORE_ztsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
254 }