PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ztsmqr.c
Go to the documentation of this file.
1 
19 #include "common.h"
20 
21 /***************************************************************************/
120 #if defined(PLASMA_HAVE_WEAK)
121 #pragma weak CORE_ztsmqr = PCORE_ztsmqr
122 #define CORE_ztsmqr PCORE_ztsmqr
123 #endif
124 int CORE_ztsmqr(int side, int trans,
125  int M1, int N1, int M2, int N2, int K, int IB,
126  PLASMA_Complex64_t *A1, int LDA1,
127  PLASMA_Complex64_t *A2, int LDA2,
128  PLASMA_Complex64_t *V, int LDV,
129  PLASMA_Complex64_t *T, int LDT,
130  PLASMA_Complex64_t *WORK, int LDWORK)
131 {
132  int i, i1, i3;
133  int NQ, NW;
134  int kb;
135  int ic = 0;
136  int jc = 0;
137  int mi = M1;
138  int ni = N1;
139 
140  /* Check input arguments */
141  if ((side != PlasmaLeft) && (side != PlasmaRight)) {
142  coreblas_error(1, "Illegal value of side");
143  return -1;
144  }
145 
146  /* NQ is the order of Q */
147  if (side == PlasmaLeft) {
148  NQ = M2;
149  NW = IB;
150  }
151  else {
152  NQ = N2;
153  NW = M1;
154  }
155 
156  if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
157  coreblas_error(2, "Illegal value of trans");
158  return -2;
159  }
160  if (M1 < 0) {
161  coreblas_error(3, "Illegal value of M1");
162  return -3;
163  }
164  if (N1 < 0) {
165  coreblas_error(4, "Illegal value of N1");
166  return -4;
167  }
168  if ( (M2 < 0) ||
169  ( (M2 != M1) && (side == PlasmaRight) ) ){
170  coreblas_error(5, "Illegal value of M2");
171  return -5;
172  }
173  if ( (N2 < 0) ||
174  ( (N2 != N1) && (side == PlasmaLeft) ) ){
175  coreblas_error(6, "Illegal value of N2");
176  return -6;
177  }
178  if ((K < 0) ||
179  ( (side == PlasmaLeft) && (K > M1) ) ||
180  ( (side == PlasmaRight) && (K > N1) ) ) {
181  coreblas_error(7, "Illegal value of K");
182  return -7;
183  }
184  if (IB < 0) {
185  coreblas_error(8, "Illegal value of IB");
186  return -8;
187  }
188  if (LDA1 < max(1,M1)){
189  coreblas_error(10, "Illegal value of LDA1");
190  return -10;
191  }
192  if (LDA2 < max(1,M2)){
193  coreblas_error(12, "Illegal value of LDA2");
194  return -12;
195  }
196  if (LDV < max(1,NQ)){
197  coreblas_error(14, "Illegal value of LDV");
198  return -14;
199  }
200  if (LDT < max(1,IB)){
201  coreblas_error(16, "Illegal value of LDT");
202  return -16;
203  }
204  if (LDWORK < max(1,NW)){
205  coreblas_error(18, "Illegal value of LDWORK");
206  return -18;
207  }
208 
209  /* Quick return */
210  if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0) || (IB == 0))
211  return PLASMA_SUCCESS;
212 
213  if (((side == PlasmaLeft) && (trans != PlasmaNoTrans))
214  || ((side == PlasmaRight) && (trans == PlasmaNoTrans))) {
215  i1 = 0;
216  i3 = IB;
217  }
218  else {
219  i1 = ((K-1) / IB)*IB;
220  i3 = -IB;
221  }
222 
223  for(i = i1; (i > -1) && (i < K); i += i3) {
224  kb = min(IB, K-i);
225 
226  if (side == PlasmaLeft) {
227  /*
228  * H or H' is applied to C(i:m,1:n)
229  */
230  mi = M1 - i;
231  ic = i;
232  }
233  else {
234  /*
235  * H or H' is applied to C(1:m,i:n)
236  */
237  ni = N1 - i;
238  jc = i;
239  }
240  /*
241  * Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
242  */
243  CORE_zparfb(
244  side, trans, PlasmaForward, PlasmaColumnwise,
245  mi, ni, M2, N2, kb, 0,
246  &A1[LDA1*jc+ic], LDA1,
247  A2, LDA2,
248  &V[LDV*i], LDV,
249  &T[LDT*i], LDT,
250  WORK, LDWORK);
251  }
252  return PLASMA_SUCCESS;
253 }
254 
255 /***************************************************************************/
258 void QUARK_CORE_ztsmqr(Quark *quark, Quark_Task_Flags *task_flags,
259  int side, int trans,
260  int m1, int n1, int m2, int n2, int k, int ib, int nb,
261  PLASMA_Complex64_t *A1, int lda1,
262  PLASMA_Complex64_t *A2, int lda2,
263  PLASMA_Complex64_t *V, int ldv,
264  PLASMA_Complex64_t *T, int ldt)
265 {
266  int ldwork = side == PlasmaLeft ? ib : nb;
267 
269  QUARK_Insert_Task(quark, CORE_ztsmqr_quark, task_flags,
270  sizeof(PLASMA_enum), &side, VALUE,
271  sizeof(PLASMA_enum), &trans, VALUE,
272  sizeof(int), &m1, VALUE,
273  sizeof(int), &n1, VALUE,
274  sizeof(int), &m2, VALUE,
275  sizeof(int), &n2, VALUE,
276  sizeof(int), &k, VALUE,
277  sizeof(int), &ib, VALUE,
278  sizeof(PLASMA_Complex64_t)*nb*nb, A1, INOUT,
279  sizeof(int), &lda1, VALUE,
280  sizeof(PLASMA_Complex64_t)*nb*nb, A2, INOUT | LOCALITY,
281  sizeof(int), &lda2, VALUE,
282  sizeof(PLASMA_Complex64_t)*nb*nb, V, INPUT,
283  sizeof(int), &ldv, VALUE,
284  sizeof(PLASMA_Complex64_t)*ib*nb, T, INPUT,
285  sizeof(int), &ldt, VALUE,
286  sizeof(PLASMA_Complex64_t)*ib*nb, NULL, SCRATCH,
287  sizeof(int), &ldwork, VALUE,
288  0);
289 }
290 
291 /***************************************************************************/
294 #if defined(PLASMA_HAVE_WEAK)
295 #pragma weak CORE_ztsmqr_quark = PCORE_ztsmqr_quark
296 #define CORE_ztsmqr_quark PCORE_ztsmqr_quark
297 #endif
299 {
300  int side;
301  int trans;
302  int m1;
303  int n1;
304  int m2;
305  int n2;
306  int k;
307  int ib;
308  PLASMA_Complex64_t *A1;
309  int lda1;
310  PLASMA_Complex64_t *A2;
311  int lda2;
313  int ldv;
315  int ldt;
316  PLASMA_Complex64_t *WORK;
317  int ldwork;
318 
319  quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib,
320  A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
321  CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib,
322  A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
323 }