PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_zunmlq.c
Go to the documentation of this file.
1 
18 #include <lapacke.h>
19 #include "common.h"
20 
21 /***************************************************************************/
104 #if defined(PLASMA_HAVE_WEAK)
105 #pragma weak CORE_zunmlq = PCORE_zunmlq
106 #define CORE_zunmlq PCORE_zunmlq
107 #endif
108 int CORE_zunmlq(int side, int trans,
109  int M, int N, int K, int IB,
110  PLASMA_Complex64_t *A, int LDA,
111  PLASMA_Complex64_t *T, int LDT,
112  PLASMA_Complex64_t *C, int LDC,
113  PLASMA_Complex64_t *WORK, int LDWORK)
114 {
115  int i, kb;
116  int i1, i3;
117  int nq, nw;
118  int ic = 0;
119  int jc = 0;
120  int ni = N;
121  int mi = M;
122 
123  /* Check input arguments */
124  if ((side != PlasmaLeft) && (side != PlasmaRight)) {
125  coreblas_error(1, "Illegal value of side");
126  return -1;
127  }
128  /*
129  * NQ is the order of Q and NW is the minimum dimension of WORK
130  */
131  if (side == PlasmaLeft) {
132  nq = M;
133  nw = N;
134  }
135  else {
136  nq = N;
137  nw = M;
138  }
139 
140  if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
141  coreblas_error(2, "Illegal value of trans");
142  return -2;
143  }
144  if (M < 0) {
145  coreblas_error(3, "Illegal value of M");
146  return -3;
147  }
148  if (N < 0) {
149  coreblas_error(4, "Illegal value of N");
150  return -4;
151  }
152  if ((K < 0) || (K > nq)) {
153  coreblas_error(5, "Illegal value of K");
154  return -5;
155  }
156  if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
157  coreblas_error(6, "Illegal value of IB");
158  return -6;
159  }
160  if ((LDA < max(1,K)) && (K > 0)) {
161  coreblas_error(8, "Illegal value of LDA");
162  return -8;
163  }
164  if ((LDC < max(1,M)) && (M > 0)) {
165  coreblas_error(12, "Illegal value of LDC");
166  return -12;
167  }
168  if ((LDWORK < max(1,nw)) && (nw > 0)) {
169  coreblas_error(14, "Illegal value of LDWORK");
170  return -14;
171  }
172 
173  /* Quick return */
174  if ((M == 0) || (N == 0) || (K == 0))
175  return PLASMA_SUCCESS;
176 
177  if (((side == PlasmaLeft) && (trans == PlasmaNoTrans))
178  || ((side == PlasmaRight) && (trans != PlasmaNoTrans))) {
179  i1 = 0;
180  i3 = IB;
181  }
182  else {
183  i1 = ( ( K-1 ) / IB )*IB;
184  i3 = -IB;
185  }
186 
187  if( trans == PlasmaNoTrans) {
188  trans = PlasmaConjTrans;
189  }
190  else {
191  trans = PlasmaNoTrans;
192  }
193 
194  for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
195  kb = min(IB, K-i);
196 
197  if (side == PlasmaLeft) {
198  /*
199  * H or H' is applied to C(i:m,1:n)
200  */
201  mi = M - i;
202  ic = i;
203  }
204  else {
205  /*
206  * H or H' is applied to C(1:m,i:n)
207  */
208  ni = N - i;
209  jc = i;
210  }
211  /*
212  * Apply H or H'
213  */
214  LAPACKE_zlarfb_work(LAPACK_COL_MAJOR,
215  lapack_const(side),
216  lapack_const(trans),
219  mi, ni, kb,
220  &A[LDA*i+i], LDA,
221  &T[LDT*i], LDT,
222  &C[LDC*jc+ic], LDC,
223  WORK, LDWORK);
224  }
225  return PLASMA_SUCCESS;
226 }
227 
228 /***************************************************************************/
231 void QUARK_CORE_zunmlq(Quark *quark, Quark_Task_Flags *task_flags,
232  int side, int trans,
233  int m, int n, int k, int ib, int nb,
234  PLASMA_Complex64_t *A, int lda,
235  PLASMA_Complex64_t *T, int ldt,
236  PLASMA_Complex64_t *C, int ldc)
237 {
239  QUARK_Insert_Task(quark, CORE_zunmlq_quark, task_flags,
240  sizeof(PLASMA_enum), &side, VALUE,
241  sizeof(PLASMA_enum), &trans, VALUE,
242  sizeof(int), &m, VALUE,
243  sizeof(int), &n, VALUE,
244  sizeof(int), &k, VALUE,
245  sizeof(int), &ib, VALUE,
246  sizeof(PLASMA_Complex64_t)*nb*nb, A, INPUT | QUARK_REGION_U,
247  sizeof(int), &lda, VALUE,
248  sizeof(PLASMA_Complex64_t)*ib*nb, T, INPUT,
249  sizeof(int), &ldt, VALUE,
250  sizeof(PLASMA_Complex64_t)*nb*nb, C, INOUT,
251  sizeof(int), &ldc, VALUE,
252  sizeof(PLASMA_Complex64_t)*ib*nb, NULL, SCRATCH,
253  sizeof(int), &nb, VALUE,
254  0);
255 }
256 
257 /***************************************************************************/
260 #if defined(PLASMA_HAVE_WEAK)
261 #pragma weak CORE_zunmlq_quark = PCORE_zunmlq_quark
262 #define CORE_zunmlq_quark PCORE_zunmlq_quark
263 #endif
265 {
266  int side;
267  int trans;
268  int m;
269  int n;
270  int k;
271  int ib;
273  int lda;
275  int ldt;
277  int ldc;
278  PLASMA_Complex64_t *WORK;
279  int ldwork;
280 
281  quark_unpack_args_14(quark, side, trans, m, n, k, ib,
282  A, lda, T, ldt, C, ldc, WORK, ldwork);
283  CORE_zunmlq(side, trans, m, n, k, ib,
284  A, lda, T, ldt, C, ldc, WORK, ldwork);
285 }