PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_sormqr.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 
20 /***************************************************************************/
104 #if defined(PLASMA_HAVE_WEAK)
105 #pragma weak CORE_sormqr = PCORE_sormqr
106 #define CORE_sormqr PCORE_sormqr
107 #endif
108 int CORE_sormqr(int side, int trans,
109  int M, int N, int K, int IB,
110  float *A, int LDA,
111  float *T, int LDT,
112  float *C, int LDC,
113  float *WORK, int LDWORK)
114 {
115  int i, kb;
116  int i1, i3;
117  int nq, nw;
118  int ic = 0;
119  int jc = 0;
120  int ni = N;
121  int mi = M;
122 
123  /* Check input arguments */
124  if ((side != PlasmaLeft) && (side != PlasmaRight)) {
125  coreblas_error(1, "Illegal value of side");
126  return -1;
127  }
128  /*
129  * NQ is the order of Q and NW is the minimum dimension of WORK
130  */
131  if (side == PlasmaLeft) {
132  nq = M;
133  nw = N;
134  }
135  else {
136  nq = N;
137  nw = M;
138  }
139 
140  if ((trans != PlasmaNoTrans) && (trans != PlasmaTrans)) {
141  coreblas_error(2, "Illegal value of trans");
142  return -2;
143  }
144  if (M < 0) {
145  coreblas_error(3, "Illegal value of M");
146  return -3;
147  }
148  if (N < 0) {
149  coreblas_error(4, "Illegal value of N");
150  return -4;
151  }
152  if ((K < 0) || (K > nq)) {
153  coreblas_error(5, "Illegal value of K");
154  return -5;
155  }
156  if ((IB < 0) || ( (IB == 0) && ((M > 0) && (N > 0)) )) {
157  coreblas_error(6, "Illegal value of IB");
158  return -6;
159  }
160  if ((LDA < max(1,nq)) && (nq > 0)) {
161  coreblas_error(8, "Illegal value of LDA");
162  return -8;
163  }
164  if ((LDC < max(1,M)) && (M > 0)) {
165  coreblas_error(12, "Illegal value of LDC");
166  return -12;
167  }
168  if ((LDWORK < max(1,nw)) && (nw > 0)) {
169  coreblas_error(14, "Illegal value of LDWORK");
170  return -14;
171  }
172 
173  /* Quick return */
174  if ((M == 0) || (N == 0) || (K == 0))
175  return PLASMA_SUCCESS;
176 
177  if (((side == PlasmaLeft) && (trans != PlasmaNoTrans))
178  || ((side == PlasmaRight) && (trans == PlasmaNoTrans))) {
179  i1 = 0;
180  i3 = IB;
181  }
182  else {
183  i1 = ( ( K-1 ) / IB )*IB;
184  i3 = -IB;
185  }
186 
187  for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
188  kb = min(IB, K-i);
189 
190  if (side == PlasmaLeft) {
191  /*
192  * H or H' is applied to C(i:m,1:n)
193  */
194  mi = M - i;
195  ic = i;
196  }
197  else {
198  /*
199  * H or H' is applied to C(1:m,i:n)
200  */
201  ni = N - i;
202  jc = i;
203  }
204  /*
205  * Apply H or H'
206  */
207  LAPACKE_slarfb_work(LAPACK_COL_MAJOR,
208  lapack_const(side),
209  lapack_const(trans),
212  mi, ni, kb,
213  &A[LDA*i+i], LDA,
214  &T[LDT*i], LDT,
215  &C[LDC*jc+ic], LDC,
216  WORK, LDWORK);
217  }
218  return PLASMA_SUCCESS;
219 }
220 
221 /***************************************************************************/
224 void QUARK_CORE_sormqr(Quark *quark, Quark_Task_Flags *task_flags,
225  int side, int trans,
226  int m, int n, int k, int ib, int nb,
227  float *A, int lda,
228  float *T, int ldt,
229  float *C, int ldc)
230 {
232  QUARK_Insert_Task(quark, CORE_sormqr_quark, task_flags,
233  sizeof(PLASMA_enum), &side, VALUE,
234  sizeof(PLASMA_enum), &trans, VALUE,
235  sizeof(int), &m, VALUE,
236  sizeof(int), &n, VALUE,
237  sizeof(int), &k, VALUE,
238  sizeof(int), &ib, VALUE,
239  sizeof(float)*nb*nb, A, INPUT | QUARK_REGION_L,
240  sizeof(int), &lda, VALUE,
241  sizeof(float)*ib*nb, T, INPUT,
242  sizeof(int), &ldt, VALUE,
243  sizeof(float)*nb*nb, C, INOUT,
244  sizeof(int), &ldc, VALUE,
245  sizeof(float)*ib*nb, NULL, SCRATCH,
246  sizeof(int), &nb, VALUE,
247  0);
248 }
249 
250 /***************************************************************************/
253 #if defined(PLASMA_HAVE_WEAK)
254 #pragma weak CORE_sormqr_quark = PCORE_sormqr_quark
255 #define CORE_sormqr_quark PCORE_sormqr_quark
256 #endif
258 {
259  int side;
260  int trans;
261  int m;
262  int n;
263  int k;
264  int ib;
265  float *A;
266  int lda;
267  float *T;
268  int ldt;
269  float *C;
270  int ldc;
271  float *WORK;
272  int ldwork;
273 
274  quark_unpack_args_14(quark, side, trans, m, n, k, ib,
275  A, lda, T, ldt, C, ldc, WORK, ldwork);
276  CORE_sormqr(side, trans, m, n, k, ib,
277  A, lda, T, ldt, C, ldc, WORK, ldwork);
278 }