PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_cparfb.c
Go to the documentation of this file.
1 
15 #include <cblas.h>
16 #include <lapacke.h>
17 #include "common.h"
18 
19 /***************************************************************************/
130 int
131 CORE_cparfb(int side, int trans, int direct, int storev,
132  int M1, int N1, int M2, int N2, int K, int L,
133  PLASMA_Complex32_t *A1, int LDA1,
134  PLASMA_Complex32_t *A2, int LDA2,
135  PLASMA_Complex32_t *V, int LDV,
136  PLASMA_Complex32_t *T, int LDT,
137  PLASMA_Complex32_t *WORK, int LDWORK)
138 {
139  static PLASMA_Complex32_t zone = 1.0;
140  static PLASMA_Complex32_t mzone = -1.0;
141 
142  int j;
143 
144  /* Check input arguments */
145  if ((side != PlasmaLeft) && (side != PlasmaRight)) {
146  coreblas_error(1, "Illegal value of side");
147  return -1;
148  }
149  if ((trans != PlasmaNoTrans) && (trans != PlasmaConjTrans)) {
150  coreblas_error(2, "Illegal value of trans");
151  return -2;
152  }
153  if ((direct != PlasmaForward) && (direct != PlasmaBackward)) {
154  coreblas_error(3, "Illegal value of direct");
155  return -3;
156  }
157  if ((storev != PlasmaColumnwise) && (storev != PlasmaRowwise)) {
158  coreblas_error(4, "Illegal value of storev");
159  return -4;
160  }
161  if (M1 < 0) {
162  coreblas_error(5, "Illegal value of M1");
163  return -5;
164  }
165  if (N1 < 0) {
166  coreblas_error(6, "Illegal value of N1");
167  return -6;
168  }
169  if ((M2 < 0) ||
170  ( (side == PlasmaRight) && (M1 != M2) ) ) {
171  coreblas_error(7, "Illegal value of M2");
172  return -7;
173  }
174  if ((N2 < 0) ||
175  ( (side == PlasmaLeft) && (N1 != N2) ) ) {
176  coreblas_error(8, "Illegal value of N2");
177  return -8;
178  }
179  if (K < 0) {
180  coreblas_error(9, "Illegal value of K");
181  return -9;
182  }
183 
184  /* Quick return */
185  if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0))
186  return PLASMA_SUCCESS;
187 
188  if (direct == PlasmaForward) {
189 
190  if (side == PlasmaLeft) {
191 
192  /*
193  * Column or Rowwise / Forward / Left
194  * ----------------------------------
195  *
196  * Form H * A or H' * A where A = ( A1 )
197  * ( A2 )
198  */
199 
200  /* W = A1 + op(V) * A2 */
201  CORE_cpamm(
202  PlasmaW, PlasmaLeft, storev,
203  K, N1, M2, L,
204  A1, LDA1,
205  A2, LDA2,
206  V, LDV,
207  WORK, LDWORK);
208 
209  /* W = op(T) * W */
210  cblas_ctrmm(
212  (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2,
213  CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);
214 
215  /* A1 = A1 - W */
216  for(j = 0; j < N1; j++) {
217  cblas_caxpy(
218  K, CBLAS_SADDR(mzone),
219  &WORK[LDWORK*j], 1,
220  &A1[LDA1*j], 1);
221  }
222 
223  /* A2 = A2 - op(V) * W */
224  /* W also changes: W = V * W, A2 = A2 - W */
225  CORE_cpamm(
226  PlasmaA2, PlasmaLeft, storev,
227  M2, N2, K, L,
228  A1, LDA1,
229  A2, LDA2,
230  V, LDV,
231  WORK, LDWORK);
232  }
233  else {
234  /*
235  * Column or Rowwise / Forward / Right
236  * -----------------------------------
237  *
238  * Form H * A or H' * A where A = ( A1 A2 )
239  *
240  */
241 
242  /* W = A1 + A2 * op(V) */
243  CORE_cpamm(
244  PlasmaW, PlasmaRight, storev,
245  M1, K, N2, L,
246  A1, LDA1,
247  A2, LDA2,
248  V, LDV,
249  WORK, LDWORK);
250 
251  /* W = W * op(T) */
252  cblas_ctrmm(
254  (CBLAS_TRANSPOSE)trans, CblasNonUnit, M2, K,
255  CBLAS_SADDR(zone), T, LDT, WORK, LDWORK);
256 
257  /* A1 = A1 - W */
258  for(j = 0; j < K; j++) {
259  cblas_caxpy(
260  M1, CBLAS_SADDR(mzone),
261  &WORK[LDWORK*j], 1,
262  &A1[LDA1*j], 1);
263  }
264 
265  /* A2 = A2 - W * op(V) */
266  /* W also changes: W = W * V', A2 = A2 - W */
267  CORE_cpamm(
268  PlasmaA2, PlasmaRight, storev,
269  M2, N2, K, L,
270  A1, LDA1,
271  A2, LDA2,
272  V, LDV,
273  WORK, LDWORK);
274  }
275  }
276  else {
277  coreblas_error(3, "Not implemented (Backward / Left or Right)");
279  }
280 
281  return PLASMA_SUCCESS;
282 }
283