PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_zlaswp.c
Go to the documentation of this file.
1 
15 #include <lapacke.h>
16 #include "common.h"
17 
18 #define A(m, n) BLKADDR(descA, PLASMA_Complex64_t, m, n)
19 
20 /***************************************************************************/
25 #if defined(PLASMA_HAVE_WEAK)
26 #pragma weak CORE_zlaswp = PCORE_zlaswp
27 #define CORE_zlaswp PCORE_zlaswp
28 #endif
29 void CORE_zlaswp(int N, PLASMA_Complex64_t *A, int LDA, int I1, int I2, int *IPIV, int INC)
30 {
31  LAPACKE_zlaswp_work( LAPACK_COL_MAJOR, N, A, LDA, I1, I2, IPIV, INC );
32 }
33 
34 /***************************************************************************/
37 void QUARK_CORE_zlaswp(Quark *quark, Quark_Task_Flags *task_flags,
38  int n, PLASMA_Complex64_t *A, int lda,
39  int i1, int i2, int *ipiv, int inc)
40 {
43  quark, CORE_zlaswp_quark, task_flags,
44  sizeof(int), &n, VALUE,
45  sizeof(PLASMA_Complex64_t)*lda*n, A, INOUT | LOCALITY,
46  sizeof(int), &lda, VALUE,
47  sizeof(int), &i1, VALUE,
48  sizeof(int), &i2, VALUE,
49  sizeof(int)*n, ipiv, INPUT,
50  sizeof(int), &inc, VALUE,
51  0);
52 }
53 
54 /***************************************************************************/
57 #if defined(PLASMA_HAVE_WEAK)
58 #pragma weak CORE_zlaswp_quark = PCORE_zlaswp_quark
59 #define CORE_zlaswp_quark PCORE_zlaswp_quark
60 #endif
62 {
63  int n, lda, i1, i2, inc;
64  int *ipiv;
66 
67  quark_unpack_args_7(quark, n, A, lda, i1, i2, ipiv, inc);
68  LAPACKE_zlaswp_work(LAPACK_COL_MAJOR, n, A, lda, i1, i2, ipiv, inc );
69 }
70 
71 /***************************************************************************/
74 void QUARK_CORE_zlaswp_f2(Quark *quark, Quark_Task_Flags *task_flags,
75  int n, PLASMA_Complex64_t *A, int lda,
76  int i1, int i2, int *ipiv, int inc,
77  PLASMA_Complex64_t *fake1, int szefake1, int flag1,
78  PLASMA_Complex64_t *fake2, int szefake2, int flag2)
79 {
82  quark, CORE_zlaswp_f2_quark, task_flags,
83  sizeof(int), &n, VALUE,
84  sizeof(PLASMA_Complex64_t)*lda*n, A, INOUT | LOCALITY,
85  sizeof(int), &lda, VALUE,
86  sizeof(int), &i1, VALUE,
87  sizeof(int), &i2, VALUE,
88  sizeof(int)*n, ipiv, INPUT,
89  sizeof(int), &inc, VALUE,
90  sizeof(PLASMA_Complex64_t)*szefake1, fake1, flag1,
91  sizeof(PLASMA_Complex64_t)*szefake2, fake2, flag2,
92  0);
93 }
94 
95 /***************************************************************************/
98 #if defined(PLASMA_HAVE_WEAK)
99 #pragma weak CORE_zlaswp_f2_quark = PCORE_zlaswp_f2_quark
100 #define CORE_zlaswp_f2_quark PCORE_zlaswp_f2_quark
101 #endif
103 {
104  int n, lda, i1, i2, inc;
105  int *ipiv;
107  void *fake1, *fake2;
108 
109  quark_unpack_args_9(quark, n, A, lda, i1, i2, ipiv, inc, fake1, fake2);
110  LAPACKE_zlaswp_work(LAPACK_COL_MAJOR, n, A, lda, i1, i2, ipiv, inc );
111 }
112 
113 /***************************************************************************/
143 #if defined(PLASMA_HAVE_WEAK)
144 #pragma weak CORE_zlaswp_ontile = PCORE_zlaswp_ontile
145 #define CORE_zlaswp_ontile PCORE_zlaswp_ontile
146 #endif
147 int CORE_zlaswp_ontile(PLASMA_desc descA, int i1, int i2, int *ipiv, int inc)
148 {
149  int i, j, ip, it;
150  PLASMA_Complex64_t *A1;
151  int lda1, lda2;
152 
153  /* Change i1 to C notation */
154  i1--;
155  if ( descA.nt > 1 ) {
156  coreblas_error(1, "Illegal value of descA.nt");
157  return -1;
158  }
159  if ( i1 < 0 ) {
160  coreblas_error(2, "Illegal value of i1");
161  return -2;
162  }
163  if ( (i2 < i1) || (i2 > descA.m) ) {
164  coreblas_error(3, "Illegal value of i2");
165  return -3;
166  }
167  if ( ! ( (i2 - i1 - i1%descA.mb -1) < descA.mb ) ) {
168  coreblas_error(2, "Illegal value of i1,i2. They have to be part of the same block.");
169  return -3;
170  }
171 
172  it = i1 / descA.mb;
173  if (inc > 0) {
174  A1 = A(it, 0);
175  lda1 = BLKLDD(descA, 0);
176 
177  for (j = i1; j < i2; ++j, ipiv+=inc) {
178  ip = (*ipiv) - descA.i - 1;
179  if ( ip != j )
180  {
181  it = ip / descA.mb;
182  i = ip % descA.mb;
183  lda2 = BLKLDD(descA, it);
184  cblas_zswap(descA.n, A1 + j, lda1,
185  A(it, 0) + i, lda2 );
186  }
187  }
188  }
189  else
190  {
191  A1 = A(it, 0);
192  lda1 = BLKLDD(descA, descA.mt-1);
193 
194  i1--;
195  ipiv = &ipiv[(1-i2)*inc];
196  for (j = i2-1; j > i1; --j, ipiv+=inc) {
197  ip = (*ipiv) - descA.i - 1;
198  if ( ip != j )
199  {
200  it = ip / descA.mb;
201  i = ip % descA.mb;
202  lda2 = BLKLDD(descA, it);
203  cblas_zswap(descA.n, A1 + j, lda1,
204  A(it, 0) + i, lda2 );
205  }
206  }
207  }
208 
209  return PLASMA_SUCCESS;
210 }
211 /***************************************************************************/
215  PLASMA_desc descA, PLASMA_Complex64_t *Aij,
216  int i1, int i2, int *ipiv, int inc, PLASMA_Complex64_t *fakepanel)
217 {
220  quark, CORE_zlaswp_ontile_quark, task_flags,
221  sizeof(PLASMA_desc), &descA, VALUE,
222  sizeof(PLASMA_Complex64_t)*1, Aij, INOUT | LOCALITY,
223  sizeof(int), &i1, VALUE,
224  sizeof(int), &i2, VALUE,
225  sizeof(int)*(i2-i1+1)*abs(inc), ipiv, INPUT,
226  sizeof(int), &inc, VALUE,
227  sizeof(PLASMA_Complex64_t)*1, fakepanel, INOUT,
228  0);
229 }
230 
231 /***************************************************************************/
234 #if defined(PLASMA_HAVE_WEAK)
235 #pragma weak CORE_zlaswp_ontile_quark = PCORE_zlaswp_ontile_quark
236 #define CORE_zlaswp_ontile_quark PCORE_zlaswp_ontile_quark
237 #endif
239 {
240  int i1, i2, inc;
241  int *ipiv;
242  PLASMA_Complex64_t *A, *fake;
243  PLASMA_desc descA;
244 
245  quark_unpack_args_7(quark, descA, A, i1, i2, ipiv, inc, fake);
246  CORE_zlaswp_ontile(descA, i1, i2, ipiv, inc);
247 }
248 
249 /***************************************************************************/
253  PLASMA_desc descA, PLASMA_Complex64_t *Aij,
254  int i1, int i2, int *ipiv, int inc,
255  PLASMA_Complex64_t *fake1, int szefake1, int flag1,
256  PLASMA_Complex64_t *fake2, int szefake2, int flag2)
257 {
260  quark, CORE_zlaswp_ontile_f2_quark, task_flags,
261  sizeof(PLASMA_desc), &descA, VALUE,
262  sizeof(PLASMA_Complex64_t)*1, Aij, INOUT | LOCALITY,
263  sizeof(int), &i1, VALUE,
264  sizeof(int), &i2, VALUE,
265  sizeof(int)*(i2-i1+1)*abs(inc), ipiv, INPUT,
266  sizeof(int), &inc, VALUE,
267  sizeof(PLASMA_Complex64_t)*szefake1, fake1, flag1,
268  sizeof(PLASMA_Complex64_t)*szefake2, fake2, flag2,
269  0);
270 }
271 
272 /***************************************************************************/
275 #if defined(PLASMA_HAVE_WEAK)
276 #pragma weak CORE_zlaswp_ontile_f2_quark = PCORE_zlaswp_ontile_f2_quark
277 #define CORE_zlaswp_ontile_f2_quark PCORE_zlaswp_ontile_f2_quark
278 #endif
280 {
281  int i1, i2, inc;
282  int *ipiv;
284  PLASMA_desc descA;
285  void *fake1, *fake2;
286 
287  quark_unpack_args_8(quark, descA, A, i1, i2, ipiv, inc, fake1, fake2);
288  CORE_zlaswp_ontile(descA, i1, i2, ipiv, inc);
289 }
290 
291 /***************************************************************************/
321 #if defined(PLASMA_HAVE_WEAK)
322 #pragma weak CORE_zswptr_ontile = PCORE_zswptr_ontile
323 #define CORE_zswptr_ontile PCORE_zswptr_ontile
324 #endif
325 int CORE_zswptr_ontile(PLASMA_desc descA, int i1, int i2, int *ipiv, int inc,
326  PLASMA_Complex64_t *Akk, int ldak)
327 {
328  PLASMA_Complex64_t zone = 1.0;
329  int lda;
330  int m = descA.mt == 1 ? descA.m : descA.mb;
331 
332  if ( descA.nt > 1 ) {
333  coreblas_error(1, "Illegal value of descA.nt");
334  return -1;
335  }
336  if ( i1 < 1 ) {
337  coreblas_error(2, "Illegal value of i1");
338  return -2;
339  }
340  if ( (i2 < i1) || (i2 > m) ) {
341  coreblas_error(3, "Illegal value of i2");
342  return -3;
343  }
344 
345  CORE_zlaswp_ontile(descA, i1, i2, ipiv, inc);
346 
347  lda = BLKLDD(descA, 0);
350  m, descA.n, CBLAS_SADDR(zone),
351  Akk, ldak,
352  A(0, 0), lda );
353 
354  return PLASMA_SUCCESS;
355 }
356 /***************************************************************************/
360  PLASMA_desc descA, PLASMA_Complex64_t *Aij,
361  int i1, int i2, int *ipiv, int inc,
362  PLASMA_Complex64_t *Akk, int ldak)
363 {
366  quark, CORE_zswptr_ontile_quark, task_flags,
367  sizeof(PLASMA_desc), &descA, VALUE,
368  sizeof(PLASMA_Complex64_t)*1, Aij, INOUT | LOCALITY,
369  sizeof(int), &i1, VALUE,
370  sizeof(int), &i2, VALUE,
371  sizeof(int)*(i2-i1+1)*abs(inc), ipiv, INPUT,
372  sizeof(int), &inc, VALUE,
373  sizeof(PLASMA_Complex64_t)*ldak, Akk, INPUT,
374  sizeof(int), &ldak, VALUE,
375  0);
376 }
377 
378 /***************************************************************************/
381 #if defined(PLASMA_HAVE_WEAK)
382 #pragma weak CORE_zswptr_ontile_quark = PCORE_zswptr_ontile_quark
383 #define CORE_zswptr_ontile_quark PCORE_zswptr_ontile_quark
384 #endif
386 {
387  int i1, i2, inc, ldak;
388  int *ipiv;
389  PLASMA_Complex64_t *A, *Akk;
390  PLASMA_desc descA;
391 
392  quark_unpack_args_8(quark, descA, A, i1, i2, ipiv, inc, Akk, ldak);
393  CORE_zswptr_ontile(descA, i1, i2, ipiv, inc, Akk, ldak);
394 }
395 
396 /***************************************************************************/
426 #if defined(PLASMA_HAVE_WEAK)
427 #pragma weak CORE_zlaswpc_ontile = PCORE_zlaswpc_ontile
428 #define CORE_zlaswpc_ontile PCORE_zlaswpc_ontile
429 #endif
430 int CORE_zlaswpc_ontile(PLASMA_desc descA, int i1, int i2, int *ipiv, int inc)
431 {
432  int i, j, ip, it;
433  PLASMA_Complex64_t *A1;
434  int lda;
435 
436  if ( descA.mt > 1 ) {
437  coreblas_error(1, "Illegal value of descA.mt");
438  return -1;
439  }
440  if ( i1 < 1 ) {
441  coreblas_error(2, "Illegal value of i1");
442  return -2;
443  }
444  if ( (i2 < i1) || (i2 > descA.n) ) {
445  coreblas_error(3, "Illegal value of i2");
446  return -3;
447  }
448  if ( ! ( (i2 - i1 - i1%descA.nb -1) < descA.nb ) ) {
449  coreblas_error(2, "Illegal value of i1,i2. They have to be part of the same block.");
450  return -3;
451  }
452 
453  lda = BLKLDD(descA, 0);
454 
455  it = i1 / descA.nb;
456  if (inc > 0) {
457  A1 = A(0, it);
458 
459  for (j = i1-1; j < i2; ++j, ipiv+=inc) {
460  ip = (*ipiv) - descA.j - 1;
461  if ( ip != j )
462  {
463  it = ip / descA.nb;
464  i = ip % descA.nb;
465  cblas_zswap(descA.m, A1 + j*lda, 1,
466  A(0, it) + i*lda, 1 );
467  }
468  }
469  }
470  else
471  {
472  A1 = A(0, it);
473  i1 -= 2;
474  ipiv = &ipiv[(1-i2)*inc];
475  for (j = i2-1; j > i1; --j, ipiv+=inc) {
476  ip = (*ipiv) - descA.j - 1;
477  if ( ip != j )
478  {
479  it = ip / descA.nb;
480  i = ip % descA.nb;
481  cblas_zswap(descA.m, A1 + j*lda, 1,
482  A(0, it) + i*lda, 1 );
483  }
484  }
485  }
486 
487  return PLASMA_SUCCESS;
488 }
489 /***************************************************************************/
493  PLASMA_desc descA, PLASMA_Complex64_t *Aij,
494  int i1, int i2, int *ipiv, int inc, PLASMA_Complex64_t *fakepanel)
495 {
498  quark, CORE_zlaswpc_ontile_quark, task_flags,
499  sizeof(PLASMA_desc), &descA, VALUE,
500  sizeof(PLASMA_Complex64_t)*1, Aij, INOUT | LOCALITY,
501  sizeof(int), &i1, VALUE,
502  sizeof(int), &i2, VALUE,
503  sizeof(int)*(i2-i1+1)*abs(inc), ipiv, INPUT,
504  sizeof(int), &inc, VALUE,
505  sizeof(PLASMA_Complex64_t)*1, fakepanel, INOUT,
506  0);
507 }
508 
509 /***************************************************************************/
512 #if defined(PLASMA_HAVE_WEAK)
513 #pragma weak CORE_zlaswpc_ontile_quark = PCORE_zlaswpc_ontile_quark
514 #define CORE_zlaswpc_ontile_quark PCORE_zlaswpc_ontile_quark
515 #endif
517 {
518  int i1, i2, inc;
519  int *ipiv;
520  PLASMA_Complex64_t *A, *fake;
521  PLASMA_desc descA;
522 
523  quark_unpack_args_7(quark, descA, A, i1, i2, ipiv, inc, fake);
524  CORE_zlaswpc_ontile(descA, i1, i2, ipiv, inc);
525 }
526