PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_cshift.c
Go to the documentation of this file.
1 
21 #include <stdlib.h>
22 #include "common.h"
23 #include "quark.h"
24 
62 #if defined(PLASMA_HAVE_WEAK)
63 #pragma weak CORE_cshiftw = PCORE_cshiftw
64 #define CORE_cshiftw PCORE_cshiftw
65 #endif
66 void CORE_cshiftw(int s, int cl, int m, int n, int L, PLASMA_Complex32_t *A, PLASMA_Complex32_t *W) {
67  int64_t k, k1;
68  int i, j, q, kL, k1L;
69 
70  q = m * n - 1;
71  k = s;
72 
73  if( cl != 0 ) {
74  for (i=1; i<cl; i++) {
75  k1 = (k * m) % (int64_t)q;
76 
77  /* A(k*L:k*L+L-1) = A(k1*L:k1*L+L-1) */
78  kL = k *L;
79  k1L = k1*L;
80 
81  for(j=0; j<L; j++) {
82  A[kL+j] = A[k1L+j];
83  }
84  k = k1;
85  }
86  }
87  else {
88  while (1) {
89  k1 = (k * m) % (int64_t)q;
90  if( k1 == s )
91  break;
92 
93  /* A(k*L:k*L+L-1) = A(k1*L:k1*L+L-1) */
94  kL = k *L;
95  k1L = k1*L;
96  for (j=0; j<L; j++) {
97  A[kL+j] = A[k1L+j];
98  }
99  k = k1;
100  }
101  }
102  memcpy(&(A[k*L]), W, L*sizeof(PLASMA_Complex32_t));
103 }
104 
105 /***************************************************************************/
108 void QUARK_CORE_cshiftw(Quark *quark, Quark_Task_Flags *task_flags,
109  int s, int cl, int m, int n, int L, PLASMA_Complex32_t *A, PLASMA_Complex32_t *W)
110 {
112  QUARK_Insert_Task(quark, CORE_cshiftw_quark, task_flags,
113  sizeof(int), &s, VALUE,
114  sizeof(int), &cl, VALUE,
115  sizeof(int), &m, VALUE,
116  sizeof(int), &n, VALUE,
117  sizeof(int), &L, VALUE,
118  sizeof(PLASMA_Complex32_t)*m*n*L, A, INOUT,
119  sizeof(PLASMA_Complex32_t)*L, W, INPUT,
120  0);
121 }
122 
123 /***************************************************************************/
126 #if defined(PLASMA_HAVE_WEAK)
127 #pragma weak CORE_cshiftw_quark = PCORE_cshiftw_quark
128 #define CORE_cshiftw_quark PCORE_cshiftw_quark
129 #endif
131 {
132  int s;
133  int cl;
134  int m;
135  int n;
136  int L;
139 
140  quark_unpack_args_7(quark, s, cl, m, n, L, A, W);
141  CORE_cshiftw(s, cl, m, n, L, A, W);
142 }
143 
171 #if defined(PLASMA_HAVE_WEAK)
172 #pragma weak CORE_cshift = PCORE_cshift
173 #define CORE_cshift PCORE_cshift
174 #endif
175 void CORE_cshift(int s, int m, int n, int L, PLASMA_Complex32_t *A) {
177 
178  W = (PLASMA_Complex32_t*)malloc(L * sizeof(PLASMA_Complex32_t));
179  memcpy(W, &(A[s*L]), L*sizeof(PLASMA_Complex32_t));
180  CORE_cshiftw(s, 0, m, n, L, A, W);
181  free(W);
182 }
183 
184 /***************************************************************************/
187 void QUARK_CORE_cshift(Quark *quark, Quark_Task_Flags *task_flags,
188  int s, int m, int n, int L, PLASMA_Complex32_t *A)
189 {
191  QUARK_Insert_Task(quark, CORE_cshift_quark, task_flags,
192  sizeof(int), &s, VALUE,
193  sizeof(int), &m, VALUE,
194  sizeof(int), &n, VALUE,
195  sizeof(int), &L, VALUE,
196  sizeof(PLASMA_Complex32_t)*m*n*L, A, INOUT | GATHERV,
197  sizeof(PLASMA_Complex32_t)*L, NULL, SCRATCH,
198  0);
199 }
200 
201 /***************************************************************************/
204 #if defined(PLASMA_HAVE_WEAK)
205 #pragma weak CORE_cshift_quark = PCORE_cshift_quark
206 #define CORE_cshift_quark PCORE_cshift_quark
207 #endif
209 {
210  int s;
211  int m;
212  int n;
213  int L;
216 
217  quark_unpack_args_6(quark, s, m, n, L, A, W);
218  memcpy(W, &(A[s*L]), L*sizeof(PLASMA_Complex32_t));
219  CORE_cshiftw(s, 0, m, n, L, A, W);
220 }
221