PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pztile.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 #include "auxiliary.h"
18 #include "tile.h"
19 #include "quark.h"
20 
21 #define AF77(m, n) &(Af77[ ((int64_t)A.nb*(int64_t)lda*(int64_t)(n)) + (int64_t)(A.mb*(m)) ])
22 #define ABDL(m, n) BLKADDR(A, PLASMA_Complex64_t, m, n)
23 
24 void CORE_ztile_zero_quark(Quark* quark);
25 
26 /***************************************************************************/
30 {
31  PLASMA_Complex64_t *Af77;
32  int lda;
33  PLASMA_desc A;
34  PLASMA_sequence *sequence;
35  PLASMA_request *request;
36 
37  PLASMA_Complex64_t *f77;
38  PLASMA_Complex64_t *bdl;
39 
40  int X1, Y1;
41  int X2, Y2;
42  int n, m, ldt;
43  int next_m;
44  int next_n;
45 
46  plasma_unpack_args_5(Af77, lda, A, sequence, request);
47  if (sequence->status != PLASMA_SUCCESS)
48  return;
49 
50  n = 0;
51  m = PLASMA_RANK;
52  while (m >= A.mt && n < A.nt) {
53  n++;
54  m = m-A.mt;
55  }
56 
57  while (n < A.nt) {
58  next_m = m;
59  next_n = n;
60 
61  next_m += PLASMA_SIZE;
62  while (next_m >= A.mt && next_n < A.nt) {
63  next_n++;
64  next_m = next_m-A.mt;
65  }
66 
67  X1 = n == 0 ? A.j%A.nb : 0;
68  X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
69  Y1 = m == 0 ? A.i%A.mb : 0;
70  Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
71 
72  f77 = AF77(m, n);
73  bdl = ABDL(m, n);
74  ldt = BLKLDD(A, m);
76  PlasmaUpperLower, (Y2-Y1), (X2-X1),
77  &(f77[X1*lda+Y1]), lda,
78  &(bdl[X1*lda+Y1]), ldt);
79 
80  m = next_m;
81  n = next_n;
82  }
83 }
84 
85 /***************************************************************************/
89  PLASMA_sequence *sequence, PLASMA_request *request)
90 {
91  PLASMA_Complex64_t *f77;
92  PLASMA_Complex64_t *bdl;
94  int X1, Y1;
95  int X2, Y2;
96  int n, m, ldt;
98 
99  plasma = plasma_context_self();
100  if (sequence->status != PLASMA_SUCCESS)
101  return;
102  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
103 
104  for (m = 0; m < A.mt; m++)
105  {
106  ldt = BLKLDD(A, m);
107  for (n = 0; n < A.nt; n++)
108  {
109  X1 = n == 0 ? A.j%A.nb : 0;
110  Y1 = m == 0 ? A.i%A.mb : 0;
111  X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
112  Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
113 
114  f77 = AF77(m, n);
115  bdl = ABDL(m, n);
117  plasma->quark, &task_flags,
118  PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
119  &(f77[X1*lda+Y1]), lda,
120  &(bdl[X1*lda+Y1]), ldt);
121  }
122  }
123 }
124 
125 /***************************************************************************/
129 {
130  PLASMA_desc A;
131  PLASMA_Complex64_t *Af77;
132  int lda;
133  PLASMA_sequence *sequence;
134  PLASMA_request *request;
135 
136  PLASMA_Complex64_t *f77;
137  PLASMA_Complex64_t *bdl;
138 
139  int X1, Y1;
140  int X2, Y2;
141  int n, m, ldt;
142  int next_m;
143  int next_n;
144 
145  plasma_unpack_args_5(A, Af77, lda, sequence, request);
146  if (sequence->status != PLASMA_SUCCESS)
147  return;
148 
149  n = 0;
150  m = PLASMA_RANK;
151  while (m >= A.mt && n < A.nt) {
152  n++;
153  m = m-A.mt;
154  }
155 
156  while (n < A.nt) {
157  next_m = m;
158  next_n = n;
159 
160  next_m += PLASMA_SIZE;
161  while (next_m >= A.mt && next_n < A.nt) {
162  next_n++;
163  next_m = next_m-A.mt;
164  }
165 
166  X1 = n == 0 ? A.j%A.nb : 0;
167  Y1 = m == 0 ? A.i%A.mb : 0;
168  X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
169  Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
170 
171  f77 = AF77(m, n);
172  bdl = ABDL(m, n);
173  ldt = BLKLDD(A, m);
174  CORE_zlacpy(
175  PlasmaUpperLower, (Y2-Y1), (X2-X1),
176  &(bdl[X1*lda+Y1]), ldt,
177  &(f77[X1*lda+Y1]), lda);
178 
179  m = next_m;
180  n = next_n;
181  }
182 }
183 
184 /***************************************************************************/
188  PLASMA_sequence *sequence, PLASMA_request *request)
189 {
190  PLASMA_Complex64_t *f77;
191  PLASMA_Complex64_t *bdl;
193  int X1, Y1;
194  int X2, Y2;
195  int n, m, ldt;
197 
198  plasma = plasma_context_self();
199  if (sequence->status != PLASMA_SUCCESS)
200  return;
201  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
202 
203  for (m = 0; m < A.mt; m++)
204  {
205  ldt = BLKLDD(A, m);
206  for (n = 0; n < A.nt; n++)
207  {
208  X1 = n == 0 ? A.j%A.nb : 0;
209  Y1 = m == 0 ? A.i%A.mb : 0;
210  X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
211  Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
212 
213  f77 = AF77(m, n);
214  bdl = ABDL(m, n);
216  plasma->quark, &task_flags,
217  PlasmaUpperLower, (Y2-Y1), (X2-X1), A.mb,
218  &(bdl[X1*lda+Y1]), ldt,
219  &(f77[X1*lda+Y1]), lda);
220  }
221  }
222 }
223 
224 /***************************************************************************/
228 {
229  PLASMA_desc A;
230  PLASMA_sequence *sequence;
231  PLASMA_request *request;
232 
233  PLASMA_Complex64_t *bdl;
234  int x, y;
235  int X1, Y1;
236  int X2, Y2;
237  int n, m, ldt;
238  int next_m;
239  int next_n;
240 
241  plasma_unpack_args_3(A, sequence, request);
242  if (sequence->status != PLASMA_SUCCESS)
243  return;
244 
245  n = 0;
246  m = PLASMA_RANK;
247  while (m >= A.mt && n < A.nt) {
248  n++;
249  m = m-A.mt;
250  }
251 
252  while (n < A.nt) {
253  next_m = m;
254  next_n = n;
255 
256  next_m += PLASMA_SIZE;
257  while (next_m >= A.mt && next_n < A.nt) {
258  next_n++;
259  next_m = next_m-A.mt;
260  }
261 
262  X1 = n == 0 ? A.j%A.nb : 0;
263  Y1 = m == 0 ? A.i%A.mb : 0;
264  X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
265  Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
266 
267  bdl = ABDL(m, n);
268  ldt = BLKLDD(A, m);
269  for (x = X1; x < X2; x++)
270  for (y = Y1; y < Y2; y++)
271  bdl[ldt*x+y] = 0.0;
272 
273  m = next_m;
274  n = next_n;
275  }
276 }
277 
278 /***************************************************************************/
282 {
283  PLASMA_Complex64_t *bdl;
285  int X1, Y1;
286  int X2, Y2;
287  int n, m, ldt;
289 
290  plasma = plasma_context_self();
291  if (sequence->status != PLASMA_SUCCESS)
292  return;
293  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
294 
295  for (m = 0; m < A.mt; m++)
296  {
297  ldt = BLKLDD(A, m);
298  for (n = 0; n < A.nt; n++)
299  {
300  X1 = n == 0 ? A.j%A.nb : 0;
301  Y1 = m == 0 ? A.i%A.mb : 0;
302  X2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
303  Y2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
304 
305  bdl = ABDL(m, n);
306  QUARK_Insert_Task(plasma->quark, CORE_ztile_zero_quark, &task_flags,
307  sizeof(int), &X1, VALUE,
308  sizeof(int), &X2, VALUE,
309  sizeof(int), &Y1, VALUE,
310  sizeof(int), &Y2, VALUE,
311  sizeof(PLASMA_Complex64_t)*A.bsiz, bdl, OUTPUT | LOCALITY,
312  sizeof(int), &ldt, VALUE,
313  0);
314  }
315  }
316 }
317 
318 /***************************************************************************/
322 {
323  int X1;
324  int X2;
325  int Y1;
326  int Y2;
328  int lda;
329 
330  int x, y;
331 
332  quark_unpack_args_6(quark, X1, X2, Y1, Y2, A, lda);
333 
334  for (x = X1; x < X2; x++)
335  for (y = Y1; y < Y2; y++)
336  A[lda*x+y] = 0.0;
337 }