PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzlacpy.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
19 #define B(m,n) BLKADDR(B, PLASMA_Complex64_t, m, n)
20 /***************************************************************************/
24 {
26  PLASMA_desc A;
27  PLASMA_desc B;
28  PLASMA_sequence *sequence;
29  PLASMA_request *request;
30 
31  int X, Y;
32  int m, n;
33  int next_m;
34  int next_n;
35  int ldam, ldbm;
36 
37  plasma_unpack_args_5(uplo, A, B, sequence, request);
38  if (sequence->status != PLASMA_SUCCESS)
39  return;
40 
41  switch (uplo) {
42  /*
43  * PlasmaUpper
44  */
45  case PlasmaUpper:
46  m = 0;
47  n = PLASMA_RANK;
48  while (n >= A.nt) {
49  m++;
50  n = n - A.nt + m;
51  }
52 
53  while (m < A.mt) {
54  next_m = m;
55  next_n = n;
56 
57  next_n += PLASMA_SIZE;
58  while (next_n >= A.nt && next_m < A.mt) {
59  next_m++;
60  next_n = next_n - A.nt + next_m;
61  }
62 
63  X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
64  Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
65  ldam = BLKLDD(A, m);
66  ldbm = BLKLDD(B, m);
68  m == n ? uplo : PlasmaUpperLower,
69  X, Y,
70  A(m, n), ldam,
71  B(m, n), ldbm);
72 
73  n = next_n;
74  m = next_m;
75  }
76  break;
77  /*
78  * PlasmaLower
79  */
80  case PlasmaLower:
81  n = 0;
82  m = PLASMA_RANK;
83  while (m >= A.mt) {
84  n++;
85  m = m - A.mt + n;
86  }
87 
88  while (n < A.nt) {
89  next_m = m;
90  next_n = n;
91 
92  next_m += PLASMA_SIZE;
93  while (next_m >= A.mt && next_n < A.nt) {
94  next_n++;
95  next_m = next_m - A.mt + next_n;
96  }
97 
98  X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
99  Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
100  ldam = BLKLDD(A, m);
101  ldbm = BLKLDD(B, m);
102  CORE_zlacpy(
103  m == n ? uplo : PlasmaUpperLower,
104  X, Y,
105  A(m, n), ldam,
106  B(m, n), ldbm);
107 
108  n = next_n;
109  m = next_m;
110  }
111  break;
112  /*
113  * PlasmaUpperLower
114  */
115  case PlasmaUpperLower:
116  default:
117  n = 0;
118  m = PLASMA_RANK;
119  while (m >= A.mt) {
120  n++;
121  m = m - A.mt;
122  }
123 
124  while (n < A.nt) {
125  next_m = m;
126  next_n = n;
127 
128  next_m += PLASMA_SIZE;
129  while (next_m >= A.mt && next_n < A.nt) {
130  next_n++;
131  next_m = next_m - A.mt;
132  }
133 
134  X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
135  Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
136  ldam = BLKLDD(A, m);
137  ldbm = BLKLDD(B, m);
138  CORE_zlacpy(
140  X, Y,
141  A(m, n), ldam,
142  B(m, n), ldbm);
143 
144  n = next_n;
145  m = next_m;
146  }
147  break;
148  }
149 }
150 /***************************************************************************/
154  PLASMA_sequence *sequence, PLASMA_request *request)
155 {
158 
159  int X, Y;
160  int m, n;
161  int ldam, ldbm;
162 
163  plasma = plasma_context_self();
164  if (sequence->status != PLASMA_SUCCESS)
165  return;
166  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
167 
168  switch (uplo) {
169  /*
170  * PlasmaUpper
171  */
172  case PlasmaUpper:
173  for (m = 0; m < A.mt; m++) {
174  X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
175  ldam = BLKLDD(A, m);
176  ldbm = BLKLDD(B, m);
177  if (m < A.nt) {
178  Y = m == A.nt-1 ? A.n-m*A.nb : A.nb;
180  plasma->quark, &task_flags,
181  PlasmaUpper,
182  X, Y, A.mb,
183  A(m, m), ldam,
184  B(m, m), ldbm);
185  }
186  for (n = m+1; n < A.nt; n++) {
187  Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
189  plasma->quark, &task_flags,
191  X, Y, A.mb,
192  A(m, n), ldam,
193  B(m, n), ldbm);
194  }
195  }
196  break;
197  /*
198  * PlasmaLower
199  */
200  case PlasmaLower:
201  for (m = 0; m < A.mt; m++) {
202  X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
203  ldam = BLKLDD(A, m);
204  ldbm = BLKLDD(B, m);
205  if (m < A.nt) {
206  Y = m == A.nt-1 ? A.n-m*A.nb : A.nb;
208  plasma->quark, &task_flags,
209  PlasmaLower,
210  X, Y, A.mb,
211  A(m, m), ldam,
212  B(m, m), ldbm);
213  }
214  for (n = 0; n < min(m, A.nt); n++) {
215  Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
217  plasma->quark, &task_flags,
219  X, Y, A.mb,
220  A(m, n), ldam,
221  B(m, n), ldbm);
222  }
223  }
224  break;
225  /*
226  * PlasmaUpperLower
227  */
228  case PlasmaUpperLower:
229  default:
230  for (m = 0; m < A.mt; m++) {
231  X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
232  ldam = BLKLDD(A, m);
233  ldbm = BLKLDD(B, m);
234  for (n = 0; n < A.nt; n++) {
235  Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
237  plasma->quark, &task_flags,
239  X, Y, A.mb,
240  A(m, n), ldam,
241  B(m, n), ldbm);
242  }
243  }
244  }
245 }