PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pdlange.c
Go to the documentation of this file.
1 
16 #include <stdlib.h>
17 #include <math.h>
18 #include "common.h"
19 
20 #define A(m, n, i, j, ldt) (BLKADDR(A, double, m, n)+((j)*(ldt)+(i)))
21 /***************************************************************************/
25 {
27  PLASMA_desc A;
28  double *work;
29  double *result;
30  PLASMA_sequence *sequence;
31  PLASMA_request *request;
32 
33  int m, n;
34  int next_m;
35  int next_n;
36  int ldam;
37  int step, lrank;
38  int X, X1, X2, Y, Y1, Y2;
39 
40  double* lwork;
41  double normtmp, normtmp2;
42 
43  plasma_unpack_args_6(norm, A, work, result, sequence, request);
44  *result = 0.0;
45 
46  if (PLASMA_RANK == 0)
47  memset(work, 0, PLASMA_SIZE*sizeof(double));
48  ss_init(PLASMA_SIZE, 1, 0);
49 
50  switch (norm) {
51  /*
52  * PlasmaMaxNorm
53  */
54  case PlasmaMaxNorm:
55  n = 0;
56  m = PLASMA_RANK;
57  while (m >= A.mt && n < A.nt) {
58  n++;
59  m = m-A.mt;
60  }
61 
62  while (n < A.nt) {
63  next_m = m;
64  next_n = n;
65 
66  next_m += PLASMA_SIZE;
67  while (next_m >= A.mt && next_n < A.nt) {
68  next_n++;
69  next_m = next_m-A.mt;
70  }
71 
72  X1 = m == 0 ? A.i %A.mb : 0;
73  X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
74  X = X2 - X1;
75 
76  Y1 = n == 0 ? A.j %A.nb : 0;
77  Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
78  Y = Y2 - Y1;
79 
80  ldam = BLKLDD(A, m);
81  CORE_dlange(PlasmaMaxNorm, X, Y, A(m, n, X1, Y1, ldam), ldam, NULL, &normtmp);
82 
83  if (normtmp > work[PLASMA_RANK])
84  work[PLASMA_RANK] = normtmp;
85 
86  m = next_m;
87  n = next_n;
88  }
89  ss_cond_set(PLASMA_RANK, 0, 1);
90  break;
91  /*
92  * PlasmaOneNorm
93  */
94  case PlasmaOneNorm:
95 
96  n = PLASMA_RANK;
97  normtmp2 = 0.0;
98  lwork = (double*)plasma_private_alloc(plasma, A.nb, PlasmaRealDouble);
99 
100  while (n < A.nt) {
101  Y1 = n == 0 ? A.j %A.nb : 0;
102  Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
103  Y = Y2 - Y1;
104  memset(lwork, 0, A.nb*sizeof(double));
105  for (m = 0; m < A.mt; m++) {
106  X1 = m == 0 ? A.i %A.mb : 0;
107  X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
108  X = X2 - X1;
109 
110  ldam = BLKLDD(A, m);
111  CORE_dasum(
113  X, Y,
114  A(m, n, X1, Y1, ldam), ldam,
115  lwork);
116  }
117  CORE_dlange(PlasmaMaxNorm, Y, 1, lwork, 1, NULL, &normtmp);
118  if (normtmp > normtmp2)
119  normtmp2 = normtmp;
120 
121  n += PLASMA_SIZE;
122  }
123  work[PLASMA_RANK] = normtmp2;
124  ss_cond_set(PLASMA_RANK, 0, 1);
125  plasma_private_free(plasma, lwork);
126  break;
127  /*
128  * PlasmaInfNorm
129  */
130  case PlasmaInfNorm:
131  m = PLASMA_RANK;
132  normtmp2 = 0.0;
133  lwork = (double*)plasma_private_alloc(plasma, A.mb, PlasmaRealDouble);
134 
135  while (m < A.mt) {
136  X1 = m == 0 ? A.i %A.mb : 0;
137  X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
138  X = X2 - X1;
139 
140  ldam = BLKLDD(A, m);
141  memset(lwork, 0, A.mb*sizeof(double));
142 
143  for (n = 0; n < A.nt; n++) {
144  Y1 = n == 0 ? A.j %A.nb : 0;
145  Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
146  Y = Y2 - Y1;
147  CORE_dasum(
149  X, Y,
150  A(m, n, X1, Y1, ldam), ldam,
151  lwork);
152  }
153  CORE_dlange(PlasmaMaxNorm, X, 1, lwork, 1, NULL, &normtmp);
154  if (normtmp > normtmp2)
155  normtmp2 = normtmp;
156 
157  m += PLASMA_SIZE;
158  }
159  work[PLASMA_RANK] = normtmp2;
160  ss_cond_set(PLASMA_RANK, 0, 1);
161  plasma_private_free(plasma, lwork);
162  break;
163  /*
164  * PlasmaFrobeniusNorm - not implemented
165  */
166  case PlasmaFrobeniusNorm:
167  default:;
168  }
169 
170  if (norm != PlasmaFrobeniusNorm) {
171  step = 1;
172  lrank = PLASMA_RANK;
173  while ( (lrank%2 == 0) && (PLASMA_RANK+step < PLASMA_SIZE) ) {
174  ss_cond_wait(PLASMA_RANK+step, 0, step);
175  work[PLASMA_RANK] = max(work[PLASMA_RANK], work[PLASMA_RANK+step]);
176  lrank = lrank >> 1;
177  step = step << 1;
178  ss_cond_set(PLASMA_RANK, 0, step);
179  }
180  if (PLASMA_RANK > 0) {
181  while( lrank != 0 ) {
182  if (lrank%2 == 1) {
183  ss_cond_set(PLASMA_RANK, 0, step);
184  lrank = 0;
185  } else {
186  lrank = lrank >> 1;
187  step = step << 1;
188  ss_cond_set(PLASMA_RANK, 0, step);
189  }
190  }
191  }
192 
193  if (PLASMA_RANK == 0)
194  *result = work[0];
195  }
196  ss_finalize();
197 }
198 
199 /***************************************************************************/
202 void plasma_pdlange_quark(PLASMA_enum norm, PLASMA_desc A, double *work, double *result,
203  PLASMA_sequence *sequence, PLASMA_request *request)
204 {
207 
208  int X, X1, X2, Y, Y1, Y2;
209  int ldam;
210  int m, n;
211  int szeW;
212  double* lwork;
213 
214  plasma = plasma_context_self();
215  if (sequence->status != PLASMA_SUCCESS)
216  return;
217  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
218 
219  *result = 0.0;
220  switch ( norm ) {
221  /*
222  * PlasmaMaxNorm
223  */
224  case PlasmaMaxNorm:
225  szeW = A.mt*A.nt;
226  lwork = (double*)plasma_shared_alloc(plasma, szeW, PlasmaRealDouble);
227  memset(lwork, 0, szeW*sizeof(double));
228  for(m = 0; m < A.mt; m++) {
229  X1 = m == 0 ? A.i %A.mb : 0;
230  X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
231  X = X2 - X1;
232  ldam = BLKLDD(A, m);
233  for(n = 0; n < A.nt; n++) {
234  Y1 = n == 0 ? A.j %A.nb : 0;
235  Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
236  Y = Y2 - Y1;
238  plasma->quark, &task_flags,
239  PlasmaMaxNorm, X, Y,
240  A(m, n, X1, Y1, ldam), ldam, ldam*Y,
241  0, &(lwork[A.mt*n+m]),
242  lwork, szeW);
243  }
244  }
246  plasma->quark, &task_flags,
247  PlasmaMaxNorm, A.mt, A.nt,
248  lwork, A.mt, szeW,
249  0, result);
250 
251  QUARK_CORE_free(plasma->quark, &task_flags, lwork, szeW*sizeof(double));
252  break;
253 
254  /*
255  * PlasmaOneNorm
256  */
257  case PlasmaOneNorm:
258  lwork = (double*)plasma_shared_alloc(plasma, (A.n+1), PlasmaRealDouble);
259  memset(lwork, 0, (A.n+1)*sizeof(double));
260  for(m = 0; m < A.mt; m++) {
261  X1 = m == 0 ? A.i %A.mb : 0;
262  X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
263  X = X2 - X1;
264  ldam = BLKLDD(A, m);
265  for(n = 0; n < A.nt; n++) {
266  Y1 = n == 0 ? A.j %A.nb : 0;
267  Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
268  Y = Y2 - Y1;
270  plasma->quark, &task_flags,
272  A(m, n, X1, Y1, ldam), ldam, ldam*Y,
273  &(lwork[n*A.nb+1]), A.nb,
274  lwork, A.n);
275  }
276  }
278  plasma->quark, &task_flags,
279  PlasmaMaxNorm, A.n+1, 1,
280  lwork, 1, A.n+1,
281  0, result);
282 
283  QUARK_CORE_free(plasma->quark, &task_flags, lwork, (A.n+1)*sizeof(double));
284  break;
285  /*
286  * PlasmaInfNorm
287  */
288  case PlasmaInfNorm:
289  lwork = (double*)plasma_shared_alloc(plasma, (A.m+1), PlasmaRealDouble);
290  memset(lwork, 0, (A.m+1)*sizeof(double));
291  for(m = 0; m < A.mt; m++) {
292  X1 = m == 0 ? A.i %A.mb : 0;
293  X2 = m == A.mt-1 ? (A.i+A.m-1)%A.mb+1 : A.mb;
294  X = X2 - X1;
295  ldam = BLKLDD(A, m);
296  for(n = 0; n < A.nt; n++) {
297  Y1 = n == 0 ? A.j %A.nb : 0;
298  Y2 = n == A.nt-1 ? (A.j+A.n-1)%A.nb+1 : A.nb;
299  Y = Y2 - Y1;
301  plasma->quark, &task_flags,
303  A(m, n, X1, Y1, ldam), ldam, ldam*Y,
304  &(lwork[m*A.mb+1]), A.mb,
305  lwork, A.m);
306  }
307  }
309  plasma->quark, &task_flags,
310  PlasmaMaxNorm, A.m+1, 1,
311  lwork, 1, A.m+1,
312  0, result);
313 
314  QUARK_CORE_free(plasma->quark, &task_flags, lwork, (A.m+1)*sizeof(double));
315  break;
316  /*
317  * PlasmaFrobeniusNorm - not implemented
318  */
319  case PlasmaFrobeniusNorm:
320  default:;
321  }
322 }