PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pdpack.c
Go to the documentation of this file.
1 
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include "common.h"
24 
66 {
67  double *A, *W, *Wl;
68  PLASMA_sequence *sequence;
69  PLASMA_request *request;
70  int m, n, m0;
71  int i, m1, size, rank, start, end, bs, mod;
72 
73  plasma_unpack_args_6(m, n, A, m0, sequence, request);
74  if (sequence->status != PLASMA_SUCCESS)
75  return;
76 
77  /* Quick return */
78  if ( n <= 1 )
79  return;
80 
81  m1 = m - m0;
82 
83  size = PLASMA_SIZE;
84  rank = PLASMA_RANK;
85 
86  mod = (n-1) % size;
87  bs = (n-1) / size;
88  start = rank * bs;
89  if ( rank < mod ) {
90  bs++;
91  }
92  start += min( mod, rank );
93 
94  W = (double*)plasma_private_alloc(plasma, (m0*bs), PlasmaRealDouble);
95  Wl = (double*)plasma_private_alloc(plasma, m1, PlasmaRealDouble);
96 
97  /* Save leftover pieces that are otherwise going to be overwritten */
98  CORE_dlacpy( PlasmaUpperLower, m0, bs, &(A[(int64_t)start*m+m1]), m, W, m0 );
99 
100  /* Pack A */
101  end = ((n-1) / size) * size + 1;
102  for(i=rank+1; i<end; i+=size) {
103  memcpy( Wl, &(A[i*m]), m1*sizeof(double));
104  plasma_barrier(plasma);
105  memcpy( &(A[i*m1]), Wl, m1*sizeof(double));
106  }
107 
108  if ( rank < (n - end)) {
109  i = end + rank;
110  memcpy( Wl, &(A[i*m]), m1*sizeof(double));
111  plasma_barrier(plasma);
112  memcpy( &(A[i*m1]), Wl, m1*sizeof(double));
113  }
114  else
115  plasma_barrier(plasma);
116 
117  /* Restore leftover pieces */
118  CORE_dlacpy( PlasmaUpperLower, m0, bs, W, m0, &(A[(int64_t)m1*n+start*m0]), m0 );
119 
120  plasma_private_free(plasma, W);
121  plasma_private_free(plasma, Wl);
122 }
123 
124 
167 {
168  double *A, *W, *Wl;
169  PLASMA_sequence *sequence;
170  PLASMA_request *request;
171  int m, n, m0;
172  int i, m1, size, rank, start, end, bs, mod;
173 
174  plasma_unpack_args_6(m, n, A, m0, sequence, request);
175  if (sequence->status != PLASMA_SUCCESS)
176  return;
177 
178  /* Quick return */
179  if ( n <= 1 )
180  return;
181 
182  m1 = m - m0;
183 
184  size = PLASMA_SIZE;
185  rank = PLASMA_RANK;
186 
187  mod = (n-1) % size;
188  bs = (n-1) / size;
189  start = rank * bs;
190  if ( rank < mod ) {
191  bs++;
192  }
193  start += min( mod, rank );
194 
195  W = (double*)plasma_private_alloc(plasma, (m0*bs), PlasmaRealDouble);
196  Wl = (double*)plasma_private_alloc(plasma, m1, PlasmaRealDouble);
197 
198  /* Save leftover pieces that are otherwise going to be overwritten */
199  CORE_dlacpy( PlasmaUpperLower, m0, bs, &(A[(int64_t)m1*n+start*m0]), m0, W, m0 );
200 
201  /* Unpack A */
202  end = ((n-1) % size) ;
203  for(i=n-1-rank; i>end; i-=size) {
204  memcpy( Wl, &(A[i*m1]), m1*sizeof(double));
205  plasma_barrier(plasma);
206  memcpy( &(A[i*m]), Wl, m1*sizeof(double));
207  }
208 
209  if ( rank < end ) {
210  i = rank+1;
211  memcpy( Wl, &(A[i*m1]), m1*sizeof(double));
212  plasma_barrier(plasma);
213  memcpy( &(A[i*m]), Wl, m1*sizeof(double));
214  }
215  else
216  plasma_barrier(plasma);
217 
218  /* Restore leftover pieces */
219  CORE_dlacpy( PlasmaUpperLower, m0, bs, W, m0, &(A[(int64_t)start*m+m1]), m );
220 
221  plasma_private_free(plasma, W);
222  plasma_private_free(plasma, Wl);
223 }