PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sgecfi.c
Go to the documentation of this file.
1 
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include "common.h"
24 #include "sgecfi2.h"
25 
69 int PLASMA_sgecfi(int m, int n, float *A,
70  PLASMA_enum f_in, int imb, int inb,
71  PLASMA_enum f_out, int omb, int onb)
72 {
74  PLASMA_sequence *sequence = NULL;
76  int status;
77 
78  plasma = plasma_context_self();
79  if (plasma == NULL) {
80  plasma_fatal_error(__func__, "PLASMA not initialized");
82  }
83 
84  plasma_sequence_create(plasma, &sequence);
85 
86  PLASMA_sgecfi_Async( m, n, A,
87  f_in, imb, inb,
88  f_out, omb, onb,
89  sequence, &request);
91  status = sequence->status;
92  plasma_sequence_destroy(plasma, sequence);
93 
94  return status;
95 }
96 
97 
148 int PLASMA_sgecfi_Async(int m, int n, float *A,
149  PLASMA_enum f_in, int imb, int inb,
150  PLASMA_enum f_out, int omb, int onb,
151  PLASMA_sequence *sequence, PLASMA_request *request)
152 {
154  float *W = NULL;
155  int im1, in1, om1, on1;
156  size_t A11, A21, A12, A22;
157 
158  /* Check Plasma context */
159  plasma = plasma_context_self();
160  if (plasma == NULL) {
161  plasma_fatal_error(__func__, "PLASMA not initialized");
163  }
164 
165  /* Check arguments */
166  if( ( f_in != PlasmaCM ) && ( f_in != PlasmaRM )
167  && ( f_in != PlasmaCCRB ) && ( f_in != PlasmaRRRB )
168  && ( f_in != PlasmaCRRB ) && ( f_in != PlasmaRCRB ) )
169  {
170  plasma_error(__func__, "Input format unknown");
171  return -4;
172  }
173  if( ( f_out != PlasmaCM ) && ( f_out != PlasmaRM )
174  && ( f_out != PlasmaCCRB ) && ( f_out != PlasmaRRRB )
175  && ( f_out != PlasmaCRRB ) && ( f_out != PlasmaRCRB ) )
176  {
177  plasma_error(__func__, "Input format unknown");
178  return -7;
179  }
180 
181  /* quick return */
182  if( (f_in == f_out) && ( (f_in == PlasmaCM) || (f_in == PlasmaRM))
183  && (imb == omb) && ( inb == onb ) ) {
184  return PLASMA_SUCCESS;
185  }
186 
187  if ( (f_in == PlasmaCM) || (f_in == PlasmaRM) )
188  {
189  if ( (f_out == PlasmaCM) || (f_out == PlasmaRM) ){
190  imb = omb = PLASMA_NB;
191  inb = onb = PLASMA_NB;
192  } else {
193  imb = omb;
194  inb = onb;
195  }
196  }
197  else if ( (f_out == PlasmaCM) || (f_out == PlasmaRM) )
198  {
199  omb = imb;
200  onb = inb;
201  }
202 
203  /* calculate number of full blocks */
204  im1 = (m / imb) * imb;
205  in1 = (n / inb) * inb;
206  om1 = (m / omb) * omb;
207  on1 = (n / onb) * onb;
208 
209  /* separate the four submatrices A11, A12, A21, A22 */
210  if( f_in == PlasmaCM ) {
211  if( om1 < m ) {
213  int, m,
214  int, on1,
215  float*, A,
216  int, (m-om1),
217  PLASMA_sequence*, sequence,
218  PLASMA_request*, request);
219  if ( on1 < n) {
221  int, m,
222  int, (n-on1),
223  float*, &(A[m*on1]),
224  int, (m-om1),
225  PLASMA_sequence*, sequence,
226  PLASMA_request*, request);
227  }
228  }
229  }
230  else if ( f_in == PlasmaRM ) {
231  if( on1 < n ) {
233  int, n,
234  int, om1,
235  float*, A,
236  int, (n-on1),
237  PLASMA_sequence*, sequence,
238  PLASMA_request*, request);
239  if( om1 < m ) {
241  int, n,
242  int, (m-om1),
243  float*, &(A[n*om1]),
244  int, (n-on1),
245  PLASMA_sequence*, sequence,
246  PLASMA_request*, request);
247  }
248  }
249  }
250 
251  /* blocked format to blocked format conversion with different block sizes */
252  if( (f_in != PlasmaCM) && (f_in != PlasmaRM) &&
253  (f_out != PlasmaCM) && (f_out != PlasmaRM) ) {
254  if( (imb != omb) || (inb != onb) ) {
255  if( (f_in == PlasmaRRRB) || (f_out == PlasmaRRRB ) ) {
256  PLASMA_sgecfi_Async(m, n, A, f_in, imb, inb, PlasmaRM, 1, 1 , sequence, request);
257  PLASMA_sgecfi_Async(m, n, A, PlasmaRM, 1, 1, f_out, omb, onb, sequence, request);
258  }
259  else {
260  PLASMA_sgecfi_Async(m, n, A, f_in, imb, inb, PlasmaCM, 1, 1 , sequence, request);
261  PLASMA_sgecfi_Async(m, n, A, PlasmaCM, 1, 1, f_out, omb, onb, sequence, request);
262  }
263  return PLASMA_SUCCESS;
264  }
265  }
266 
267  if( (f_in == PlasmaCM) || (f_in == PlasmaCCRB) || (f_in == PlasmaCRRB) )
268  {
269  A11 = 0;
270  A21 = im1*in1;
271  A12 = m *in1;
272  A22 = m *in1 + im1*(n-in1);
273  }
274  else
275  {
276  A11 = 0;
277  A12 = im1*in1;
278  A21 = im1*n;
279  A22 = im1*n + in1*(m-im1);
280  }
281 
282  switch ( f_in ) {
283  case PlasmaCM :
284  switch ( f_out ) {
285  case PlasmaCM : break;
286  case PlasmaCCRB : ipt_call(cm2ccrb, om1, on1, omb, onb); break;
287  case PlasmaCRRB : ipt_call(cm2crrb, om1, on1, omb, onb); break;
288  case PlasmaRCRB : ipt_call(cm2rcrb, om1, on1, omb, onb); break;
289  case PlasmaRRRB : ipt_call(cm2rrrb, om1, on1, omb, onb); break;
290  case PlasmaRM : ipt_call(cm2rm, om1, on1, omb, onb); break;
291  default: ;
292  }
293  break;
294  case PlasmaCCRB:
295  switch ( f_out ) {
296  case PlasmaCM : ipt_call(ccrb2cm, im1, in1, imb, inb); break;
297  case PlasmaCCRB : break;
298  case PlasmaCRRB : ipt_cal2(ccrb2crrb, im1, in1, imb, inb); break;
299  case PlasmaRCRB : ipt_call(ccrb2rcrb, im1, in1, imb, inb); break;
300  case PlasmaRRRB : ipt_call(ccrb2rrrb, im1, in1, imb, inb); break;
301  case PlasmaRM : ipt_call(ccrb2rm, im1, in1, imb, inb); break;
302  default: ;
303  }
304  break;
305  case PlasmaCRRB:
306  switch ( f_out ) {
307  case PlasmaCM : ipt_call(crrb2cm, im1, in1, imb, inb); break;
308  case PlasmaCCRB : ipt_cal2(crrb2ccrb, im1, in1, imb, inb); break;
309  case PlasmaCRRB : break;
310  case PlasmaRCRB : ipt_call(crrb2rcrb, im1, in1, imb, inb); break;
311  case PlasmaRRRB : ipt_call(crrb2rrrb, im1, in1, imb, inb); break;
312  case PlasmaRM : ipt_call(crrb2rm, im1, in1, imb, inb); break;
313  default: ;
314  }
315  break;
316  case PlasmaRCRB:
317  switch ( f_out ) {
318  case PlasmaCM : ipt_call(rcrb2cm, im1, in1, imb, inb); break;
319  case PlasmaCCRB : ipt_call(rcrb2ccrb, im1, in1, imb, inb); break;
320  case PlasmaCRRB : ipt_call(rcrb2crrb, im1, in1, imb, inb); break;
321  case PlasmaRCRB : break;
322  case PlasmaRRRB : ipt_cal2(rcrb2rrrb, im1, in1, imb, inb); break;
323  case PlasmaRM : ipt_call(rcrb2rm, im1, in1, imb, inb); break;
324  default: ;
325  }
326  break;
327  case PlasmaRRRB:
328  switch ( f_out ) {
329  case PlasmaCM : ipt_call(rrrb2cm, im1, in1, imb, inb); break;
330  case PlasmaCCRB : ipt_call(rrrb2ccrb, im1, in1, imb, inb); break;
331  case PlasmaCRRB : ipt_call(rrrb2crrb, im1, in1, imb, inb); break;
332  case PlasmaRCRB : ipt_cal2(rrrb2rcrb, im1, in1, imb, inb); break;
333  case PlasmaRRRB : break;
334  case PlasmaRM : ipt_call(rrrb2rm, im1, in1, imb, inb); break;
335  default: ;
336  }
337  break;
338  case PlasmaRM:
339  switch ( f_out ) {
340  case PlasmaCM : ipt_call(rm2cm, om1, on1, omb, onb); break;
341  case PlasmaCCRB : ipt_call(rm2ccrb, om1, on1, omb, onb); break;
342  case PlasmaCRRB : ipt_call(rm2crrb, om1, on1, omb, onb); break;
343  case PlasmaRCRB : ipt_call(rm2rcrb, om1, on1, omb, onb); break;
344  case PlasmaRRRB : ipt_call(rm2rrrb, om1, on1, omb, onb); break;
345  case PlasmaRM : break;
346  default: ;
347  }
348  break;
349  default: ;
350  }
351 
352  /* reorder block */
353  if( (f_out == PlasmaCM) || (f_out == PlasmaCCRB) || (f_out == PlasmaCRRB) )
354  {
355  /* We need to swap A21 and A12 */
356  if ( A21 > A12 ) {
357  size_t sze1 = A21-A12;
358  size_t sze2 = A22-A21;
359 
360  QUARK_Barrier(plasma->quark);
361  //plasma_malloc(W, max( in1, on1), float);
362  W = (float*)malloc( max( sze1, sze2 ) * sizeof(float) );
363  CORE_sswpab(0, sze1, sze2, &(A[A12]), W);
364  free(W);
365  }
366  }
367  else {
368  /* We need to swap A21 and A12 */
369  if ( A12 > A21 ) {
370  size_t sze1 = A12-A21;
371  size_t sze2 = A22-A12;
372 
373  QUARK_Barrier(plasma->quark);
374  //plasma_malloc(W, max( in1, on1), float);
375  W = (float*)malloc( max( sze1, sze2 ) * sizeof(float) );
376  CORE_sswpab(0, sze1, sze2, &(A[A21]), W);
377  free(W);
378  }
379  }
380 
381  /* unseparate if output is not blocked */
382  if( f_out == PlasmaCM ) {
383  if( im1 < m ) {
385  int, m,
386  int, in1,
387  float*, A,
388  int, (m-im1),
389  PLASMA_sequence*, sequence,
390  PLASMA_request*, request);
391  if ( in1 < n) {
393  int, m,
394  int, (n-in1),
395  float*, &(A[m*in1]),
396  int, (m-im1),
397  PLASMA_sequence*, sequence,
398  PLASMA_request*, request);
399  }
400  }
401  }
402  else if( f_out == PlasmaRM ) {
403  if( in1 < n ) {
405  int, n,
406  int, im1,
407  float*, A,
408  int, (n-in1),
409  PLASMA_sequence*, sequence,
410  PLASMA_request*, request);
411  if( im1 < m ) {
413  int, n,
414  int, (m-im1),
415  float*, &(A[n*im1]),
416  int, (n-in1),
417  PLASMA_sequence*, sequence,
418  PLASMA_request*, request);
419  }
420  }
421  }
422 
423  return PLASMA_SUCCESS;
424 }