PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sgetri.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 /***************************************************************************/
63 int PLASMA_sgetri(int N,
64  float *A, int LDA,
65  int *IPIV)
66 {
67  int NB;
68  int status;
70  PLASMA_sequence *sequence = NULL;
72  PLASMA_desc descA;
73  PLASMA_desc descW;
74 
75  plasma = plasma_context_self();
76  if (plasma == NULL) {
77  plasma_fatal_error("PLASMA_sgetri", "PLASMA not initialized");
79  }
80  /* Check input arguments */
81  if (N < 0) {
82  plasma_error("PLASMA_sgetri", "illegal value of N");
83  return -1;
84  }
85  if (LDA < max(1, N)) {
86  plasma_error("PLASMA_sgetri", "illegal value of LDA");
87  return -3;
88  }
89  /* Quick return */
90  if (max(N, 0) == 0)
91  return PLASMA_SUCCESS;
92 
93  /* Tune NB depending on M, N & NRHS; Set NBNB */
94  status = plasma_tune(PLASMA_FUNC_SGESV, N, N, 0);
95  if (status != PLASMA_SUCCESS) {
96  plasma_error("PLASMA_sgetri", "plasma_tune() failed");
97  return status;
98  }
99 
100  /* Set NT */
101  NB = PLASMA_NB;
102 
103  plasma_sequence_create(plasma, &sequence);
104 
106  plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, plasma_desc_mat_free(&(descA)) );
107  } else {
108  plasma_siplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N);
109  }
110 
111  /* Allocate workspace */
113 
114  /* Call the tile interface */
115  PLASMA_sgetri_Tile_Async(&descA, IPIV, &descW, sequence, &request);
116 
118  plasma_sooptile2lap( descA, A, NB, NB, LDA, N );
120  plasma_desc_mat_free(&descA);
121  } else {
122  plasma_siptile2lap( descA, A, NB, NB, LDA, N );
124  }
125  plasma_desc_mat_free(&(descW));
126 
127  status = sequence->status;
128  plasma_sequence_destroy(plasma, sequence);
129  return status;
130 }
131 
132 /***************************************************************************/
176 {
178  PLASMA_sequence *sequence = NULL;
180  PLASMA_desc descW;
181  int status;
182 
183  plasma = plasma_context_self();
184  if (plasma == NULL) {
185  plasma_fatal_error("PLASMA_sgetri_Tile", "PLASMA not initialized");
187  }
188  plasma_sequence_create(plasma, &sequence);
189 
190  /* Allocate workspace */
192 
193  PLASMA_sgetri_Tile_Async(A, IPIV, &descW, sequence, &request);
195  plasma_desc_mat_free(&(descW));
196 
197  status = sequence->status;
198  plasma_sequence_destroy(plasma, sequence);
199  return status;
200 }
201 
202 /***************************************************************************/
234  PLASMA_sequence *sequence, PLASMA_request *request)
235 {
236  PLASMA_desc descA = *A;
237  PLASMA_desc descW = *W;
239 
240  plasma = plasma_context_self();
241  if (plasma == NULL) {
242  plasma_fatal_error("PLASMA_sgetri_Tile_Async", "PLASMA not initialized");
244  }
245  if (sequence == NULL) {
246  plasma_fatal_error("PLASMA_sgetri_Tile_Async", "NULL sequence");
247  return PLASMA_ERR_UNALLOCATED;
248  }
249  if (request == NULL) {
250  plasma_fatal_error("PLASMA_sgetri_Tile_Async", "NULL request");
251  return PLASMA_ERR_UNALLOCATED;
252  }
253  /* Check sequence status */
254  if (sequence->status == PLASMA_SUCCESS)
255  request->status = PLASMA_SUCCESS;
256  else
257  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
258 
259  /* Check descriptors for correctness */
260  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
261  plasma_error("PLASMA_sgetri_Tile_Async", "invalid descriptor");
262  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
263  }
264  /* Check input arguments */
265  if (descA.nb != descA.mb) {
266  plasma_error("PLASMA_sgetri_Tile_Async", "only square tiles supported");
267  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
268  }
269  /* Quick return */
270  if (max(descA.m, 0) == 0)
271  return PLASMA_SUCCESS;
272 
273  plasma_dynamic_call_5(plasma_pstrtri,
276  PLASMA_desc, descA,
277  PLASMA_sequence*, sequence,
278  PLASMA_request*, request);
279 
285  float, (float) 1.0,
286  PLASMA_desc, descA,
287  PLASMA_desc, descW,
288  PLASMA_sequence*, sequence,
289  PLASMA_request*, request);
290 
291  /* No need for barrier tile2row because of previous dependencies */
292 
293  /* swap */
295  plasma_pslaswpc,
296  PLASMA_desc, descA,
297  int *, IPIV,
298  int, -1,
299  PLASMA_sequence*, sequence,
300  PLASMA_request*, request);
301 
303  plasma_psbarrier_row2tl,
304  PLASMA_desc, descA,
305  PLASMA_sequence*, sequence,
306  PLASMA_request*, request);
307 
308  return PLASMA_SUCCESS;
309 }
310 
312 {
313  plasma_sdesc_alloc( *W, A->mb, A->nb, A->m, A->nb, 0, 0, A->m, A->nb,
314  plasma_desc_mat_free( W ));
315  return PLASMA_SUCCESS;
316 }