PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zgetrs.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 /***************************************************************************/
72 int PLASMA_zgetrs(PLASMA_enum trans, int N, int NRHS,
73  PLASMA_Complex64_t *A, int LDA,
74  int *IPIV,
75  PLASMA_Complex64_t *B, int LDB)
76 {
77  int NB;
78  int status;
80  PLASMA_sequence *sequence = NULL;
82  PLASMA_desc descA, descB;
83 
84  plasma = plasma_context_self();
85  if (plasma == NULL) {
86  plasma_fatal_error("PLASMA_zgetrs", "PLASMA not initialized");
88  }
89  /* Check input arguments */
90  if ( (trans != PlasmaNoTrans) &&
91  (trans != PlasmaTrans) &&
92  (trans != PlasmaConjTrans)) {
93  plasma_error("PLASMA_zgetrs", "illegal value of trans");
94  return -1;
95  }
96  if (N < 0) {
97  plasma_error("PLASMA_zgetrs", "illegal value of N");
98  return -2;
99  }
100  if (NRHS < 0) {
101  plasma_error("PLASMA_zgetrs", "illegal value of NRHS");
102  return -3;
103  }
104  if (LDA < max(1, N)) {
105  plasma_error("PLASMA_zgetrs", "illegal value of LDA");
106  return -5;
107  }
108  if (LDB < max(1, N)) {
109  plasma_error("PLASMA_zgetrs", "illegal value of LDB");
110  return -8;
111  }
112  /* Quick return */
113  if (min(N, NRHS) == 0)
114  return PLASMA_SUCCESS;
115 
116  /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */
117  status = plasma_tune(PLASMA_FUNC_ZGESV, N, N, NRHS);
118  if (status != PLASMA_SUCCESS) {
119  plasma_error("PLASMA_zgetrs", "plasma_tune() failed");
120  return status;
121  }
122 
123  /* Set NT & NTRHS */
124  NB = PLASMA_NB;
125 
126  plasma_sequence_create(plasma, &sequence);
127 
129  plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N , plasma_desc_mat_free(&(descA)) );
130  plasma_zooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
131  } else {
132  plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N );
133  plasma_ziplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
134  }
135 
136  /* Call the tile interface */
137  PLASMA_zgetrs_Tile_Async(trans, &descA, IPIV, &descB, sequence, &request);
138 
140  plasma_zooptile2lap( descB, B, NB, NB, LDB, NRHS );
142  plasma_desc_mat_free(&descA);
143  plasma_desc_mat_free(&descB);
144  } else {
145  plasma_ziptile2lap( descA, A, NB, NB, LDA, N );
146  plasma_ziptile2lap( descB, B, NB, NB, LDB, NRHS );
148  }
149 
150  status = sequence->status;
151  plasma_sequence_destroy(plasma, sequence);
152  return status;
153 }
154 
155 /***************************************************************************/
200 {
202  PLASMA_sequence *sequence = NULL;
204  int status;
205 
206  plasma = plasma_context_self();
207  if (plasma == NULL) {
208  plasma_fatal_error("PLASMA_zgetrs_Tile", "PLASMA not initialized");
210  }
211  plasma_sequence_create(plasma, &sequence);
212  PLASMA_zgetrs_Tile_Async(trans, A, IPIV, B, sequence, &request);
214  status = sequence->status;
215  plasma_sequence_destroy(plasma, sequence);
216  return status;
217 }
218 
219 /***************************************************************************/
249  PLASMA_sequence *sequence, PLASMA_request *request)
250 {
251  PLASMA_desc descA = *A;
252  PLASMA_desc descB = *B;
254 
255  plasma = plasma_context_self();
256  if (plasma == NULL) {
257  plasma_fatal_error("PLASMA_zgetrs_Tile", "PLASMA not initialized");
259  }
260  if (sequence == NULL) {
261  plasma_fatal_error("PLASMA_zgetrs_Tile", "NULL sequence");
262  return PLASMA_ERR_UNALLOCATED;
263  }
264  if (request == NULL) {
265  plasma_fatal_error("PLASMA_zgetrs_Tile", "NULL request");
266  return PLASMA_ERR_UNALLOCATED;
267  }
268  /* Check sequence status */
269  if (sequence->status == PLASMA_SUCCESS)
270  request->status = PLASMA_SUCCESS;
271  else
272  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
273 
274  /* Check descriptors for correctness */
275  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
276  plasma_error("PLASMA_zgetrs_Tile", "invalid first descriptor");
277  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
278  }
279  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
280  plasma_error("PLASMA_zgetrs_Tile", "invalid third descriptor");
281  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
282  }
283  /* Check input arguments */
284  if (descA.nb != descA.mb || descB.nb != descB.mb) {
285  plasma_error("PLASMA_zgetrs_Tile", "only square tiles supported");
286  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
287  }
288  /* Quick return */
289 /*
290  if (min(N, NRHS) == 0)
291  return PLASMA_SUCCESS;
292 */
293 
294  if ( trans == PlasmaNoTrans )
295  {
297  plasma_pzbarrier_tl2pnl,
298  PLASMA_desc, descB,
299  PLASMA_sequence*, sequence,
300  PLASMA_request*, request);
301 
302  /* swap */
304  plasma_pzlaswp,
305  PLASMA_desc, descB,
306  int *, IPIV,
307  int, 1,
308  PLASMA_sequence*, sequence,
309  PLASMA_request*, request);
310 
317  PLASMA_Complex64_t, 1.0,
318  PLASMA_desc, descA,
319  PLASMA_desc, descB,
320  PLASMA_sequence*, sequence,
321  PLASMA_request*, request);
322 
329  PLASMA_Complex64_t, 1.0,
330  PLASMA_desc, descA,
331  PLASMA_desc, descB,
332  PLASMA_sequence*, sequence,
333  PLASMA_request*, request);
334  }
335  else {
340  PLASMA_enum, trans,
342  PLASMA_Complex64_t, 1.0,
343  PLASMA_desc, descA,
344  PLASMA_desc, descB,
345  PLASMA_sequence*, sequence,
346  PLASMA_request*, request);
347 
352  PLASMA_enum, trans,
354  PLASMA_Complex64_t, 1.0,
355  PLASMA_desc, descA,
356  PLASMA_desc, descB,
357  PLASMA_sequence*, sequence,
358  PLASMA_request*, request);
359 
361  plasma_pzbarrier_tl2pnl,
362  PLASMA_desc, descB,
363  PLASMA_sequence*, sequence,
364  PLASMA_request*, request);
365 
366  /* swap */
368  plasma_pzlaswp,
369  PLASMA_desc, descB,
370  int *, IPIV,
371  int, -1,
372  PLASMA_sequence*, sequence,
373  PLASMA_request*, request);
374 
376  plasma_pzbarrier_pnl2tl,
377  PLASMA_desc, descB,
378  PLASMA_sequence*, sequence,
379  PLASMA_request*, request);
380  }
381  return PLASMA_SUCCESS;
382 }