PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
sgelqs.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 /***************************************************************************/
67 int PLASMA_sgelqs(int M, int N, int NRHS,
68  float *A, int LDA,
69  float *T,
70  float *B, int LDB)
71 {
72  int NB, IB, IBNB, MT, NT;
73  int status;
75  PLASMA_sequence *sequence = NULL;
77  PLASMA_desc descA, descB, descT;
78 
79  plasma = plasma_context_self();
80  if (plasma == NULL) {
81  plasma_fatal_error("PLASMA_sgelqs", "PLASMA not initialized");
83  }
84  /* Check input arguments */
85  if (M < 0) {
86  plasma_error("PLASMA_sgelqs", "illegal value of M");
87  return -1;
88  }
89  if (N < 0 || M > N) {
90  plasma_error("PLASMA_sgelqs", "illegal value of N");
91  return -2;
92  }
93  if (NRHS < 0) {
94  plasma_error("PLASMA_sgelqs", "illegal value of N");
95  return -3;
96  }
97  if (LDA < max(1, M)) {
98  plasma_error("PLASMA_sgelqs", "illegal value of LDA");
99  return -5;
100  }
101  if (LDB < max(1, max(1, N))) {
102  plasma_error("PLASMA_sgelqs", "illegal value of LDB");
103  return -8;
104  }
105  /* Quick return */
106  if (min(M, min(N, NRHS)) == 0) {
107  return PLASMA_SUCCESS;
108  }
109 
110  /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */
111  status = plasma_tune(PLASMA_FUNC_SGELS, M, N, NRHS);
112  if (status != PLASMA_SUCCESS) {
113  plasma_error("PLASMA_sgelqs", "plasma_tune() failed");
114  return status;
115  }
116 
117  /* Set MT, NT & NTRHS */
118  NB = PLASMA_NB;
119  IB = PLASMA_IB;
120  IBNB = IB*NB;
121  MT = (M%NB==0) ? (M/NB) : (M/NB+1);
122  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
123 
124  plasma_sequence_create(plasma, &sequence);
125 
126  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
127  descT = plasma_desc_init(
129  IB, NB, IBNB,
130  MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
131  }
132  else {
133  /* Double the size of T to accomodate the tree reduction phase */
134  descT = plasma_desc_init(
136  IB, NB, IBNB,
137  MT*IB, 2*NT*NB, 0, 0, MT*IB, 2*NT*NB);
138  }
139  descT.mat = T;
140 
142  plasma_sooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N , plasma_desc_mat_free(&(descA)) );
143  plasma_sooplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
144  } else {
145  plasma_siplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, N );
146  plasma_siplap2tile( descB, B, NB, NB, LDB, NRHS, 0, 0, N, NRHS);
147  }
148 
149  /* Call the tile interface */
150  PLASMA_sgelqs_Tile_Async(&descA, &descT, &descB, sequence, &request);
151 
153  plasma_sooptile2lap( descA, A, NB, NB, LDA, N );
154  plasma_sooptile2lap( descB, B, NB, NB, LDB, NRHS );
156  plasma_desc_mat_free(&descA);
157  plasma_desc_mat_free(&descB);
158  } else {
159  plasma_siptile2lap( descA, A, NB, NB, LDA, N );
160  plasma_siptile2lap( descB, B, NB, NB, LDB, NRHS );
162  }
163 
164  status = sequence->status;
165  plasma_sequence_destroy(plasma, sequence);
166  return status;
167 }
168 
169 /***************************************************************************/
208 {
210  PLASMA_sequence *sequence = NULL;
212  int status;
213 
214  plasma = plasma_context_self();
215  if (plasma == NULL) {
216  plasma_fatal_error("PLASMA_sgelqs_Tile", "PLASMA not initialized");
218  }
219  plasma_sequence_create(plasma, &sequence);
220  PLASMA_sgelqs_Tile_Async(A, T, B, sequence, &request);
222  status = sequence->status;
223  plasma_sequence_destroy(plasma, sequence);
224  return status;
225 }
226 
227 /***************************************************************************/
257  PLASMA_sequence *sequence, PLASMA_request *request)
258 {
259  PLASMA_desc descA = *A;
260  PLASMA_desc descT = *T;
261  PLASMA_desc descB = *B;
263 
264  plasma = plasma_context_self();
265  if (plasma == NULL) {
266  plasma_fatal_error("PLASMA_sgelqs_Tile", "PLASMA not initialized");
268  }
269  if (sequence == NULL) {
270  plasma_fatal_error("PLASMA_sgelqs_Tile", "NULL sequence");
271  return PLASMA_ERR_UNALLOCATED;
272  }
273  if (request == NULL) {
274  plasma_fatal_error("PLASMA_sgelqs_Tile", "NULL request");
275  return PLASMA_ERR_UNALLOCATED;
276  }
277  /* Check sequence status */
278  if (sequence->status == PLASMA_SUCCESS)
279  request->status = PLASMA_SUCCESS;
280  else
281  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
282 
283  /* Check descriptors for correctness */
284  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
285  plasma_error("PLASMA_sgelqs_Tile", "invalid first descriptor");
286  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
287  }
288  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
289  plasma_error("PLASMA_sgelqs_Tile", "invalid second descriptor");
290  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
291  }
292  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
293  plasma_error("PLASMA_sgelqs_Tile", "invalid third descriptor");
294  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
295  }
296  /* Check input arguments */
297  if (descA.nb != descA.mb || descB.nb != descB.mb) {
298  plasma_error("PLASMA_sgelqs_Tile", "only square tiles supported");
299  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
300  }
301  /* Quick return */
302 /*
303  if (min(M, min(N, NRHS)) == 0) {
304  return PLASMA_SUCCESS;
305  }
306 */
308  PLASMA_desc, plasma_desc_submatrix(descB, descA.m, 0, descA.n-descA.m, descB.n),
309  PLASMA_sequence*, sequence,
310  PLASMA_request*, request);
311 
317  float, 1.0,
318  PLASMA_desc, plasma_desc_submatrix(descA, 0, 0, descA.m, descA.m),
319  PLASMA_desc, plasma_desc_submatrix(descB, 0, 0, descA.m, descB.n),
320  PLASMA_sequence*, sequence,
321  PLASMA_request*, request);
322 
323  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
327  PLASMA_desc, descA,
328  PLASMA_desc, descB,
329  PLASMA_desc, descT,
330  PLASMA_sequence*, sequence,
331  PLASMA_request*, request);
332  }
333  else {
334  plasma_dynamic_call_8(plasma_psormlqrh,
337  PLASMA_desc, descA,
338  PLASMA_desc, descB,
339  PLASMA_desc, descT,
341  PLASMA_sequence*, sequence,
342  PLASMA_request*, request);
343  }
344 
345  return PLASMA_SUCCESS;
346 }