PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
dorglq.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 /***************************************************************************/
68 int PLASMA_dorglq(int M, int N, int K,
69  double *A, int LDA,
70  double *T,
71  double *Q, int LDQ)
72 {
73  int NB, IB, IBNB, KT, NT;
74  int status;
76  PLASMA_sequence *sequence = NULL;
78  PLASMA_desc descA, descQ, descT;
79 
80  plasma = plasma_context_self();
81  if (plasma == NULL) {
82  plasma_fatal_error("PLASMA_dorglq", "PLASMA not initialized");
84  }
85  if (M < 0) {
86  plasma_error("PLASMA_dorglq", "illegal value of M");
87  return -1;
88  }
89  if (N < M) {
90  plasma_error("PLASMA_dorglq", "illegal value of N");
91  return -2;
92  }
93  if (K < 0 || K > M) {
94  plasma_error("PLASMA_dorglq", "illegal value of K");
95  return -3;
96  }
97  if (LDA < max(1, M)) {
98  plasma_error("PLASMA_dorglq", "illegal value of LDA");
99  return -5;
100  }
101  if (LDQ < max(1, M)) {
102  plasma_error("PLASMA_dorglq", "illegal value of LDQ");
103  return -8;
104  }
105  /* Quick return - currently NOT equivalent to LAPACK's:
106  * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDQ ) */
107  if (min(M, min(N, K)) == 0)
108  return PLASMA_SUCCESS;
109 
110  /* Tune NB & IB depending on M, N & NRHS; Set NBNB */
111  status = plasma_tune(PLASMA_FUNC_DGELS, M, N, 0);
112  if (status != PLASMA_SUCCESS) {
113  plasma_error("PLASMA_dorglq", "plasma_tune() failed");
114  return status;
115  }
116 
117  /* Set MT & NT */
118  NB = PLASMA_NB;
119  IB = PLASMA_IB;
120  IBNB = IB*NB;
121  NT = (N%NB==0) ? (N/NB) : (N/NB+1);
122  KT = (K%NB==0) ? (K/NB) : (K/NB+1);
123 
124  plasma_sequence_create(plasma, &sequence);
125 
126  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
127  descT = plasma_desc_init(
129  IB, NB, IBNB,
130  KT*IB, NT*NB, 0, 0, KT*IB, NT*NB);
131  }
132  else {
133  /* Double the size of T to accomodate the tree reduction phase */
134  descT = plasma_desc_init(
136  IB, NB, IBNB,
137  KT*IB, 2*NT*NB, 0, 0, KT*IB, 2*NT*NB);
138  }
139  descT.mat = T;
140 
142  plasma_dooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N, plasma_desc_mat_free(&(descA)) );
143  plasma_dooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
144  } else {
145  plasma_diplap2tile( descA, A, NB, NB, LDA, N, 0, 0, K, N);
146  plasma_diplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
147  }
148 
149  /* Call the tile interface */
150  PLASMA_dorglq_Tile_Async(&descA, &descT, &descQ, sequence, &request);
151 
153  plasma_dooptile2lap( descQ, Q, NB, NB, LDQ, N );
155  plasma_desc_mat_free(&descA);
156  plasma_desc_mat_free(&descQ);
157  } else {
158  plasma_diptile2lap( descA, A, NB, NB, LDA, N );
159  plasma_diptile2lap( descQ, Q, NB, NB, LDQ, N );
161  }
162 
163  status = sequence->status;
164  plasma_sequence_destroy(plasma, sequence);
165  return status;
166 }
167 
168 /***************************************************************************/
203 {
205  PLASMA_sequence *sequence = NULL;
207  int status;
208 
209  plasma = plasma_context_self();
210  if (plasma == NULL) {
211  plasma_fatal_error("PLASMA_dorglq_Tile", "PLASMA not initialized");
213  }
214  plasma_sequence_create(plasma, &sequence);
215  PLASMA_dorglq_Tile_Async(A, T, B, sequence, &request);
217  status = sequence->status;
218  plasma_sequence_destroy(plasma, sequence);
219  return status;
220 }
221 
222 /***************************************************************************/
250  PLASMA_sequence *sequence, PLASMA_request *request)
251 {
252  PLASMA_desc descA = *A;
253  PLASMA_desc descT = *T;
254  PLASMA_desc descQ = *Q;
256 
257  plasma = plasma_context_self();
258  if (plasma == NULL) {
259  plasma_fatal_error("PLASMA_dorglq_Tile", "PLASMA not initialized");
261  }
262  if (sequence == NULL) {
263  plasma_fatal_error("PLASMA_dorglq_Tile", "NULL sequence");
264  return PLASMA_ERR_UNALLOCATED;
265  }
266  if (request == NULL) {
267  plasma_fatal_error("PLASMA_dorglq_Tile", "NULL request");
268  return PLASMA_ERR_UNALLOCATED;
269  }
270  /* Check sequence status */
271  if (sequence->status == PLASMA_SUCCESS)
272  request->status = PLASMA_SUCCESS;
273  else
274  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
275 
276  /* Check descriptors for correctness */
277  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
278  plasma_error("PLASMA_dorglq_Tile", "invalid first descriptor");
279  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
280  }
281  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
282  plasma_error("PLASMA_dorglq_Tile", "invalid second descriptor");
283  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
284  }
285  if (plasma_desc_check(&descQ) != PLASMA_SUCCESS) {
286  plasma_error("PLASMA_dorglq_Tile", "invalid third descriptor");
287  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
288  }
289  /* Check input arguments */
290  if (descA.nb != descA.mb || descQ.nb != descQ.mb) {
291  plasma_error("PLASMA_dorglq_Tile", "only square tiles supported");
292  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
293  }
294  /* Quick return - currently NOT equivalent to LAPACK's:
295  * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, Q, LDQ ) */
296 /*
297  if (min(M, N) == 0)
298  return PLASMA_SUCCESS;
299 */
300  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
302  PLASMA_desc, descA,
303  PLASMA_desc, descQ,
304  PLASMA_desc, descT,
305  PLASMA_sequence*, sequence,
306  PLASMA_request*, request);
307  }
308  else {
309  plasma_dynamic_call_6(plasma_pdorglqrh,
310  PLASMA_desc, descA,
311  PLASMA_desc, descQ,
312  PLASMA_desc, descT,
314  PLASMA_sequence*, sequence,
315  PLASMA_request*, request);
316  }
317 
318  return PLASMA_SUCCESS;
319 }