PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zunmqr.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 /***************************************************************************/
85 int PLASMA_zunmqr(PLASMA_enum side, PLASMA_enum trans, int M, int N, int K,
86  PLASMA_Complex64_t *A, int LDA,
88  PLASMA_Complex64_t *B, int LDB)
89 {
90  int NB, IB, IBNB, Am, MT, KT;
91  int status;
93  PLASMA_sequence *sequence = NULL;
95  PLASMA_desc descA, descB, descT;
96 
97  plasma = plasma_context_self();
98  if (plasma == NULL) {
99  plasma_fatal_error("PLASMA_zunmqr", "PLASMA not initialized");
101  }
102 
103  if ( side == PlasmaLeft ) {
104  Am = M;
105  } else {
106  Am = N;
107  }
108 
109  /* Check input arguments */
110  if ((side != PlasmaLeft) && (side != PlasmaRight)) {
111  plasma_error("PLASMA_zunmqr", "illegal value of side");
112  return -1;
113  }
114  if ((trans != PlasmaConjTrans) && (trans != PlasmaNoTrans)){
115  plasma_error("PLASMA_zunmqr", "illegal value of trans");
116  return -2;
117  }
118  if (M < 0) {
119  plasma_error("PLASMA_zunmqr", "illegal value of M");
120  return -3;
121  }
122  if (N < 0) {
123  plasma_error("PLASMA_zunmqr", "illegal value of N");
124  return -4;
125  }
126  if ( (K < 0) || (K > Am) ) {
127  plasma_error("PLASMA_zunmqr", "illegal value of K");
128  return -5;
129  }
130  if ( LDA < max(1, Am) ) {
131  plasma_error("PLASMA_zunmqr", "illegal value of LDA");
132  return -7;
133  }
134  if (LDB < max(1, M)) {
135  plasma_error("PLASMA_zunmqr", "illegal value of LDB");
136  return -10;
137  }
138  /* Quick return - currently NOT equivalent to LAPACK's:
139  * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
140  if (min(M, min(N, K)) == 0)
141  return PLASMA_SUCCESS;
142 
143  /* Tune NB & IB depending on M, K & N; Set NBNB */
144  status = plasma_tune(PLASMA_FUNC_ZGELS, M, K, N);
145  if (status != PLASMA_SUCCESS) {
146  plasma_error("PLASMA_zunmqr", "plasma_tune() failed");
147  return status;
148  }
149 
150  /* Set MT, NT & NTRHS */
151  NB = PLASMA_NB;
152  IB = PLASMA_IB;
153  IBNB = IB*NB;
154  MT = (Am%NB==0) ? (Am/NB) : (Am/NB+1);
155  KT = (K%NB==0) ? (K /NB) : (K /NB+1);
156 
157  plasma_sequence_create(plasma, &sequence);
158 
159  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
160  descT = plasma_desc_init(
162  IB, NB, IBNB,
163  MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
164  }
165  else {
166  /* Double the size of T to accomodate the tree reduction phase */
167  descT = plasma_desc_init(
169  IB, NB, IBNB,
170  MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
171  }
172  descT.mat = T;
173 
175  plasma_zooplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K, plasma_desc_mat_free(&(descA)) );
176  plasma_zooplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descB)));
177  } else {
178  plasma_ziplap2tile( descA, A, NB, NB, LDA, K, 0, 0, Am, K);
179  plasma_ziplap2tile( descB, B, NB, NB, LDB, N, 0, 0, M, N);
180  }
181 
182  /* Call the tile interface */
184  side, trans, &descA, &descT, &descB, sequence, &request);
185 
187  plasma_zooptile2lap( descB, B, NB, NB, LDB, N );
189  plasma_desc_mat_free(&descA);
190  plasma_desc_mat_free(&descB);
191  } else {
192  plasma_ziptile2lap( descA, A, NB, NB, LDA, K );
193  plasma_ziptile2lap( descB, B, NB, NB, LDB, N );
195  }
196 
197  status = sequence->status;
198  plasma_sequence_destroy(plasma, sequence);
199  return status;
200 }
201 
202 /***************************************************************************/
252 {
254  PLASMA_sequence *sequence = NULL;
256  int status;
257 
258  plasma = plasma_context_self();
259  if (plasma == NULL) {
260  plasma_fatal_error("PLASMA_zunmqr_Tile", "PLASMA not initialized");
262  }
263  plasma_sequence_create(plasma, &sequence);
264  PLASMA_zunmqr_Tile_Async(side, trans, A, T, B, sequence, &request);
266  status = sequence->status;
267  plasma_sequence_destroy(plasma, sequence);
268  return status;
269 }
270 
271 /***************************************************************************/
300  PLASMA_sequence *sequence, PLASMA_request *request)
301 {
302  PLASMA_desc descA = *A;
303  PLASMA_desc descT = *T;
304  PLASMA_desc descB = *B;
306 
307  plasma = plasma_context_self();
308  if (plasma == NULL) {
309  plasma_fatal_error("PLASMA_zunmqr_Tile", "PLASMA not initialized");
311  }
312  if (sequence == NULL) {
313  plasma_fatal_error("PLASMA_zunmqr_Tile", "NULL sequence");
314  return PLASMA_ERR_UNALLOCATED;
315  }
316  if (request == NULL) {
317  plasma_fatal_error("PLASMA_zunmqr_Tile", "NULL request");
318  return PLASMA_ERR_UNALLOCATED;
319  }
320  /* Check sequence status */
321  if (sequence->status == PLASMA_SUCCESS)
322  request->status = PLASMA_SUCCESS;
323  else
324  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
325 
326  /* Check descriptors for correctness */
327  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
328  plasma_error("PLASMA_zunmqr_Tile", "invalid first descriptor");
329  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
330  }
331  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
332  plasma_error("PLASMA_zunmqr_Tile", "invalid second descriptor");
333  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
334  }
335  if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
336  plasma_error("PLASMA_zunmqr_Tile", "invalid third descriptor");
337  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
338  }
339  /* Check input arguments */
340  if (descA.nb != descA.mb || descB.nb != descB.mb) {
341  plasma_error("PLASMA_zunmqr_Tile", "only square tiles supported");
342  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
343  }
344  if ((side != PlasmaLeft) && (side != PlasmaRight)) {
345  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
346  }
347  if ((trans != PlasmaConjTrans) && (trans != PlasmaNoTrans)){
348  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
349  }
350  /* Quick return - currently NOT equivalent to LAPACK's:
351  * CALL DLASET( 'Full', MAX( M, N ), NRHS, ZERO, ZERO, B, LDB ) */
352 /*
353  if (min(M, min(N, K)) == 0)
354  return PLASMA_SUCCESS;
355 */
356  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
357  if ( (trans == PlasmaConjTrans) &&
358  (side == PlasmaLeft) ) {
360  PLASMA_enum, side,
361  PLASMA_enum, trans,
362  PLASMA_desc, descA,
363  PLASMA_desc, descB,
364  PLASMA_desc, descT,
365  PLASMA_sequence*, sequence,
366  PLASMA_request*, request);
367  }
368  else {
370  PLASMA_enum, side,
371  PLASMA_enum, trans,
372  PLASMA_desc, descA,
373  PLASMA_desc, descB,
374  PLASMA_desc, descT,
375  PLASMA_sequence*, sequence,
376  PLASMA_request*, request);
377  }
378  }
379  else {
380  plasma_dynamic_call_8(plasma_pzunmqrrh,
381  PLASMA_enum, side,
382  PLASMA_enum, trans,
383  PLASMA_desc, descA,
384  PLASMA_desc, descB,
385  PLASMA_desc, descT,
387  PLASMA_sequence*, sequence,
388  PLASMA_request*, request);
389  }
390 
391  return PLASMA_SUCCESS;
392 }