PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
zungqr.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 /***************************************************************************/
68 int PLASMA_zungqr(int M, int N, int K,
69  PLASMA_Complex64_t *A, int LDA,
71  PLASMA_Complex64_t *Q, int LDQ)
72 {
73  int NB, IB, IBNB, MT, KT;
74  int status;
76  PLASMA_sequence *sequence = NULL;
78  PLASMA_desc descA, descQ, descT;
79 
80  plasma = plasma_context_self();
81  if (plasma == NULL) {
82  plasma_fatal_error("PLASMA_zungqr", "PLASMA not initialized");
84  }
85  if (M < 0) {
86  plasma_error("PLASMA_zungqr", "illegal value of M");
87  return -1;
88  }
89  if (N < 0 || N > M) {
90  plasma_error("PLASMA_zungqr", "illegal value of N");
91  return -2;
92  }
93  if (K < 0 || K > N) {
94  plasma_error("PLASMA_zungqr", "illegal value of K");
95  return -3;
96  }
97  if (LDA < max(1, M)) {
98  plasma_error("PLASMA_zungqr", "illegal value of LDA");
99  return -5;
100  }
101  if (LDQ < max(1, M)) {
102  plasma_error("PLASMA_zungqr", "illegal value of LDQ");
103  return -8;
104  }
105  if (min(M, min(N, K)) == 0)
106  return PLASMA_SUCCESS;
107 
108  /* Tune NB & IB depending on M & N; Set NBNB */
109  status = plasma_tune(PLASMA_FUNC_ZGELS, M, N, 0);
110  if (status != PLASMA_SUCCESS) {
111  plasma_error("PLASMA_zungqr", "plasma_tune() failed");
112  return status;
113  }
114 
115  /* Set MT & KT */
116  NB = PLASMA_NB;
117  IB = PLASMA_IB;
118  IBNB = IB*NB;
119  MT = (M%NB==0) ? (M/NB) : (M/NB+1);
120  KT = (K%NB==0) ? (K/NB) : (K/NB+1);
121 
122  plasma_sequence_create(plasma, &sequence);
123 
124  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
125  descT = plasma_desc_init(
127  IB, NB, IBNB,
128  MT*IB, KT*NB, 0, 0, MT*IB, KT*NB);
129  }
130  else {
131  /* Double the size of T to accomodate the tree reduction phase */
132  descT = plasma_desc_init(
134  IB, NB, IBNB,
135  MT*IB, 2*KT*NB, 0, 0, MT*IB, 2*KT*NB);
136  }
137  descT.mat = T;
138 
140  plasma_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K, plasma_desc_mat_free(&(descA)) );
141  plasma_zooplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N, plasma_desc_mat_free(&(descA)); plasma_desc_mat_free(&(descQ)));
142  } else {
143  plasma_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, M, K);
144  plasma_ziplap2tile( descQ, Q, NB, NB, LDQ, N, 0, 0, M, N);
145  }
146 
147  /* Call the tile interface */
148  PLASMA_zungqr_Tile_Async(&descA, &descT, &descQ, sequence, &request);
149 
151  plasma_zooptile2lap( descQ, Q, NB, NB, LDQ, N );
153  plasma_desc_mat_free(&descA);
154  plasma_desc_mat_free(&descQ);
155  } else {
156  plasma_ziptile2lap( descA, A, NB, NB, LDA, K );
157  plasma_ziptile2lap( descQ, Q, NB, NB, LDQ, N );
159  }
160 
161  status = sequence->status;
162  plasma_sequence_destroy(plasma, sequence);
163  return status;
164 }
165 
166 /***************************************************************************/
201 {
203  PLASMA_sequence *sequence = NULL;
205  int status;
206 
207  plasma = plasma_context_self();
208  if (plasma == NULL) {
209  plasma_fatal_error("PLASMA_zungqr_Tile", "PLASMA not initialized");
211  }
212  plasma_sequence_create(plasma, &sequence);
213  PLASMA_zungqr_Tile_Async(A, T, Q, sequence, &request);
215  status = sequence->status;
216  plasma_sequence_destroy(plasma, sequence);
217  return status;
218 }
219 
220 /***************************************************************************/
248  PLASMA_sequence *sequence, PLASMA_request *request)
249 {
250  PLASMA_desc descA = *A;
251  PLASMA_desc descT = *T;
252  PLASMA_desc descQ = *Q;
254 
255  plasma = plasma_context_self();
256  if (plasma == NULL) {
257  plasma_fatal_error("PLASMA_zungqr_Tile", "PLASMA not initialized");
259  }
260  if (sequence == NULL) {
261  plasma_fatal_error("PLASMA_zungqr_Tile", "NULL sequence");
262  return PLASMA_ERR_UNALLOCATED;
263  }
264  if (request == NULL) {
265  plasma_fatal_error("PLASMA_zungqr_Tile", "NULL request");
266  return PLASMA_ERR_UNALLOCATED;
267  }
268  /* Check sequence status */
269  if (sequence->status == PLASMA_SUCCESS)
270  request->status = PLASMA_SUCCESS;
271  else
272  return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
273 
274  /* Check descriptors for correctness */
275  if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
276  plasma_error("PLASMA_zungqr_Tile", "invalid first descriptor");
277  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
278  }
279  if (plasma_desc_check(&descT) != PLASMA_SUCCESS) {
280  plasma_error("PLASMA_zungqr_Tile", "invalid second descriptor");
281  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
282  }
283  if (plasma_desc_check(&descQ) != PLASMA_SUCCESS) {
284  plasma_error("PLASMA_zungqr_Tile", "invalid third descriptor");
285  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
286  }
287  /* Check input arguments */
288  if (descA.nb != descA.mb || descQ.nb != descQ.mb) {
289  plasma_error("PLASMA_zungqr_Tile", "only square tiles supported");
290  return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
291  }
292  /* Quick return */
293 /*
294  if (N <= 0)
295  return PLASMA_SUCCESS;
296 */
297  if (plasma->householder == PLASMA_FLAT_HOUSEHOLDER) {
299  PLASMA_desc, descA,
300  PLASMA_desc, descQ,
301  PLASMA_desc, descT,
302  PLASMA_sequence*, sequence,
303  PLASMA_request*, request);
304  }
305  else {
307  PLASMA_desc, descA,
308  PLASMA_desc, descQ,
309  PLASMA_desc, descT,
311  PLASMA_sequence*, sequence,
312  PLASMA_request*, request);
313  }
314 
315  return PLASMA_SUCCESS;
316 }