PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzunmqr.c
Go to the documentation of this file.
1 
18 #include "common.h"
19 
20 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
21 #define B(m,n) BLKADDR(B, PLASMA_Complex64_t, m, n)
22 #define T(m,n) BLKADDR(T, PLASMA_Complex64_t, m, n)
23 /***************************************************************************/
27 {
30  PLASMA_desc A;
31  PLASMA_desc B;
32  PLASMA_desc T;
33  PLASMA_sequence *sequence;
34  PLASMA_request *request;
35 
36  int k, m, n;
37  int next_k;
38  int next_m;
39  int next_n;
40  int ldak, ldbk, ldam, ldbm;
41  int tempkm, tempnn, tempkmin, tempmm;
42  int minMT, minM;
43  int ib = PLASMA_IB;
44  PLASMA_Complex64_t *work;
45 
46  plasma_unpack_args_7(side, trans, A, B, T, sequence, request);
47  if (sequence->status != PLASMA_SUCCESS)
48  return;
49 
50  if (side != PlasmaLeft) {
52  return;
53  }
54  if (trans != PlasmaConjTrans) {
56  return;
57  }
58 
59  work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp);
60  ss_init(B.mt, B.nt, -1);
61 
62  if (A.m > A.n) {
63  minM = A.n;
64  minMT = A.nt;
65  } else {
66  minM = A.m;
67  minMT = A.mt;
68  }
69 
70  k = 0;
71  n = PLASMA_RANK;
72  while (n >= B.nt) {
73  k++;
74  n = n-B.nt;
75  }
76  m = k;
77 
78  while (k < minMT && n < B.nt) {
79  next_n = n;
80  next_m = m;
81  next_k = k;
82 
83  next_m++;
84  if (next_m == A.mt) {
85  next_n += PLASMA_SIZE;
86  while (next_n >= B.nt && next_k < minMT) {
87  next_k++;
88  next_n = next_n-B.nt;
89  }
90  next_m = next_k;
91  }
92 
93  tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
94  tempkm = k == B.mt-1 ? B.m-k*B.mb : B.mb;
95  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
96  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
97 
98  ldak = BLKLDD(A, k);
99  ldbk = BLKLDD(B, k);
100  ldam = BLKLDD(A, m);
101  ldbm = BLKLDD(B, m);
102 
103  if (m == k) {
104  ss_cond_wait(k, n, k-1);
105  CORE_zunmqr(
106  side, trans,
107  tempkm, tempnn, tempkmin, ib,
108  A(k, k), ldak,
109  T(k, k), T.mb,
110  B(k, n), ldbk,
111  work, T.nb);
112  ss_cond_set(k, n, k);
113  }
114  else {
115  ss_cond_wait(m, n, k-1);
116  CORE_ztsmqr(
117  side, trans,
118  A.mb, tempnn, tempmm, tempnn, tempkmin, ib,
119  B(k, n), ldbk,
120  B(m, n), ldbm,
121  A(m, k), ldam,
122  T(m, k), T.mb,
123  work, ib);
124  ss_cond_set(m, n, k);
125  }
126  n = next_n;
127  m = next_m;
128  k = next_k;
129  }
130  plasma_private_free(plasma, work);
131  ss_finalize();
132 }
133 
134 /***************************************************************************/
139  PLASMA_sequence *sequence, PLASMA_request *request)
140 {
143 
144  int k, m, n;
145  int ldak, ldbk, ldam, ldan, ldbm;
146  int tempkm, tempnn, tempkmin, tempmm, tempkn;
147  int ib, minMT, minM;
148 
149  plasma = plasma_context_self();
150  if (sequence->status != PLASMA_SUCCESS)
151  return;
152  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
153 
154  ib = PLASMA_IB;
155  if (A.m > A.n) {
156  minM = A.n;
157  minMT = A.nt;
158  } else {
159  minM = A.m;
160  minMT = A.mt;
161  }
162 
163  /*
164  * PlasmaLeft / PlasmaConjTrans
165  */
166  if (side == PlasmaLeft ) {
167  if (trans == PlasmaConjTrans) {
168  for (k = 0; k < minMT; k++) {
169  tempkm = k == B.mt-1 ? B.m-k*B.mb : B.mb;
170  tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
171  ldak = BLKLDD(A, k);
172  ldbk = BLKLDD(B, k);
173  for (n = 0; n < B.nt; n++) {
174  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
176  plasma->quark, &task_flags,
177  side, trans,
178  tempkm, tempnn, tempkmin, ib, T.nb,
179  A(k, k), ldak,
180  T(k, k), T.mb,
181  B(k, n), ldbk);
182  }
183  for (m = k+1; m < B.mt; m++) {
184  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
185  ldam = BLKLDD(A, m);
186  ldbm = BLKLDD(B, m);
187  for (n = 0; n < B.nt; n++) {
188  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
190  plasma->quark, &task_flags,
191  side, trans,
192  B.mb, tempnn, tempmm, tempnn, tempkmin, ib, T.nb,
193  B(k, n), ldbk,
194  B(m, n), ldbm,
195  A(m, k), ldam,
196  T(m, k), T.mb);
197  }
198  }
199  }
200  }
201  /*
202  * PlasmaLeft / PlasmaNoTrans
203  */
204  else {
205  for (k = minMT-1; k >= 0; k--) {
206  tempkm = k == B.mt-1 ? B.m-k*B.mb : B.mb;
207  tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
208  ldak = BLKLDD(A, k);
209  ldbk = BLKLDD(B, k);
210  for (m = B.mt-1; m > k; m--) {
211  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
212  ldam = BLKLDD(A, m);
213  ldbm = BLKLDD(B, m);
214  for (n = 0; n < B.nt; n++) {
215  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
217  plasma->quark, &task_flags,
218  side, trans,
219  B.mb, tempnn, tempmm, tempnn, tempkmin, ib, T.nb,
220  B(k, n), ldbk,
221  B(m, n), ldbm,
222  A(m, k), ldam,
223  T(m, k), T.mb);
224  }
225  }
226  for (n = 0; n < B.nt; n++) {
227  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
229  plasma->quark, &task_flags,
230  side, trans,
231  tempkm, tempnn, tempkmin, ib, T.nb,
232  A(k, k), ldak,
233  T(k, k), T.mb,
234  B(k, n), ldbk);
235  }
236  }
237  }
238  }
239  /*
240  * PlasmaRight / PlasmaConjTrans
241  */
242  else {
243  if (trans == PlasmaConjTrans) {
244  for (k = minMT-1; k >= 0; k--) {
245  tempkn = k == B.nt-1 ? B.n-k*B.nb : B.nb;
246  tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
247  ldak = BLKLDD(A, k);
248  ldbk = BLKLDD(B, k);
249  for (n = B.nt-1; n > k; n--) {
250  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
251  ldan = BLKLDD(A, n);
252  for (m = 0; m < B.mt; m++) {
253  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
254  ldbm = BLKLDD(B, m);
256  plasma->quark, &task_flags,
257  side, trans,
258  tempmm, B.nb, tempmm, tempnn, tempkmin, ib, T.nb,
259  B(m, k), ldbm,
260  B(m, n), ldbm,
261  A(n, k), ldan,
262  T(n, k), T.mb);
263  }
264  }
265  for (m = 0; m < B.mt; m++) {
266  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
267  ldbm = BLKLDD(B, m);
269  plasma->quark, &task_flags,
270  side, trans,
271  tempmm, tempkn, tempkmin, ib, T.nb,
272  A(k, k), ldak,
273  T(k, k), T.mb,
274  B(m, k), ldbm);
275  }
276  }
277  }
278  /*
279  * PlasmaRight / PlasmaNoTrans
280  */
281  else {
282  for (k = 0; k < minMT; k++) {
283  tempkn = k == B.nt-1 ? B.n-k*B.nb : B.nb;
284  tempkmin = k == minMT-1 ? minM-k*A.nb : A.nb;
285  ldak = BLKLDD(A, k);
286  for (m = 0; m < B.mt; m++) {
287  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
288  ldbm = BLKLDD(B, m);
290  plasma->quark, &task_flags,
291  side, trans,
292  tempmm, tempkn, tempkmin, ib, T.nb,
293  A(k, k), ldak,
294  T(k, k), T.mb,
295  B(m, k), ldbm);
296  }
297  for (n = k+1; n < B.nt; n++) {
298  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
299  ldan = BLKLDD(A, n);
300  for (m = 0; m < B.mt; m++) {
301  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
302  ldbm = BLKLDD(B, m);
304  plasma->quark, &task_flags,
305  side, trans,
306  tempmm, B.nb, tempmm, tempnn, tempkmin, ib, T.nb,
307  B(m, k), ldbm,
308  B(m, n), ldbm,
309  A(n, k), ldan,
310  T(n, k), T.mb);
311  }
312  }
313  }
314  }
315  }
316 }