PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzunmlqrh.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, (m), (n))
20 #define B(m,n) BLKADDR(B, PLASMA_Complex64_t, (m), (n))
21 #define T(m,n) BLKADDR(T, PLASMA_Complex64_t, (m), (n))
22 #define T2(m,n) BLKADDR(T, PLASMA_Complex64_t, (m), (n)+A.nt)
23 /***************************************************************************/
29  PLASMA_sequence *sequence, PLASMA_request *request)
30 {
33 
34  int k, m, n;
35  int K, N, RD, lastRD;
36  int ldaN, ldak;
37  int ldbN, ldbm, ldbNRD;
38  int tempNn, tempkm, tempnn, tempmm, tempNRDn, tempkmin;
39  int ib;
40 
41  plasma = plasma_context_self();
42  if (sequence->status != PLASMA_SUCCESS)
43  return;
44  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
45 
46  ib = PLASMA_IB;
47  K = min(A.mt, A.nt);
48 
49  if (side == PlasmaLeft ) {
50  if (trans == PlasmaNoTrans) {
51  /*
52  * PlasmaLeft / PlasmaNoTrans
53  */
54  for (k = 0; k < K; k++) {
55  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
56  ldak = BLKLDD(A, k);
57  for (N = k; N < A.nt; N += BS) {
58  tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
59  tempkmin = min(tempkm,tempNn);
60  ldaN = BLKLDD(A, N);
61  ldbN = BLKLDD(B, N);
62  for (n = 0; n < B.nt; n++) {
63  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
65  plasma->quark, &task_flags,
66  side, trans,
67  tempNn, tempnn,
68  tempkmin, ib, T.nb,
69  A(k, N), ldak,
70  T(k, N), T.mb,
71  B(N, n), ldbN);
72  }
73  for (m = N+1; m < min(N+BS, A.nt); m++) {
74  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
75  ldbm = BLKLDD(B, m);
76  for (n = 0; n < B.nt; n++) {
77  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
79  plasma->quark, &task_flags,
80  side, trans,
81  B.nb, tempnn, tempmm, tempnn,
82  tempkm, ib, T.nb,
83  B(N, n), ldbN,
84  B(m, n), ldbm,
85  A(k, m), ldak,
86  T(k, m), T.mb);
87  }
88  }
89  }
90  for (RD = BS; RD < A.nt-k; RD *= 2) {
91  for (N = k; N+RD < A.nt; N += 2*RD) {
92  tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
93  ldbN = BLKLDD(B, N );
94  ldbNRD = BLKLDD(B, N+RD);
95  for (n = 0; n < B.nt; n++) {
96  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
98  plasma->quark, &task_flags,
99  side, trans,
100  B.mb, tempnn, tempNRDn, tempnn,
101  tempkm, ib, T.nb,
102  B (N, n), ldbN,
103  B (N+RD, n), ldbNRD,
104  A (k, N+RD), ldak,
105  T2(k, N+RD), T.mb);
106  }
107  }
108  }
109  }
110  } else {
111  /*
112  * PlasmaLeft / PlasmaConjTrans
113  */
114  for (k = K-1; k >= 0; k--) {
115  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
116  ldak = BLKLDD(A, k);
117  lastRD = 0;
118  for (RD = BS; RD < A.nt-k; RD *= 2)
119  lastRD = RD;
120  for (RD = lastRD; RD >= BS; RD /= 2) {
121  for (N = k; N+RD < A.nt; N += 2*RD) {
122  tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
123  ldbN = BLKLDD(B, N );
124  ldbNRD = BLKLDD(B, N+RD);
125  for (n = 0; n < B.nt; n++) {
126  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
128  plasma->quark, &task_flags,
129  side, trans,
130  B.nb, tempnn, tempNRDn, tempnn,
131  tempkm, ib, T.nb,
132  B (N, n), ldbN,
133  B (N+RD, n), ldbNRD,
134  A (k, N+RD), ldak,
135  T2(k, N+RD), T.mb);
136  }
137  }
138  }
139  for (N = k; N < A.nt; N += BS) {
140  tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
141  tempkmin = min(tempkm,tempNn);
142  ldaN = BLKLDD(A, N);
143  ldbN = BLKLDD(B, N);
144  for (m = min(N+BS, A.nt)-1; m > N; m--) {
145  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
146  ldbm = BLKLDD(B, m);
147  for (n = 0; n < B.nt; n++) {
148  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
150  plasma->quark, &task_flags,
151  side, trans,
152  B.mb, tempnn, tempmm, tempnn,
153  tempkm, ib, T.nb,
154  B(N, n), ldbN,
155  B(m, n), ldbm,
156  A(k, m), ldak,
157  T(k, m), T.mb);
158  }
159  }
160  for (n = 0; n < B.nt; n++) {
161  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
163  plasma->quark, &task_flags,
164  side, trans,
165  tempNn, tempnn,
166  tempkmin, ib, T.nb,
167  A(k, N), ldak,
168  T(k, N), T.mb,
169  B(N, n), ldbN);
170  }
171  }
172  }
173 
174  }
175  } else {
176  if (trans == PlasmaNoTrans) {
177  /*
178  * PlasmaRight / PlasmaNoTrans
179  */
180  for (k = K-1; k >= 0; k--) {
181  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
182  ldak = BLKLDD(A, k);
183  lastRD = 0;
184  for (RD = BS; RD < A.nt-k; RD *= 2)
185  lastRD = RD;
186  for (RD = lastRD; RD >= BS; RD /= 2) {
187  for (N = k; N+RD < A.nt; N += 2*RD) {
188  tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
189  for (m = 0; m < B.mt; m++) {
190  ldbm = BLKLDD(B, m);
191  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
193  plasma->quark, &task_flags,
194  side, trans,
195  tempmm, B.nb, tempmm, tempNRDn,
196  tempkm, ib, T.nb,
197  B (m, N ), ldbm,
198  B (m, N+RD), ldbm,
199  A (k, N+RD), ldak,
200  T2(k, N+RD), T.mb);
201  }
202  }
203  }
204  for (N = k; N < A.nt; N += BS) {
205  tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
206  tempkmin = min(tempkm,tempNn);
207  for (n = min(N+BS, A.nt)-1; n > N; n--) {
208  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
209  for (m = 0; m < B.mt; m++) {
210  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
211  ldbm = BLKLDD(B, m);
213  plasma->quark, &task_flags,
214  side, trans,
215  tempmm, B.nb, tempmm, tempnn,
216  tempkm, ib, T.nb,
217  B(m, N), ldbm,
218  B(m, n), ldbm,
219  A(k, n), ldak,
220  T(k, n), T.mb);
221  }
222  }
223  for (m = 0; m < B.mt; m++) {
224  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
225  ldbm = BLKLDD(B, m);
227  plasma->quark, &task_flags,
228  side, trans,
229  tempmm, tempNn,
230  tempkmin, ib, T.nb,
231  A(k, N), ldak,
232  T(k, N), T.mb,
233  B(m, N), ldbm);
234  }
235  }
236  }
237  } else {
238  /*
239  * PlasmaRight / PlasmaConjTrans
240  */
241  for (k = 0; k < K; k++) {
242  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
243  ldak = BLKLDD(A, k);
244  for (N = k; N < A.nt; N += BS) {
245  tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
246  tempkmin = min(tempkm,tempNn);
247  ldaN = BLKLDD(A, N);
248  for (m = 0; m < B.mt; m++) {
249  ldbm = BLKLDD(B, m);
250  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
252  plasma->quark, &task_flags,
253  side, trans,
254  tempmm, tempNn,
255  tempkmin, ib, T.nb,
256  A(k, N), ldaN,
257  T(k, N), T.mb,
258  B(m, N), ldbm);
259  }
260  for (n = N+1; n < min(N+BS, A.nt); n++) {
261  tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
262  for (m = 0; m < B.mt; m++) {
263  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
264  ldbm = BLKLDD(B, m);
266  plasma->quark, &task_flags,
267  side, trans,
268  tempmm, tempNn, tempmm, tempnn,
269  tempkm, ib, T.nb,
270  B(m, N), ldbm,
271  B(m, n), ldbm,
272  A(k, n), ldak,
273  T(k, n), T.mb);
274  }
275  }
276  }
277  for (RD = BS; RD < A.nt-k; RD *= 2) {
278  for (N = k; N+RD < A.nt; N += 2*RD) {
279  tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
280  for (m = 0; m < B.mt; m++) {
281  tempmm = m == B.mt-1 ? B.m-m*B.mb : B.mb;
282  ldbm = BLKLDD(B, m);
284  plasma->quark, &task_flags,
285  side, trans,
286  tempmm, B.nb, tempmm, tempNRDn,
287  tempkm, ib, T.nb,
288  B (m, N ), ldbm,
289  B (m, N+RD), ldbm,
290  A (k, N+RD), ldak,
291  T2(k, N+RD), T.mb);
292  }
293  }
294  }
295  }
296  }
297  }
298 }