PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
psorglqrh.c
Go to the documentation of this file.
1 
15 #include "common.h"
16 
17 #define A(m,n) BLKADDR(A, float, (m), (n))
18 #define Q(m,n) BLKADDR(Q, float, (m), (n))
19 #define T(m,n) BLKADDR(T, float, (m), (n))
20 #define T2(m,n) BLKADDR(T, float, (m), (n)+(A.nt))
21 /***************************************************************************/
26  PLASMA_desc T, int BS,
27  PLASMA_sequence *sequence, PLASMA_request *request)
28 {
31 
32  int k, m, n;
33  int K, N, RD, lastRD;
34  int ldak;
35  int ldqm;
36  int tempkm, tempkmin, tempNn, tempnn, tempmm, tempNRDn;
37  int ib;
38 
39  plasma = plasma_context_self();
40  if (sequence->status != PLASMA_SUCCESS)
41  return;
42  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
43 
44  ib = PLASMA_IB;
45  K = min(A.mt, A.nt);
46 
47  for (k = K-1; k >= 0; k--) {
48  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
49  ldak = BLKLDD(A, k);
50  lastRD = 0;
51  for (RD = BS; RD < A.nt-k; RD *= 2)
52  lastRD = RD;
53  for (RD = lastRD; RD >= BS; RD /= 2) {
54  for (N = k; N+RD < A.nt; N += 2*RD) {
55  tempNRDn = N+RD == A.nt-1 ? A.n-(N+RD)*A.nb : A.nb;
56  for (m = 0; m < Q.mt; m++) {
57  tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
58  ldqm = BLKLDD(Q, m );
60  plasma->quark, &task_flags,
62  tempmm, Q.nb, tempmm, tempNRDn,
63  tempkm, ib, T.nb,
64  Q (m, N ), ldqm,
65  Q (m, N+RD), ldqm,
66  A (k, N+RD), ldak,
67  T2(k, N+RD), T.mb);
68  }
69  }
70  }
71  for (N = k; N < A.nt; N += BS) {
72  tempNn = N == A.nt-1 ? A.n-N*A.nb : A.nb;
73  tempkmin = min(tempkm, tempNn);
74  for (n = min(N+BS, A.nt)-1; n > N; n--) {
75  tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb;
76 
77  for (m = 0; m < Q.mt; m++) {
78  tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
79  ldqm = BLKLDD(Q, m);
81  plasma->quark, &task_flags,
83  tempmm, Q.nb, tempmm, tempnn,
84  tempkm, ib, T.nb,
85  Q(m, N), ldqm,
86  Q(m, n), ldqm,
87  A(k, n), ldak,
88  T(k, n), T.mb);
89  }
90  }
91  for (m = 0; m < Q.mt; m++) {
92  tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
93  ldqm = BLKLDD(Q, m);
95  plasma->quark, &task_flags,
97  tempmm, tempNn,
98  tempkmin, ib, T.nb,
99  A(k, N), ldak,
100  T(k, N), T.mb,
101  Q(m, N), ldqm);
102  }
103  }
104  }
105 }