PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pcherbt.c
Go to the documentation of this file.
1 
16 #include "common.h"
17 
18 #define A(m,n) BLKADDR(A, PLASMA_Complex32_t, m, n)
19 #define T(m,n) BLKADDR(T, PLASMA_Complex32_t, m, n)
20 /***************************************************************************/
25  PLASMA_sequence *sequence, PLASMA_request *request)
26 {
29 
30  int k, m, n, i, j;
31  int ldak, ldam, ldan, ldaj, ldai;
32  int tempkn, tempmm, tempnn, tempjj;
33  int ib;
34 
35  plasma = plasma_context_self();
36  if (sequence->status != PLASMA_SUCCESS)
37  return;
38 
39  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
40 
41  ib = PLASMA_IB;
42  if (uplo == PlasmaLower) {
43  for (k = 0; k < A.nt-1; k++){
44  tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
45  ldak = BLKLDD(A, k+1);
47  plasma->quark, &task_flags,
48  tempkn, A.nb, ib, T.nb,
49  A(k+1, k), ldak,
50  T(k+1, k), T.mb);
51 
52  /* LEFT and RIGHT on the symmetric diagonal block */
54  plasma->quark, &task_flags,
56  tempkn, tempkn, ib, T.nb,
57  A(k+1, k), ldak,
58  T(k+1, k), T.mb,
59  A(k+1, k+1), ldak);
60 
61  /* RIGHT on the remaining tiles until the bottom */
62  for (m = k+2; m < A.mt ; m++) {
63  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
64  ldam = BLKLDD(A, m);
66  plasma->quark, &task_flags,
68  tempmm, A.nb, tempkn, ib, T.nb,
69  A(k+1, k), ldak,
70  T(k+1, k), T.mb,
71  A(m , k+1), ldam);
72  }
73 
74  for (m = k+2; m < A.mt; m++) {
75  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
76  ldam = BLKLDD(A, m);
78  plasma->quark, &task_flags,
79  tempmm, A.nb, ib, T.nb,
80  A(k+1, k), ldak,
81  A(m , k), ldam,
82  T(m , k), T.mb);
83 
84  /* LEFT */
85  for (i = k+2; i < m; i++) {
86  ldai = BLKLDD(A, i);
88  plasma->quark, &task_flags,
90  A.mb, A.nb, tempmm, A.nb, A.nb, ib, T.nb,
91  A(i, k+1), ldai,
92  A(m, i), ldam,
93  A(m, k), ldam,
94  T(m, k), T.mb);
95  }
96 
97  /* RIGHT */
98  for (j = m+1; j < A.mt ; j++) {
99  tempjj = j == A.mt-1 ? A.m-j*A.mb : A.mb;
100  ldaj = BLKLDD(A, j);
102  plasma->quark, &task_flags,
104  tempjj, A.nb, tempjj, tempmm, A.nb, ib, T.nb,
105  A(j, k+1), ldaj,
106  A(j, m), ldaj,
107  A(m, k), ldam,
108  T(m, k), T.mb);
109  }
110 
111  /* LEFT->RIGHT */
113  plasma->quark, &task_flags,
114  A.nb, A.nb, tempmm, A.nb, tempmm, tempmm, A.nb, ib, T.nb,
115  A(k+1, k+1), ldak,
116  A(m , k+1), ldam,
117  A(m , m), ldam,
118  A(m , k), ldam,
119  T(m , k), T.mb);
120  }
121  }
122  }
123  else {
124  for (k = 0; k < A.nt-1; k++){
125  tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
126  ldak = BLKLDD(A, k+1);
128  plasma->quark, &task_flags,
129  A.nb, tempkn, ib, T.nb,
130  A(k, k+1), A.nb,
131  T(k, k+1), T.mb);
132 
133  /* RIGHT and LEFT on the symmetric diagonal block */
135  plasma->quark, &task_flags,
136  PlasmaUpper,
137  tempkn, tempkn, ib, T.nb,
138  A(k, k+1), A.nb,
139  T(k, k+1), T.mb,
140  A(k+1, k+1), ldak);
141 
142  /* LEFT on the remaining tiles until the left side */
143  for (n = k+2; n < A.nt ; n++) {
144  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
146  plasma->quark, &task_flags,
148  A.nb, tempnn, tempkn, ib, T.nb,
149  A(k, k+1), A.nb,
150  T(k, k+1), T.mb,
151  A(k+1, n), ldak);
152  }
153 
154  for (n = k+2; n < A.nt; n++) {
155  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
156  ldan = BLKLDD(A, n);
158  plasma->quark, &task_flags,
159  A.nb, tempnn, ib, T.nb,
160  A(k, k+1), A.nb,
161  A(k, n), A.nb,
162  T(k, n), T.mb);
163 
164  /* RIGHT */
165  for (i = k+2; i < n; i++) {
166  ldai = BLKLDD(A, i);
168  plasma->quark, &task_flags,
170  A.mb, A.nb, A.nb, tempnn, A.nb, ib, T.nb,
171  A(k+1, i), ldak,
172  A(i, n), ldai,
173  A(k, n), A.nb,
174  T(k, n), T.mb);
175  }
176 
177  /* LEFT */
178  for (j = n+1; j < A.nt ; j++) {
179  tempjj = j == A.nt-1 ? A.n-j*A.nb : A.nb;
180  ldaj = BLKLDD(A, j);
182  plasma->quark, &task_flags,
184  A.nb, tempjj, tempnn, tempjj, A.nb, ib, T.nb,
185  A(k+1, j), ldak,
186  A(n, j), ldan,
187  A(k, n), A.nb,
188  T(k, n), T.mb);
189  }
190 
191  /* RIGHT->LEFT */
193  plasma->quark, &task_flags,
194  A.nb, A.nb, A.nb, tempnn, tempnn, tempnn, A.nb, ib, T.nb,
195  A(k+1, k+1), ldak,
196  A(k+1, n), ldak,
197  A(n , n), ldan,
198  A(k , n), A.nb,
199  T(k , n), T.mb);
200  }
201  }
202  }
203 }