23 #define PARALLEL_KERNEL
24 #define A(m,n) BLKADDR(A, double, m, n)
25 #define IPIV(k) &(IPIV[(int64_t)A.mb*(int64_t)(k)])
27 #define plasma_pdgetrf_rectil_rl_quark plasma_pdgetrf_rectil_quark
36 int tempk, tempm, tempkm, tempkn, tempmm, tempnn;
41 double zone = (double)1.0;
42 double mzone = (double)-1.0;
46 int panel_thread_count;
61 for (k = 0; k <
min(A.
mt, A.
nt); k++)
65 tempkm = k == A.
mt-1 ? tempm : A.
mb;
66 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
71 while ( ((panel_thread_count * 4 * A.
mb) > tempm)
72 && (panel_thread_count > 1) ) {
80 plasma->
quark, &task_flagsP,
82 A(k, k), A.
mb*A.
nb, IPIV(k),
83 sequence, request, 1, tempk,
86 fakedep = (
void *)(intptr_t)(k+1);
87 for (n = k+1; n < A.
nt; n++)
93 tempnn = n == A.
nt-1 ? A.
n-n*A.
nb : A.
nb;
95 plasma->
quark, &task_flagsU,
97 A(k, n), 1, tempkm,
IPIV(k), 1,
102 tempmm = m == A.
mt-1 ? A.
m-m*A.
mb : A.
mb;
106 plasma->
quark, &task_flagsU,
108 tempmm, tempnn, A.
nb, A.
mb,
109 mzone, A(m, k), ldam,
111 zone,
A(m, n), ldam);
113 for (m = k+2; m < A.
mt; m++)
115 tempmm = m == A.
mt-1 ? A.
m-m*A.
mb : A.
mb;
119 plasma->
quark, &task_flagsU,
121 tempmm, tempnn, A.
nb, A.
mb,
122 mzone, A(m, k), ldam,
134 for (k = 0; k <
min(A.
mt, A.
nt); k++)
139 tempkm = k == A.
mt-1 ? tempm : A.
mb;
140 tempkn = k == A.
nt-1 ? A.
n - k * A.
nb : A.
nb;
141 mintmp =
min(tempkm, tempkn);
148 fakedep = (
void*)(intptr_t)k;
149 for (n = 0; n < k; n++)
151 tempnn = n == A.
nt-1 ? A.
n-n*A.
nb : A.
nb;
153 plasma->
quark, &task_flagsU,
155 A(k, n), 1, mintmp,
IPIV(k), 1,
171 int tempkm, tempkn, tempmm, tempnn;
177 double zone = (double)1.0;
178 double mzone = (double)-1.0;
182 int panel_thread_count;
197 fakedep = (
void*)(intptr_t)1;
198 for (n = 0; n < A.
nt; n++)
200 tempnn = n == A.
nt-1 ? A.
n-n*A.
nb : A.
nb;
205 for (k = 0; k <
min(A.
mt, n); k++)
209 tempkm = k == A.
mt-1 ? tempm : A.
mb;
213 plasma->
quark, &task_flagsU,
215 A(k, n), 1, tempkm,
IPIV(k), 1,
220 tempmm = m == A.
mt-1 ? A.
m-m*A.
mb : A.
mb;
224 plasma->
quark, &task_flagsU,
226 tempmm, tempnn, A.
nb, A.
mb,
227 mzone, A(m, k), ldam,
229 zone,
A(m, n), ldam);
231 fakedep = (
void*)(intptr_t)k;
232 for (m = k+2; m < A.
mt; m++)
234 tempmm = m == A.
mt-1 ? A.
m-m*A.
mb : A.
mb;
238 plasma->
quark, &task_flagsU,
240 tempmm, tempnn, A.
nb, A.
mb,
241 mzone, A(m, k), ldam,
254 tempm = A.
m - k * A.
mb;
255 tempkn = k == A.
nt-1 ? A.
n-k*A.
nb : A.
nb;
258 while ( ((panel_thread_count * 4 * A.
mb) > tempm)
259 && (panel_thread_count > 1) ) {
260 panel_thread_count--;
265 plasma->
quark, &task_flagsP,
267 IPIV(k), sequence, request,
268 1, A.
mb*k, panel_thread_count );
273 for (k = 0; k <
min(A.
mt, A.
nt); k++)
277 tempkm = k == A.
mt-1 ? tempm : A.
mb;
280 fakedep = (
void*)(intptr_t)k;
281 for (n = 0; n < k; n++)
286 tempnn = n == A.
nt-1 ? A.
n-n*A.
nb : A.
nb;
288 plasma->
quark, &task_flagsU,
290 A(k, n), 1, tempkm,
IPIV(k), 1,