19 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, (m), (n))
20 #define B(m,n) BLKADDR(B, PLASMA_Complex64_t, (m), (n))
21 #define T(m,n) BLKADDR(T, PLASMA_Complex64_t, (m), (n))
22 #define T2(m,n) BLKADDR(T, PLASMA_Complex64_t, (m), (n)+A.nt)
37 int ldbN, ldbm, ldbNRD;
38 int tempNn, tempkm, tempnn, tempmm, tempNRDn, tempkmin;
54 for (k = 0; k < K; k++) {
55 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
57 for (N = k; N < A.
nt; N += BS) {
58 tempNn = N == A.
nt-1 ? A.
n-N*A.
nb : A.
nb;
59 tempkmin =
min(tempkm,tempNn);
62 for (n = 0; n < B.
nt; n++) {
63 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
65 plasma->
quark, &task_flags,
73 for (m = N+1; m <
min(N+BS, A.
nt); m++) {
74 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
76 for (n = 0; n < B.
nt; n++) {
77 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
79 plasma->
quark, &task_flags,
81 B.
nb, tempnn, tempmm, tempnn,
90 for (RD = BS; RD < A.
nt-k; RD *= 2) {
91 for (N = k; N+RD < A.
nt; N += 2*RD) {
92 tempNRDn = N+RD == A.
nt-1 ? A.
n-(N+RD)*A.
nb : A.
nb;
95 for (n = 0; n < B.
nt; n++) {
96 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
98 plasma->
quark, &task_flags,
100 B.
mb, tempnn, tempNRDn, tempnn,
114 for (k = K-1; k >= 0; k--) {
115 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
118 for (RD = BS; RD < A.
nt-k; RD *= 2)
120 for (RD = lastRD; RD >= BS; RD /= 2) {
121 for (N = k; N+RD < A.
nt; N += 2*RD) {
122 tempNRDn = N+RD == A.
nt-1 ? A.
n-(N+RD)*A.
nb : A.
nb;
125 for (n = 0; n < B.
nt; n++) {
126 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
128 plasma->
quark, &task_flags,
130 B.
nb, tempnn, tempNRDn, tempnn,
139 for (N = k; N < A.
nt; N += BS) {
140 tempNn = N == A.
nt-1 ? A.
n-N*A.
nb : A.
nb;
141 tempkmin =
min(tempkm,tempNn);
144 for (m =
min(N+BS, A.
nt)-1; m > N; m--) {
145 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
147 for (n = 0; n < B.
nt; n++) {
148 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
150 plasma->
quark, &task_flags,
152 B.
mb, tempnn, tempmm, tempnn,
160 for (n = 0; n < B.
nt; n++) {
161 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
163 plasma->
quark, &task_flags,
180 for (k = K-1; k >= 0; k--) {
181 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
184 for (RD = BS; RD < A.
nt-k; RD *= 2)
186 for (RD = lastRD; RD >= BS; RD /= 2) {
187 for (N = k; N+RD < A.
nt; N += 2*RD) {
188 tempNRDn = N+RD == A.
nt-1 ? A.
n-(N+RD)*A.
nb : A.
nb;
189 for (m = 0; m < B.
mt; m++) {
191 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
193 plasma->
quark, &task_flags,
195 tempmm, B.
nb, tempmm, tempNRDn,
204 for (N = k; N < A.
nt; N += BS) {
205 tempNn = N == A.
nt-1 ? A.
n-N*A.
nb : A.
nb;
206 tempkmin =
min(tempkm,tempNn);
207 for (n =
min(N+BS, A.
nt)-1; n > N; n--) {
208 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
209 for (m = 0; m < B.
mt; m++) {
210 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
213 plasma->
quark, &task_flags,
215 tempmm, B.
nb, tempmm, tempnn,
223 for (m = 0; m < B.
mt; m++) {
224 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
227 plasma->
quark, &task_flags,
241 for (k = 0; k < K; k++) {
242 tempkm = k == A.
mt-1 ? A.
m-k*A.
mb : A.
mb;
244 for (N = k; N < A.
nt; N += BS) {
245 tempNn = N == A.
nt-1 ? A.
n-N*A.
nb : A.
nb;
246 tempkmin =
min(tempkm,tempNn);
248 for (m = 0; m < B.
mt; m++) {
250 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
252 plasma->
quark, &task_flags,
260 for (n = N+1; n <
min(N+BS, A.
nt); n++) {
261 tempnn = n == B.
nt-1 ? B.
n-n*B.
nb : B.
nb;
262 for (m = 0; m < B.
mt; m++) {
263 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
266 plasma->
quark, &task_flags,
268 tempmm, tempNn, tempmm, tempnn,
277 for (RD = BS; RD < A.
nt-k; RD *= 2) {
278 for (N = k; N+RD < A.
nt; N += 2*RD) {
279 tempNRDn = N+RD == A.
nt-1 ? A.
n-(N+RD)*A.
nb : A.
nb;
280 for (m = 0; m < B.
mt; m++) {
281 tempmm = m == B.
mt-1 ? B.
m-m*B.
mb : B.
mb;
284 plasma->
quark, &task_flags,
286 tempmm, B.
nb, tempmm, tempNRDn,