PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pzgetrf_incpiv.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
20 #define L(m,n) BLKADDR(L, PLASMA_Complex64_t, m, n)
21 #define IPIV(m,n) &(IPIV[(int64_t)A.mb*((int64_t)(m)+(int64_t)A.mt*(int64_t)(n))])
22 /***************************************************************************/
26 {
27  PLASMA_desc A;
28  PLASMA_desc L;
29  int *IPIV;
30  PLASMA_sequence *sequence;
31  PLASMA_request *request;
32 
33  int k, m, n;
34  int next_k;
35  int next_m;
36  int next_n;
37  int ldak, ldam;
38  int info;
39  int tempkn, tempkm, tempmm, tempnn;
40  int ib = PLASMA_IB;
41  PLASMA_Complex64_t *work;
42 
43  plasma_unpack_args_5(A, L, IPIV, sequence, request);
44  if (sequence->status != PLASMA_SUCCESS)
45  return;
46  work = (PLASMA_Complex64_t*)plasma_private_alloc(plasma, ib*L.nb, L.dtyp);
47  ss_init(A.mt, A.nt, -1);
48 
49  k = 0;
50  n = PLASMA_RANK;
51  while (n >= A.nt) {
52  k++;
53  n = n-A.nt+k;
54  }
55  m = k;
56 
57  while (k < min(A.mt, A.nt) && n < A.nt && !ss_aborted()) {
58  next_n = n;
59  next_m = m;
60  next_k = k;
61 
62  next_m++;
63  if (next_m == A.mt) {
64  next_n += PLASMA_SIZE;
65  while (next_n >= A.nt && next_k < min(A.mt, A.nt)) {
66  next_k++;
67  next_n = next_n-A.nt+next_k;
68  }
69  next_m = next_k;
70  }
71 
72  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
73  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
74  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
75  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
76 
77  ldak = BLKLDD(A, k);
78  ldam = BLKLDD(A, m);
79 
80  if (n == k) {
81  if (m == k) {
82  ss_cond_wait(k, k, k-1);
84  tempkm, tempkn, ib,
85  A(k, k), ldak,
86  IPIV(k, k), &info);
87  if (info != 0 && m == A.mt-1) {
88  plasma_request_fail(sequence, request, info + A.nb*k);
89  ss_abort();
90  }
91  ss_cond_set(k, k, k);
92  }
93  else {
94  ss_cond_wait(m, k, k-1);
96  tempmm, tempkn, ib, A.nb,
97  A(k, k), ldak,
98  A(m, k), ldam,
99  L(m, k), L.mb,
100  IPIV(m, k),
101  work, L.nb, &info);
102  if (info != 0 && m == A.mt-1) {
103  plasma_request_fail(sequence, request, info + A.nb*k);
104  ss_abort();
105  }
106  ss_cond_set(m, k, k);
107  }
108  }
109  else {
110  if (m == k) {
111  ss_cond_wait(k, k, k);
112  ss_cond_wait(k, n, k-1);
113  CORE_zgessm(
114  tempkm, tempnn, tempkm, ib,
115  IPIV(k, k),
116  A(k, k), ldak,
117  A(k, n), ldak);
118  }
119  else {
120  ss_cond_wait(m, k, k);
121  ss_cond_wait(m, n, k-1);
122  CORE_zssssm(
123  A.nb, tempnn, tempmm, tempnn, A.nb, ib,
124  A(k, n), ldak,
125  A(m, n), ldam,
126  L(m, k), L.mb,
127  A(m, k), ldam,
128  IPIV(m, k));
129  ss_cond_set(m, n, k);
130  }
131  }
132  n = next_n;
133  m = next_m;
134  k = next_k;
135  }
136  plasma_private_free(plasma, work);
137  ss_finalize();
138 }
139 
140 /***************************************************************************/
144  PLASMA_sequence *sequence, PLASMA_request *request)
145 {
148 
149  int k, m, n;
150  int ldak, ldam;
151  int tempkm, tempkn, tempmm, tempnn;
152  int ib;
153 
154  plasma = plasma_context_self();
155  if (sequence->status != PLASMA_SUCCESS)
156  return;
157  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
158 
159  ib = PLASMA_IB;
160  for (k = 0; k < min(A.mt, A.nt); k++) {
161  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
162  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
163  ldak = BLKLDD(A, k);
165  plasma->quark, &task_flags,
166  tempkm, tempkn, ib, L.nb,
167  A(k, k), ldak, IPIV(k, k),
168  sequence, request,
169  k == A.mt-1, A.nb*k);
170 
171  for (n = k+1; n < A.nt; n++) {
172  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
174  plasma->quark, &task_flags,
175  tempkm, tempnn, tempkm, ib, L.nb,
176  IPIV(k, k),
177  A(k, k), ldak,
178  A(k, n), ldak);
179  }
180  for (m = k+1; m < A.mt; m++) {
181  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
182  ldam = BLKLDD(A, m);
184  plasma->quark, &task_flags,
185  tempmm, tempkn, ib, L.nb,
186  A(k, k), ldak,
187  A(m, k), ldam,
188  L(m, k), L.mb,
189  IPIV(m, k),
190  sequence, request,
191  m == A.mt-1, A.nb*k);
192 
193  for (n = k+1; n < A.nt; n++) {
194  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
196  plasma->quark, &task_flags,
197  A.nb, tempnn, tempmm, tempnn, A.nb, ib, L.nb,
198  A(k, n), ldak,
199  A(m, n), ldam,
200  L(m, k), L.mb,
201  A(m, k), ldam,
202  IPIV(m, k));
203  }
204  }
205  }
206 }