PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
pcgelqf.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define A(m,n) BLKADDR(A, PLASMA_Complex32_t, m, n)
20 #define T(m,n) BLKADDR(T, PLASMA_Complex32_t, m, n)
21 /***************************************************************************/
25 {
26  PLASMA_desc A;
27  PLASMA_desc T;
28  PLASMA_sequence *sequence;
29  PLASMA_request *request;
30 
31  int k, m, n;
32  int next_k;
33  int next_m;
34  int next_n;
35  int ldak, ldam;
36  int tempkm, tempkn, tempmm, tempnn;
37  int ib = PLASMA_IB;
38  PLASMA_Complex32_t *work, *tau;
39 
40  plasma_unpack_args_4(A, T, sequence, request);
41  if (sequence->status != PLASMA_SUCCESS)
42  return;
43  work = (PLASMA_Complex32_t*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp);
44  tau = (PLASMA_Complex32_t*)plasma_private_alloc(plasma, A.nb, A.dtyp);
45  ss_init(A.mt, A.nt, -1);
46 
47  k = 0;
48  m = PLASMA_RANK;
49  while (m >= A.mt) {
50  k++;
51  m = m-A.mt+k;
52  }
53  n = k;
54 
55  while (k < min(A.mt, A.nt) && m < A.mt) {
56  next_m = m;
57  next_n = n;
58  next_k = k;
59 
60  next_n++;
61  if (next_n == A.nt) {
62  next_m += PLASMA_SIZE;
63  while (next_m >= A.mt && next_k < min(A.nt, A.mt)) {
64  next_k++;
65  next_m = next_m-A.mt+next_k;
66  }
67  next_n = next_k;
68  }
69 
70  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
71  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
72  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
73  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
74 
75  ldak = BLKLDD(A, k);
76  ldam = BLKLDD(A, m);
77 
78  if (m == k) {
79  if (n == k) {
80  ss_cond_wait(k, k, k-1);
82  tempkm, tempkn, ib,
83  A(k, k), ldak,
84  T(k, k), T.mb,
85  tau, work);
86  ss_cond_set(k, k, k);
87  }
88  else {
89  ss_cond_wait(k, n, k-1);
91  tempkm, tempnn, ib,
92  A(k, k), ldak,
93  A(k, n), ldak,
94  T(k, n), T.mb,
95  tau, work);
96  ss_cond_set(k, n, k);
97  }
98  }
99  else {
100  if (n == k) {
101  ss_cond_wait(k, k, k);
102  ss_cond_wait(m, k, k-1);
103  CORE_cunmlq(
105  tempmm, tempkn, tempkn, ib,
106  A(k, k), ldak,
107  T(k, k), T.mb,
108  A(m, k), ldam,
109  work, T.nb);
110  }
111  else {
112  ss_cond_wait(k, n, k);
113  ss_cond_wait(m, n, k-1);
114  CORE_ctsmlq(
116  tempmm, A.nb, tempmm, tempnn, A.nb, ib,
117  A(m, k), ldam,
118  A(m, n), ldam,
119  A(k, n), ldak,
120  T(k, n), T.mb,
121  work, T.nb);
122  ss_cond_set(m, n, k);
123  }
124  }
125  m = next_m;
126  n = next_n;
127  k = next_k;
128  }
129  plasma_private_free(plasma, work);
130  plasma_private_free(plasma, tau);
131  ss_finalize();
132 }
133 
134 /***************************************************************************/
138  PLASMA_sequence *sequence, PLASMA_request *request)
139 {
142 
143  int k, m, n;
144  int ldak, ldam;
145  int tempkm, tempkn, tempmm, tempnn;
146  int ib;
147 
148  plasma = plasma_context_self();
149  if (sequence->status != PLASMA_SUCCESS)
150  return;
151  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
152 
153  ib = PLASMA_IB;
154  for (k = 0; k < min(A.mt, A.nt); k++) {
155  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
156  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
157  ldak = BLKLDD(A, k);
159  plasma->quark, &task_flags,
160  tempkm, tempkn, ib, T.nb,
161  A(k, k), ldak,
162  T(k, k), T.mb);
163 
164  for (m = k+1; m < A.mt; m++) {
165  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
166  ldam = BLKLDD(A, m);
168  plasma->quark, &task_flags,
170  tempmm, tempkn, tempkn, ib, T.nb,
171  A(k, k), ldak,
172  T(k, k), T.mb,
173  A(m, k), ldam);
174  }
175  for (n = k+1; n < A.nt; n++) {
176  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
178  plasma->quark, &task_flags,
179  tempkm, tempnn, ib, T.nb,
180  A(k, k), ldak,
181  A(k, n), ldak,
182  T(k, n), T.mb);
183 
184  for (m = k+1; m < A.mt; m++) {
185  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
186  ldam = BLKLDD(A, m);
188  plasma->quark, &task_flags,
190  tempmm, A.nb, tempmm, tempnn, A.mb, ib, T.nb,
191  A(m, k), ldam,
192  A(m, n), ldam,
193  A(k, n), ldak,
194  T(k, n), T.mb);
195  }
196  }
197  }
198 }