PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
psgeqrf.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 
19 #define A(m,n) BLKADDR(A, float, m, n)
20 #define T(m,n) BLKADDR(T, float, m, n)
21 /***************************************************************************/
25 {
26  PLASMA_desc A;
27  PLASMA_desc T;
28  PLASMA_sequence *sequence;
29  PLASMA_request *request;
30 
31  int k, m, n;
32  int next_k;
33  int next_m;
34  int next_n;
35  int ldak, ldam;
36  int tempkm, tempkn, tempnn, tempmm;
37  int ib = PLASMA_IB;
38  float *work, *tau;
39 
40  plasma_unpack_args_4(A, T, sequence, request);
41  if (sequence->status != PLASMA_SUCCESS)
42  return;
43  work = (float*)plasma_private_alloc(plasma, ib*T.nb, T.dtyp);
44  tau = (float*)plasma_private_alloc(plasma, A.nb, A.dtyp);
45  ss_init(A.mt, A.nt, -1);
46 
47  k = 0;
48  n = PLASMA_RANK;
49  while (n >= A.nt) {
50  k++;
51  n = n-A.nt+k;
52  }
53  m = k;
54 
55  while (k < min(A.mt, A.nt) && n < A.nt) {
56  next_n = n;
57  next_m = m;
58  next_k = k;
59 
60  next_m++;
61  if (next_m == A.mt) {
62  next_n += PLASMA_SIZE;
63  while (next_n >= A.nt && next_k < min(A.mt, A.nt)) {
64  next_k++;
65  next_n = next_n-A.nt+next_k;
66  }
67  next_m = next_k;
68  }
69 
70  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
71  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
72  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
73  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
74 
75  ldak = BLKLDD(A, k);
76  ldam = BLKLDD(A, m);
77 
78  if (n == k) {
79  if (m == k) {
80  ss_cond_wait(k, k, k-1);
82  tempkm, tempkn, ib,
83  A(k, k), ldak,
84  T(k, k), T.mb,
85  tau, work);
86  ss_cond_set(k, k, k);
87  }
88  else {
89  ss_cond_wait(m, k, k-1);
91  tempmm, tempkn, ib,
92  A(k, k), ldak,
93  A(m, k), ldam,
94  T(m, k), T.mb,
95  tau, work);
96  ss_cond_set(m, k, k);
97  }
98  }
99  else {
100  if (m == k) {
101  ss_cond_wait(k, k, k);
102  ss_cond_wait(k, n, k-1);
103  CORE_sormqr(
105  tempkm, tempnn, tempkm, ib,
106  A(k, k), ldak,
107  T(k, k), T.mb,
108  A(k, n), ldak,
109  work, T.nb);
110  }
111  else {
112  ss_cond_wait(m, k, k);
113  ss_cond_wait(m, n, k-1);
114  CORE_stsmqr(
116  A.nb, tempnn, tempmm, tempnn, A.nb, ib,
117  A(k, n), ldak,
118  A(m, n), ldam,
119  A(m, k), ldam,
120  T(m, k), T.mb,
121  work, ib);
122  ss_cond_set(m, n, k);
123  }
124  }
125  n = next_n;
126  m = next_m;
127  k = next_k;
128  }
129  plasma_private_free(plasma, work);
130  plasma_private_free(plasma, tau);
131  ss_finalize();
132 }
133 
134 /***************************************************************************/
138  PLASMA_sequence *sequence, PLASMA_request *request)
139 {
142 
143  int k, m, n;
144  int ldak, ldam;
145  int tempkm, tempkn, tempnn, tempmm;
146  int ib;
147 
148  plasma = plasma_context_self();
149  if (sequence->status != PLASMA_SUCCESS)
150  return;
151  QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
152 
153  ib = PLASMA_IB;
154  for (k = 0; k < min(A.mt, A.nt); k++) {
155  tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
156  tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
157  ldak = BLKLDD(A, k);
159  plasma->quark, &task_flags,
160  tempkm, tempkn, ib, T.nb,
161  A(k, k), ldak,
162  T(k, k), T.mb);
163 
164  for (n = k+1; n < A.nt; n++) {
165  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
167  plasma->quark, &task_flags,
169  tempkm, tempnn, tempkm, ib, T.nb,
170  A(k, k), ldak,
171  T(k, k), T.mb,
172  A(k, n), ldak);
173  }
174  for (m = k+1; m < A.mt; m++) {
175  tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
176  ldam = BLKLDD(A, m);
178  plasma->quark, &task_flags,
179  tempmm, tempkn, ib, T.nb,
180  A(k, k), ldak,
181  A(m, k), ldam,
182  T(m, k), T.mb);
183 
184  for (n = k+1; n < A.nt; n++) {
185  tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
187  plasma->quark, &task_flags,
189  A.mb, tempnn, tempmm, tempnn, A.nb, ib, T.nb,
190  A(k, n), ldak,
191  A(m, n), ldam,
192  A(m, k), ldam,
193  T(m, k), T.mb);
194  }
195  }
196  }
197 }