PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ststrf.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 #include <cblas.h>
19 #include <math.h>
20 
21 /***************************************************************************/
88 #if defined(PLASMA_HAVE_WEAK)
89 #pragma weak CORE_ststrf = PCORE_ststrf
90 #define CORE_ststrf PCORE_ststrf
91 #define CORE_sssssm PCORE_sssssm
92 int CORE_sssssm(int M1, int N1, int M2, int N2, int K, int IB,
93  float *A1, int LDA1,
94  float *A2, int LDA2,
95  float *L1, int LDL1,
96  float *L2, int LDL2,
97  int *IPIV);
98 #endif
99 int CORE_ststrf(int M, int N, int IB, int NB,
100  float *U, int LDU,
101  float *A, int LDA,
102  float *L, int LDL,
103  int *IPIV,
104  float *WORK, int LDWORK,
105  int *INFO)
106 {
107  static float zzero = 0.0;
108  static float mzone =-1.0;
109 
110  float alpha;
111  int i, j, ii, sb;
112  int im, ip;
113 
114  /* Check input arguments */
115  *INFO = 0;
116  if (M < 0) {
117  coreblas_error(1, "Illegal value of M");
118  return -1;
119  }
120  if (N < 0) {
121  coreblas_error(2, "Illegal value of N");
122  return -2;
123  }
124  if (IB < 0) {
125  coreblas_error(3, "Illegal value of IB");
126  return -3;
127  }
128  if ((LDU < max(1,NB)) && (NB > 0)) {
129  coreblas_error(6, "Illegal value of LDU");
130  return -6;
131  }
132  if ((LDA < max(1,M)) && (M > 0)) {
133  coreblas_error(8, "Illegal value of LDA");
134  return -8;
135  }
136  if ((LDL < max(1,IB)) && (IB > 0)) {
137  coreblas_error(10, "Illegal value of LDL");
138  return -10;
139  }
140 
141  /* Quick return */
142  if ((M == 0) || (N == 0) || (IB == 0))
143  return PLASMA_SUCCESS;
144 
145  /* Set L to 0 */
146  memset(L, 0, LDL*N*sizeof(float));
147 
148  ip = 0;
149  for (ii = 0; ii < N; ii += IB) {
150  sb = min(N-ii, IB);
151 
152  for (i = 0; i < sb; i++) {
153  im = cblas_isamax(M, &A[LDA*(ii+i)], 1);
154  IPIV[ip] = ii+i+1;
155 
156  if (fabsf(A[LDA*(ii+i)+im]) > fabsf(U[LDU*(ii+i)+ii+i])) {
157  /*
158  * Swap behind.
159  */
160  cblas_sswap(i, &L[LDL*ii+i], LDL, &WORK[im], LDWORK );
161  /*
162  * Swap ahead.
163  */
164  cblas_sswap(sb-i, &U[LDU*(ii+i)+ii+i], LDU, &A[LDA*(ii+i)+im], LDA );
165  /*
166  * Set IPIV.
167  */
168  IPIV[ip] = NB + im + 1;
169 
170  for (j = 0; j < i; j++) {
171  A[LDA*(ii+j)+im] = zzero;
172  }
173  }
174 
175  if ((*INFO == 0) && (fabsf(A[LDA*(ii+i)+im]) == zzero)
176  && (fabsf(U[LDU*(ii+i)+ii+i]) == zzero)) {
177  *INFO = ii+i+1;
178  }
179 
180  alpha = ((float)1. / U[LDU*(ii+i)+ii+i]);
181  cblas_sscal(M, (alpha), &A[LDA*(ii+i)], 1);
182  cblas_scopy(M, &A[LDA*(ii+i)], 1, &WORK[LDWORK*i], 1);
183  cblas_sger(
184  CblasColMajor, M, sb-i-1,
185  (mzone), &A[LDA*(ii+i)], 1,
186  &U[LDU*(ii+i+1)+ii+i], LDU,
187  &A[LDA*(ii+i+1)], LDA );
188  ip = ip+1;
189  }
190  /*
191  * Apply the subpanel to the rest of the panel.
192  */
193  if(ii+i < N) {
194  for(j = ii; j < ii+sb; j++) {
195  if (IPIV[j] <= NB) {
196  IPIV[j] = IPIV[j] - ii;
197  }
198  }
199 
200  CORE_sssssm(
201  NB, N-(ii+sb), M, N-(ii+sb), sb, sb,
202  &U[LDU*(ii+sb)+ii], LDU,
203  &A[LDA*(ii+sb)], LDA,
204  &L[LDL*ii], LDL,
205  WORK, LDWORK, &IPIV[ii]);
206 
207  for(j = ii; j < ii+sb; j++) {
208  if (IPIV[j] <= NB) {
209  IPIV[j] = IPIV[j] + ii;
210  }
211  }
212  }
213  }
214  return PLASMA_SUCCESS;
215 }
216 
217 /***************************************************************************/
220 void QUARK_CORE_ststrf(Quark *quark, Quark_Task_Flags *task_flags,
221  int m, int n, int ib, int nb,
222  float *U, int ldu,
223  float *A, int lda,
224  float *L, int ldl,
225  int *IPIV,
226  PLASMA_sequence *sequence, PLASMA_request *request,
227  PLASMA_bool check_info, int iinfo)
228 {
230  QUARK_Insert_Task(quark, CORE_ststrf_quark, task_flags,
231  sizeof(int), &m, VALUE,
232  sizeof(int), &n, VALUE,
233  sizeof(int), &ib, VALUE,
234  sizeof(int), &nb, VALUE,
235  sizeof(float)*nb*nb, U, INOUT | QUARK_REGION_D | QUARK_REGION_U,
236  sizeof(int), &ldu, VALUE,
237  sizeof(float)*nb*nb, A, INOUT | LOCALITY,
238  sizeof(int), &lda, VALUE,
239  sizeof(float)*ib*nb, L, OUTPUT,
240  sizeof(int), &ldl, VALUE,
241  sizeof(int)*nb, IPIV, OUTPUT,
242  sizeof(float)*ib*nb, NULL, SCRATCH,
243  sizeof(int), &nb, VALUE,
244  sizeof(PLASMA_sequence*), &sequence, VALUE,
245  sizeof(PLASMA_request*), &request, VALUE,
246  sizeof(PLASMA_bool), &check_info, VALUE,
247  sizeof(int), &iinfo, VALUE,
248  0);
249 }
250 
251 /***************************************************************************/
254 #if defined(PLASMA_HAVE_WEAK)
255 #pragma weak CORE_ststrf_quark = PCORE_ststrf_quark
256 #define CORE_ststrf_quark PCORE_ststrf_quark
257 #endif
259 {
260  int m;
261  int n;
262  int ib;
263  int nb;
264  float *U;
265  int ldu;
266  float *A;
267  int lda;
268  float *L;
269  int ldl;
270  int *IPIV;
271  float *WORK;
272  int ldwork;
273  PLASMA_sequence *sequence;
274  PLASMA_request *request;
275  PLASMA_bool check_info;
276  int iinfo;
277 
278  int info;
279 
280  quark_unpack_args_17(quark, m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, sequence, request, check_info, iinfo);
281  CORE_ststrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
282  if (info != PLASMA_SUCCESS && check_info)
283  plasma_sequence_flush(quark, sequence, request, iinfo + info);
284 }