PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ctstrf.c
Go to the documentation of this file.
1 
17 #include "common.h"
18 #include <cblas.h>
19 #include <math.h>
20 
21 /***************************************************************************/
88 #if defined(PLASMA_HAVE_WEAK)
89 #pragma weak CORE_ctstrf = PCORE_ctstrf
90 #define CORE_ctstrf PCORE_ctstrf
91 #define CORE_cssssm PCORE_cssssm
92 int CORE_cssssm(int M1, int N1, int M2, int N2, int K, int IB,
93  PLASMA_Complex32_t *A1, int LDA1,
94  PLASMA_Complex32_t *A2, int LDA2,
95  PLASMA_Complex32_t *L1, int LDL1,
96  PLASMA_Complex32_t *L2, int LDL2,
97  int *IPIV);
98 #endif
99 int CORE_ctstrf(int M, int N, int IB, int NB,
100  PLASMA_Complex32_t *U, int LDU,
101  PLASMA_Complex32_t *A, int LDA,
102  PLASMA_Complex32_t *L, int LDL,
103  int *IPIV,
104  PLASMA_Complex32_t *WORK, int LDWORK,
105  int *INFO)
106 {
107  static PLASMA_Complex32_t zzero = 0.0;
108  static PLASMA_Complex32_t mzone =-1.0;
109 
110  PLASMA_Complex32_t alpha;
111  int i, j, ii, sb;
112  int im, ip;
113 
114  /* Check input arguments */
115  *INFO = 0;
116  if (M < 0) {
117  coreblas_error(1, "Illegal value of M");
118  return -1;
119  }
120  if (N < 0) {
121  coreblas_error(2, "Illegal value of N");
122  return -2;
123  }
124  if (IB < 0) {
125  coreblas_error(3, "Illegal value of IB");
126  return -3;
127  }
128  if ((LDU < max(1,NB)) && (NB > 0)) {
129  coreblas_error(6, "Illegal value of LDU");
130  return -6;
131  }
132  if ((LDA < max(1,M)) && (M > 0)) {
133  coreblas_error(8, "Illegal value of LDA");
134  return -8;
135  }
136  if ((LDL < max(1,IB)) && (IB > 0)) {
137  coreblas_error(10, "Illegal value of LDL");
138  return -10;
139  }
140 
141  /* Quick return */
142  if ((M == 0) || (N == 0) || (IB == 0))
143  return PLASMA_SUCCESS;
144 
145  /* Set L to 0 */
146  memset(L, 0, LDL*N*sizeof(PLASMA_Complex32_t));
147 
148  ip = 0;
149  for (ii = 0; ii < N; ii += IB) {
150  sb = min(N-ii, IB);
151 
152  for (i = 0; i < sb; i++) {
153  im = cblas_icamax(M, &A[LDA*(ii+i)], 1);
154  IPIV[ip] = ii+i+1;
155 
156  if (cabsf(A[LDA*(ii+i)+im]) > cabsf(U[LDU*(ii+i)+ii+i])) {
157  /*
158  * Swap behind.
159  */
160  cblas_cswap(i, &L[LDL*ii+i], LDL, &WORK[im], LDWORK );
161  /*
162  * Swap ahead.
163  */
164  cblas_cswap(sb-i, &U[LDU*(ii+i)+ii+i], LDU, &A[LDA*(ii+i)+im], LDA );
165  /*
166  * Set IPIV.
167  */
168  IPIV[ip] = NB + im + 1;
169 
170  for (j = 0; j < i; j++) {
171  A[LDA*(ii+j)+im] = zzero;
172  }
173  }
174 
175  if ((*INFO == 0) && (cabsf(A[LDA*(ii+i)+im]) == zzero)
176  && (cabsf(U[LDU*(ii+i)+ii+i]) == zzero)) {
177  *INFO = ii+i+1;
178  }
179 
180  alpha = ((PLASMA_Complex32_t)1. / U[LDU*(ii+i)+ii+i]);
181  cblas_cscal(M, CBLAS_SADDR(alpha), &A[LDA*(ii+i)], 1);
182  cblas_ccopy(M, &A[LDA*(ii+i)], 1, &WORK[LDWORK*i], 1);
183  cblas_cgeru(
184  CblasColMajor, M, sb-i-1,
185  CBLAS_SADDR(mzone), &A[LDA*(ii+i)], 1,
186  &U[LDU*(ii+i+1)+ii+i], LDU,
187  &A[LDA*(ii+i+1)], LDA );
188  ip = ip+1;
189  }
190  /*
191  * Apply the subpanel to the rest of the panel.
192  */
193  if(ii+i < N) {
194  for(j = ii; j < ii+sb; j++) {
195  if (IPIV[j] <= NB) {
196  IPIV[j] = IPIV[j] - ii;
197  }
198  }
199 
200  CORE_cssssm(
201  NB, N-(ii+sb), M, N-(ii+sb), sb, sb,
202  &U[LDU*(ii+sb)+ii], LDU,
203  &A[LDA*(ii+sb)], LDA,
204  &L[LDL*ii], LDL,
205  WORK, LDWORK, &IPIV[ii]);
206 
207  for(j = ii; j < ii+sb; j++) {
208  if (IPIV[j] <= NB) {
209  IPIV[j] = IPIV[j] + ii;
210  }
211  }
212  }
213  }
214  return PLASMA_SUCCESS;
215 }
216 
217 /***************************************************************************/
220 void QUARK_CORE_ctstrf(Quark *quark, Quark_Task_Flags *task_flags,
221  int m, int n, int ib, int nb,
222  PLASMA_Complex32_t *U, int ldu,
223  PLASMA_Complex32_t *A, int lda,
224  PLASMA_Complex32_t *L, int ldl,
225  int *IPIV,
226  PLASMA_sequence *sequence, PLASMA_request *request,
227  PLASMA_bool check_info, int iinfo)
228 {
230  QUARK_Insert_Task(quark, CORE_ctstrf_quark, task_flags,
231  sizeof(int), &m, VALUE,
232  sizeof(int), &n, VALUE,
233  sizeof(int), &ib, VALUE,
234  sizeof(int), &nb, VALUE,
236  sizeof(int), &ldu, VALUE,
237  sizeof(PLASMA_Complex32_t)*nb*nb, A, INOUT | LOCALITY,
238  sizeof(int), &lda, VALUE,
239  sizeof(PLASMA_Complex32_t)*ib*nb, L, OUTPUT,
240  sizeof(int), &ldl, VALUE,
241  sizeof(int)*nb, IPIV, OUTPUT,
242  sizeof(PLASMA_Complex32_t)*ib*nb, NULL, SCRATCH,
243  sizeof(int), &nb, VALUE,
244  sizeof(PLASMA_sequence*), &sequence, VALUE,
245  sizeof(PLASMA_request*), &request, VALUE,
246  sizeof(PLASMA_bool), &check_info, VALUE,
247  sizeof(int), &iinfo, VALUE,
248  0);
249 }
250 
251 /***************************************************************************/
254 #if defined(PLASMA_HAVE_WEAK)
255 #pragma weak CORE_ctstrf_quark = PCORE_ctstrf_quark
256 #define CORE_ctstrf_quark PCORE_ctstrf_quark
257 #endif
259 {
260  int m;
261  int n;
262  int ib;
263  int nb;
265  int ldu;
267  int lda;
269  int ldl;
270  int *IPIV;
271  PLASMA_Complex32_t *WORK;
272  int ldwork;
273  PLASMA_sequence *sequence;
274  PLASMA_request *request;
275  PLASMA_bool check_info;
276  int iinfo;
277 
278  int info;
279 
280  quark_unpack_args_17(quark, m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, sequence, request, check_info, iinfo);
281  CORE_ctstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
282  if (info != PLASMA_SUCCESS && check_info)
283  plasma_sequence_flush(quark, sequence, request, iinfo + info);
284 }