PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_dtsqrt.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef COMPLEX
20 #define REAL
21 
22 /***************************************************************************/
84 #if defined(PLASMA_HAVE_WEAK)
85 #pragma weak CORE_dtsqrt = PCORE_dtsqrt
86 #define CORE_dtsqrt PCORE_dtsqrt
87 #define CORE_dtsmqr PCORE_dtsmqr
88 int CORE_dtsmqr(int side, int trans,
89  int M1, int N1, int M2, int N2, int K, int IB,
90  double *A1, int LDA1,
91  double *A2, int LDA2,
92  double *V, int LDV,
93  double *T, int LDT,
94  double *WORK, int LDWORK);
95 #endif
96 
97 int CORE_dtsqrt(int M, int N, int IB,
98  double *A1, int LDA1,
99  double *A2, int LDA2,
100  double *T, int LDT,
101  double *TAU, double *WORK)
102 {
103  static double zone = 1.0;
104  static double zzero = 0.0;
105 
106  double alpha;
107  int i, ii, sb;
108 
109  /* Check input arguments */
110  if (M < 0) {
111  coreblas_error(1, "Illegal value of M");
112  return -1;
113  }
114  if (N < 0) {
115  coreblas_error(2, "Illegal value of N");
116  return -2;
117  }
118  if (IB < 0) {
119  coreblas_error(3, "Illegal value of IB");
120  return -3;
121  }
122  if ((LDA2 < max(1,M)) && (M > 0)) {
123  coreblas_error(8, "Illegal value of LDA2");
124  return -8;
125  }
126 
127  /* Quick return */
128  if ((M == 0) || (N == 0) || (IB == 0))
129  return PLASMA_SUCCESS;
130 
131  for(ii = 0; ii < N; ii += IB) {
132  sb = min(N-ii, IB);
133  for(i = 0; i < sb; i++) {
134  /*
135  * Generate elementary reflector H( II*IB+I ) to annihilate
136  * A( II*IB+I:M, II*IB+I )
137  */
138  LAPACKE_dlarfg_work(M+1, &A1[LDA1*(ii+i)+ii+i], &A2[LDA2*(ii+i)], 1, &TAU[ii+i]);
139 
140  if (ii+i+1 < N) {
141  /*
142  * Apply H( II*IB+I ) to A( II*IB+I:M, II*IB+I+1:II*IB+IB ) from the left
143  */
144  alpha = -(TAU[ii+i]);
145  cblas_dcopy(
146  sb-i-1,
147  &A1[LDA1*(ii+i+1)+(ii+i)], LDA1,
148  WORK, 1);
149 #ifdef COMPLEX
150  LAPACKE_dlacgv_work(sb-i-1, WORK, 1);
151 #endif
152  cblas_dgemv(
154  M, sb-i-1,
155  (zone), &A2[LDA2*(ii+i+1)], LDA2,
156  &A2[LDA2*(ii+i)], 1,
157  (zone), WORK, 1);
158 #ifdef COMPLEX
159  LAPACKE_dlacgv_work(sb-i-1, WORK, 1 );
160 #endif
161  cblas_daxpy(
162  sb-i-1, (alpha),
163  WORK, 1,
164  &A1[LDA1*(ii+i+1)+ii+i], LDA1);
165 #ifdef COMPLEX
166  LAPACKE_dlacgv_work(sb-i-1, WORK, 1 );
167 #endif
168  cblas_dger(
169  CblasColMajor, M, sb-i-1, (alpha),
170  &A2[LDA2*(ii+i)], 1,
171  WORK, 1,
172  &A2[LDA2*(ii+i+1)], LDA2);
173  }
174  /*
175  * Calculate T
176  */
177  alpha = -TAU[ii+i];
178  cblas_dgemv(
180  (alpha), &A2[LDA2*ii], LDA2,
181  &A2[LDA2*(ii+i)], 1,
182  (zzero), &T[LDT*(ii+i)], 1);
183 
184  cblas_dtrmv(
187  &T[LDT*ii], LDT,
188  &T[LDT*(ii+i)], 1);
189 
190  T[LDT*(ii+i)+i] = TAU[ii+i];
191  }
192  if (N > ii+sb) {
193  CORE_dtsmqr(
195  sb, N-(ii+sb), M, N-(ii+sb), IB, IB,
196  &A1[LDA1*(ii+sb)+ii], LDA1,
197  &A2[LDA2*(ii+sb)], LDA2,
198  &A2[LDA2*ii], LDA2,
199  &T[LDT*ii], LDT,
200  WORK, sb);
201  }
202  }
203  return PLASMA_SUCCESS;
204 }
205 
206 /***************************************************************************/
209 void QUARK_CORE_dtsqrt(Quark *quark, Quark_Task_Flags *task_flags,
210  int m, int n, int ib, int nb,
211  double *A1, int lda1,
212  double *A2, int lda2,
213  double *T, int ldt)
214 {
216  QUARK_Insert_Task(quark, CORE_dtsqrt_quark, task_flags,
217  sizeof(int), &m, VALUE,
218  sizeof(int), &n, VALUE,
219  sizeof(int), &ib, VALUE,
220  sizeof(double)*nb*nb, A1, INOUT | QUARK_REGION_D | QUARK_REGION_U,
221  sizeof(int), &lda1, VALUE,
222  sizeof(double)*nb*nb, A2, INOUT | LOCALITY,
223  sizeof(int), &lda2, VALUE,
224  sizeof(double)*ib*nb, T, OUTPUT,
225  sizeof(int), &ldt, VALUE,
226  sizeof(double)*nb, NULL, SCRATCH,
227  sizeof(double)*ib*nb, NULL, SCRATCH,
228  0);
229 }
230 
231 /***************************************************************************/
234 #if defined(PLASMA_HAVE_WEAK)
235 #pragma weak CORE_dtsqrt_quark = PCORE_dtsqrt_quark
236 #define CORE_dtsqrt_quark PCORE_dtsqrt_quark
237 #endif
239 {
240  int m;
241  int n;
242  int ib;
243  double *A1;
244  int lda1;
245  double *A2;
246  int lda2;
247  double *T;
248  int ldt;
249  double *TAU;
250  double *WORK;
251 
252  quark_unpack_args_11(quark, m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
253  CORE_dtsqrt(m, n, ib, A1, lda1, A2, lda2, T, ldt, TAU, WORK);
254 }