PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_dtsmlq_corner.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef COMPLEX
20 #define REAL
21 
22 /***************************************************************************/
113 #if defined(PLASMA_HAVE_WEAK)
114 #pragma weak CORE_dtsmlq_corner = PCORE_dtsmlq_corner
115 #define CORE_dtsmlq_corner PCORE_dtsmlq_corner
116 #define CORE_dtsmlq PCORE_dtsmlq
117 int CORE_dtsmlq(int side, int trans,
118  int M1, int N1, int M2, int N2, int K, int IB,
119  double *A1, int LDA1,
120  double *A2, int LDA2,
121  double *V, int LDV,
122  double *T, int LDT,
123  double *WORK, int LDWORK);
124 #endif
125 int CORE_dtsmlq_corner( int m1, int n1, int m2, int n2, int m3, int n3,
126  int k, int ib, int nb,
127  double *A1, int lda1,
128  double *A2, int lda2,
129  double *A3, int lda3,
130  double *V, int ldv,
131  double *T, int ldt,
132  double *WORK, int ldwork)
133 {
136  int i, j;
137 
138  if ( m1 != n1 ) {
139  coreblas_error(1, "Illegal value of M1, N1");
140  return -1;
141  }
142 
143  /* Rebuild the symmetric block: WORK <- A1 */
144  for (i = 0; i < m1; i++)
145  for (j = i; j < n1; j++){
146  *(WORK + i + j*ldwork) = *(A1 + i + j*lda1);
147  if (j > i){
148  *(WORK + j + i*ldwork) = ( *(WORK + i + j*ldwork) );
149  }
150  }
151 
152  /* Copy the transpose of A2: WORK+nb*ldwork <- A2' */
153  for (j = 0; j < n2; j++)
154  for (i = 0; i < m2; i++){
155  *(WORK + j + (i + nb) * ldwork) = ( *(A2 + i + j*lda2) );
156  }
157 
158  side = PlasmaRight;
159  trans = PlasmaTrans;
160 
161  /* Right application on |A1 A2| */
162  CORE_dtsmlq(side, trans, m1, n1, m2, n2, k, ib,
163  WORK, ldwork, A2, lda2,
164  V, ldv, T, ldt,
165  WORK+3*nb*ldwork, ldwork);
166 
167  /* Rebuild the symmetric block: WORK+2*nb*ldwork <- A3 */
168  for (i = 0; i < m3; i++)
169  for (j = i; j < n3; j++){
170  *(WORK + i + (j + 2*nb) * ldwork) = *(A3 + i + j*lda3);
171  if (j > i){
172  *(WORK + j + (i + 2*nb) * ldwork) = ( *(WORK + i + (j + 2*nb) * ldwork) );
173  }
174  }
175 
176  /* Right application on | A2' A3 | */
177  CORE_dtsmlq(side, trans, n2, m2, m3, n3, k, ib,
178  WORK+nb*ldwork, ldwork, WORK+2*nb*ldwork, ldwork,
179  V, ldv, T, ldt,
180  WORK + 3*nb*ldwork, ldwork);
181 
182  side = PlasmaLeft;
183  trans = PlasmaNoTrans;
184 
185  /* Left application on | A1 | */
186  /* | A2' | */
187  CORE_dtsmlq(side, trans, m1, n1, n2, m2, k, ib,
188  WORK, ldwork, WORK+nb*ldwork, ldwork,
189  V, ldv, T, ldt,
190  WORK + 3*nb*ldwork, ldwork);
191 
192  /* Copy back the final result to the upper part of A1 */
193  /* A1 = WORK */
194  for (i = 0; i < m1; i++)
195  for (j = i; j < n1; j++)
196  *(A1 + i + j*lda1) = *(WORK + i + j*ldwork);
197 
198  /* Left application on | A2 | */
199  /* | A3 | */
200  CORE_dtsmlq(side, trans, m2, n2, m3, n3, k, ib,
201  A2, lda2, WORK+2*nb*ldwork, ldwork,
202  V, ldv, T, ldt,
203  WORK + 3*nb*ldwork, ldwork);
204 
205  /* Copy back the final result to the upper part of A3 */
206  /* A3 = WORK+2*nb*ldwork */
207  for (i = 0; i < m3; i++)
208  for (j = i; j < n3; j++)
209  *(A3 + i + j*lda3) = *(WORK + i + (j+ 2*nb) * ldwork);
210 
211  return PLASMA_SUCCESS;
212 }
213 
214 
215 
216 /***************************************************************************/
220  int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb,
221  double *A1, int lda1,
222  double *A2, int lda2,
223  double *A3, int lda3,
224  double *V, int ldv,
225  double *T, int ldt)
226 {
227  int ldwork = nb;
228 
229  QUARK_Insert_Task(quark, CORE_dtsmlq_corner_quark, task_flags,
230  sizeof(int), &m1, VALUE,
231  sizeof(int), &n1, VALUE,
232  sizeof(int), &m2, VALUE,
233  sizeof(int), &n2, VALUE,
234  sizeof(int), &m3, VALUE,
235  sizeof(int), &n3, VALUE,
236  sizeof(int), &k, VALUE,
237  sizeof(int), &ib, VALUE,
238  sizeof(int), &nb, VALUE,
239  sizeof(double)*nb*nb, A1, INOUT|QUARK_REGION_D|QUARK_REGION_U,
240  sizeof(int), &lda1, VALUE,
241  sizeof(double)*nb*nb, A2, INOUT,
242  sizeof(int), &lda2, VALUE,
243  sizeof(double)*nb*nb, A3, INOUT|QUARK_REGION_D|QUARK_REGION_U,
244  sizeof(int), &lda3, VALUE,
245  sizeof(double)*nb*nb, V, INPUT,
246  sizeof(int), &ldv, VALUE,
247  sizeof(double)*ib*nb, T, INPUT,
248  sizeof(int), &ldt, VALUE,
249  sizeof(double)*4*nb*nb, NULL, SCRATCH,
250  sizeof(int), &ldwork, VALUE,
251  0);
252 }
253 
254 /***************************************************************************/
262 #if defined(PLASMA_HAVE_WEAK)
263 #pragma weak CORE_dtsmlq_corner_quark = PCORE_dtsmlq_corner_quark
264 #define CORE_dtsmlq_corner_quark PCORE_dtsmlq_corner_quark
265 #endif
267 {
268  int m1;
269  int n1;
270  int m2;
271  int n2;
272  int m3;
273  int n3;
274  int k;
275  int ib;
276  int nb;
277  double *A1;
278  int lda1;
279  double *A2;
280  int lda2;
281  double *A3;
282  int lda3;
283  double *V;
284  int ldv;
285  double *T;
286  int ldt;
287  double *WORK;
288  int ldwork;
289 
290  quark_unpack_args_21(quark, m1, n1, m2, n2, m3, n3, k, ib, nb,
291  A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
292 
293  CORE_dtsmlq_corner(m1, n1, m2, n2, m3, n3, k, ib, nb,
294  A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
295 
296 }