PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_ctsmlq_corner.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef REAL
20 #define COMPLEX
21 
22 /***************************************************************************/
113 #if defined(PLASMA_HAVE_WEAK)
114 #pragma weak CORE_ctsmlq_corner = PCORE_ctsmlq_corner
115 #define CORE_ctsmlq_corner PCORE_ctsmlq_corner
116 #define CORE_ctsmlq PCORE_ctsmlq
117 int CORE_ctsmlq(int side, int trans,
118  int M1, int N1, int M2, int N2, int K, int IB,
119  PLASMA_Complex32_t *A1, int LDA1,
120  PLASMA_Complex32_t *A2, int LDA2,
121  PLASMA_Complex32_t *V, int LDV,
122  PLASMA_Complex32_t *T, int LDT,
123  PLASMA_Complex32_t *WORK, int LDWORK);
124 #endif
125 int CORE_ctsmlq_corner( int m1, int n1, int m2, int n2, int m3, int n3,
126  int k, int ib, int nb,
127  PLASMA_Complex32_t *A1, int lda1,
128  PLASMA_Complex32_t *A2, int lda2,
129  PLASMA_Complex32_t *A3, int lda3,
130  PLASMA_Complex32_t *V, int ldv,
131  PLASMA_Complex32_t *T, int ldt,
132  PLASMA_Complex32_t *WORK, int ldwork)
133 {
136  int i, j;
137 
138  if ( m1 != n1 ) {
139  coreblas_error(1, "Illegal value of M1, N1");
140  return -1;
141  }
142 
143  /* Rebuild the symmetric block: WORK <- A1 */
144  for (i = 0; i < m1; i++)
145  for (j = i; j < n1; j++){
146  *(WORK + i + j*ldwork) = *(A1 + i + j*lda1);
147  if (j > i){
148  *(WORK + j + i*ldwork) = conjf( *(WORK + i + j*ldwork) );
149  }
150  }
151 
152  /* Copy the transpose of A2: WORK+nb*ldwork <- A2' */
153  for (j = 0; j < n2; j++)
154  for (i = 0; i < m2; i++){
155  *(WORK + j + (i + nb) * ldwork) = conjf( *(A2 + i + j*lda2) );
156  }
157 
158  side = PlasmaRight;
159  trans = PlasmaConjTrans;
160 
161  /* Right application on |A1 A2| */
162  CORE_ctsmlq(side, trans, m1, n1, m2, n2, k, ib,
163  WORK, ldwork, A2, lda2,
164  V, ldv, T, ldt,
165  WORK+3*nb*ldwork, ldwork);
166 
167  /* Rebuild the symmetric block: WORK+2*nb*ldwork <- A3 */
168  for (i = 0; i < m3; i++)
169  for (j = i; j < n3; j++){
170  *(WORK + i + (j + 2*nb) * ldwork) = *(A3 + i + j*lda3);
171  if (j > i){
172  *(WORK + j + (i + 2*nb) * ldwork) = conjf ( *(WORK + i + (j + 2*nb) * ldwork) );
173  }
174  }
175 
176  /* Right application on | A2' A3 | */
177  CORE_ctsmlq(side, trans, n2, m2, m3, n3, k, ib,
178  WORK+nb*ldwork, ldwork, WORK+2*nb*ldwork, ldwork,
179  V, ldv, T, ldt,
180  WORK + 3*nb*ldwork, ldwork);
181 
182  side = PlasmaLeft;
183  trans = PlasmaNoTrans;
184 
185  /* Left application on | A1 | */
186  /* | A2' | */
187  CORE_ctsmlq(side, trans, m1, n1, n2, m2, k, ib,
188  WORK, ldwork, WORK+nb*ldwork, ldwork,
189  V, ldv, T, ldt,
190  WORK + 3*nb*ldwork, ldwork);
191 
192  /* Copy back the final result to the upper part of A1 */
193  /* A1 = WORK */
194  for (i = 0; i < m1; i++)
195  for (j = i; j < n1; j++)
196  *(A1 + i + j*lda1) = *(WORK + i + j*ldwork);
197 
198  /* Left application on | A2 | */
199  /* | A3 | */
200  CORE_ctsmlq(side, trans, m2, n2, m3, n3, k, ib,
201  A2, lda2, WORK+2*nb*ldwork, ldwork,
202  V, ldv, T, ldt,
203  WORK + 3*nb*ldwork, ldwork);
204 
205  /* Copy back the final result to the upper part of A3 */
206  /* A3 = WORK+2*nb*ldwork */
207  for (i = 0; i < m3; i++)
208  for (j = i; j < n3; j++)
209  *(A3 + i + j*lda3) = *(WORK + i + (j+ 2*nb) * ldwork);
210 
211  return PLASMA_SUCCESS;
212 }
213 
214 
215 
216 /***************************************************************************/
220  int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb,
221  PLASMA_Complex32_t *A1, int lda1,
222  PLASMA_Complex32_t *A2, int lda2,
223  PLASMA_Complex32_t *A3, int lda3,
224  PLASMA_Complex32_t *V, int ldv,
225  PLASMA_Complex32_t *T, int ldt)
226 {
227  int ldwork = nb;
228 
229  QUARK_Insert_Task(quark, CORE_ctsmlq_corner_quark, task_flags,
230  sizeof(int), &m1, VALUE,
231  sizeof(int), &n1, VALUE,
232  sizeof(int), &m2, VALUE,
233  sizeof(int), &n2, VALUE,
234  sizeof(int), &m3, VALUE,
235  sizeof(int), &n3, VALUE,
236  sizeof(int), &k, VALUE,
237  sizeof(int), &ib, VALUE,
238  sizeof(int), &nb, VALUE,
240  sizeof(int), &lda1, VALUE,
241  sizeof(PLASMA_Complex32_t)*nb*nb, A2, INOUT,
242  sizeof(int), &lda2, VALUE,
244  sizeof(int), &lda3, VALUE,
245  sizeof(PLASMA_Complex32_t)*nb*nb, V, INPUT,
246  sizeof(int), &ldv, VALUE,
247  sizeof(PLASMA_Complex32_t)*ib*nb, T, INPUT,
248  sizeof(int), &ldt, VALUE,
249  sizeof(PLASMA_Complex32_t)*4*nb*nb, NULL, SCRATCH,
250  sizeof(int), &ldwork, VALUE,
251  0);
252 }
253 
254 /***************************************************************************/
262 #if defined(PLASMA_HAVE_WEAK)
263 #pragma weak CORE_ctsmlq_corner_quark = PCORE_ctsmlq_corner_quark
264 #define CORE_ctsmlq_corner_quark PCORE_ctsmlq_corner_quark
265 #endif
267 {
268  int m1;
269  int n1;
270  int m2;
271  int n2;
272  int m3;
273  int n3;
274  int k;
275  int ib;
276  int nb;
277  PLASMA_Complex32_t *A1;
278  int lda1;
279  PLASMA_Complex32_t *A2;
280  int lda2;
281  PLASMA_Complex32_t *A3;
282  int lda3;
284  int ldv;
286  int ldt;
287  PLASMA_Complex32_t *WORK;
288  int ldwork;
289 
290  quark_unpack_args_21(quark, m1, n1, m2, n2, m3, n3, k, ib, nb,
291  A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
292 
293  CORE_ctsmlq_corner(m1, n1, m2, n2, m3, n3, k, ib, nb,
294  A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
295 
296 }