PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_stsmqr_corner.c
Go to the documentation of this file.
1 
17 #include <lapacke.h>
18 #include "common.h"
19 #undef COMPLEX
20 #define REAL
21 
22 /***************************************************************************/
113 #if defined(PLASMA_HAVE_WEAK)
114 #pragma weak CORE_stsmqr_corner = PCORE_stsmqr_corner
115 #define CORE_stsmqr_corner PCORE_stsmqr_corner
116 #define CORE_stsmqr PCORE_stsmqr
117 int CORE_stsmqr(int side, int trans,
118  int M1, int N1, int M2, int N2, int K, int IB,
119  float *A1, int LDA1,
120  float *A2, int LDA2,
121  float *V, int LDV,
122  float *T, int LDT,
123  float *WORK, int LDWORK);
124 #endif
125 int CORE_stsmqr_corner( int m1, int n1, int m2, int n2, int m3, int n3,
126  int k, int ib, int nb,
127  float *A1, int lda1,
128  float *A2, int lda2,
129  float *A3, int lda3,
130  float *V, int ldv,
131  float *T, int ldt,
132  float *WORK, int ldwork)
133 {
134  int i, j;
136 
137  if ( m1 != n1 ) {
138  coreblas_error(1, "Illegal value of M1, N1");
139  return -1;
140  }
141 
142  /* Rebuild the symmetric block: WORK <- A1 */
143  for (j = 0; j < n1; j++)
144  for (i = j; i < m1; i++){
145  *(WORK + i + j*ldwork) = *(A1 + i + j*lda1);
146  if (i > j){
147  *(WORK + j + i*ldwork) = ( *(WORK + i + j*ldwork) );
148  }
149  }
150 
151  /* Copy the transpose of A2: WORK+nb*ldwork <- A2' */
152  for (j = 0; j < n2; j++)
153  for (i = 0; i < m2; i++){
154  *(WORK + j + (i + nb) * ldwork) = ( *(A2 + i + j*lda2) );
155  }
156 
157  side = PlasmaLeft;
158  trans = PlasmaTrans;
159 
160  /* Left application on |A1| */
161  /* |A2| */
162  CORE_stsmqr(side, trans, m1, n1, m2, n2, k, ib,
163  WORK, ldwork, A2, lda2,
164  V, ldv, T, ldt,
165  WORK + 3*nb*ldwork, ldwork);
166 
167  /* Rebuild the symmetric block: WORK+2*nb*ldwork <- A3 */
168  for (j = 0; j < n3; j++)
169  for (i = j; i < m3; i++){
170  *(WORK + i + (j + 2*nb) * ldwork) = *(A3 + i + j*lda3);
171  if (i != j){
172  *(WORK + j + (i + 2*nb) * ldwork) = ( *(WORK + i + (j + 2*nb) * ldwork) );
173  }
174  }
175  /* Left application on | A2'| */
176  /* | A3 | */
177  CORE_stsmqr(side, trans, n2, m2, m3, n3, k, ib,
178  WORK+nb*ldwork, ldwork, WORK+2*nb*ldwork, ldwork,
179  V, ldv, T, ldt,
180  WORK + 3*nb*ldwork, ldwork);
181 
182  side = PlasmaRight;
183  trans = PlasmaNoTrans;
184 
185  /* Right application on | A1 A2' | */
186  CORE_stsmqr(side, trans, m1, n1, n2, m2, k, ib,
187  WORK, ldwork, WORK+nb*ldwork, ldwork,
188  V, ldv, T, ldt,
189  WORK + 3*nb*ldwork, ldwork);
190 
191  /* Copy back the final result to the lower part of A1 */
192  /* A1 = WORK */
193  for (j = 0; j < n1; j++)
194  for (i = j; i < m1; i++)
195  *(A1 + i + j*lda1) = *(WORK + i + j*ldwork);
196 
197  /* Right application on | A2 A3 | */
198  CORE_stsmqr(side, trans, m2, n2, m3, n3, k, ib,
199  A2, lda2, WORK+2*nb*ldwork, ldwork,
200  V, ldv, T, ldt,
201  WORK + 3*nb*ldwork, ldwork);
202 
203  /* Copy back the final result to the lower part of A3 */
204  /* A3 = WORK+2*nb*ldwork */
205  for (j = 0; j < n3; j++)
206  for (i = j; i < m3; i++)
207  *(A3 + i + j*lda3) = *(WORK + i + (j+ 2*nb) * ldwork);
208 
209  return PLASMA_SUCCESS;
210 }
211 /***************************************************************************/
215  int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb,
216  float *A1, int lda1,
217  float *A2, int lda2,
218  float *A3, int lda3,
219  float *V, int ldv,
220  float *T, int ldt)
221 {
222  int ldwork = nb;
223 
224  QUARK_Insert_Task(quark, CORE_stsmqr_corner_quark, task_flags,
225  sizeof(int), &m1, VALUE,
226  sizeof(int), &n1, VALUE,
227  sizeof(int), &m2, VALUE,
228  sizeof(int), &n2, VALUE,
229  sizeof(int), &m3, VALUE,
230  sizeof(int), &n3, VALUE,
231  sizeof(int), &k, VALUE,
232  sizeof(int), &ib, VALUE,
233  sizeof(int), &nb, VALUE,
234  sizeof(float)*nb*nb, A1, INOUT|QUARK_REGION_D|QUARK_REGION_L,
235  sizeof(int), &lda1, VALUE,
236  sizeof(float)*nb*nb, A2, INOUT,
237  sizeof(int), &lda2, VALUE,
238  sizeof(float)*nb*nb, A3, INOUT|QUARK_REGION_D|QUARK_REGION_L,
239  sizeof(int), &lda3, VALUE,
240  sizeof(float)*nb*nb, V, INPUT,
241  sizeof(int), &ldv, VALUE,
242  sizeof(float)*ib*nb, T, INPUT,
243  sizeof(int), &ldt, VALUE,
244  sizeof(float)*4*nb*nb, NULL, SCRATCH,
245  sizeof(int), &ldwork, VALUE,
246  0);
247 }
248 
249 
250 #if defined(PLASMA_HAVE_WEAK)
251 #pragma weak CORE_stsmqr_corner_quark = PCORE_stsmqr_corner_quark
252 #define CORE_stsmqr_corner_quark PCORE_stsmqr_corner_quark
253 #endif
255 {
256  int m1;
257  int n1;
258  int m2;
259  int n2;
260  int m3;
261  int n3;
262  int k;
263  int ib;
264  int nb;
265  float *A1;
266  int lda1;
267  float *A2;
268  int lda2;
269  float *A3;
270  int lda3;
271  float *V;
272  int ldv;
273  float *T;
274  int ldt;
275  float *WORK;
276  int ldwork;
277 
278  quark_unpack_args_21(quark, m1, n1, m2, n2, m3, n3, k, ib, nb,
279  A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
280  CORE_stsmqr_corner(m1, n1, m2, n2, m3, n3, k, ib, nb,
281  A1, lda1, A2, lda2, A3, lda3, V, ldv, T, ldt, WORK, ldwork);
282 }