PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_stsmqr_sytra1.c
Go to the documentation of this file.
1 
18 #include <lapacke.h>
19 #include "common.h"
20 #undef COMPLEX
21 #define REAL
22 
23 /***************************************************************************/
115 #if defined(PLASMA_HAVE_WEAK)
116 #pragma weak CORE_stsmqr_sytra1 = PCORE_stsmqr_sytra1
117 #define CORE_stsmqr_sytra1 PCORE_stsmqr_sytra1
118 #define CORE_stsmqr PCORE_stsmqr
119 int CORE_stsmqr(int side, int trans,
120  int M1, int N1, int M2, int N2, int K, int IB,
121  float *A1, int LDA1,
122  float *A2, int LDA2,
123  float *V, int LDV,
124  float *T, int LDT,
125  float *WORK, int LDWORK);
126 #endif
128  int m1, int n1, int m2, int n2,
129  int k, int ib,
130  float *A1, int lda1,
131  float *A2, int lda2,
132  float *V, int ldv,
133  float *T, int ldt,
134  float *WORK, int ldwork)
135 {
136  int i, j;
137 
138  if ( (m1 != n1) ) {
139  coreblas_error(3, "Illegal value of M1, N1");
140  return -3;
141  }
142 
143  /* in-place transposition of A1 */
144  for (j = 0; j < n1; j++){
145  A1[j + j*lda1] = (A1[j + j*lda1]);
146 
147  for (i = j+1; i < m1; i++){
148  *WORK = *(A1 + i + j*lda1);
149  *(A1 + i + j*lda1) = (*(A1 + j + i*lda1));
150  *(A1 + j + i*lda1) = (*WORK);
151  }
152  }
153 
154  CORE_stsmqr(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
155 
156  /* in-place transposition of A1 */
157  for (j = 0; j < n1; j++){
158  A1[j + j*lda1] = (A1[j + j*lda1]);
159 
160  for (i = j+1; i < m1; i++){
161  *WORK = *(A1 + i + j*lda1);
162  *(A1 + i + j*lda1) = (*(A1 + j + i*lda1));
163  *(A1 + j + i*lda1) = (*WORK);
164  }
165  }
166 
167  return PLASMA_SUCCESS;
168 }
169 
170 /***************************************************************************/
174  int side, int trans,
175  int m1, int n1, int m2, int n2, int k, int ib, int nb,
176  float *A1, int lda1,
177  float *A2, int lda2,
178  float *V, int ldv,
179  float *T, int ldt)
180 {
181  int ldwork = side == PlasmaLeft ? ib : nb;
182 
183  QUARK_Insert_Task(quark, CORE_stsmqr_sytra1_quark, task_flags,
184  sizeof(PLASMA_enum), &side, VALUE,
185  sizeof(PLASMA_enum), &trans, VALUE,
186  sizeof(int), &m1, VALUE,
187  sizeof(int), &n1, VALUE,
188  sizeof(int), &m2, VALUE,
189  sizeof(int), &n2, VALUE,
190  sizeof(int), &k, VALUE,
191  sizeof(int), &ib, VALUE,
192  sizeof(float)*nb*nb, A1, INOUT|QUARK_REGION_L|QUARK_REGION_D,
193  sizeof(int), &lda1, VALUE,
194  sizeof(float)*nb*nb, A2, INOUT,
195  sizeof(int), &lda2, VALUE,
196  sizeof(float)*nb*nb, V, INPUT,
197  sizeof(int), &ldv, VALUE,
198  sizeof(float)*ib*nb, T, INPUT,
199  sizeof(int), &ldt, VALUE,
200  sizeof(float)*ib*nb, NULL, SCRATCH,
201  sizeof(int), &ldwork, VALUE,
202  0);
203 }
204 
205 /***************************************************************************/
208 #if defined(PLASMA_HAVE_WEAK)
209 #pragma weak CORE_stsmqr_sytra1_quark = PCORE_stsmqr_sytra1_quark
210 #define CORE_stsmqr_sytra1_quark PCORE_stsmqr_sytra1_quark
211 #endif
213 {
214  int side;
215  int trans;
216  int m1;
217  int n1;
218  int m2;
219  int n2;
220  int k;
221  int ib;
222  float *A1;
223  int lda1;
224  float *A2;
225  int lda2;
226  float *V;
227  int ldv;
228  float *T;
229  int ldt;
230  float *WORK;
231  int ldwork;
232 
233  quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
234  CORE_stsmqr_sytra1(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
235 }