PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_cherfb.c
Go to the documentation of this file.
1 
15 #include <lapacke.h>
16 #include "common.h"
17 #undef REAL
18 #define COMPLEX
19 
20 /***************************************************************************/
92 #if defined(PLASMA_HAVE_WEAK)
93 #pragma weak CORE_cherfb = PCORE_cherfb
94 #define CORE_cherfb PCORE_cherfb
95 #define CORE_cunmlq PCORE_cunmlq
96 #define CORE_cunmqr PCORE_cunmqr
97 int CORE_cunmlq(int side, int trans,
98  int M, int N, int IB, int K,
99  PLASMA_Complex32_t *V, int LDV,
100  PLASMA_Complex32_t *T, int LDT,
101  PLASMA_Complex32_t *C, int LDC,
102  PLASMA_Complex32_t *WORK, int LDWORK);
103 int CORE_cunmqr(int side, int trans,
104  int M, int N, int K, int IB,
105  PLASMA_Complex32_t *V, int LDV,
106  PLASMA_Complex32_t *T, int LDT,
107  PLASMA_Complex32_t *C, int LDC,
108  PLASMA_Complex32_t *WORK, int LDWORK);
109 #endif
111  int k, int ib, int nb,
112  PLASMA_Complex32_t *A, int lda,
113  PLASMA_Complex32_t *T, int ldt,
114  PLASMA_Complex32_t *C, int ldc,
115  PLASMA_Complex32_t *WORK, int ldwork )
116 {
117  int i, j;
118 
119  if (uplo == PlasmaLower) {
120  /* Rebuild the symmetric block: WORK <- C */
121  for (j = 0; j < n; j++)
122  for (i = j; i < n; i++){
123  *(WORK + i + j * ldwork) = *(C + i + j*ldc);
124  if (i > j){
125  *(WORK + j + i * ldwork) = *(WORK + i + j * ldwork);
126 #ifdef COMPLEX
127  LAPACKE_clacgv_work(1, WORK + j + i * ldwork, ldwork);
128 #endif
129  }
130  }
131 
132  /* Left */
133  CORE_cunmqr(PlasmaLeft, PlasmaConjTrans, n, n, k, ib,
134  A, lda, T, ldt, WORK, ldwork, WORK+nb*ldwork, ldwork);
135  /* Right */
136  CORE_cunmqr(PlasmaRight, PlasmaNoTrans, n, n, k, ib,
137  A, lda, T, ldt, WORK, ldwork, WORK+nb*ldwork, ldwork);
138 
139  /*
140  * Copy back the final result to the lower part of C
141  */
142  /* C = WORK */
143  for (j = 0; j < n; j++)
144  for (i = j; i < n; i++)
145  *(C + i + j*ldc) = *(WORK + i + j * ldwork);
146  }
147  else {
148  /* Rebuild the symmetric block: WORK <- C */
149  for (i = 0; i < n; i++)
150  for (j = i; j < n; j++){
151  *(WORK + i + j * ldwork) = *(C + i + j*ldc);
152  if (j > i){
153  *(WORK + j + i * ldwork) = *(WORK + i + j * ldwork);
154 #ifdef COMPLEX
155  LAPACKE_clacgv_work(1, WORK + j + i * ldwork, ldwork);
156 #endif
157  }
158  }
159 
160  /* Right */
161  CORE_cunmlq(PlasmaRight, PlasmaConjTrans, n, n, k, ib,
162  A, lda, T, ldt, WORK, ldwork, WORK+nb*ldwork, ldwork);
163  /* Left */
164  CORE_cunmlq(PlasmaLeft, PlasmaNoTrans, n, n, k, ib,
165  A, lda, T, ldt, WORK, ldwork, WORK+nb*ldwork, ldwork);
166 
167  /*
168  * Copy back the final result to the upper part of C
169  */
170  /* C = WORK */
171  for (i = 0; i < n; i++)
172  for (j = i; j < n; j++)
173  *(C + i + j*ldc) = *(WORK + i + j * ldwork);
174  }
175  return 0;
176 }
177 
178 
179 /***************************************************************************/
183 void QUARK_CORE_cherfb(Quark *quark, Quark_Task_Flags *task_flags,
185  int n, int k, int ib, int nb,
186  PLASMA_Complex32_t *A, int lda,
187  PLASMA_Complex32_t *T, int ldt,
188  PLASMA_Complex32_t *C, int ldc)
189 {
191  quark, CORE_cherfb_quark, task_flags,
192  sizeof(PLASMA_enum), &uplo, VALUE,
193  sizeof(int), &n, VALUE,
194  sizeof(int), &k, VALUE,
195  sizeof(int), &ib, VALUE,
196  sizeof(int), &nb, VALUE,
198  sizeof(int), &lda, VALUE,
199  sizeof(PLASMA_Complex32_t)*ib*nb, T, INPUT,
200  sizeof(int), &ldt, VALUE,
202  sizeof(int), &ldc, VALUE,
203  sizeof(PLASMA_Complex32_t)*2*nb*nb, NULL, SCRATCH,
204  sizeof(int), &nb, VALUE,
205  0);
206 }
207 
208 /***************************************************************************/
211 #if defined(PLASMA_HAVE_WEAK)
212 #pragma weak CORE_cherfb_quark = PCORE_cherfb_quark
213 #define CORE_cherfb_quark PCORE_cherfb_quark
214 #endif
216 {
218  int n;
219  int k;
220  int ib;
221  int nb;
223  int lda;
225  int ldt;
227  int ldc;
228  PLASMA_Complex32_t *WORK;
229  int ldwork;
230 
231  quark_unpack_args_13(quark, uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
232  CORE_cherfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
233 }