PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_slarfx_tbrd.c
Go to the documentation of this file.
1 
15 #include <lapacke.h>
16 #include "common.h"
17 
18 /***************************************************************************/
85 int
87  float V,
88  float TAU,
89  float *C1, int LDC1,
90  float *C2, int LDC2)
91 {
92  float V2, T2, SUM;
93  int j;
94 
95  if (TAU == (float)0.0)
96  return PLASMA_SUCCESS;
97 
98  /*
99  * Special code for 2 x 2 Householder where V1 = I
100  */
101  if(side==PlasmaLeft){
102  V2 = (V);
103  T2 = TAU*(V2);
104  for (j = 0; j < N ; j++, C1+=LDC1 ) {
105  SUM = *C1 + V2 * (*C2);
106  *C1 = *C1 - SUM*TAU;
107  *C2 = *C2 - SUM*T2;
108  C2 += LDC2;
109  }
110  }
111  else {
112  V2 = V;
113  T2 = TAU*(V2);
114  for (j = 0; j < N ; j++, C1++){
115  SUM = *C1 + V2 * (*C2);
116  *C1 = *C1 - SUM*TAU;
117  *C2 = *C2 - SUM*T2;
118  C2++;
119  }
120  }
121 
122  return PLASMA_SUCCESS;
123 }
124 
125 
126 /***************************************************************************/
184 int
186  float V,
187  float TAU,
188  float *C1,
189  float *C2,
190  float *C3)
191 {
192  float T2, SUM, TEMP;
193 
194  /* Quick return */
195  if (TAU == (float)0.0)
196  return PLASMA_SUCCESS;
197 
198  /*
199  * Special code for a diagonal block C1
200  * C2 C3
201  */
202  if(uplo==PlasmaLower) {
203  /*
204  * Do the corner Left then Right (used for the lower case
205  * tridiag) L and R for the 2x2 corner
206  * C(N-1, N-1) C(N-1,N) C1 TEMP
207  * C(N , N-1) C(N ,N) C2 C3
208  * For Left : use (TAU) and V.
209  * For Right: nothing, keep TAU and V.
210  * Left 1 ==> C1
211  * C2
212  */
213  TEMP = (*C2); /* copy C2 here before modifying it. */
214  T2 = (TAU) * V;
215  SUM = *C1 + (V) * (*C2);
216  *C1 = *C1 - SUM * (TAU);
217  *C2 = *C2 - SUM * T2;
218  /* Left 2 ==> TEMP */
219  /* C3 */
220  SUM = TEMP + (V) * (*C3);
221  TEMP = TEMP - SUM * (TAU);
222  *C3 = *C3 - SUM * T2;
223  /* Right 1 ==> C1 TEMP. NB: no need to compute corner (2,2)=TEMP */
224  T2 = TAU * (V);
225  SUM = *C1 + V*TEMP;
226  *C1 = *C1 - SUM*TAU;
227  /* Right 2 ==> C2 C3 */
228  SUM = *C2 + V*(*C3);
229  *C2 = *C2 - SUM*TAU;
230  *C3 = *C3 - SUM*T2;
231  }
232  else {
233  /*
234  * Do the corner Right then Left (used for the upper case tridiag)
235  * C(N-1, N-1) C(N-1,N) C1 C2
236  * C(N , N-1) C(N ,N) TEMP C3
237  * For Left : use TAU and (V).
238  * For Right: use (TAU) and (V).
239  * Right 1 ==> C1 C2
240  */
241  V = (V);
242  TEMP = (*C2); /* copy C2 here before modifying it. */
243  T2 = (TAU) * (V);
244  SUM = *C1 + V * (*C2);
245  *C1 = *C1 - SUM * (TAU);
246  *C2 = *C2 - SUM * T2;
247  /* Right 2 ==> TEMP C3 */
248  SUM = TEMP + V * (*C3);
249  TEMP = TEMP - SUM * (TAU);
250  *C3 = *C3 - SUM * T2;
251  /* Left 1 ==> C1 */
252  /* TEMP. NB: no need to compute corner (2,1)=TEMP */
253  T2 = TAU * V;
254  SUM = *C1 + (V) * TEMP;
255  *C1 = *C1 - SUM * TAU;
256  /* Left 2 ==> C2 */
257  /* C3 */
258  SUM = *C2 + (V) * (*C3);
259  *C2 = *C2 - SUM * TAU;
260  *C3 = *C3 - SUM * T2;
261  }
262 
263  return PLASMA_SUCCESS;
264 }
265 
266 
267 /***************************************************************************/
334 int
336  float *V,
337  float *TAU,
338  float *C1,
339  float *C2,
340  float *C3)
341 {
342  float T2, SUM, TEMP, VIN, TAUIN;
343 
344  /* Quick return */
345  if (*TAU == (float)0.0)
346  return PLASMA_SUCCESS;
347 
348  /*
349  * Special code for a diagonal block C1
350  * C2 C3
351  */
352  if(uplo==PlasmaLower){
353  /*
354  * Do the corner for the lower case BIDIAG ==> Left then will
355  * create a new nnz. eliminate it and modify V TAU and then
356  * Right L and R for the 2x2 corner
357  * C(N-1, N-1) C(N-1,N) C1 TEMP
358  * C(N , N-1) C(N ,N) C2 C3
359  */
360  VIN = *V;
361  TAUIN = (*TAU);
362  /* Left 1 ==> C1 */
363  /* C2 */
364  VIN = (VIN);
365  T2 = TAUIN * (VIN);
366  SUM = *C1 + VIN*(*C2);
367  *C1 = *C1 - SUM*TAUIN;
368  *C2 = *C2 - SUM*T2;
369  /* new nnz at TEMP and update C3 */
370  SUM = VIN * (*C3);
371  TEMP = - SUM * TAUIN;
372  *C3 = *C3 - SUM * T2;
373  /* generate Householder to annihilate the nonzero created at TEMP */
374  *V = TEMP;
375  LAPACKE_slarfg_work( 2, C1, V, 1, TAU);
376  VIN = (*V);
377  TAUIN = (*TAU);
378  /* Right 1 ==> C2 C3 */
379  /* VIN = VIN */
380  T2 = TAUIN * (VIN);
381  SUM = *C2 + VIN*(*C3);
382  *C2 = *C2 - SUM*TAUIN;
383  *C3 = *C3 - SUM*T2;
384  }else if(uplo==PlasmaUpper){
385  /*
386  * Do the corner for the upper case BIDIAG ==> Right then will
387  * create a new nnz. eliminate it and modify V TAU and then
388  * Left
389  * C(N-1, N-1) C(N-1,N) C1 C2
390  * C(N , N-1) C(N ,N) TEMP C3
391  * For Left : use (TAU) and V.
392  * For Right: use (TAU) and (V) as input.
393  */
394  VIN = (*V);
395  TAUIN = (*TAU);
396  /* Right 1 ==> C1 C2 */
397  /* VIN = VIN */
398  T2 = TAUIN*(VIN);
399  SUM = *C1 + VIN*(*C2);
400  *C1 = *C1 - SUM*TAUIN;
401  *C2 = *C2 - SUM*T2;
402  /* new nnz at TEMP and update C3 */
403  SUM = VIN * (*C3);
404  TEMP = - SUM * TAUIN;
405  *C3 = *C3 - SUM * T2;
406  /* generate Householder to annihilate the nonzero created at TEMP */
407  *V = TEMP;
408  LAPACKE_slarfg_work( 2, C1, V, 1, TAU);
409  VIN = *V;
410  TAUIN = (*TAU);
411  /* apply from the Left using the NEW V TAU to the remaining 2 elements [C2 C3] */
412  /* Left 2 ==> C2 */
413  /* C3 */
414  VIN = (VIN);
415  T2 = TAUIN*(VIN);
416  SUM = *C2 + VIN*(*C3);
417  *C2 = *C2 - SUM*TAUIN;
418  *C3 = *C3 - SUM*T2;
419  }
420  return PLASMA_SUCCESS;
421 }
422