PLASMA  2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
core_sgbrce.c
Go to the documentation of this file.
1 
15 #include <lapacke.h>
16 #include "common.h"
17 
18 /***************************************************************************/
68 /***************************************************************************/
72 #define A(_m, _n) (float *)plasma_geteltaddr(A, ((_m)-1), ((_n)-1), eltsize)
73 #define V(_m) &(V[(_m)-1])
74 #define TAU(_m) &(TAU[(_m)-1])
75 int
76 CORE_sgbrce(int uplo, int N,
77  PLASMA_desc *A,
78  float *V,
79  float *TAU,
80  int st,
81  int ed,
82  int eltsize)
83 {
84  int NB, J1, J2, J3, KDM2, len, pt;
85  int len1, len2, t1ed, t2st;
86  int i;
87  static float zzero = 0.0;
88  PLASMA_desc vA=*A;
89 
90  /* Check input arguments */
91  if (N < 0) {
92  coreblas_error(2, "Illegal value of N");
93  return -2;
94  }
95  if (ed <= st) {
96  coreblas_error(6, "Illegal value of st and ed (internal)");
97  return -6;
98  }
99 
100  /* Quick return */
101  if (N == 0)
102  return PLASMA_SUCCESS;
103 
104  NB = A->mb;
105  KDM2 = A->mb-2;
106  if( uplo == PlasmaLower ){
107  /* ========================
108  * LOWER CASE
109  * ========================*/
110  for (i = ed; i >= st+1 ; i--){
111  /* apply Householder from the right. and create newnnz outside the band if J3 < N */
112  J1 = ed+1;
113  J2 = min((i+1+KDM2), N);
114  J3 = min((J2+1), N);
115  len = J3-J1+1;
116  if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/
117 
118  t1ed = (J3/NB)*NB;
119  t2st = max(t1ed+1,J1);
120  len1 = t1ed-J1+1;
121  len2 = J3-t2st+1;
122  if(len1>0)CORE_slarfx2(PlasmaRight, len1, (*V(i)), (*TAU(i)), A(J1, i-1), ELTLDD(vA, J1) , A(J1 , i), ELTLDD(vA, J1) );
123  if(len2>0)CORE_slarfx2(PlasmaRight, len2, (*V(i)), (*TAU(i)), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st));
124  len = J3-J2;
125  if(len>0){
126  /* generate Householder to annihilate a(j+kd,j-1) within the band */
127  *V(J3) = *A(J3,(i-1));
128  *A(J3,(i-1)) = 0.0;
129  LAPACKE_slarfg_work( 2, A(J2,(i-1)), V(J3), 1, TAU(J3));
130  }
131  }
132  /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
133  for (i = ed; i >= st+1 ; i--){
134  J2 = min((i+1+KDM2), N);
135  J3 = min((J2+1), N);
136  len = J3-J2;
137  if(len>0){
138  pt = J2;
139  J1 = i;
140  J2 = min(ed,N);
141  t1ed = (J2/NB)*NB;
142  t2st = max(t1ed+1,J1);
143  len1 = t1ed-J1+1;
144  len2 = J2-t2st+1;
145  if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i ), ELTLDD(vA, pt), A((pt+1), i ), ELTLDD(vA, pt+1) );
146  if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt), A((pt+1), t2st), ELTLDD(vA, pt+1) );
147  }
148  }
149  } else {
150  /* ========================
151  * UPPER CASE
152  * ========================*/
153  for (i = ed; i >= st+1 ; i--){
154  /* apply Householder from the right. and create newnnz outside the band if J3 < N */
155  J1 = ed+1;
156  J2 = min((i+1+KDM2), N);
157  J3 = min((J2+1), N);
158  len = J3-J1+1;
159  if(J3>J2)*A((i-1), J3)=zzero;
160 
161  t1ed = (J3/NB)*NB;
162  t2st = max(t1ed+1,J1);
163  len1 = t1ed-J1+1;
164  len2 = J3-t2st+1;
165  if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(i), (*TAU(i)), A(i-1, J1 ), ELTLDD(vA, i-1), A(i, J1 ), ELTLDD(vA, i) );
166  if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(i), (*TAU(i)), A(i-1, t2st), ELTLDD(vA, i-1), A(i, t2st), ELTLDD(vA, i) );
167  /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it. */
168  len = J3-J2;
169  if(len>0){
170  /* generate Householder to annihilate a(j+kd,j-1) within the band */
171  *V(J3) = *A(i-1, J3);
172  *A(i-1, J3) = 0.0;
173  LAPACKE_slarfg_work( 2, A(i-1, J2), V(J3), 1, TAU(J3));
174  }
175  }
176  /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2 */
177  for (i = ed; i >= st+1 ; i--){
178  /* find if there was a nnz created. if yes apply right else nothing to be done. */
179  J2 = min((i+1+KDM2), N);
180  J3 = min((J2+1), N);
181  len = J3-J2;
182  if(len>0){
183  pt = J2;
184  J1 = i;
185  J2 = min(ed,N);
186  t1ed = (J2/NB)*NB;
187  t2st = max(t1ed+1,J1);
188  len1 = t1ed-J1+1;
189  len2 = J2-t2st+1;
190  if(len1>0)CORE_slarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i , pt), ELTLDD(vA, i), A(i, pt+1), ELTLDD(vA, i) );
191  if(len2>0)CORE_slarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st), A(t2st, pt+1), ELTLDD(vA, t2st) );
192  }
193  }
194  } /* end of else for the upper case */
195 
196  return PLASMA_SUCCESS;
197 }