PLASMA
2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
Main Page
Modules
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
core_sgbrce.c
Go to the documentation of this file.
1
15
#include <lapacke.h>
16
#include "
common.h
"
17
18
/***************************************************************************/
68
/***************************************************************************/
72
#define A(_m, _n) (float *)plasma_geteltaddr(A, ((_m)-1), ((_n)-1), eltsize)
73
#define V(_m) &(V[(_m)-1])
74
#define TAU(_m) &(TAU[(_m)-1])
75
int
76
CORE_sgbrce
(
int
uplo
,
int
N,
77
PLASMA_desc
*
A
,
78
float
*
V
,
79
float
*
TAU
,
80
int
st,
81
int
ed,
82
int
eltsize)
83
{
84
int
NB, J1, J2, J3, KDM2, len, pt;
85
int
len1, len2, t1ed, t2st;
86
int
i;
87
static
float
zzero = 0.0;
88
PLASMA_desc
vA=*
A
;
89
90
/* Check input arguments */
91
if
(N < 0) {
92
coreblas_error
(2,
"Illegal value of N"
);
93
return
-2;
94
}
95
if
(ed <= st) {
96
coreblas_error
(6,
"Illegal value of st and ed (internal)"
);
97
return
-6;
98
}
99
100
/* Quick return */
101
if
(N == 0)
102
return
PLASMA_SUCCESS
;
103
104
NB = A->
mb
;
105
KDM2 = A->
mb
-2;
106
if
( uplo ==
PlasmaLower
){
107
/* ========================
108
* LOWER CASE
109
* ========================*/
110
for
(i = ed; i >= st+1 ; i--){
111
/* apply Householder from the right. and create newnnz outside the band if J3 < N */
112
J1 = ed+1;
113
J2 =
min
((i+1+KDM2), N);
114
J3 =
min
((J2+1), N);
115
len = J3-J1+1;
116
if
(J3>J2)*
A
(J3,(i-1))=zzero;
/* could be removed because A is supposed to be band.*/
117
118
t1ed = (J3/NB)*NB;
119
t2st =
max
(t1ed+1,J1);
120
len1 = t1ed-J1+1;
121
len2 = J3-t2st+1;
122
if
(len1>0)
CORE_slarfx2
(
PlasmaRight
, len1, (*
V
(i)), (*
TAU
(i)),
A
(J1, i-1),
ELTLDD
(vA, J1) ,
A
(J1 , i),
ELTLDD
(vA, J1) );
123
if
(len2>0)
CORE_slarfx2
(
PlasmaRight
, len2, (*
V
(i)), (*
TAU
(i)),
A
(t2st,i-1),
ELTLDD
(vA, t2st),
A
(t2st, i),
ELTLDD
(vA, t2st));
124
len = J3-J2;
125
if
(len>0){
126
/* generate Householder to annihilate a(j+kd,j-1) within the band */
127
*
V
(J3) = *
A
(J3,(i-1));
128
*
A
(J3,(i-1)) = 0.0;
129
LAPACKE_slarfg_work( 2,
A
(J2,(i-1)),
V
(J3), 1,
TAU
(J3));
130
}
131
}
132
/* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */
133
for
(i = ed; i >= st+1 ; i--){
134
J2 =
min
((i+1+KDM2), N);
135
J3 =
min
((J2+1), N);
136
len = J3-J2;
137
if
(len>0){
138
pt = J2;
139
J1 = i;
140
J2 =
min
(ed,N);
141
t1ed = (J2/NB)*NB;
142
t2st =
max
(t1ed+1,J1);
143
len1 = t1ed-J1+1;
144
len2 = J2-t2st+1;
145
if
(len1>0)
CORE_slarfx2
(
PlasmaLeft
, len1 , *
V
(J3), (*
TAU
(J3)),
A
(pt, i ),
ELTLDD
(vA, pt),
A
((pt+1), i ),
ELTLDD
(vA, pt+1) );
146
if
(len2>0)
CORE_slarfx2
(
PlasmaLeft
, len2 , *
V
(J3), (*
TAU
(J3)),
A
(pt, t2st),
ELTLDD
(vA, pt),
A
((pt+1), t2st),
ELTLDD
(vA, pt+1) );
147
}
148
}
149
}
else
{
150
/* ========================
151
* UPPER CASE
152
* ========================*/
153
for
(i = ed; i >= st+1 ; i--){
154
/* apply Householder from the right. and create newnnz outside the band if J3 < N */
155
J1 = ed+1;
156
J2 =
min
((i+1+KDM2), N);
157
J3 =
min
((J2+1), N);
158
len = J3-J1+1;
159
if
(J3>J2)*
A
((i-1), J3)=zzero;
160
161
t1ed = (J3/NB)*NB;
162
t2st =
max
(t1ed+1,J1);
163
len1 = t1ed-J1+1;
164
len2 = J3-t2st+1;
165
if
(len1>0)
CORE_slarfx2
(
PlasmaLeft
, len1 , *
V
(i), (*
TAU
(i)),
A
(i-1, J1 ),
ELTLDD
(vA, i-1),
A
(i, J1 ),
ELTLDD
(vA, i) );
166
if
(len2>0)
CORE_slarfx2
(
PlasmaLeft
, len2 , *
V
(i), (*
TAU
(i)),
A
(i-1, t2st),
ELTLDD
(vA, i-1),
A
(i, t2st),
ELTLDD
(vA, i) );
167
/* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it. */
168
len = J3-J2;
169
if
(len>0){
170
/* generate Householder to annihilate a(j+kd,j-1) within the band */
171
*
V
(J3) = *
A
(i-1, J3);
172
*
A
(i-1, J3) = 0.0;
173
LAPACKE_slarfg_work( 2,
A
(i-1, J2),
V
(J3), 1,
TAU
(J3));
174
}
175
}
176
/* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2 */
177
for
(i = ed; i >= st+1 ; i--){
178
/* find if there was a nnz created. if yes apply right else nothing to be done. */
179
J2 =
min
((i+1+KDM2), N);
180
J3 =
min
((J2+1), N);
181
len = J3-J2;
182
if
(len>0){
183
pt = J2;
184
J1 = i;
185
J2 =
min
(ed,N);
186
t1ed = (J2/NB)*NB;
187
t2st =
max
(t1ed+1,J1);
188
len1 = t1ed-J1+1;
189
len2 = J2-t2st+1;
190
if
(len1>0)
CORE_slarfx2
(
PlasmaRight
, len1 , (*
V
(J3)), (*
TAU
(J3)),
A
(i , pt),
ELTLDD
(vA, i),
A
(i, pt+1),
ELTLDD
(vA, i) );
191
if
(len2>0)
CORE_slarfx2
(
PlasmaRight
, len2 , (*
V
(J3)), (*
TAU
(J3)),
A
(t2st, pt),
ELTLDD
(vA, t2st),
A
(t2st, pt+1),
ELTLDD
(vA, t2st) );
192
}
193
}
194
}
/* end of else for the upper case */
195
196
return
PLASMA_SUCCESS
;
197
}
plasma_2.4.5
core_blas
core_sgbrce.c
Generated on Mon Jul 9 2012 12:45:00 for PLASMA by
1.8.1