PLASMA
2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
Main Page
Modules
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
pzhegst.c
Go to the documentation of this file.
1
15
#include "
common.h
"
16
17
#define A(m,n) BLKADDR(A, PLASMA_Complex64_t, m, n)
18
#define B(m,n) BLKADDR(B, PLASMA_Complex64_t, m, n)
19
/***************************************************************************/
22
void
plasma_pzhegst_quark
(
PLASMA_enum
itype
,
PLASMA_enum
uplo
,
23
PLASMA_desc
A
,
PLASMA_desc
B
,
24
PLASMA_sequence
*sequence,
PLASMA_request
*request)
25
{
26
plasma_context_t
*
plasma
;
27
Quark_Task_Flags
task_flags =
Quark_Task_Flags_Initializer
;
28
29
int
k;
30
int
ldak, ldbk;
31
int
tempkn;
32
static
double
done = 1.0;
33
static
PLASMA_Complex64_t
zone = 1.0;
34
static
PLASMA_Complex64_t
mzone = -1.0;
35
static
PLASMA_Complex64_t
zhalf = 0.5;
36
static
PLASMA_Complex64_t
mzhalf = -0.5;
37
38
plasma =
plasma_context_self
();
39
if
(sequence->
status
!=
PLASMA_SUCCESS
)
40
return
;
41
42
QUARK_Task_Flag_Set
(&task_flags,
TASK_SEQUENCE
, (intptr_t)sequence->
quark_sequence
);
43
44
if
(itype == 1) {
45
if
(uplo ==
PlasmaLower
) {
46
for
(k = 0; k < A.
nt
; k++){
47
tempkn = k == A.
nt
-1 ? A.
n
-k*A.
nb
: A.
nb
;
48
ldak =
BLKLDD
(A, k);
49
ldbk =
BLKLDD
(B, k);
50
51
QUARK_CORE_zhegst
(
52
plasma->
quark
, &task_flags,
53
itype, uplo, tempkn,
54
A(k, k), ldak,
55
B
(k, k), ldbk,
56
sequence, request, A.
nb
*k);
57
58
if
(k*A.
nb
+tempkn < A.
n
) {
59
plasma_pztrsm_quark
(
60
PlasmaRight
, uplo,
61
PlasmaConjTrans
,
PlasmaNonUnit
,
62
zone,
63
plasma_desc_submatrix
(B, k*B.
nb
, k*B.
nb
, tempkn, tempkn),
64
plasma_desc_submatrix
(A, k*A.
nb
+tempkn, k*A.
nb
, A.
n
-k*A.
nb
-tempkn, tempkn),
65
sequence, request);
66
67
plasma_pzhemm_quark
(
68
PlasmaRight
, uplo, mzhalf,
69
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
, tempkn, tempkn),
70
plasma_desc_submatrix
(B, k*B.
nb
+tempkn, k*B.
nb
, B.
n
-k*B.
nb
-tempkn, tempkn),
71
zone,
72
plasma_desc_submatrix
(A, k*A.
nb
+tempkn, k*A.
nb
, A.
n
-k*A.
nb
-tempkn, tempkn),
73
sequence, request);
74
75
plasma_pzher2k_quark
(
76
uplo,
PlasmaNoTrans
,
77
mzone,
78
plasma_desc_submatrix
(A, k*A.
nb
+tempkn, k*A.
nb
, A.
n
-k*A.
nb
-tempkn, tempkn),
79
plasma_desc_submatrix
(B, k*B.
nb
+tempkn, k*B.
nb
, B.
n
-k*B.
nb
-tempkn, tempkn),
80
done,
81
plasma_desc_submatrix
(A, k*A.
nb
+tempkn, k*A.
nb
+tempkn, A.
n
-k*A.
nb
-tempkn, A.
n
-k*A.
nb
-tempkn),
82
sequence, request);
83
84
plasma_pzhemm_quark
(
85
PlasmaRight
, uplo,
86
mzhalf,
87
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
, tempkn, tempkn),
88
plasma_desc_submatrix
(B, k*B.
nb
+tempkn, k*B.
nb
, B.
n
-k*B.
nb
-tempkn, tempkn),
89
zone,
90
plasma_desc_submatrix
(A, k*A.
nb
+tempkn, k*A.
nb
, A.
n
-k*A.
nb
-tempkn, tempkn),
91
sequence, request);
92
93
plasma_pztrsm_quark
(
94
PlasmaLeft
, uplo,
95
PlasmaNoTrans
,
PlasmaNonUnit
,
96
zone,
97
plasma_desc_submatrix
(B, k*B.
nb
+tempkn, k*B.
nb
+tempkn, tempkn, tempkn),
98
plasma_desc_submatrix
(A, k*A.
nb
+tempkn, k*A.
nb
, A.
n
-k*A.
nb
-tempkn, tempkn),
99
sequence, request);
100
}
101
}
102
}
103
else
{
104
for
(k = 0; k < A.
nt
; k++){
105
tempkn = k == A.
nt
-1 ? A.
n
-k*A.
nb
: A.
nb
;
106
ldak =
BLKLDD
(A, k);
107
ldbk =
BLKLDD
(B, k);
108
QUARK_CORE_zhegst
(
109
plasma->
quark
, &task_flags,
110
itype, uplo, tempkn,
111
A(k, k), ldak,
112
B
(k, k), ldbk,
113
sequence, request, A.
nb
*k);
114
115
if
(k*A.
nb
+tempkn < A.
n
) {
116
plasma_pztrsm_quark
(
117
PlasmaLeft
, uplo,
118
PlasmaConjTrans
,
PlasmaNonUnit
,
119
zone,
120
plasma_desc_submatrix
(B, k*B.
nb
, k*B.
nb
, tempkn, tempkn),
121
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
+tempkn, tempkn, A.
n
-k*A.
nb
-tempkn),
122
sequence, request);
123
124
plasma_pzhemm_quark
(
125
PlasmaLeft
, uplo, mzhalf,
126
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
, tempkn, tempkn),
127
plasma_desc_submatrix
(B, k*B.
nb
, k*B.
nb
+tempkn, tempkn, B.
n
-k*B.
nb
-tempkn),
128
zone,
129
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
+tempkn, tempkn, A.
n
-k*A.
nb
-tempkn),
130
sequence, request);
131
132
plasma_pzher2k_quark
(
133
uplo,
PlasmaConjTrans
,
134
mzone,
135
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
+tempkn, tempkn, A.
n
-k*A.
nb
-tempkn),
136
plasma_desc_submatrix
(B, k*B.
nb
, k*B.
nb
+tempkn, tempkn, B.
n
-k*B.
nb
-tempkn),
137
done,
138
plasma_desc_submatrix
(A, k*A.
nb
+tempkn, k*A.
nb
+tempkn, A.
n
-k*A.
nb
-tempkn, A.
n
-k*A.
nb
-tempkn),
139
sequence, request);
140
141
plasma_pzhemm_quark
(
142
PlasmaLeft
, uplo,
143
mzhalf,
144
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
, tempkn, tempkn),
145
plasma_desc_submatrix
(B, k*B.
nb
, k*B.
nb
+tempkn, tempkn, B.
n
-k*B.
nb
-tempkn),
146
zone,
147
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
+tempkn, tempkn, A.
n
-k*A.
nb
-tempkn),
148
sequence, request);
149
150
plasma_pztrsm_quark
(
151
PlasmaRight
, uplo,
152
PlasmaNoTrans
,
PlasmaNonUnit
,
153
zone,
154
plasma_desc_submatrix
(B, k*B.
nb
+tempkn, k*B.
nb
+tempkn, tempkn, tempkn),
155
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
+tempkn, tempkn, A.
n
-k*A.
nb
-tempkn),
156
sequence, request);
157
}
158
}
159
}
160
}
161
else
{
162
if
(uplo ==
PlasmaLower
) {
163
for
(k = 0; k < A.
nt
; k++){
164
tempkn = k == A.
nt
-1 ? A.
n
-k*A.
nb
: A.
nb
;
165
ldak =
BLKLDD
(A, k);
166
ldbk =
BLKLDD
(B, k);
167
168
plasma_pztrmm_quark
(
169
PlasmaRight
, uplo,
170
PlasmaNoTrans
,
PlasmaNonUnit
,
171
zone,
172
plasma_desc_submatrix
(B, 0, 0, k*B.
nb
, k*B.
nb
),
173
plasma_desc_submatrix
(A, k*A.
nb
, 0, tempkn, k*A.
nb
),
174
sequence, request);
175
176
plasma_pzhemm_quark
(
177
PlasmaLeft
, uplo, zhalf,
178
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
, tempkn, tempkn),
179
plasma_desc_submatrix
(B, k*B.
nb
, 0, tempkn, k*B.
nb
),
180
zone,
181
plasma_desc_submatrix
(A, k*A.
nb
, 0, tempkn, k*A.
nb
),
182
sequence, request);
183
184
plasma_pzher2k_quark
(
185
uplo,
PlasmaConjTrans
,
186
zone,
187
plasma_desc_submatrix
(A, k*A.
nb
, 0, tempkn, k*A.
nb
),
188
plasma_desc_submatrix
(B, k*B.
nb
, 0, tempkn, k*B.
nb
),
189
done,
190
plasma_desc_submatrix
(A, 0, 0, k*A.
nb
, k*A.
nb
),
191
sequence, request);
192
193
plasma_pzhemm_quark
(
194
PlasmaLeft
, uplo, zhalf,
195
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
, tempkn, tempkn),
196
plasma_desc_submatrix
(B, k*B.
nb
, 0, tempkn, k*B.
nb
),
197
zone,
198
plasma_desc_submatrix
(A, k*A.
nb
, 0, tempkn, k*A.
nb
),
199
sequence, request);
200
201
plasma_pztrmm_quark
(
202
PlasmaLeft
, uplo,
203
PlasmaConjTrans
,
PlasmaNonUnit
,
204
zone,
205
plasma_desc_submatrix
(B, k*B.
nb
, k*B.
nb
, tempkn, tempkn),
206
plasma_desc_submatrix
(A, k*A.
nb
, 0, tempkn, k*A.
nb
),
207
sequence, request);
208
209
QUARK_CORE_zhegst
(
210
plasma->
quark
, &task_flags,
211
itype, uplo, tempkn,
212
A(k, k), ldak,
213
B
(k, k), ldbk,
214
sequence, request, A.
nb
*k);
215
}
216
}
217
else
{
218
for
(k = 0; k < A.
nt
; k++){
219
tempkn = k == A.
nt
-1 ? A.
n
-k*A.
nb
: A.
nb
;
220
ldak =
BLKLDD
(A, k);
221
ldbk =
BLKLDD
(B, k);
222
223
plasma_pztrmm_quark
(
224
PlasmaLeft
, uplo,
225
PlasmaNoTrans
,
PlasmaNonUnit
,
226
zone,
227
plasma_desc_submatrix
(B, 0, 0, k*B.
nb
, k*B.
nb
),
228
plasma_desc_submatrix
(A, 0, k*A.
nb
, k*A.
nb
, tempkn),
229
sequence, request);
230
231
plasma_pzhemm_quark
(
232
PlasmaRight
, uplo, zhalf,
233
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
, k*A.
nb
, k*A.
nb
),
234
plasma_desc_submatrix
(B, 0, k*B.
nb
, k*B.
nb
, tempkn),
235
zone,
236
plasma_desc_submatrix
(A, 0, k*A.
nb
, k*A.
nb
, tempkn),
237
sequence, request);
238
239
plasma_pzher2k_quark
(
240
uplo,
PlasmaNoTrans
,
241
zone,
242
plasma_desc_submatrix
(A, 0, k*A.
nb
, k*A.
nb
, tempkn),
243
plasma_desc_submatrix
(B, 0, k*B.
nb
, k*B.
nb
, tempkn),
244
done,
245
plasma_desc_submatrix
(A, 0, 0, k*A.
nb
, k*A.
nb
),
246
sequence, request);
247
248
plasma_pzhemm_quark
(
249
PlasmaRight
, uplo, zhalf,
250
plasma_desc_submatrix
(A, k*A.
nb
, k*A.
nb
, k*A.
nb
, k*A.
nb
),
251
plasma_desc_submatrix
(B, 0, k*B.
nb
, k*B.
nb
, tempkn),
252
zone,
253
plasma_desc_submatrix
(A, 0, k*A.
nb
, k*A.
nb
, tempkn),
254
sequence, request);
255
256
plasma_pztrmm_quark
(
257
PlasmaRight
, uplo,
258
PlasmaConjTrans
,
PlasmaNonUnit
,
259
zone,
260
plasma_desc_submatrix
(B, k*B.
nb
, k*B.
nb
, tempkn, tempkn),
261
plasma_desc_submatrix
(A, 0, k*A.
nb
, k*A.
nb
, tempkn),
262
sequence, request);
263
264
QUARK_CORE_zhegst
(
265
plasma->
quark
, &task_flags,
266
itype, uplo, tempkn,
267
A(k, k), ldak,
268
B
(k, k), ldbk,
269
sequence, request, A.
nb
*k);
270
}
271
}
272
}
273
}
plasma_2.4.5
compute
pzhegst.c
Generated on Mon Jul 9 2012 12:44:56 for PLASMA by
1.8.1