PLASMA
2.4.5
PLASMA - Parallel Linear Algebra for Scalable Multi-core Architectures
Main Page
Modules
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
pclacpy.c
Go to the documentation of this file.
1
16
#include "
common.h
"
17
18
#define A(m,n) BLKADDR(A, PLASMA_Complex32_t, m, n)
19
#define B(m,n) BLKADDR(B, PLASMA_Complex32_t, m, n)
20
/***************************************************************************/
23
void
plasma_pclacpy
(
plasma_context_t
*
plasma
)
24
{
25
PLASMA_enum
uplo
;
26
PLASMA_desc
A
;
27
PLASMA_desc
B
;
28
PLASMA_sequence
*sequence;
29
PLASMA_request
*request;
30
31
int
X, Y;
32
int
m, n;
33
int
next_m;
34
int
next_n;
35
int
ldam, ldbm;
36
37
plasma_unpack_args_5
(uplo, A, B, sequence, request);
38
if
(sequence->
status
!=
PLASMA_SUCCESS
)
39
return
;
40
41
switch
(uplo) {
42
/*
43
* PlasmaUpper
44
*/
45
case
PlasmaUpper
:
46
m = 0;
47
n =
PLASMA_RANK
;
48
while
(n >= A.
nt
) {
49
m++;
50
n = n - A.
nt
+ m;
51
}
52
53
while
(m < A.
mt
) {
54
next_m = m;
55
next_n = n;
56
57
next_n +=
PLASMA_SIZE
;
58
while
(next_n >= A.
nt
&& next_m < A.
mt
) {
59
next_m++;
60
next_n = next_n - A.
nt
+ next_m;
61
}
62
63
X = m == A.
mt
-1 ? A.
m
-m*A.
mb
: A.
mb
;
64
Y = n == A.
nt
-1 ? A.
n
-n*A.
nb
: A.
nb
;
65
ldam =
BLKLDD
(A, m);
66
ldbm =
BLKLDD
(B, m);
67
CORE_clacpy
(
68
m == n ? uplo :
PlasmaUpperLower
,
69
X, Y,
70
A
(m, n), ldam,
71
B
(m, n), ldbm);
72
73
n = next_n;
74
m = next_m;
75
}
76
break
;
77
/*
78
* PlasmaLower
79
*/
80
case
PlasmaLower
:
81
n = 0;
82
m =
PLASMA_RANK
;
83
while
(m >= A.
mt
) {
84
n++;
85
m = m - A.
mt
+ n;
86
}
87
88
while
(n < A.
nt
) {
89
next_m = m;
90
next_n = n;
91
92
next_m +=
PLASMA_SIZE
;
93
while
(next_m >= A.
mt
&& next_n < A.
nt
) {
94
next_n++;
95
next_m = next_m - A.
mt
+ next_n;
96
}
97
98
X = m == A.
mt
-1 ? A.
m
-m*A.
mb
: A.
mb
;
99
Y = n == A.
nt
-1 ? A.
n
-n*A.
nb
: A.
nb
;
100
ldam =
BLKLDD
(A, m);
101
ldbm =
BLKLDD
(B, m);
102
CORE_clacpy
(
103
m == n ? uplo :
PlasmaUpperLower
,
104
X, Y,
105
A
(m, n), ldam,
106
B
(m, n), ldbm);
107
108
n = next_n;
109
m = next_m;
110
}
111
break
;
112
/*
113
* PlasmaUpperLower
114
*/
115
case
PlasmaUpperLower
:
116
default
:
117
n = 0;
118
m =
PLASMA_RANK
;
119
while
(m >= A.
mt
) {
120
n++;
121
m = m - A.
mt
;
122
}
123
124
while
(n < A.
nt
) {
125
next_m = m;
126
next_n = n;
127
128
next_m +=
PLASMA_SIZE
;
129
while
(next_m >= A.
mt
&& next_n < A.
nt
) {
130
next_n++;
131
next_m = next_m - A.
mt
;
132
}
133
134
X = m == A.
mt
-1 ? A.
m
-m*A.
mb
: A.
mb
;
135
Y = n == A.
nt
-1 ? A.
n
-n*A.
nb
: A.
nb
;
136
ldam =
BLKLDD
(A, m);
137
ldbm =
BLKLDD
(B, m);
138
CORE_clacpy
(
139
PlasmaUpperLower
,
140
X, Y,
141
A
(m, n), ldam,
142
B
(m, n), ldbm);
143
144
n = next_n;
145
m = next_m;
146
}
147
break
;
148
}
149
}
150
/***************************************************************************/
153
void
plasma_pclacpy_quark
(
PLASMA_enum
uplo
,
PLASMA_desc
A
,
PLASMA_desc
B
,
154
PLASMA_sequence
*sequence,
PLASMA_request
*request)
155
{
156
plasma_context_t
*
plasma
;
157
Quark_Task_Flags
task_flags =
Quark_Task_Flags_Initializer
;
158
159
int
X, Y;
160
int
m, n;
161
int
ldam, ldbm;
162
163
plasma =
plasma_context_self
();
164
if
(sequence->
status
!=
PLASMA_SUCCESS
)
165
return
;
166
QUARK_Task_Flag_Set
(&task_flags,
TASK_SEQUENCE
, (intptr_t)sequence->
quark_sequence
);
167
168
switch
(uplo) {
169
/*
170
* PlasmaUpper
171
*/
172
case
PlasmaUpper
:
173
for
(m = 0; m < A.
mt
; m++) {
174
X = m == A.
mt
-1 ? A.
m
-m*A.
mb
: A.
mb
;
175
ldam =
BLKLDD
(A, m);
176
ldbm =
BLKLDD
(B, m);
177
if
(m < A.
nt
) {
178
Y = m == A.
nt
-1 ? A.
n
-m*A.
nb
: A.
nb
;
179
QUARK_CORE_clacpy
(
180
plasma->
quark
, &task_flags,
181
PlasmaUpper
,
182
X, Y, A.
mb
,
183
A(m, m), ldam,
184
B
(m, m), ldbm);
185
}
186
for
(n = m+1; n < A.
nt
; n++) {
187
Y = n == A.
nt
-1 ? A.
n
-n*A.
nb
: A.
nb
;
188
QUARK_CORE_clacpy
(
189
plasma->
quark
, &task_flags,
190
PlasmaUpperLower
,
191
X, Y, A.
mb
,
192
A(m, n), ldam,
193
B
(m, n), ldbm);
194
}
195
}
196
break
;
197
/*
198
* PlasmaLower
199
*/
200
case
PlasmaLower
:
201
for
(m = 0; m < A.
mt
; m++) {
202
X = m == A.
mt
-1 ? A.
m
-m*A.
mb
: A.
mb
;
203
ldam =
BLKLDD
(A, m);
204
ldbm =
BLKLDD
(B, m);
205
if
(m < A.
nt
) {
206
Y = m == A.
nt
-1 ? A.
n
-m*A.
nb
: A.
nb
;
207
QUARK_CORE_clacpy
(
208
plasma->
quark
, &task_flags,
209
PlasmaLower
,
210
X, Y, A.
mb
,
211
A(m, m), ldam,
212
B
(m, m), ldbm);
213
}
214
for
(n = 0; n <
min
(m, A.
nt
); n++) {
215
Y = n == A.
nt
-1 ? A.
n
-n*A.
nb
: A.
nb
;
216
QUARK_CORE_clacpy
(
217
plasma->
quark
, &task_flags,
218
PlasmaUpperLower
,
219
X, Y, A.
mb
,
220
A(m, n), ldam,
221
B
(m, n), ldbm);
222
}
223
}
224
break
;
225
/*
226
* PlasmaUpperLower
227
*/
228
case
PlasmaUpperLower
:
229
default
:
230
for
(m = 0; m < A.
mt
; m++) {
231
X = m == A.
mt
-1 ? A.
m
-m*A.
mb
: A.
mb
;
232
ldam =
BLKLDD
(A, m);
233
ldbm =
BLKLDD
(B, m);
234
for
(n = 0; n < A.
nt
; n++) {
235
Y = n == A.
nt
-1 ? A.
n
-n*A.
nb
: A.
nb
;
236
QUARK_CORE_clacpy
(
237
plasma->
quark
, &task_flags,
238
PlasmaUpperLower
,
239
X, Y, A.
mb
,
240
A(m, n), ldam,
241
B
(m, n), ldbm);
242
}
243
}
244
}
245
}
plasma_2.4.5
compute
pclacpy.c
Generated on Mon Jul 9 2012 12:44:55 for PLASMA by
1.8.1