PAPI 7.1.0.0
Loading...
Searching...
No Matches
vec_nonfma_dp.c File Reference
Include dependency graph for vec_nonfma_dp.c:

Go to the source code of this file.

Functions

static double test_dp_mac_VEC_24 (uint64 iterations, int EventSet, FILE *fp)
 
static double test_dp_mac_VEC_48 (uint64 iterations, int EventSet, FILE *fp)
 
static double test_dp_mac_VEC_96 (uint64 iterations, int EventSet, FILE *fp)
 
static void test_dp_VEC (int instr_per_loop, uint64 iterations, int EventSet, FILE *fp)
 

Function Documentation

◆ test_dp_mac_VEC_24()

static double test_dp_mac_VEC_24 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 35 of file vec_nonfma_dp.c.

35 {
36 register DP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
37
38 /* Generate starting data */
39 r0 = SET_VEC_PD(0.01);
40 r1 = SET_VEC_PD(0.02);
41 r2 = SET_VEC_PD(0.03);
42 r3 = SET_VEC_PD(0.04);
43 r4 = SET_VEC_PD(0.05);
44 r5 = SET_VEC_PD(0.06);
45 r6 = SET_VEC_PD(0.07);
46 r7 = SET_VEC_PD(0.08);
47 r8 = SET_VEC_PD(0.09);
48 r9 = SET_VEC_PD(0.10);
49 rA = SET_VEC_PD(0.11);
50 rB = SET_VEC_PD(0.12);
51 rC = SET_VEC_PD(0.13);
52 rD = SET_VEC_PD(0.14);
53 rE = SET_VEC_PD(0.15);
54 rF = SET_VEC_PD(0.16);
55
56 /* Start PAPI counters */
57 if ( PAPI_start( EventSet ) != PAPI_OK ) {
58 return -1;
59 }
60
61 uint64 c = 0;
62 while (c < iterations){
63 size_t i = 0;
64 while (i < 1000){
65 /* The performance critical part */
66
67 r0 = MUL_VEC_PD(r0,rC);
68 r1 = ADD_VEC_PD(r1,rD);
69 r2 = MUL_VEC_PD(r2,rE);
70 r3 = ADD_VEC_PD(r3,rF);
71 r4 = MUL_VEC_PD(r4,rC);
72 r5 = ADD_VEC_PD(r5,rD);
73 r6 = MUL_VEC_PD(r6,rE);
74 r7 = ADD_VEC_PD(r7,rF);
75 r8 = MUL_VEC_PD(r8,rC);
76 r9 = ADD_VEC_PD(r9,rD);
77 rA = MUL_VEC_PD(rA,rE);
78 rB = ADD_VEC_PD(rB,rF);
79
80 r0 = ADD_VEC_PD(r0,rF);
81 r1 = MUL_VEC_PD(r1,rE);
82 r2 = ADD_VEC_PD(r2,rD);
83 r3 = MUL_VEC_PD(r3,rC);
84 r4 = ADD_VEC_PD(r4,rF);
85 r5 = MUL_VEC_PD(r5,rE);
86 r6 = ADD_VEC_PD(r6,rD);
87 r7 = MUL_VEC_PD(r7,rC);
88 r8 = ADD_VEC_PD(r8,rF);
89 r9 = MUL_VEC_PD(r9,rE);
90 rA = ADD_VEC_PD(rA,rD);
91 rB = MUL_VEC_PD(rB,rC);
92
93 i++;
94 }
95 c++;
96 }
97
98 /* Stop PAPI counters */
100
101 /* Use data so that compiler does not eliminate it when using -O2 */
102 r0 = ADD_VEC_PD(r0,r1);
103 r2 = ADD_VEC_PD(r2,r3);
104 r4 = ADD_VEC_PD(r4,r5);
105 r6 = ADD_VEC_PD(r6,r7);
106 r8 = ADD_VEC_PD(r8,r9);
107 rA = ADD_VEC_PD(rA,rB);
108
109 r0 = ADD_VEC_PD(r0,r2);
110 r4 = ADD_VEC_PD(r4,r6);
111 r8 = ADD_VEC_PD(r8,rA);
112
113 r0 = ADD_VEC_PD(r0,r4);
114 r0 = ADD_VEC_PD(r0,r8);
115
116 double out = 0;
117 DP_VEC_TYPE temp = r0;
118 out += ((double*)&temp)[0];
119 out += ((double*)&temp)[1];
120
121 return out;
122}
int i
unsigned long long uint64
Definition: cat_arch.h:3
Start counting hardware events in an event set.
#define PAPI_OK
Definition: f90papi.h:73
static int EventSet
Definition: init_fini.c:8
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
static FILE * fp
void papi_stop_and_print(long long theory, int EventSet, FILE *fp)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_dp_mac_VEC_48()

static double test_dp_mac_VEC_48 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 128 of file vec_nonfma_dp.c.

128 {
129 register DP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
130
131 /* Generate starting data */
132 r0 = SET_VEC_PD(0.01);
133 r1 = SET_VEC_PD(0.02);
134 r2 = SET_VEC_PD(0.03);
135 r3 = SET_VEC_PD(0.04);
136 r4 = SET_VEC_PD(0.05);
137 r5 = SET_VEC_PD(0.06);
138 r6 = SET_VEC_PD(0.07);
139 r7 = SET_VEC_PD(0.08);
140 r8 = SET_VEC_PD(0.09);
141 r9 = SET_VEC_PD(0.10);
142 rA = SET_VEC_PD(0.11);
143 rB = SET_VEC_PD(0.12);
144 rC = SET_VEC_PD(0.13);
145 rD = SET_VEC_PD(0.14);
146 rE = SET_VEC_PD(0.15);
147 rF = SET_VEC_PD(0.16);
148
149 /* Start PAPI counters */
150 if ( PAPI_start( EventSet ) != PAPI_OK ) {
151 return -1;
152 }
153
154 uint64 c = 0;
155 while (c < iterations){
156 size_t i = 0;
157 while (i < 1000){
158 /* The performance critical part */
159
160 r0 = MUL_VEC_PD(r0,rC);
161 r1 = ADD_VEC_PD(r1,rD);
162 r2 = MUL_VEC_PD(r2,rE);
163 r3 = ADD_VEC_PD(r3,rF);
164 r4 = MUL_VEC_PD(r4,rC);
165 r5 = ADD_VEC_PD(r5,rD);
166 r6 = MUL_VEC_PD(r6,rE);
167 r7 = ADD_VEC_PD(r7,rF);
168 r8 = MUL_VEC_PD(r8,rC);
169 r9 = ADD_VEC_PD(r9,rD);
170 rA = MUL_VEC_PD(rA,rE);
171 rB = ADD_VEC_PD(rB,rF);
172
173 r0 = ADD_VEC_PD(r0,rF);
174 r1 = MUL_VEC_PD(r1,rE);
175 r2 = ADD_VEC_PD(r2,rD);
176 r3 = MUL_VEC_PD(r3,rC);
177 r4 = ADD_VEC_PD(r4,rF);
178 r5 = MUL_VEC_PD(r5,rE);
179 r6 = ADD_VEC_PD(r6,rD);
180 r7 = MUL_VEC_PD(r7,rC);
181 r8 = ADD_VEC_PD(r8,rF);
182 r9 = MUL_VEC_PD(r9,rE);
183 rA = ADD_VEC_PD(rA,rD);
184 rB = MUL_VEC_PD(rB,rC);
185
186 r0 = MUL_VEC_PD(r0,rC);
187 r1 = ADD_VEC_PD(r1,rD);
188 r2 = MUL_VEC_PD(r2,rE);
189 r3 = ADD_VEC_PD(r3,rF);
190 r4 = MUL_VEC_PD(r4,rC);
191 r5 = ADD_VEC_PD(r5,rD);
192 r6 = MUL_VEC_PD(r6,rE);
193 r7 = ADD_VEC_PD(r7,rF);
194 r8 = MUL_VEC_PD(r8,rC);
195 r9 = ADD_VEC_PD(r9,rD);
196 rA = MUL_VEC_PD(rA,rE);
197 rB = ADD_VEC_PD(rB,rF);
198
199 r0 = ADD_VEC_PD(r0,rF);
200 r1 = MUL_VEC_PD(r1,rE);
201 r2 = ADD_VEC_PD(r2,rD);
202 r3 = MUL_VEC_PD(r3,rC);
203 r4 = ADD_VEC_PD(r4,rF);
204 r5 = MUL_VEC_PD(r5,rE);
205 r6 = ADD_VEC_PD(r6,rD);
206 r7 = MUL_VEC_PD(r7,rC);
207 r8 = ADD_VEC_PD(r8,rF);
208 r9 = MUL_VEC_PD(r9,rE);
209 rA = ADD_VEC_PD(rA,rD);
210 rB = MUL_VEC_PD(rB,rC);
211
212 i++;
213 }
214 c++;
215 }
216
217 /* Stop PAPI counters */
219
220 /* Use data so that compiler does not eliminate it when using -O2 */
221 r0 = ADD_VEC_PD(r0,r1);
222 r2 = ADD_VEC_PD(r2,r3);
223 r4 = ADD_VEC_PD(r4,r5);
224 r6 = ADD_VEC_PD(r6,r7);
225 r8 = ADD_VEC_PD(r8,r9);
226 rA = ADD_VEC_PD(rA,rB);
227
228 r0 = ADD_VEC_PD(r0,r2);
229 r4 = ADD_VEC_PD(r4,r6);
230 r8 = ADD_VEC_PD(r8,rA);
231
232 r0 = ADD_VEC_PD(r0,r4);
233 r0 = ADD_VEC_PD(r0,r8);
234
235 double out = 0;
236 DP_VEC_TYPE temp = r0;
237 out += ((double*)&temp)[0];
238 out += ((double*)&temp)[1];
239
240 return out;
241}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_dp_mac_VEC_96()

static double test_dp_mac_VEC_96 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 247 of file vec_nonfma_dp.c.

247 {
248 register DP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
249
250 /* Generate starting data */
251 r0 = SET_VEC_PD(0.01);
252 r1 = SET_VEC_PD(0.02);
253 r2 = SET_VEC_PD(0.03);
254 r3 = SET_VEC_PD(0.04);
255 r4 = SET_VEC_PD(0.05);
256 r5 = SET_VEC_PD(0.06);
257 r6 = SET_VEC_PD(0.07);
258 r7 = SET_VEC_PD(0.08);
259 r8 = SET_VEC_PD(0.09);
260 r9 = SET_VEC_PD(0.10);
261 rA = SET_VEC_PD(0.11);
262 rB = SET_VEC_PD(0.12);
263 rC = SET_VEC_PD(0.13);
264 rD = SET_VEC_PD(0.14);
265 rE = SET_VEC_PD(0.15);
266 rF = SET_VEC_PD(0.16);
267
268 /* Start PAPI counters */
269 if ( PAPI_start( EventSet ) != PAPI_OK ) {
270 return -1;
271 }
272
273 uint64 c = 0;
274 while (c < iterations){
275 size_t i = 0;
276 while (i < 1000){
277 /* The performance critical part */
278
279 r0 = MUL_VEC_PD(r0,rC);
280 r1 = ADD_VEC_PD(r1,rD);
281 r2 = MUL_VEC_PD(r2,rE);
282 r3 = ADD_VEC_PD(r3,rF);
283 r4 = MUL_VEC_PD(r4,rC);
284 r5 = ADD_VEC_PD(r5,rD);
285 r6 = MUL_VEC_PD(r6,rE);
286 r7 = ADD_VEC_PD(r7,rF);
287 r8 = MUL_VEC_PD(r8,rC);
288 r9 = ADD_VEC_PD(r9,rD);
289 rA = MUL_VEC_PD(rA,rE);
290 rB = ADD_VEC_PD(rB,rF);
291
292 r0 = ADD_VEC_PD(r0,rF);
293 r1 = MUL_VEC_PD(r1,rE);
294 r2 = ADD_VEC_PD(r2,rD);
295 r3 = MUL_VEC_PD(r3,rC);
296 r4 = ADD_VEC_PD(r4,rF);
297 r5 = MUL_VEC_PD(r5,rE);
298 r6 = ADD_VEC_PD(r6,rD);
299 r7 = MUL_VEC_PD(r7,rC);
300 r8 = ADD_VEC_PD(r8,rF);
301 r9 = MUL_VEC_PD(r9,rE);
302 rA = ADD_VEC_PD(rA,rD);
303 rB = MUL_VEC_PD(rB,rC);
304
305 r0 = MUL_VEC_PD(r0,rC);
306 r1 = ADD_VEC_PD(r1,rD);
307 r2 = MUL_VEC_PD(r2,rE);
308 r3 = ADD_VEC_PD(r3,rF);
309 r4 = MUL_VEC_PD(r4,rC);
310 r5 = ADD_VEC_PD(r5,rD);
311 r6 = MUL_VEC_PD(r6,rE);
312 r7 = ADD_VEC_PD(r7,rF);
313 r8 = MUL_VEC_PD(r8,rC);
314 r9 = ADD_VEC_PD(r9,rD);
315 rA = MUL_VEC_PD(rA,rE);
316 rB = ADD_VEC_PD(rB,rF);
317
318 r0 = ADD_VEC_PD(r0,rF);
319 r1 = MUL_VEC_PD(r1,rE);
320 r2 = ADD_VEC_PD(r2,rD);
321 r3 = MUL_VEC_PD(r3,rC);
322 r4 = ADD_VEC_PD(r4,rF);
323 r5 = MUL_VEC_PD(r5,rE);
324 r6 = ADD_VEC_PD(r6,rD);
325 r7 = MUL_VEC_PD(r7,rC);
326 r8 = ADD_VEC_PD(r8,rF);
327 r9 = MUL_VEC_PD(r9,rE);
328 rA = ADD_VEC_PD(rA,rD);
329 rB = MUL_VEC_PD(rB,rC);
330
331 r0 = MUL_VEC_PD(r0,rC);
332 r1 = ADD_VEC_PD(r1,rD);
333 r2 = MUL_VEC_PD(r2,rE);
334 r3 = ADD_VEC_PD(r3,rF);
335 r4 = MUL_VEC_PD(r4,rC);
336 r5 = ADD_VEC_PD(r5,rD);
337 r6 = MUL_VEC_PD(r6,rE);
338 r7 = ADD_VEC_PD(r7,rF);
339 r8 = MUL_VEC_PD(r8,rC);
340 r9 = ADD_VEC_PD(r9,rD);
341 rA = MUL_VEC_PD(rA,rE);
342 rB = ADD_VEC_PD(rB,rF);
343
344 r0 = ADD_VEC_PD(r0,rF);
345 r1 = MUL_VEC_PD(r1,rE);
346 r2 = ADD_VEC_PD(r2,rD);
347 r3 = MUL_VEC_PD(r3,rC);
348 r4 = ADD_VEC_PD(r4,rF);
349 r5 = MUL_VEC_PD(r5,rE);
350 r6 = ADD_VEC_PD(r6,rD);
351 r7 = MUL_VEC_PD(r7,rC);
352 r8 = ADD_VEC_PD(r8,rF);
353 r9 = MUL_VEC_PD(r9,rE);
354 rA = ADD_VEC_PD(rA,rD);
355 rB = MUL_VEC_PD(rB,rC);
356
357 r0 = MUL_VEC_PD(r0,rC);
358 r1 = ADD_VEC_PD(r1,rD);
359 r2 = MUL_VEC_PD(r2,rE);
360 r3 = ADD_VEC_PD(r3,rF);
361 r4 = MUL_VEC_PD(r4,rC);
362 r5 = ADD_VEC_PD(r5,rD);
363 r6 = MUL_VEC_PD(r6,rE);
364 r7 = ADD_VEC_PD(r7,rF);
365 r8 = MUL_VEC_PD(r8,rC);
366 r9 = ADD_VEC_PD(r9,rD);
367 rA = MUL_VEC_PD(rA,rE);
368 rB = ADD_VEC_PD(rB,rF);
369
370 r0 = ADD_VEC_PD(r0,rF);
371 r1 = MUL_VEC_PD(r1,rE);
372 r2 = ADD_VEC_PD(r2,rD);
373 r3 = MUL_VEC_PD(r3,rC);
374 r4 = ADD_VEC_PD(r4,rF);
375 r5 = MUL_VEC_PD(r5,rE);
376 r6 = ADD_VEC_PD(r6,rD);
377 r7 = MUL_VEC_PD(r7,rC);
378 r8 = ADD_VEC_PD(r8,rF);
379 r9 = MUL_VEC_PD(r9,rE);
380 rA = ADD_VEC_PD(rA,rD);
381 rB = MUL_VEC_PD(rB,rC);
382
383 i++;
384 }
385 c++;
386 }
387
388 /* Stop PAPI counters */
390
391 /* Use data so that compiler does not eliminate it when using -O2 */
392 r0 = ADD_VEC_PD(r0,r1);
393 r2 = ADD_VEC_PD(r2,r3);
394 r4 = ADD_VEC_PD(r4,r5);
395 r6 = ADD_VEC_PD(r6,r7);
396 r8 = ADD_VEC_PD(r8,r9);
397 rA = ADD_VEC_PD(rA,rB);
398
399 r0 = ADD_VEC_PD(r0,r2);
400 r4 = ADD_VEC_PD(r4,r6);
401 r8 = ADD_VEC_PD(r8,rA);
402
403 r0 = ADD_VEC_PD(r0,r4);
404 r0 = ADD_VEC_PD(r0,r8);
405
406 double out = 0;
407 DP_VEC_TYPE temp = r0;
408 out += ((double*)&temp)[0];
409 out += ((double*)&temp)[1];
410
411 return out;
412}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_dp_VEC()

static void test_dp_VEC ( int  instr_per_loop,
uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 415 of file vec_nonfma_dp.c.

416{
417 double sum = 0.0;
418 double scalar_sum = 0.0;
419
420 if ( instr_per_loop == 24 ) {
421 sum += test_dp_mac_VEC_24( iterations, EventSet, fp );
422 scalar_sum += test_dp_scalar_VEC_24( iterations );
423 }
424 else if ( instr_per_loop == 48 ) {
425 sum += test_dp_mac_VEC_48( iterations, EventSet, fp );
426 scalar_sum += test_dp_scalar_VEC_48( iterations );
427 }
428 else if ( instr_per_loop == 96 ) {
429 sum += test_dp_mac_VEC_96( iterations, EventSet, fp );
430 scalar_sum += test_dp_scalar_VEC_96( iterations );
431 }
432
433 if( sum/2.0 != scalar_sum ) {
434 fprintf(stderr, "Inconsistent FLOP results detected!\n");
435 }
436}
FILE * stderr
static double test_dp_mac_VEC_48(uint64 iterations, int EventSet, FILE *fp)
static double test_dp_mac_VEC_96(uint64 iterations, int EventSet, FILE *fp)
static double test_dp_mac_VEC_24(uint64 iterations, int EventSet, FILE *fp)
Definition: vec_nonfma_dp.c:35
double test_dp_scalar_VEC_48(uint64 iterations)
double test_dp_scalar_VEC_96(uint64 iterations)
double test_dp_scalar_VEC_24(uint64 iterations)
Here is the call graph for this function: