PAPI 7.1.0.0
Loading...
Searching...
No Matches
vec_nonfma_sp.c File Reference
Include dependency graph for vec_nonfma_sp.c:

Go to the source code of this file.

Functions

static float test_sp_mac_VEC_24 (uint64 iterations, int EventSet, FILE *fp)
 
static float test_sp_mac_VEC_48 (uint64 iterations, int EventSet, FILE *fp)
 
static float test_sp_mac_VEC_96 (uint64 iterations, int EventSet, FILE *fp)
 
static void test_sp_VEC (int instr_per_loop, uint64 iterations, int EventSet, FILE *fp)
 

Function Documentation

◆ test_sp_mac_VEC_24()

static float test_sp_mac_VEC_24 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 35 of file vec_nonfma_sp.c.

35 {
36 register SP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
37
38 /* Generate starting data */
39 r0 = SET_VEC_PS(0.01);
40 r1 = SET_VEC_PS(0.02);
41 r2 = SET_VEC_PS(0.03);
42 r3 = SET_VEC_PS(0.04);
43 r4 = SET_VEC_PS(0.05);
44 r5 = SET_VEC_PS(0.06);
45 r6 = SET_VEC_PS(0.07);
46 r7 = SET_VEC_PS(0.08);
47 r8 = SET_VEC_PS(0.09);
48 r9 = SET_VEC_PS(0.10);
49 rA = SET_VEC_PS(0.11);
50 rB = SET_VEC_PS(0.12);
51 rC = SET_VEC_PS(0.13);
52 rD = SET_VEC_PS(0.14);
53 rE = SET_VEC_PS(0.15);
54 rF = SET_VEC_PS(0.16);
55
56 /* Start PAPI counters */
57 if ( PAPI_start( EventSet ) != PAPI_OK ) {
58 return -1;
59 }
60
61 uint64 c = 0;
62 while (c < iterations){
63 size_t i = 0;
64 while (i < 1000){
65 /* The performance critical part */
66
67 r0 = MUL_VEC_PS(r0,rC);
68 r1 = ADD_VEC_PS(r1,rD);
69 r2 = MUL_VEC_PS(r2,rE);
70 r3 = ADD_VEC_PS(r3,rF);
71 r4 = MUL_VEC_PS(r4,rC);
72 r5 = ADD_VEC_PS(r5,rD);
73 r6 = MUL_VEC_PS(r6,rE);
74 r7 = ADD_VEC_PS(r7,rF);
75 r8 = MUL_VEC_PS(r8,rC);
76 r9 = ADD_VEC_PS(r9,rD);
77 rA = MUL_VEC_PS(rA,rE);
78 rB = ADD_VEC_PS(rB,rF);
79
80 r0 = ADD_VEC_PS(r0,rF);
81 r1 = MUL_VEC_PS(r1,rE);
82 r2 = ADD_VEC_PS(r2,rD);
83 r3 = MUL_VEC_PS(r3,rC);
84 r4 = ADD_VEC_PS(r4,rF);
85 r5 = MUL_VEC_PS(r5,rE);
86 r6 = ADD_VEC_PS(r6,rD);
87 r7 = MUL_VEC_PS(r7,rC);
88 r8 = ADD_VEC_PS(r8,rF);
89 r9 = MUL_VEC_PS(r9,rE);
90 rA = ADD_VEC_PS(rA,rD);
91 rB = MUL_VEC_PS(rB,rC);
92
93 i++;
94 }
95 c++;
96 }
97
98 /* Stop PAPI counters */
100
101 /* Use data so that compiler does not eliminate it when using -O2 */
102 r0 = ADD_VEC_PS(r0,r1);
103 r2 = ADD_VEC_PS(r2,r3);
104 r4 = ADD_VEC_PS(r4,r5);
105 r6 = ADD_VEC_PS(r6,r7);
106 r8 = ADD_VEC_PS(r8,r9);
107 rA = ADD_VEC_PS(rA,rB);
108
109 r0 = ADD_VEC_PS(r0,r2);
110 r4 = ADD_VEC_PS(r4,r6);
111 r8 = ADD_VEC_PS(r8,rA);
112
113 r0 = ADD_VEC_PS(r0,r4);
114 r0 = ADD_VEC_PS(r0,r8);
115
116 float out = 0;
117 SP_VEC_TYPE temp = r0;
118 out += ((float*)&temp)[0];
119 out += ((float*)&temp)[1];
120 out += ((float*)&temp)[2];
121 out += ((float*)&temp)[3];
122
123 return out;
124}
int i
unsigned long long uint64
Definition: cat_arch.h:3
Start counting hardware events in an event set.
#define PAPI_OK
Definition: f90papi.h:73
static int EventSet
Definition: init_fini.c:8
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
static FILE * fp
void papi_stop_and_print(long long theory, int EventSet, FILE *fp)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_sp_mac_VEC_48()

static float test_sp_mac_VEC_48 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 130 of file vec_nonfma_sp.c.

130 {
131 register SP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
132
133 /* Generate starting data */
134 r0 = SET_VEC_PS(0.01);
135 r1 = SET_VEC_PS(0.02);
136 r2 = SET_VEC_PS(0.03);
137 r3 = SET_VEC_PS(0.04);
138 r4 = SET_VEC_PS(0.05);
139 r5 = SET_VEC_PS(0.06);
140 r6 = SET_VEC_PS(0.07);
141 r7 = SET_VEC_PS(0.08);
142 r8 = SET_VEC_PS(0.09);
143 r9 = SET_VEC_PS(0.10);
144 rA = SET_VEC_PS(0.11);
145 rB = SET_VEC_PS(0.12);
146 rC = SET_VEC_PS(0.13);
147 rD = SET_VEC_PS(0.14);
148 rE = SET_VEC_PS(0.15);
149 rF = SET_VEC_PS(0.16);
150
151 /* Start PAPI counters */
152 if ( PAPI_start( EventSet ) != PAPI_OK ) {
153 return -1;
154 }
155
156 uint64 c = 0;
157 while (c < iterations){
158 size_t i = 0;
159 while (i < 1000){
160 /* The performance critical part */
161
162 r0 = MUL_VEC_PS(r0,rC);
163 r1 = ADD_VEC_PS(r1,rD);
164 r2 = MUL_VEC_PS(r2,rE);
165 r3 = ADD_VEC_PS(r3,rF);
166 r4 = MUL_VEC_PS(r4,rC);
167 r5 = ADD_VEC_PS(r5,rD);
168 r6 = MUL_VEC_PS(r6,rE);
169 r7 = ADD_VEC_PS(r7,rF);
170 r8 = MUL_VEC_PS(r8,rC);
171 r9 = ADD_VEC_PS(r9,rD);
172 rA = MUL_VEC_PS(rA,rE);
173 rB = ADD_VEC_PS(rB,rF);
174
175 r0 = ADD_VEC_PS(r0,rF);
176 r1 = MUL_VEC_PS(r1,rE);
177 r2 = ADD_VEC_PS(r2,rD);
178 r3 = MUL_VEC_PS(r3,rC);
179 r4 = ADD_VEC_PS(r4,rF);
180 r5 = MUL_VEC_PS(r5,rE);
181 r6 = ADD_VEC_PS(r6,rD);
182 r7 = MUL_VEC_PS(r7,rC);
183 r8 = ADD_VEC_PS(r8,rF);
184 r9 = MUL_VEC_PS(r9,rE);
185 rA = ADD_VEC_PS(rA,rD);
186 rB = MUL_VEC_PS(rB,rC);
187
188 r0 = MUL_VEC_PS(r0,rC);
189 r1 = ADD_VEC_PS(r1,rD);
190 r2 = MUL_VEC_PS(r2,rE);
191 r3 = ADD_VEC_PS(r3,rF);
192 r4 = MUL_VEC_PS(r4,rC);
193 r5 = ADD_VEC_PS(r5,rD);
194 r6 = MUL_VEC_PS(r6,rE);
195 r7 = ADD_VEC_PS(r7,rF);
196 r8 = MUL_VEC_PS(r8,rC);
197 r9 = ADD_VEC_PS(r9,rD);
198 rA = MUL_VEC_PS(rA,rE);
199 rB = ADD_VEC_PS(rB,rF);
200
201 r0 = ADD_VEC_PS(r0,rF);
202 r1 = MUL_VEC_PS(r1,rE);
203 r2 = ADD_VEC_PS(r2,rD);
204 r3 = MUL_VEC_PS(r3,rC);
205 r4 = ADD_VEC_PS(r4,rF);
206 r5 = MUL_VEC_PS(r5,rE);
207 r6 = ADD_VEC_PS(r6,rD);
208 r7 = MUL_VEC_PS(r7,rC);
209 r8 = ADD_VEC_PS(r8,rF);
210 r9 = MUL_VEC_PS(r9,rE);
211 rA = ADD_VEC_PS(rA,rD);
212 rB = MUL_VEC_PS(rB,rC);
213
214 i++;
215 }
216 c++;
217 }
218
219 /* Stop PAPI counters */
221
222 /* Use data so that compiler does not eliminate it when using -O2 */
223 r0 = ADD_VEC_PS(r0,r1);
224 r2 = ADD_VEC_PS(r2,r3);
225 r4 = ADD_VEC_PS(r4,r5);
226 r6 = ADD_VEC_PS(r6,r7);
227 r8 = ADD_VEC_PS(r8,r9);
228 rA = ADD_VEC_PS(rA,rB);
229
230 r0 = ADD_VEC_PS(r0,r2);
231 r4 = ADD_VEC_PS(r4,r6);
232 r8 = ADD_VEC_PS(r8,rA);
233
234 r0 = ADD_VEC_PS(r0,r4);
235 r0 = ADD_VEC_PS(r0,r8);
236
237 float out = 0;
238 SP_VEC_TYPE temp = r0;
239 out += ((float*)&temp)[0];
240 out += ((float*)&temp)[1];
241 out += ((float*)&temp)[2];
242 out += ((float*)&temp)[3];
243
244 return out;
245}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_sp_mac_VEC_96()

static float test_sp_mac_VEC_96 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 251 of file vec_nonfma_sp.c.

251 {
252 register SP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
253
254 /* Generate starting data */
255 r0 = SET_VEC_PS(0.01);
256 r1 = SET_VEC_PS(0.02);
257 r2 = SET_VEC_PS(0.03);
258 r3 = SET_VEC_PS(0.04);
259 r4 = SET_VEC_PS(0.05);
260 r5 = SET_VEC_PS(0.06);
261 r6 = SET_VEC_PS(0.07);
262 r7 = SET_VEC_PS(0.08);
263 r8 = SET_VEC_PS(0.09);
264 r9 = SET_VEC_PS(0.10);
265 rA = SET_VEC_PS(0.11);
266 rB = SET_VEC_PS(0.12);
267 rC = SET_VEC_PS(0.13);
268 rD = SET_VEC_PS(0.14);
269 rE = SET_VEC_PS(0.15);
270 rF = SET_VEC_PS(0.16);
271
272 /* Start PAPI counters */
273 if ( PAPI_start( EventSet ) != PAPI_OK ) {
274 return -1;
275 }
276
277 uint64 c = 0;
278 while (c < iterations){
279 size_t i = 0;
280 while (i < 1000){
281 /* The performance critical part */
282
283 r0 = MUL_VEC_PS(r0,rC);
284 r1 = ADD_VEC_PS(r1,rD);
285 r2 = MUL_VEC_PS(r2,rE);
286 r3 = ADD_VEC_PS(r3,rF);
287 r4 = MUL_VEC_PS(r4,rC);
288 r5 = ADD_VEC_PS(r5,rD);
289 r6 = MUL_VEC_PS(r6,rE);
290 r7 = ADD_VEC_PS(r7,rF);
291 r8 = MUL_VEC_PS(r8,rC);
292 r9 = ADD_VEC_PS(r9,rD);
293 rA = MUL_VEC_PS(rA,rE);
294 rB = ADD_VEC_PS(rB,rF);
295
296 r0 = ADD_VEC_PS(r0,rF);
297 r1 = MUL_VEC_PS(r1,rE);
298 r2 = ADD_VEC_PS(r2,rD);
299 r3 = MUL_VEC_PS(r3,rC);
300 r4 = ADD_VEC_PS(r4,rF);
301 r5 = MUL_VEC_PS(r5,rE);
302 r6 = ADD_VEC_PS(r6,rD);
303 r7 = MUL_VEC_PS(r7,rC);
304 r8 = ADD_VEC_PS(r8,rF);
305 r9 = MUL_VEC_PS(r9,rE);
306 rA = ADD_VEC_PS(rA,rD);
307 rB = MUL_VEC_PS(rB,rC);
308
309 r0 = MUL_VEC_PS(r0,rC);
310 r1 = ADD_VEC_PS(r1,rD);
311 r2 = MUL_VEC_PS(r2,rE);
312 r3 = ADD_VEC_PS(r3,rF);
313 r4 = MUL_VEC_PS(r4,rC);
314 r5 = ADD_VEC_PS(r5,rD);
315 r6 = MUL_VEC_PS(r6,rE);
316 r7 = ADD_VEC_PS(r7,rF);
317 r8 = MUL_VEC_PS(r8,rC);
318 r9 = ADD_VEC_PS(r9,rD);
319 rA = MUL_VEC_PS(rA,rE);
320 rB = ADD_VEC_PS(rB,rF);
321
322 r0 = ADD_VEC_PS(r0,rF);
323 r1 = MUL_VEC_PS(r1,rE);
324 r2 = ADD_VEC_PS(r2,rD);
325 r3 = MUL_VEC_PS(r3,rC);
326 r4 = ADD_VEC_PS(r4,rF);
327 r5 = MUL_VEC_PS(r5,rE);
328 r6 = ADD_VEC_PS(r6,rD);
329 r7 = MUL_VEC_PS(r7,rC);
330 r8 = ADD_VEC_PS(r8,rF);
331 r9 = MUL_VEC_PS(r9,rE);
332 rA = ADD_VEC_PS(rA,rD);
333 rB = MUL_VEC_PS(rB,rC);
334
335 r0 = MUL_VEC_PS(r0,rC);
336 r1 = ADD_VEC_PS(r1,rD);
337 r2 = MUL_VEC_PS(r2,rE);
338 r3 = ADD_VEC_PS(r3,rF);
339 r4 = MUL_VEC_PS(r4,rC);
340 r5 = ADD_VEC_PS(r5,rD);
341 r6 = MUL_VEC_PS(r6,rE);
342 r7 = ADD_VEC_PS(r7,rF);
343 r8 = MUL_VEC_PS(r8,rC);
344 r9 = ADD_VEC_PS(r9,rD);
345 rA = MUL_VEC_PS(rA,rE);
346 rB = ADD_VEC_PS(rB,rF);
347
348 r0 = ADD_VEC_PS(r0,rF);
349 r1 = MUL_VEC_PS(r1,rE);
350 r2 = ADD_VEC_PS(r2,rD);
351 r3 = MUL_VEC_PS(r3,rC);
352 r4 = ADD_VEC_PS(r4,rF);
353 r5 = MUL_VEC_PS(r5,rE);
354 r6 = ADD_VEC_PS(r6,rD);
355 r7 = MUL_VEC_PS(r7,rC);
356 r8 = ADD_VEC_PS(r8,rF);
357 r9 = MUL_VEC_PS(r9,rE);
358 rA = ADD_VEC_PS(rA,rD);
359 rB = MUL_VEC_PS(rB,rC);
360
361 r0 = MUL_VEC_PS(r0,rC);
362 r1 = ADD_VEC_PS(r1,rD);
363 r2 = MUL_VEC_PS(r2,rE);
364 r3 = ADD_VEC_PS(r3,rF);
365 r4 = MUL_VEC_PS(r4,rC);
366 r5 = ADD_VEC_PS(r5,rD);
367 r6 = MUL_VEC_PS(r6,rE);
368 r7 = ADD_VEC_PS(r7,rF);
369 r8 = MUL_VEC_PS(r8,rC);
370 r9 = ADD_VEC_PS(r9,rD);
371 rA = MUL_VEC_PS(rA,rE);
372 rB = ADD_VEC_PS(rB,rF);
373
374 r0 = ADD_VEC_PS(r0,rF);
375 r1 = MUL_VEC_PS(r1,rE);
376 r2 = ADD_VEC_PS(r2,rD);
377 r3 = MUL_VEC_PS(r3,rC);
378 r4 = ADD_VEC_PS(r4,rF);
379 r5 = MUL_VEC_PS(r5,rE);
380 r6 = ADD_VEC_PS(r6,rD);
381 r7 = MUL_VEC_PS(r7,rC);
382 r8 = ADD_VEC_PS(r8,rF);
383 r9 = MUL_VEC_PS(r9,rE);
384 rA = ADD_VEC_PS(rA,rD);
385 rB = MUL_VEC_PS(rB,rC);
386
387 i++;
388 }
389 c++;
390 }
391
392 /* Stop PAPI counters */
394
395 /* Use data so that compiler does not eliminate it when using -O2 */
396 r0 = ADD_VEC_PS(r0,r1);
397 r2 = ADD_VEC_PS(r2,r3);
398 r4 = ADD_VEC_PS(r4,r5);
399 r6 = ADD_VEC_PS(r6,r7);
400 r8 = ADD_VEC_PS(r8,r9);
401 rA = ADD_VEC_PS(rA,rB);
402
403 r0 = ADD_VEC_PS(r0,r2);
404 r4 = ADD_VEC_PS(r4,r6);
405 r8 = ADD_VEC_PS(r8,rA);
406
407 r0 = ADD_VEC_PS(r0,r4);
408 r0 = ADD_VEC_PS(r0,r8);
409
410 float out = 0;
411 SP_VEC_TYPE temp = r0;
412 out += ((float*)&temp)[0];
413 out += ((float*)&temp)[1];
414 out += ((float*)&temp)[2];
415 out += ((float*)&temp)[3];
416
417 return out;
418}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_sp_VEC()

static void test_sp_VEC ( int  instr_per_loop,
uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 421 of file vec_nonfma_sp.c.

422{
423 float sum = 0.0;
424 float scalar_sum = 0.0;
425
426 if ( instr_per_loop == 24 ) {
427 sum += test_sp_mac_VEC_24( iterations, EventSet, fp );
428 scalar_sum += test_sp_scalar_VEC_24( iterations );
429 }
430 else if ( instr_per_loop == 48 ) {
431 sum += test_sp_mac_VEC_48( iterations, EventSet, fp );
432 scalar_sum += test_sp_scalar_VEC_48( iterations );
433 }
434 else if ( instr_per_loop == 96 ) {
435 sum += test_sp_mac_VEC_96( iterations, EventSet, fp );
436 scalar_sum += test_sp_scalar_VEC_96( iterations );
437 }
438
439 if( sum/4.0 != scalar_sum ) {
440 fprintf(stderr, "Inconsistent FLOP results detected!\n");
441 }
442}
FILE * stderr
static float test_sp_mac_VEC_96(uint64 iterations, int EventSet, FILE *fp)
static float test_sp_mac_VEC_48(uint64 iterations, int EventSet, FILE *fp)
static float test_sp_mac_VEC_24(uint64 iterations, int EventSet, FILE *fp)
Definition: vec_nonfma_sp.c:35
float test_sp_scalar_VEC_96(uint64 iterations)
float test_sp_scalar_VEC_24(uint64 iterations)
float test_sp_scalar_VEC_48(uint64 iterations)
Here is the call graph for this function: