PAPI 7.1.0.0
Loading...
Searching...
No Matches
vec_fma_sp.c File Reference
Include dependency graph for vec_fma_sp.c:

Go to the source code of this file.

Functions

static float test_sp_mac_VEC_FMA_12 (uint64 iterations, int EventSet, FILE *fp)
 
static float test_sp_mac_VEC_FMA_24 (uint64 iterations, int EventSet, FILE *fp)
 
static float test_sp_mac_VEC_FMA_48 (uint64 iterations, int EventSet, FILE *fp)
 
static void test_sp_VEC_FMA (int instr_per_loop, uint64 iterations, int EventSet, FILE *fp)
 

Function Documentation

◆ test_sp_mac_VEC_FMA_12()

static float test_sp_mac_VEC_FMA_12 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 35 of file vec_fma_sp.c.

35 {
36 register SP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
37
38 /* Generate starting data */
39 r0 = SET_VEC_PS(0.01);
40 r1 = SET_VEC_PS(0.02);
41 r2 = SET_VEC_PS(0.03);
42 r3 = SET_VEC_PS(0.04);
43 r4 = SET_VEC_PS(0.05);
44 r5 = SET_VEC_PS(0.06);
45 r6 = SET_VEC_PS(0.07);
46 r7 = SET_VEC_PS(0.08);
47 r8 = SET_VEC_PS(0.09);
48 r9 = SET_VEC_PS(0.10);
49 rA = SET_VEC_PS(0.11);
50 rB = SET_VEC_PS(0.12);
51 rC = SET_VEC_PS(0.13);
52 rD = SET_VEC_PS(0.14);
53 rE = SET_VEC_PS(0.15);
54 rF = SET_VEC_PS(0.16);
55
56 /* Start PAPI counters */
57 if ( PAPI_start( EventSet ) != PAPI_OK ) {
58 return -1;
59 }
60
61 uint64 c = 0;
62 while (c < iterations){
63 size_t i = 0;
64 while (i < 1000){
65
66 /* The performance critical part */
67 r0 = FMA_VEC_PS(r0,r7,r9);
68 r1 = FMA_VEC_PS(r1,r8,rA);
69 r2 = FMA_VEC_PS(r2,r9,rB);
70 r3 = FMA_VEC_PS(r3,rA,rC);
71 r4 = FMA_VEC_PS(r4,rB,rD);
72 r5 = FMA_VEC_PS(r5,rC,rE);
73
74 r0 = FMA_VEC_PS(r0,rD,rF);
75 r1 = FMA_VEC_PS(r1,rC,rE);
76 r2 = FMA_VEC_PS(r2,rB,rD);
77 r3 = FMA_VEC_PS(r3,rA,rC);
78 r4 = FMA_VEC_PS(r4,r9,rB);
79 r5 = FMA_VEC_PS(r5,r8,rA);
80
81 i++;
82 }
83 c++;
84 }
85
86 /* Stop PAPI counters */
88
89 /* Use data so that compiler does not eliminate it when using -O2 */
90 r0 = ADD_VEC_PS(r0,r1);
91 r2 = ADD_VEC_PS(r2,r3);
92 r4 = ADD_VEC_PS(r4,r5);
93
94 r0 = ADD_VEC_PS(r0,r6);
95 r2 = ADD_VEC_PS(r2,r4);
96
97 r0 = ADD_VEC_PS(r0,r2);
98
99 float out = 0;
100 SP_VEC_TYPE temp = r0;
101 out += ((float*)&temp)[0];
102 out += ((float*)&temp)[1];
103 out += ((float*)&temp)[2];
104 out += ((float*)&temp)[3];
105
106 return out;
107}
int i
unsigned long long uint64
Definition: cat_arch.h:3
Start counting hardware events in an event set.
#define PAPI_OK
Definition: f90papi.h:73
static int EventSet
Definition: init_fini.c:8
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
static FILE * fp
void papi_stop_and_print(long long theory, int EventSet, FILE *fp)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_sp_mac_VEC_FMA_24()

static float test_sp_mac_VEC_FMA_24 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 113 of file vec_fma_sp.c.

113 {
114 register SP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
115
116 /* Generate starting data */
117 r0 = SET_VEC_PS(0.01);
118 r1 = SET_VEC_PS(0.02);
119 r2 = SET_VEC_PS(0.03);
120 r3 = SET_VEC_PS(0.04);
121 r4 = SET_VEC_PS(0.05);
122 r5 = SET_VEC_PS(0.06);
123 r6 = SET_VEC_PS(0.07);
124 r7 = SET_VEC_PS(0.08);
125 r8 = SET_VEC_PS(0.09);
126 r9 = SET_VEC_PS(0.10);
127 rA = SET_VEC_PS(0.11);
128 rB = SET_VEC_PS(0.12);
129 rC = SET_VEC_PS(0.13);
130 rD = SET_VEC_PS(0.14);
131 rE = SET_VEC_PS(0.15);
132 rF = SET_VEC_PS(0.16);
133
134 /* Start PAPI counters */
135 if ( PAPI_start( EventSet ) != PAPI_OK ) {
136 return -1;
137 }
138
139 uint64 c = 0;
140 while (c < iterations){
141 size_t i = 0;
142 while (i < 1000){
143
144 /* The performance critical part */
145 r0 = FMA_VEC_PS(r0,r7,r9);
146 r1 = FMA_VEC_PS(r1,r8,rA);
147 r2 = FMA_VEC_PS(r2,r9,rB);
148 r3 = FMA_VEC_PS(r3,rA,rC);
149 r4 = FMA_VEC_PS(r4,rB,rD);
150 r5 = FMA_VEC_PS(r5,rC,rE);
151
152 r0 = FMA_VEC_PS(r0,rD,rF);
153 r1 = FMA_VEC_PS(r1,rC,rE);
154 r2 = FMA_VEC_PS(r2,rB,rD);
155 r3 = FMA_VEC_PS(r3,rA,rC);
156 r4 = FMA_VEC_PS(r4,r9,rB);
157 r5 = FMA_VEC_PS(r5,r8,rA);
158
159 r0 = FMA_VEC_PS(r0,r7,r9);
160 r1 = FMA_VEC_PS(r1,r8,rA);
161 r2 = FMA_VEC_PS(r2,r9,rB);
162 r3 = FMA_VEC_PS(r3,rA,rC);
163 r4 = FMA_VEC_PS(r4,rB,rD);
164 r5 = FMA_VEC_PS(r5,rC,rE);
165
166 r0 = FMA_VEC_PS(r0,rD,rF);
167 r1 = FMA_VEC_PS(r1,rC,rE);
168 r2 = FMA_VEC_PS(r2,rB,rD);
169 r3 = FMA_VEC_PS(r3,rA,rC);
170 r4 = FMA_VEC_PS(r4,r9,rB);
171 r5 = FMA_VEC_PS(r5,r8,rA);
172
173 i++;
174 }
175 c++;
176 }
177
178 /* Stop PAPI counters */
180
181 /* Use data so that compiler does not eliminate it when using -O2 */
182 r0 = ADD_VEC_PS(r0,r1);
183 r2 = ADD_VEC_PS(r2,r3);
184 r4 = ADD_VEC_PS(r4,r5);
185
186 r0 = ADD_VEC_PS(r0,r6);
187 r2 = ADD_VEC_PS(r2,r4);
188
189 r0 = ADD_VEC_PS(r0,r2);
190
191 float out = 0;
192 SP_VEC_TYPE temp = r0;
193 out += ((float*)&temp)[0];
194 out += ((float*)&temp)[1];
195 out += ((float*)&temp)[2];
196 out += ((float*)&temp)[3];
197
198 return out;
199}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_sp_mac_VEC_FMA_48()

static float test_sp_mac_VEC_FMA_48 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 205 of file vec_fma_sp.c.

205 {
206 register SP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
207
208 /* Generate starting data */
209 r0 = SET_VEC_PS(0.01);
210 r1 = SET_VEC_PS(0.02);
211 r2 = SET_VEC_PS(0.03);
212 r3 = SET_VEC_PS(0.04);
213 r4 = SET_VEC_PS(0.05);
214 r5 = SET_VEC_PS(0.06);
215 r6 = SET_VEC_PS(0.07);
216 r7 = SET_VEC_PS(0.08);
217 r8 = SET_VEC_PS(0.09);
218 r9 = SET_VEC_PS(0.10);
219 rA = SET_VEC_PS(0.11);
220 rB = SET_VEC_PS(0.12);
221 rC = SET_VEC_PS(0.13);
222 rD = SET_VEC_PS(0.14);
223 rE = SET_VEC_PS(0.15);
224 rF = SET_VEC_PS(0.16);
225
226 /* Start PAPI counters */
227 if ( PAPI_start( EventSet ) != PAPI_OK ) {
228 return -1;
229 }
230
231 uint64 c = 0;
232 while (c < iterations){
233 size_t i = 0;
234 while (i < 1000){
235
236 /* The performance critical part */
237 r0 = FMA_VEC_PS(r0,r7,r9);
238 r1 = FMA_VEC_PS(r1,r8,rA);
239 r2 = FMA_VEC_PS(r2,r9,rB);
240 r3 = FMA_VEC_PS(r3,rA,rC);
241 r4 = FMA_VEC_PS(r4,rB,rD);
242 r5 = FMA_VEC_PS(r5,rC,rE);
243
244 r0 = FMA_VEC_PS(r0,rD,rF);
245 r1 = FMA_VEC_PS(r1,rC,rE);
246 r2 = FMA_VEC_PS(r2,rB,rD);
247 r3 = FMA_VEC_PS(r3,rA,rC);
248 r4 = FMA_VEC_PS(r4,r9,rB);
249 r5 = FMA_VEC_PS(r5,r8,rA);
250
251 r0 = FMA_VEC_PS(r0,r7,r9);
252 r1 = FMA_VEC_PS(r1,r8,rA);
253 r2 = FMA_VEC_PS(r2,r9,rB);
254 r3 = FMA_VEC_PS(r3,rA,rC);
255 r4 = FMA_VEC_PS(r4,rB,rD);
256 r5 = FMA_VEC_PS(r5,rC,rE);
257
258 r0 = FMA_VEC_PS(r0,rD,rF);
259 r1 = FMA_VEC_PS(r1,rC,rE);
260 r2 = FMA_VEC_PS(r2,rB,rD);
261 r3 = FMA_VEC_PS(r3,rA,rC);
262 r4 = FMA_VEC_PS(r4,r9,rB);
263 r5 = FMA_VEC_PS(r5,r8,rA);
264
265 r0 = FMA_VEC_PS(r0,r7,r9);
266 r1 = FMA_VEC_PS(r1,r8,rA);
267 r2 = FMA_VEC_PS(r2,r9,rB);
268 r3 = FMA_VEC_PS(r3,rA,rC);
269 r4 = FMA_VEC_PS(r4,rB,rD);
270 r5 = FMA_VEC_PS(r5,rC,rE);
271
272 r0 = FMA_VEC_PS(r0,rD,rF);
273 r1 = FMA_VEC_PS(r1,rC,rE);
274 r2 = FMA_VEC_PS(r2,rB,rD);
275 r3 = FMA_VEC_PS(r3,rA,rC);
276 r4 = FMA_VEC_PS(r4,r9,rB);
277 r5 = FMA_VEC_PS(r5,r8,rA);
278
279 r0 = FMA_VEC_PS(r0,r7,r9);
280 r1 = FMA_VEC_PS(r1,r8,rA);
281 r2 = FMA_VEC_PS(r2,r9,rB);
282 r3 = FMA_VEC_PS(r3,rA,rC);
283 r4 = FMA_VEC_PS(r4,rB,rD);
284 r5 = FMA_VEC_PS(r5,rC,rE);
285
286 r0 = FMA_VEC_PS(r0,rD,rF);
287 r1 = FMA_VEC_PS(r1,rC,rE);
288 r2 = FMA_VEC_PS(r2,rB,rD);
289 r3 = FMA_VEC_PS(r3,rA,rC);
290 r4 = FMA_VEC_PS(r4,r9,rB);
291 r5 = FMA_VEC_PS(r5,r8,rA);
292
293 i++;
294 }
295 c++;
296 }
297
298 /* Stop PAPI counters */
300
301 /* Use data so that compiler does not eliminate it when using -O2 */
302 r0 = ADD_VEC_PS(r0,r1);
303 r2 = ADD_VEC_PS(r2,r3);
304 r4 = ADD_VEC_PS(r4,r5);
305
306 r0 = ADD_VEC_PS(r0,r6);
307 r2 = ADD_VEC_PS(r2,r4);
308
309 r0 = ADD_VEC_PS(r0,r2);
310
311 float out = 0;
312 SP_VEC_TYPE temp = r0;
313 out += ((float*)&temp)[0];
314 out += ((float*)&temp)[1];
315 out += ((float*)&temp)[2];
316 out += ((float*)&temp)[3];
317
318 return out;
319}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_sp_VEC_FMA()

static void test_sp_VEC_FMA ( int  instr_per_loop,
uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 322 of file vec_fma_sp.c.

323{
324 float sum = 0.0;
325 float scalar_sum = 0.0;
326
327 if ( instr_per_loop == 12 ) {
328 sum += test_sp_mac_VEC_FMA_12( iterations, EventSet, fp );
329 scalar_sum += test_sp_scalar_VEC_FMA_12( iterations );
330 }
331 else if ( instr_per_loop == 24 ) {
332 sum += test_sp_mac_VEC_FMA_24( iterations, EventSet, fp );
333 scalar_sum += test_sp_scalar_VEC_FMA_24( iterations );
334 }
335 else if ( instr_per_loop == 48 ) {
336 sum += test_sp_mac_VEC_FMA_48( iterations, EventSet, fp );
337 scalar_sum += test_sp_scalar_VEC_FMA_48( iterations );
338 }
339
340 if( sum/4.0 != scalar_sum ) {
341 fprintf(stderr, "FMA: Inconsistent FLOP results detected!\n");
342 }
343}
FILE * stderr
static float test_sp_mac_VEC_FMA_24(uint64 iterations, int EventSet, FILE *fp)
Definition: vec_fma_sp.c:113
static float test_sp_mac_VEC_FMA_48(uint64 iterations, int EventSet, FILE *fp)
Definition: vec_fma_sp.c:205
static float test_sp_mac_VEC_FMA_12(uint64 iterations, int EventSet, FILE *fp)
Definition: vec_fma_sp.c:35
float test_sp_scalar_VEC_FMA_12(uint64 iterations)
float test_sp_scalar_VEC_FMA_48(uint64 iterations)
float test_sp_scalar_VEC_FMA_24(uint64 iterations)
Here is the call graph for this function: