PAPI 7.1.0.0
Loading...
Searching...
No Matches
vec_fma_dp.c File Reference
Include dependency graph for vec_fma_dp.c:

Go to the source code of this file.

Functions

static double test_dp_mac_VEC_FMA_12 (uint64 iterations, int EventSet, FILE *fp)
 
static double test_dp_mac_VEC_FMA_24 (uint64 iterations, int EventSet, FILE *fp)
 
static double test_dp_mac_VEC_FMA_48 (uint64 iterations, int EventSet, FILE *fp)
 
static void test_dp_VEC_FMA (int instr_per_loop, uint64 iterations, int EventSet, FILE *fp)
 

Function Documentation

◆ test_dp_mac_VEC_FMA_12()

static double test_dp_mac_VEC_FMA_12 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 35 of file vec_fma_dp.c.

35 {
36 register DP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
37
38 /* Generate starting data */
39 r0 = SET_VEC_PD(0.01);
40 r1 = SET_VEC_PD(0.02);
41 r2 = SET_VEC_PD(0.03);
42 r3 = SET_VEC_PD(0.04);
43 r4 = SET_VEC_PD(0.05);
44 r5 = SET_VEC_PD(0.06);
45 r6 = SET_VEC_PD(0.07);
46 r7 = SET_VEC_PD(0.08);
47 r8 = SET_VEC_PD(0.09);
48 r9 = SET_VEC_PD(0.10);
49 rA = SET_VEC_PD(0.11);
50 rB = SET_VEC_PD(0.12);
51 rC = SET_VEC_PD(0.13);
52 rD = SET_VEC_PD(0.14);
53 rE = SET_VEC_PD(0.15);
54 rF = SET_VEC_PD(0.16);
55
56 /* Start PAPI counters */
57 if ( PAPI_start( EventSet ) != PAPI_OK ) {
58 return -1;
59 }
60
61 uint64 c = 0;
62 while (c < iterations){
63 size_t i = 0;
64 while (i < 1000){
65
66 /* The performance critical part */
67 r0 = FMA_VEC_PD(r0,r7,r9);
68 r1 = FMA_VEC_PD(r1,r8,rA);
69 r2 = FMA_VEC_PD(r2,r9,rB);
70 r3 = FMA_VEC_PD(r3,rA,rC);
71 r4 = FMA_VEC_PD(r4,rB,rD);
72 r5 = FMA_VEC_PD(r5,rC,rE);
73
74 r0 = FMA_VEC_PD(r0,rD,rF);
75 r1 = FMA_VEC_PD(r1,rC,rE);
76 r2 = FMA_VEC_PD(r2,rB,rD);
77 r3 = FMA_VEC_PD(r3,rA,rC);
78 r4 = FMA_VEC_PD(r4,r9,rB);
79 r5 = FMA_VEC_PD(r5,r8,rA);
80
81 i++;
82 }
83 c++;
84 }
85
86 /* Stop PAPI counters */
88
89 /* Use data so that compiler does not eliminate it when using -O2 */
90 r0 = ADD_VEC_PD(r0,r1);
91 r2 = ADD_VEC_PD(r2,r3);
92 r4 = ADD_VEC_PD(r4,r5);
93
94 r0 = ADD_VEC_PD(r0,r6);
95 r2 = ADD_VEC_PD(r2,r4);
96
97 r0 = ADD_VEC_PD(r0,r2);
98
99 double out = 0;
100 DP_VEC_TYPE temp = r0;
101 out += ((double*)&temp)[0];
102 out += ((double*)&temp)[1];
103
104 return out;
105}
int i
unsigned long long uint64
Definition: cat_arch.h:3
Start counting hardware events in an event set.
#define PAPI_OK
Definition: f90papi.h:73
static int EventSet
Definition: init_fini.c:8
static double c[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:40
static FILE * fp
void papi_stop_and_print(long long theory, int EventSet, FILE *fp)
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_dp_mac_VEC_FMA_24()

static double test_dp_mac_VEC_FMA_24 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 111 of file vec_fma_dp.c.

111 {
112 register DP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
113
114 /* Generate starting data */
115 r0 = SET_VEC_PD(0.01);
116 r1 = SET_VEC_PD(0.02);
117 r2 = SET_VEC_PD(0.03);
118 r3 = SET_VEC_PD(0.04);
119 r4 = SET_VEC_PD(0.05);
120 r5 = SET_VEC_PD(0.06);
121 r6 = SET_VEC_PD(0.07);
122 r7 = SET_VEC_PD(0.08);
123 r8 = SET_VEC_PD(0.09);
124 r9 = SET_VEC_PD(0.10);
125 rA = SET_VEC_PD(0.11);
126 rB = SET_VEC_PD(0.12);
127 rC = SET_VEC_PD(0.13);
128 rD = SET_VEC_PD(0.14);
129 rE = SET_VEC_PD(0.15);
130 rF = SET_VEC_PD(0.16);
131
132 /* Start PAPI counters */
133 if ( PAPI_start( EventSet ) != PAPI_OK ) {
134 return -1;
135 }
136
137 uint64 c = 0;
138 while (c < iterations){
139 size_t i = 0;
140 while (i < 1000){
141
142 /* The performance critical part */
143 r0 = FMA_VEC_PD(r0,r7,r9);
144 r1 = FMA_VEC_PD(r1,r8,rA);
145 r2 = FMA_VEC_PD(r2,r9,rB);
146 r3 = FMA_VEC_PD(r3,rA,rC);
147 r4 = FMA_VEC_PD(r4,rB,rD);
148 r5 = FMA_VEC_PD(r5,rC,rE);
149
150 r0 = FMA_VEC_PD(r0,rD,rF);
151 r1 = FMA_VEC_PD(r1,rC,rE);
152 r2 = FMA_VEC_PD(r2,rB,rD);
153 r3 = FMA_VEC_PD(r3,rA,rC);
154 r4 = FMA_VEC_PD(r4,r9,rB);
155 r5 = FMA_VEC_PD(r5,r8,rA);
156
157 r0 = FMA_VEC_PD(r0,r7,r9);
158 r1 = FMA_VEC_PD(r1,r8,rA);
159 r2 = FMA_VEC_PD(r2,r9,rB);
160 r3 = FMA_VEC_PD(r3,rA,rC);
161 r4 = FMA_VEC_PD(r4,rB,rD);
162 r5 = FMA_VEC_PD(r5,rC,rE);
163
164 r0 = FMA_VEC_PD(r0,rD,rF);
165 r1 = FMA_VEC_PD(r1,rC,rE);
166 r2 = FMA_VEC_PD(r2,rB,rD);
167 r3 = FMA_VEC_PD(r3,rA,rC);
168 r4 = FMA_VEC_PD(r4,r9,rB);
169 r5 = FMA_VEC_PD(r5,r8,rA);
170
171 i++;
172 }
173 c++;
174 }
175
176 /* Stop PAPI counters */
178
179 /* Use data so that compiler does not eliminate it when using -O2 */
180 r0 = ADD_VEC_PD(r0,r1);
181 r2 = ADD_VEC_PD(r2,r3);
182 r4 = ADD_VEC_PD(r4,r5);
183
184 r0 = ADD_VEC_PD(r0,r6);
185 r2 = ADD_VEC_PD(r2,r4);
186
187 r0 = ADD_VEC_PD(r0,r2);
188
189 double out = 0;
190 DP_VEC_TYPE temp = r0;
191 out += ((double*)&temp)[0];
192 out += ((double*)&temp)[1];
193
194 return out;
195}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_dp_mac_VEC_FMA_48()

static double test_dp_mac_VEC_FMA_48 ( uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 201 of file vec_fma_dp.c.

201 {
202 register DP_VEC_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
203
204 /* Generate starting data */
205 r0 = SET_VEC_PD(0.01);
206 r1 = SET_VEC_PD(0.02);
207 r2 = SET_VEC_PD(0.03);
208 r3 = SET_VEC_PD(0.04);
209 r4 = SET_VEC_PD(0.05);
210 r5 = SET_VEC_PD(0.06);
211 r6 = SET_VEC_PD(0.07);
212 r7 = SET_VEC_PD(0.08);
213 r8 = SET_VEC_PD(0.09);
214 r9 = SET_VEC_PD(0.10);
215 rA = SET_VEC_PD(0.11);
216 rB = SET_VEC_PD(0.12);
217 rC = SET_VEC_PD(0.13);
218 rD = SET_VEC_PD(0.14);
219 rE = SET_VEC_PD(0.15);
220 rF = SET_VEC_PD(0.16);
221
222 /* Start PAPI counters */
223 if ( PAPI_start( EventSet ) != PAPI_OK ) {
224 return -1;
225 }
226
227 uint64 c = 0;
228 while (c < iterations){
229 size_t i = 0;
230 while (i < 1000){
231
232 /* The performance critical part */
233 r0 = FMA_VEC_PD(r0,r7,r9);
234 r1 = FMA_VEC_PD(r1,r8,rA);
235 r2 = FMA_VEC_PD(r2,r9,rB);
236 r3 = FMA_VEC_PD(r3,rA,rC);
237 r4 = FMA_VEC_PD(r4,rB,rD);
238 r5 = FMA_VEC_PD(r5,rC,rE);
239
240 r0 = FMA_VEC_PD(r0,rD,rF);
241 r1 = FMA_VEC_PD(r1,rC,rE);
242 r2 = FMA_VEC_PD(r2,rB,rD);
243 r3 = FMA_VEC_PD(r3,rA,rC);
244 r4 = FMA_VEC_PD(r4,r9,rB);
245 r5 = FMA_VEC_PD(r5,r8,rA);
246
247 r0 = FMA_VEC_PD(r0,r7,r9);
248 r1 = FMA_VEC_PD(r1,r8,rA);
249 r2 = FMA_VEC_PD(r2,r9,rB);
250 r3 = FMA_VEC_PD(r3,rA,rC);
251 r4 = FMA_VEC_PD(r4,rB,rD);
252 r5 = FMA_VEC_PD(r5,rC,rE);
253
254 r0 = FMA_VEC_PD(r0,rD,rF);
255 r1 = FMA_VEC_PD(r1,rC,rE);
256 r2 = FMA_VEC_PD(r2,rB,rD);
257 r3 = FMA_VEC_PD(r3,rA,rC);
258 r4 = FMA_VEC_PD(r4,r9,rB);
259 r5 = FMA_VEC_PD(r5,r8,rA);
260
261 r0 = FMA_VEC_PD(r0,r7,r9);
262 r1 = FMA_VEC_PD(r1,r8,rA);
263 r2 = FMA_VEC_PD(r2,r9,rB);
264 r3 = FMA_VEC_PD(r3,rA,rC);
265 r4 = FMA_VEC_PD(r4,rB,rD);
266 r5 = FMA_VEC_PD(r5,rC,rE);
267
268 r0 = FMA_VEC_PD(r0,rD,rF);
269 r1 = FMA_VEC_PD(r1,rC,rE);
270 r2 = FMA_VEC_PD(r2,rB,rD);
271 r3 = FMA_VEC_PD(r3,rA,rC);
272 r4 = FMA_VEC_PD(r4,r9,rB);
273 r5 = FMA_VEC_PD(r5,r8,rA);
274
275 r0 = FMA_VEC_PD(r0,r7,r9);
276 r1 = FMA_VEC_PD(r1,r8,rA);
277 r2 = FMA_VEC_PD(r2,r9,rB);
278 r3 = FMA_VEC_PD(r3,rA,rC);
279 r4 = FMA_VEC_PD(r4,rB,rD);
280 r5 = FMA_VEC_PD(r5,rC,rE);
281
282 r0 = FMA_VEC_PD(r0,rD,rF);
283 r1 = FMA_VEC_PD(r1,rC,rE);
284 r2 = FMA_VEC_PD(r2,rB,rD);
285 r3 = FMA_VEC_PD(r3,rA,rC);
286 r4 = FMA_VEC_PD(r4,r9,rB);
287 r5 = FMA_VEC_PD(r5,r8,rA);
288
289 i++;
290 }
291 c++;
292 }
293
294 /* Stop PAPI counters */
296
297 /* Use data so that compiler does not eliminate it when using -O2 */
298 r0 = ADD_VEC_PD(r0,r1);
299 r2 = ADD_VEC_PD(r2,r3);
300 r4 = ADD_VEC_PD(r4,r5);
301
302 r0 = ADD_VEC_PD(r0,r6);
303 r2 = ADD_VEC_PD(r2,r4);
304
305 r0 = ADD_VEC_PD(r0,r2);
306
307 double out = 0;
308 DP_VEC_TYPE temp = r0;
309 out += ((double*)&temp)[0];
310 out += ((double*)&temp)[1];
311
312 return out;
313}
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_dp_VEC_FMA()

static void test_dp_VEC_FMA ( int  instr_per_loop,
uint64  iterations,
int  EventSet,
FILE *  fp 
)
static

Definition at line 316 of file vec_fma_dp.c.

317{
318 double sum = 0.0;
319 double scalar_sum = 0.0;
320
321 if ( instr_per_loop == 12 ) {
322 sum += test_dp_mac_VEC_FMA_12( iterations, EventSet, fp );
323 scalar_sum += test_dp_scalar_VEC_FMA_12( iterations );
324 }
325 else if ( instr_per_loop == 24 ) {
326 sum += test_dp_mac_VEC_FMA_24( iterations, EventSet, fp );
327 scalar_sum += test_dp_scalar_VEC_FMA_24( iterations );
328 }
329 else if ( instr_per_loop == 48 ) {
330 sum += test_dp_mac_VEC_FMA_48( iterations, EventSet, fp );
331 scalar_sum += test_dp_scalar_VEC_FMA_48( iterations );
332 }
333
334 if( sum/2.0 != scalar_sum ) {
335 fprintf(stderr, "FMA: Inconsistent FLOP results detected!\n");
336 }
337}
FILE * stderr
static double test_dp_mac_VEC_FMA_48(uint64 iterations, int EventSet, FILE *fp)
Definition: vec_fma_dp.c:201
static double test_dp_mac_VEC_FMA_12(uint64 iterations, int EventSet, FILE *fp)
Definition: vec_fma_dp.c:35
static double test_dp_mac_VEC_FMA_24(uint64 iterations, int EventSet, FILE *fp)
Definition: vec_fma_dp.c:111
double test_dp_scalar_VEC_FMA_24(uint64 iterations)
double test_dp_scalar_VEC_FMA_12(uint64 iterations)
double test_dp_scalar_VEC_FMA_48(uint64 iterations)
Here is the call graph for this function: