PAPI 7.1.0.0
Loading...
Searching...
No Matches
instructions.c File Reference
Include dependency graph for instructions.c:

Go to the source code of this file.

Macros

#define FADD_BLOCK()   {f64_01 += f64_00; f64_02 += f64_01; f64_03 += f64_02; f64_00 += f64_03;}
 
#define F64_ADDS(_X)   {f64_00 += _X; f64_01 += _X; f64_02 += _X; f64_03 += _X; f64_04 += _X; f64_05 += _X; f64_06 += _X; f64_07 += _X; f64_08 += _X; f64_09 += _X; f64_10 += _X; f64_11 += _X;}
 
#define BUFFER_SIZE   512+2
 
#define BUFFER_SIZE   512+4
 
#define BUFFER_SIZE   512+8
 
#define BUFFER_SIZE   512+2
 
#define BUFFER_SIZE   512+4
 
#define BUFFER_SIZE   512+8
 
#define FSUB_BLOCK()   {f64_01 -= f64_00; f64_02 -= f64_01; f64_03 -= f64_02; f64_00 -= f64_03;}
 
#define F64_SUBS(_X)   {f64_00 -= _X; f64_01 -= _X; f64_02 -= _X; f64_03 -= _X; f64_04 -= _X; f64_05 -= _X; f64_06 -= _X; f64_07 -= _X; f64_08 -= _X; f64_09 -= _X; f64_10 -= _X; f64_11 -= _X;}
 
#define FMUL_BLOCK()   {f64_01 *= f64_00; f64_02 *= f64_01; f64_03 *= f64_02; f64_00 *= f64_03;}
 
#define F64_MULS(_X)   {f64_00 *= _X; f64_01 *= _X; f64_02 *= _X; f64_03 *= _X; f64_04 *= _X; f64_05 *= _X; f64_06 *= _X; f64_07 *= _X; f64_08 *= _X; f64_09 *= _X; f64_10 *= _X; f64_11 *= _X;}
 
#define FDIV_BLOCK()   {f64_01 /= f64_00; f64_02 /= f64_01; f64_03 /= f64_02; f64_00 /= f64_03;}
 
#define F64_DIVS(_X)   {f64_00 /= _X; f64_01 /= _X; f64_02 /= _X; f64_03 /= _X; f64_04 /= _X; f64_05 /= _X; f64_06 /= _X; f64_07 /= _X; f64_08 /= _X; f64_09 /= _X; f64_10 /= _X; f64_11 /= _X;}
 
#define BUFFER_SIZE   256
 
#define BUFFER_SIZE   (256+1)
 
#define BUFFER_SIZE   (256+8)
 
#define BUFFER_SIZE   256
 

Functions

void test_int_add (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_add (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_add_max (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_add_DVEC128 (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_add_DVEC256 (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_add_DVEC512 (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_add_SVEC128 (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_add_SVEC256 (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_add_SVEC512 (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_sub (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_sub_max (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_mul (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_mul_max (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_div (int p, int M, int N, int EventSet, FILE *fp)
 
void test_f64_div_max (int p, int M, int N, int EventSet, FILE *fp)
 
void test_mem_ops_serial_RO (int p, int M, int N, int EventSet, FILE *fp)
 
void test_mem_ops_serial_RW (int p, int M, int N, int EventSet, FILE *fp)
 
void test_mem_ops_parallel_RO (int p, int M, int N, int EventSet, FILE *fp)
 
void test_mem_ops_parallel_WO (int p, int M, int N, int EventSet, FILE *fp)
 
void instr_test (int EventSet, FILE *fp)
 
void instr_driver (char *papi_event_name, hw_desc_t *hw_desc, char *outdir)
 

Variables

int sum_i32 =0
 
float sum_f32 =0.0
 
double sum_f64 =0.0
 

Macro Definition Documentation

◆ BUFFER_SIZE [1/10]

#define BUFFER_SIZE   512+2

◆ BUFFER_SIZE [2/10]

#define BUFFER_SIZE   512+4

◆ BUFFER_SIZE [3/10]

#define BUFFER_SIZE   512+8

◆ BUFFER_SIZE [4/10]

#define BUFFER_SIZE   512+2

◆ BUFFER_SIZE [5/10]

#define BUFFER_SIZE   512+4

◆ BUFFER_SIZE [6/10]

#define BUFFER_SIZE   512+8

◆ BUFFER_SIZE [7/10]

#define BUFFER_SIZE   256

◆ BUFFER_SIZE [8/10]

#define BUFFER_SIZE   (256+1)

◆ BUFFER_SIZE [9/10]

#define BUFFER_SIZE   (256+8)

◆ BUFFER_SIZE [10/10]

#define BUFFER_SIZE   256

◆ F64_ADDS

#define F64_ADDS (   _X)    {f64_00 += _X; f64_01 += _X; f64_02 += _X; f64_03 += _X; f64_04 += _X; f64_05 += _X; f64_06 += _X; f64_07 += _X; f64_08 += _X; f64_09 += _X; f64_10 += _X; f64_11 += _X;}

◆ F64_DIVS

#define F64_DIVS (   _X)    {f64_00 /= _X; f64_01 /= _X; f64_02 /= _X; f64_03 /= _X; f64_04 /= _X; f64_05 /= _X; f64_06 /= _X; f64_07 /= _X; f64_08 /= _X; f64_09 /= _X; f64_10 /= _X; f64_11 /= _X;}

◆ F64_MULS

#define F64_MULS (   _X)    {f64_00 *= _X; f64_01 *= _X; f64_02 *= _X; f64_03 *= _X; f64_04 *= _X; f64_05 *= _X; f64_06 *= _X; f64_07 *= _X; f64_08 *= _X; f64_09 *= _X; f64_10 *= _X; f64_11 *= _X;}

◆ F64_SUBS

#define F64_SUBS (   _X)    {f64_00 -= _X; f64_01 -= _X; f64_02 -= _X; f64_03 -= _X; f64_04 -= _X; f64_05 -= _X; f64_06 -= _X; f64_07 -= _X; f64_08 -= _X; f64_09 -= _X; f64_10 -= _X; f64_11 -= _X;}

◆ FADD_BLOCK

#define FADD_BLOCK ( )    {f64_01 += f64_00; f64_02 += f64_01; f64_03 += f64_02; f64_00 += f64_03;}

◆ FDIV_BLOCK

#define FDIV_BLOCK ( )    {f64_01 /= f64_00; f64_02 /= f64_01; f64_03 /= f64_02; f64_00 /= f64_03;}

◆ FMUL_BLOCK

#define FMUL_BLOCK ( )    {f64_01 *= f64_00; f64_02 *= f64_01; f64_03 *= f64_02; f64_00 *= f64_03;}

◆ FSUB_BLOCK

#define FSUB_BLOCK ( )    {f64_01 -= f64_00; f64_02 -= f64_01; f64_03 -= f64_02; f64_00 -= f64_03;}

Function Documentation

◆ instr_driver()

void instr_driver ( char *  papi_event_name,
hw_desc_t hw_desc,
char *  outdir 
)

Definition at line 1308 of file instructions.c.

1309{
1310 int retval = PAPI_OK;
1311 int EventSet = PAPI_NULL;
1312 FILE* ofp_papi;
1313 const char *sufx = ".instr";
1314 char *papiFileName;
1315
1316 (void)hw_desc;
1317
1318 int l = strlen(outdir)+strlen(papi_event_name)+strlen(sufx);
1319 if (NULL == (papiFileName = (char *)calloc( 1+l, sizeof(char)))) {
1320 return;
1321 }
1322 if (l != (sprintf(papiFileName, "%s%s%s", outdir, papi_event_name, sufx))) {
1323 goto error0;
1324 }
1325 if (NULL == (ofp_papi = fopen(papiFileName,"w"))) {
1326 fprintf(stderr, "Failed to open file %s.\n", papiFileName);
1327 goto error0;
1328 }
1329
1331 if (retval != PAPI_OK ){
1332 goto error1;
1333 }
1334
1335 retval = PAPI_add_named_event( EventSet, papi_event_name );
1336 if (retval != PAPI_OK ){
1337 goto error1;
1338 }
1339
1340 retval = PAPI_OK;
1341
1342 instr_test(EventSet, ofp_papi);
1343
1345 if (retval != PAPI_OK ){
1346 goto error1;
1347 }
1349 if (retval != PAPI_OK ){
1350 goto error1;
1351 }
1352
1353error1:
1354 fclose(ofp_papi);
1355error0:
1356 free(papiFileName);
1357 return;
1358}
add PAPI preset or native hardware event by name to an EventSet
Empty and destroy an EventSet.
Create a new empty PAPI EventSet.
Empty and destroy an EventSet.
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_NULL
Definition: f90papi.h:78
static int EventSet
Definition: init_fini.c:8
void instr_test(int EventSet, FILE *fp)
FILE * stderr
int fclose(FILE *__stream)
int retval
Definition: zero_fork.c:53
Here is the call graph for this function:
Here is the caller graph for this function:

◆ instr_test()

void instr_test ( int  EventSet,
FILE *  fp 
)

Definition at line 1086 of file instructions.c.

1086 {
1087 int i, j, M, N;
1088 int minM=64, minN=64;
1089 double f[4] = {1.0, 1.1892, 1.4142, 1.6818};
1090 int p = (int)getpid();
1091
1093 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1094 for(i=16; i<50; i*=2){
1095 for(j=0; j<4; j++){
1096 M = (int)(i*f[j]*minM);
1097 N = (int)(i*f[j]*minN);
1099 }
1100 }
1101 fprintf(fp, "\n");
1102
1104 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1105 for(i=16; i<50; i*=2){
1106 for(j=0; j<4; j++){
1107 M = (int)(i*f[j]*minM);
1108 N = (int)(i*f[j]*minN);
1110 }
1111 }
1112 fprintf(fp, "\n");
1113
1115 fprintf(fp, "# (((9+3)*N)+3)*M\n");
1116 for(i=16; i<50; i*=2){
1117 for(j=0; j<4; j++){
1118 M = (int)(i*f[j]*minM);
1119 N = (int)(i*f[j]*minN);
1121 }
1122 }
1123 fprintf(fp, "\n");
1124
1126 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1127 for(i=16; i<50; i*=2){
1128 for(j=0; j<4; j++){
1129 M = (int)(i*f[j]*minM);
1130 N = (int)(i*f[j]*minN);
1132 }
1133 }
1134 fprintf(fp, "\n");
1135
1137 fprintf(fp, "# (((50.0+3)*N)+3)*M\n");
1138 for(i=16; i<50; i*=2){
1139 for(j=0; j<4; j++){
1140 M = (int)(i*f[j]*minM);
1141 N = (int)(i*f[j]*minN);
1142 test_int_add(p, M, N, EventSet, fp);
1143 }
1144 }
1145 fprintf(fp, "\n");
1146
1148 fprintf(fp, "# (((40+3)*N)+3)*M\n");
1149 for(i=16; i<50; i*=2){
1150 for(j=0; j<4; j++){
1151 M = (int)(i*f[j]*minM);
1152 N = (int)(i*f[j]*minN);
1153 test_f64_add(p, M, N, EventSet, fp);
1154 }
1155 }
1156 fprintf(fp, "\n");
1157
1159 fprintf(fp, "# (((12.0*3+5)*N)+3)*M\n");
1160 for(i=16; i<50; i*=2){
1161 for(j=0; j<4; j++){
1162 M = (int)(i*f[j]*minM);
1163 N = (int)(i*f[j]*minN);
1164 test_f64_add_max(p, M, N, EventSet, fp);
1165 }
1166 }
1167 fprintf(fp, "\n");
1168
1170 fprintf(fp, "# (((40+3)*N)+3)*M\n");
1171 for(i=16; i<50; i*=2){
1172 for(j=0; j<4; j++){
1173 M = (int)(i*f[j]*minM);
1174 N = (int)(i*f[j]*minN);
1175 test_f64_sub(p, M, N, EventSet, fp);
1176 }
1177 }
1178 fprintf(fp, "\n");
1179
1181 fprintf(fp, "# (((12.0*3+5)*N)+3)*M\n");
1182 for(i=16; i<50; i*=2){
1183 for(j=0; j<4; j++){
1184 M = (int)(i*f[j]*minM);
1185 N = (int)(i*f[j]*minN);
1186 test_f64_sub_max(p, M, N, EventSet, fp);
1187 }
1188 }
1189 fprintf(fp, "\n");
1190
1192 fprintf(fp, "# (((40+3)*N)+3)*M\n");
1193 for(i=16; i<50; i*=2){
1194 for(j=0; j<4; j++){
1195 M = (int)(i*f[j]*minM);
1196 N = (int)(i*f[j]*minN);
1197 test_f64_mul(p, M, N, EventSet, fp);
1198 }
1199 }
1200 fprintf(fp, "\n");
1201
1203 fprintf(fp, "# (((12.0*3+5)*N)+3)*M\n");
1204 for(i=16; i<50; i*=2){
1205 for(j=0; j<4; j++){
1206 M = (int)(i*f[j]*minM);
1207 N = (int)(i*f[j]*minN);
1208 test_f64_mul_max(p, M, N, EventSet, fp);
1209 }
1210 }
1211 fprintf(fp, "\n");
1212
1214 fprintf(fp, "# (((40+3)*N)+3)*M\n");
1215 for(i=16; i<50; i*=2){
1216 for(j=0; j<4; j++){
1217 M = (int)(i*f[j]*minM/4);
1218 N = (int)(i*f[j]*minN/4);
1219 test_f64_div(p, M, N, EventSet, fp);
1220 }
1221 }
1222 fprintf(fp, "\n");
1223
1225 fprintf(fp, "# (((12.0*3+5)*N)+3)*M\n");
1226 for(i=16; i<50; i*=2){
1227 for(j=0; j<4; j++){
1228 M = (int)(i*f[j]*minM/4);
1229 N = (int)(i*f[j]*minN/4);
1230 test_f64_div_max(p, M, N, EventSet, fp);
1231 }
1232 }
1233 fprintf(fp, "\n");
1234
1236 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1237 for(i=16; i<50; i*=2){
1238 for(j=0; j<4; j++){
1239 M = (int)(i*f[j]*minM*2);
1240 N = (int)(i*f[j]*minN*2);
1242 }
1243 }
1244 fprintf(fp, "\n");
1245
1247 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1248 for(i=16; i<50; i*=2){
1249 for(j=0; j<4; j++){
1250 M = (int)(i*f[j]*minM*2);
1251 N = (int)(i*f[j]*minN*2);
1253 }
1254 }
1255 fprintf(fp, "\n");
1256
1258 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1259 for(i=16; i<50; i*=2){
1260 for(j=0; j<4; j++){
1261 M = (int)(i*f[j]*minM*2);
1262 N = (int)(i*f[j]*minN*2);
1264 }
1265 }
1266 fprintf(fp, "\n");
1267
1269 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1270 for(i=16; i<50; i*=2){
1271 for(j=0; j<4; j++){
1272 M = (int)(i*f[j]*minM*2);
1273 N = (int)(i*f[j]*minN*2);
1275 }
1276 }
1277 fprintf(fp, "\n");
1278
1280 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1281 for(i=16; i<50; i*=2){
1282 for(j=0; j<4; j++){
1283 M = (int)(i*f[j]*minM*2);
1284 N = (int)(i*f[j]*minN*2);
1286 }
1287 }
1288 fprintf(fp, "\n");
1289
1291 fprintf(fp, "# (((2+3)*N)+3)*M\n");
1292 for(i=16; i<50; i*=2){
1293 for(j=0; j<4; j++){
1294 M = (int)(i*f[j]*minM*2);
1295 N = (int)(i*f[j]*minN*2);
1297 }
1298 }
1299 fprintf(fp, "\n");
1300
1301 if( sum_i32 == 12345 && sum_f64 == 12.345)
1302 fprintf(stderr, "Side-effect to disable dead code elimination by the compiler. Please ignore.\n");
1303}
int i
#define N
Definition: byte_profile.c:32
double f(double a)
Definition: cpi.c:23
int sum_i32
Definition: instructions.c:12
void test_f64_add_DVEC512(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:336
void test_f64_div(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:772
void test_mem_ops_parallel_WO(int p, int M, int N, int EventSet, FILE *fp)
void test_mem_ops_parallel_RO(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:979
void test_f64_add(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:121
void test_f64_add_DVEC128(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:244
double sum_f64
Definition: instructions.c:14
void test_f64_add_SVEC256(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:431
void test_f64_mul(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:649
void test_mem_ops_serial_RO(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:895
void test_f64_add_SVEC512(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:477
void test_f64_add_SVEC128(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:385
void test_f64_sub_max(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:579
void test_f64_add_DVEC256(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:290
void test_f64_div_max(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:825
void test_int_add(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:16
void test_f64_sub(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:526
void test_f64_add_max(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:174
void test_f64_mul_max(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:702
void test_mem_ops_serial_RW(int p, int M, int N, int EventSet, FILE *fp)
Definition: instructions.c:937
static FILE * fp
int
Definition: sde_internal.h:89
Here is the call graph for this function:
Here is the caller graph for this function:

◆ test_f64_add()

void test_f64_add ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 121 of file instructions.c.

121 {
122 int ret;
123 long long int ev_values[2];
124 double f64_00, f64_01, f64_02, f64_03;
125
126 /* Initialize the variables with values that the compiler cannot guess. */
127 f64_00 = (double)p/1.02;
128 f64_01 = -(double)p/1.03;
129 f64_02 = (double)p/1.04;
130 f64_03 = -(double)p/1.05;
131
132 // Start the counters.
133 ret = PAPI_start(EventSet);
134 if ( PAPI_OK != ret ) {
135 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
136 // If we can't measure events, no need to run the kernel.
137 goto clean_up;
138 }
139
140#define FADD_BLOCK() {f64_01 += f64_00; f64_02 += f64_01; f64_03 += f64_02; f64_00 += f64_03;}
141
142 for(int i=0; i<M; i++){
143 for(int j=0; j<N; j++){
144 FADD_BLOCK();
145 FADD_BLOCK();
146 FADD_BLOCK();
147 FADD_BLOCK();
148 FADD_BLOCK();
149 FADD_BLOCK();
150 FADD_BLOCK();
151 FADD_BLOCK();
152 FADD_BLOCK();
153 FADD_BLOCK();
154 }
155 }
156
157 ret = PAPI_stop(EventSet, ev_values);
158 if ( PAPI_OK != ret ) {
159 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
160 // If we can't measure events, no need to print anything.
161 goto clean_up;
162 }
163 long long int fp_op_count = 40LL*N*M; // There are only 50 FP operations.
164 fprintf(fp, "%d %lld # FP_ADD_count: %lld (%.3lf)\n", N, ev_values[0], fp_op_count, (double)ev_values[0]/fp_op_count);
165
166 sum_f64 += f64_00 + f64_01 + f64_02 + f64_03;
167
168clean_up:
169
170 return;
171}
Start counting hardware events in an event set.
Stop counting hardware events in an event set.
Returns a string describing the PAPI error code.
#define FADD_BLOCK()
Here is the caller graph for this function:

◆ test_f64_add_DVEC128()

void test_f64_add_DVEC128 ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 244 of file instructions.c.

244 {
245 int ret;
246 long long int ev_values[2];
247#undef BUFFER_SIZE
248#define BUFFER_SIZE 512+2
249 double a[BUFFER_SIZE];
250 double b[BUFFER_SIZE];
251
252 /* Initialize the arrays with values that the compiler cannot guess. */
253 for(int i=0; i<BUFFER_SIZE; i++){
254 a[i] = p/(i+1.2);
255 b[i] = -p/(i+1.3);
256 }
257
258 // Start the counters.
259 ret = PAPI_start(EventSet);
260 if ( PAPI_OK != ret ) {
261 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
262 // If we can't measure events, no need to run the kernel.
263 goto clean_up;
264 }
265
266 long long int UB=1LL*M*N/(BUFFER_SIZE-2);
267 for(int i=0; i<UB; i++){
268 for(int j=2; j<BUFFER_SIZE; j++){
269 a[j] = a[j-2] + b[j];
270 }
271 }
272
273 ret = PAPI_stop(EventSet, ev_values);
274 if ( PAPI_OK != ret ) {
275 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
276 // If we can't measure events, no need to print anything.
277 goto clean_up;
278 }
279 fprintf(fp, "%d %lld # FP_ADD_DVEC128_count: %lld (%.3lf)\n", N, ev_values[0], N*M/2LL, (double)ev_values[0]/(N*M/2.0));
280
281 for(int j=0; j<BUFFER_SIZE; j++){
282 sum_f64 += a[j];
283 }
284
285clean_up:
286
287 return;
288}
#define BUFFER_SIZE
static double a[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:38
static double b[MATRIX_SIZE][MATRIX_SIZE]
Definition: libmsr_basic.c:39
Here is the caller graph for this function:

◆ test_f64_add_DVEC256()

void test_f64_add_DVEC256 ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 290 of file instructions.c.

290 {
291 int ret;
292 long long int ev_values[2];
293#undef BUFFER_SIZE
294#define BUFFER_SIZE 512+4
295 double a[BUFFER_SIZE];
296 double b[BUFFER_SIZE];
297
298 /* Initialize the arrays with values that the compiler cannot guess. */
299 for(int i=0; i<BUFFER_SIZE; i++){
300 a[i] = p/(i+1.2);
301 b[i] = -p/(i+1.3);
302 }
303
304 // Start the counters.
305 ret = PAPI_start(EventSet);
306 if ( PAPI_OK != ret ) {
307 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
308 // If we can't measure events, no need to run the kernel.
309 goto clean_up;
310 }
311
312 long long int UB=1LL*M*N/(BUFFER_SIZE-4);
313 for(int i=0; i<UB; i++){
314 for(int j=4; j<BUFFER_SIZE; j++){
315 a[j] = a[j-4] + b[j];
316 }
317 }
318
319 ret = PAPI_stop(EventSet, ev_values);
320 if ( PAPI_OK != ret ) {
321 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
322 // If we can't measure events, no need to print anything.
323 goto clean_up;
324 }
325 fprintf(fp, "%d %lld # FP_ADD_DVEC256_count: %lld (%.3lf)\n", N, ev_values[0], N*M/4LL, (double)ev_values[0]/(N*M/4.0));
326
327 for(int j=0; j<BUFFER_SIZE; j++){
328 sum_f64 += a[j];
329 }
330
331clean_up:
332
333 return;
334}
Here is the caller graph for this function:

◆ test_f64_add_DVEC512()

void test_f64_add_DVEC512 ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 336 of file instructions.c.

336 {
337 int ret;
338 long long int ev_values[2];
339#undef BUFFER_SIZE
340#define BUFFER_SIZE 512+8
341 double a[BUFFER_SIZE];
342 double b[BUFFER_SIZE];
343
344 /* Initialize the arrays with values that the compiler cannot guess. */
345 for(int i=0; i<BUFFER_SIZE; i++){
346 a[i] = p/(i+1.2);
347 b[i] = -p/(i+1.3);
348 }
349
350 // Start the counters.
351 ret = PAPI_start(EventSet);
352 if ( PAPI_OK != ret ) {
353 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
354 // If we can't measure events, no need to run the kernel.
355 goto clean_up;
356 }
357
358 long long int UB=1LL*M*N/(BUFFER_SIZE-8);
359 for(int i=0; i<UB; i++){
360 for(int j=8; j<BUFFER_SIZE; j++){
361 a[j] = a[j-8] + b[j];
362 }
363 }
364
365 ret = PAPI_stop(EventSet, ev_values);
366 if ( PAPI_OK != ret ) {
367 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
368 // If we can't measure events, no need to print anything.
369 goto clean_up;
370 }
371 fprintf(fp, "%d %lld # FP_ADD_DVEC512_count: %lld (%.3lf)\n", N, ev_values[0], N*M/8LL, (double)ev_values[0]/(N*M/8.0));
372
373 for(int j=0; j<BUFFER_SIZE; j++){
374 sum_f64 += a[j];
375 }
376
377clean_up:
378
379 return;
380}
Here is the caller graph for this function:

◆ test_f64_add_max()

void test_f64_add_max ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 174 of file instructions.c.

174 {
175 int ret;
176 long long int ev_values[2];
177 double f64_00, f64_01, f64_02, f64_03, f64_04, f64_05, f64_06, f64_07;
178 double f64_08, f64_09, f64_10, f64_11;
179 double f64_100, f64_101, f64_102;
180
181 /* Initialize the variables with values that the compiler cannot guess. */
182 f64_00 = p/1.2;
183 f64_01 = -p/1.3;
184 f64_02 = p/1.4;
185 f64_03 = -p/1.5;
186 f64_04 = p/1.6;
187 f64_05 = -p/1.7;
188 f64_06 = p/1.8;
189 f64_07 = -p/1.9;
190 f64_08 = p/2.0;
191 f64_09 = -p/2.1;
192 f64_10 = p/2.2;
193 f64_11 = -p/2.3;
194
195 f64_100 = 0.00100;
196 f64_101 = -0.00101;
197 f64_102 = 0.00102;
198
199 // Start the counters.
200 ret = PAPI_start(EventSet);
201 if ( PAPI_OK != ret ) {
202 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
203 // If we can't measure events, no need to run the kernel.
204 goto clean_up;
205 }
206
207 if( p == 123456 ){
208 p /= 2;
209 f64_100 *= 1.045;
210 f64_101 *= 1.054;
211 f64_102 *= 1.067;
212 }
213
214#define F64_ADDS(_X) {f64_00 += _X; f64_01 += _X; f64_02 += _X; f64_03 += _X; f64_04 += _X; f64_05 += _X; f64_06 += _X; f64_07 += _X; f64_08 += _X; f64_09 += _X; f64_10 += _X; f64_11 += _X;}
215
216 for(int i=0; i<M; i++){
217 for(int j=0; j<N; j++){
218 F64_ADDS(f64_100); F64_ADDS(f64_101); F64_ADDS(f64_102);
219 if( p < 4 ){
220 F64_ADDS(f64_00); F64_ADDS(f64_01); F64_ADDS(f64_02);
221 }
222 }
223 }
224
225 ret = PAPI_stop(EventSet, ev_values);
226 if ( PAPI_OK != ret ) {
227 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
228 // If we can't measure events, no need to print anything.
229 goto clean_up;
230 }
231 fprintf(fp, "%d %lld # FP_ADD_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M));
232
233 sum_f64 += f64_00 + f64_01 + f64_02 + f64_03 + f64_04 + f64_05 + f64_06 + f64_07;
234 sum_f64 += f64_08 + f64_09 + f64_10 + f64_11;
235
236clean_up:
237
238 return;
239}
#define F64_ADDS(_X)
Here is the caller graph for this function:

◆ test_f64_add_SVEC128()

void test_f64_add_SVEC128 ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 385 of file instructions.c.

385 {
386 int ret;
387 long long int ev_values[2];
388#undef BUFFER_SIZE
389#define BUFFER_SIZE 512+2
390 float a[BUFFER_SIZE];
391 float b[BUFFER_SIZE];
392
393 /* Initialize the arrays with values that the compiler cannot guess. */
394 for(int i=0; i<BUFFER_SIZE; i++){
395 a[i] = p/(i+1.2);
396 b[i] = -p/(i+1.3);
397 }
398
399 // Start the counters.
400 ret = PAPI_start(EventSet);
401 if ( PAPI_OK != ret ) {
402 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
403 // If we can't measure events, no need to run the kernel.
404 goto clean_up;
405 }
406
407 long long int UB=1LL*M*N/(BUFFER_SIZE-2);
408 for(int i=0; i<UB; i++){
409 for(int j=2; j<BUFFER_SIZE; j++){
410 a[j] = a[j-2] + b[j];
411 }
412 }
413
414 ret = PAPI_stop(EventSet, ev_values);
415 if ( PAPI_OK != ret ) {
416 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
417 // If we can't measure events, no need to print anything.
418 goto clean_up;
419 }
420 fprintf(fp, "%d %lld # FP_ADD_SVEC128_count: %lld (%.3lf)\n", N, ev_values[0], N*M/2LL, (double)ev_values[0]/(N*M/2.0));
421
422 for(int j=0; j<BUFFER_SIZE; j++){
423 sum_f64 += (float)a[j];
424 }
425
426clean_up:
427
428 return;
429}
Here is the caller graph for this function:

◆ test_f64_add_SVEC256()

void test_f64_add_SVEC256 ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 431 of file instructions.c.

431 {
432 int ret;
433 long long int ev_values[2];
434#undef BUFFER_SIZE
435#define BUFFER_SIZE 512+4
436 float a[BUFFER_SIZE];
437 float b[BUFFER_SIZE];
438
439 /* Initialize the arrays with values that the compiler cannot guess. */
440 for(int i=0; i<BUFFER_SIZE; i++){
441 a[i] = p/(i+1.2);
442 b[i] = -p/(i+1.3);
443 }
444
445 // Start the counters.
446 ret = PAPI_start(EventSet);
447 if ( PAPI_OK != ret ) {
448 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
449 // If we can't measure events, no need to run the kernel.
450 goto clean_up;
451 }
452
453 long long int UB=1LL*M*N/(BUFFER_SIZE-4);
454 for(int i=0; i<UB; i++){
455 for(int j=4; j<BUFFER_SIZE; j++){
456 a[j] = a[j-4] + b[j];
457 }
458 }
459
460 ret = PAPI_stop(EventSet, ev_values);
461 if ( PAPI_OK != ret ) {
462 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
463 // If we can't measure events, no need to print anything.
464 goto clean_up;
465 }
466 fprintf(fp, "%d %lld # FP_ADD_SVEC256_count: %lld (%.3lf)\n", N, ev_values[0], N*M/4LL, (double)ev_values[0]/(N*M/4.0));
467
468 for(int j=0; j<BUFFER_SIZE; j++){
469 sum_f64 += (float)a[j];
470 }
471
472clean_up:
473
474 return;
475}
Here is the caller graph for this function:

◆ test_f64_add_SVEC512()

void test_f64_add_SVEC512 ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 477 of file instructions.c.

477 {
478 int ret;
479 long long int ev_values[2];
480#undef BUFFER_SIZE
481#define BUFFER_SIZE 512+8
482 float a[BUFFER_SIZE];
483 float b[BUFFER_SIZE];
484
485 /* Initialize the arrays with values that the compiler cannot guess. */
486 for(int i=0; i<BUFFER_SIZE; i++){
487 a[i] = p/(i+1.2);
488 b[i] = -p/(i+1.3);
489 }
490
491 // Start the counters.
492 ret = PAPI_start(EventSet);
493 if ( PAPI_OK != ret ) {
494 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
495 // If we can't measure events, no need to run the kernel.
496 goto clean_up;
497 }
498
499 long long int UB=1LL*M*N/(BUFFER_SIZE-8);
500 for(int i=0; i<UB; i++){
501 for(int j=8; j<BUFFER_SIZE; j++){
502 a[j] = a[j-8] + b[j];
503 }
504 }
505
506 ret = PAPI_stop(EventSet, ev_values);
507 if ( PAPI_OK != ret ) {
508 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
509 // If we can't measure events, no need to print anything.
510 goto clean_up;
511 }
512 fprintf(fp, "%d %lld # FP_ADD_SVEC512_count: %lld (%.3lf)\n", N, ev_values[0], N*M/8LL, (double)ev_values[0]/(N*M/8.0));
513
514 for(int j=0; j<BUFFER_SIZE; j++){
515 sum_f64 += (float)a[j];
516 }
517
518clean_up:
519
520 return;
521}
Here is the caller graph for this function:

◆ test_f64_div()

void test_f64_div ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 772 of file instructions.c.

772 {
773 int ret;
774 long long int ev_values[2];
775 double f64_00, f64_01, f64_02, f64_03;
776
777 /* Initialize the variables with values that the compiler cannot guess. */
778 f64_00 = 1.0 + 1.0/(1000.1*(double)p);
779 f64_01 = 1.0 + 1.0/(1000.2*(double)p);
780 f64_02 = 1.0 + 1.0/(1000.3*(double)p);
781 f64_03 = 1.0 + 1.0/(1000.4*(double)p);
782
783 // Start the counters.
784 ret = PAPI_start(EventSet);
785 if ( PAPI_OK != ret ) {
786 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
787 // If we can't measure events, no need to run the kernel.
788 goto clean_up;
789 }
790
791#define FDIV_BLOCK() {f64_01 /= f64_00; f64_02 /= f64_01; f64_03 /= f64_02; f64_00 /= f64_03;}
792
793 for(int i=0; i<M; i++){
794 for(int j=0; j<N; j++){
795 FDIV_BLOCK();
796 FDIV_BLOCK();
797 FDIV_BLOCK();
798 FDIV_BLOCK();
799 FDIV_BLOCK();
800 FDIV_BLOCK();
801 FDIV_BLOCK();
802 FDIV_BLOCK();
803 FDIV_BLOCK();
804 FDIV_BLOCK();
805 }
806 }
807
808 ret = PAPI_stop(EventSet, ev_values);
809 if ( PAPI_OK != ret ) {
810 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
811 // If we can't measure events, no need to print anything.
812 goto clean_up;
813 }
814 long long int fp_op_count = 40LL*N*M; // There are only 50 FP operations.
815 fprintf(fp, "%d %lld # FP_DIV_count: %lld (%.3lf)\n", N, ev_values[0], fp_op_count, (double)ev_values[0]/fp_op_count);
816
817 sum_f64 += f64_00 + f64_01 + f64_02 + f64_03;
818
819clean_up:
820
821 return;
822}
#define FDIV_BLOCK()
Here is the caller graph for this function:

◆ test_f64_div_max()

void test_f64_div_max ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 825 of file instructions.c.

825 {
826 int ret;
827 long long int ev_values[2];
828 double f64_00, f64_01, f64_02, f64_03, f64_04, f64_05, f64_06, f64_07;
829 double f64_08, f64_09, f64_10, f64_11;
830 double f64_100, f64_101, f64_102;
831
832 /* Initialize the variables with values that the compiler cannot guess. */
833 f64_00 = p/431.2;
834 f64_01 = -p/431.3;
835 f64_02 = p/431.4;
836 f64_03 = -p/431.5;
837 f64_04 = p/431.6;
838 f64_05 = -p/431.7;
839 f64_06 = p/431.8;
840 f64_07 = -p/431.9;
841 f64_08 = p/432.0;
842 f64_09 = -p/432.1;
843 f64_10 = p/432.2;
844 f64_11 = -p/432.3;
845
846 f64_100 = 1.00001;
847 f64_101 = -1.00002;
848 f64_102 = 1.00003;
849
850 // Start the counters.
851 ret = PAPI_start(EventSet);
852 if ( PAPI_OK != ret ) {
853 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
854 // If we can't measure events, no need to run the kernel.
855 goto clean_up;
856 }
857
858 if( p == 123456 ){
859 p /= 2;
860 f64_100 *= 1.0045;
861 f64_101 *= 1.0054;
862 f64_102 *= 1.0067;
863 }
864
865#define F64_DIVS(_X) {f64_00 /= _X; f64_01 /= _X; f64_02 /= _X; f64_03 /= _X; f64_04 /= _X; f64_05 /= _X; f64_06 /= _X; f64_07 /= _X; f64_08 /= _X; f64_09 /= _X; f64_10 /= _X; f64_11 /= _X;}
866
867 for(int i=0; i<M; i++){
868 for(int j=0; j<N; j++){
869 F64_DIVS(f64_100); F64_DIVS(f64_101); F64_DIVS(f64_102);
870 if( p < 4 ){
871 F64_DIVS(f64_00); F64_DIVS(f64_01); F64_DIVS(f64_02);
872 }
873 }
874 }
875
876 ret = PAPI_stop(EventSet, ev_values);
877 if ( PAPI_OK != ret ) {
878 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
879 // If we can't measure events, no need to print anything.
880 goto clean_up;
881 }
882 fprintf(fp, "%d %lld # FP_DIV_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M));
883
884 sum_f64 += f64_00 + f64_01 + f64_02 + f64_03 + f64_04 + f64_05 + f64_06 + f64_07;
885 sum_f64 += f64_08 + f64_09 + f64_10 + f64_11;
886
887clean_up:
888
889 return;
890}
#define F64_DIVS(_X)
Here is the caller graph for this function:

◆ test_f64_mul()

void test_f64_mul ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 649 of file instructions.c.

649 {
650 int ret;
651 long long int ev_values[2];
652 double f64_00, f64_01, f64_02, f64_03;
653
654 /* Initialize the variables with values that the compiler cannot guess. */
655 f64_00 = (double)p/1.02;
656 f64_01 = 1.03/(double)p;
657 f64_02 = (double)p/1.04;
658 f64_03 = 1.05/(double)p;
659
660 // Start the counters.
661 ret = PAPI_start(EventSet);
662 if ( PAPI_OK != ret ) {
663 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
664 // If we can't measure events, no need to run the kernel.
665 goto clean_up;
666 }
667
668#define FMUL_BLOCK() {f64_01 *= f64_00; f64_02 *= f64_01; f64_03 *= f64_02; f64_00 *= f64_03;}
669
670 for(int i=0; i<M; i++){
671 for(int j=0; j<N; j++){
672 FMUL_BLOCK();
673 FMUL_BLOCK();
674 FMUL_BLOCK();
675 FMUL_BLOCK();
676 FMUL_BLOCK();
677 FMUL_BLOCK();
678 FMUL_BLOCK();
679 FMUL_BLOCK();
680 FMUL_BLOCK();
681 FMUL_BLOCK();
682 }
683 }
684
685 ret = PAPI_stop(EventSet, ev_values);
686 if ( PAPI_OK != ret ) {
687 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
688 // If we can't measure events, no need to print anything.
689 goto clean_up;
690 }
691 long long int fp_op_count = 40LL*N*M; // There are only 50 FP operations.
692 fprintf(fp, "%d %lld # FP_MUL_count: %lld (%.3lf)\n", N, ev_values[0], fp_op_count, (double)ev_values[0]/fp_op_count);
693
694 sum_f64 += f64_00 + f64_01 + f64_02 + f64_03;
695
696clean_up:
697
698 return;
699}
#define FMUL_BLOCK()
Here is the caller graph for this function:

◆ test_f64_mul_max()

void test_f64_mul_max ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 702 of file instructions.c.

702 {
703 int ret;
704 long long int ev_values[2];
705 double f64_00, f64_01, f64_02, f64_03, f64_04, f64_05, f64_06, f64_07;
706 double f64_08, f64_09, f64_10, f64_11;
707 double f64_100, f64_101, f64_102;
708
709 /* Initialize the variables with values that the compiler cannot guess. */
710 f64_00 = p/431.2;
711 f64_01 = -p/431.3;
712 f64_02 = p/431.4;
713 f64_03 = -p/431.5;
714 f64_04 = p/431.6;
715 f64_05 = -p/431.7;
716 f64_06 = p/431.8;
717 f64_07 = -p/431.9;
718 f64_08 = p/432.0;
719 f64_09 = -p/432.1;
720 f64_10 = p/432.2;
721 f64_11 = -p/432.3;
722
723 f64_100 = 1.00001;
724 f64_101 = -1.00002;
725 f64_102 = 1.00003;
726
727 // Start the counters.
728 ret = PAPI_start(EventSet);
729 if ( PAPI_OK != ret ) {
730 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
731 // If we can't measure events, no need to run the kernel.
732 goto clean_up;
733 }
734
735 if( p == 123456 ){
736 p /= 2;
737 f64_100 *= 1.0045;
738 f64_101 *= 1.0054;
739 f64_102 *= 1.0067;
740 }
741
742#define F64_MULS(_X) {f64_00 *= _X; f64_01 *= _X; f64_02 *= _X; f64_03 *= _X; f64_04 *= _X; f64_05 *= _X; f64_06 *= _X; f64_07 *= _X; f64_08 *= _X; f64_09 *= _X; f64_10 *= _X; f64_11 *= _X;}
743
744 for(int i=0; i<M; i++){
745 for(int j=0; j<N; j++){
746 F64_MULS(f64_100); F64_MULS(f64_101); F64_MULS(f64_102);
747 if( p < 4 ){
748 F64_MULS(f64_00); F64_MULS(f64_01); F64_MULS(f64_02);
749 }
750 }
751 }
752
753 ret = PAPI_stop(EventSet, ev_values);
754 if ( PAPI_OK != ret ) {
755 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
756 // If we can't measure events, no need to print anything.
757 goto clean_up;
758 }
759 fprintf(fp, "%d %lld # FP_MUL_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M));
760
761 sum_f64 += f64_00 + f64_01 + f64_02 + f64_03 + f64_04 + f64_05 + f64_06 + f64_07;
762 sum_f64 += f64_08 + f64_09 + f64_10 + f64_11;
763
764clean_up:
765
766 return;
767}
#define F64_MULS(_X)
Here is the caller graph for this function:

◆ test_f64_sub()

void test_f64_sub ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 526 of file instructions.c.

526 {
527 int ret;
528 long long int ev_values[2];
529 double f64_00, f64_01, f64_02, f64_03;
530
531 /* Initialize the variables with values that the compiler cannot guess. */
532 f64_00 = (double)p/1.02;
533 f64_01 = -(double)p/1.03;
534 f64_02 = (double)p/1.04;
535 f64_03 = -(double)p/1.05;
536
537 // Start the counters.
538 ret = PAPI_start(EventSet);
539 if ( PAPI_OK != ret ) {
540 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
541 // If we can't measure events, no need to run the kernel.
542 goto clean_up;
543 }
544
545#define FSUB_BLOCK() {f64_01 -= f64_00; f64_02 -= f64_01; f64_03 -= f64_02; f64_00 -= f64_03;}
546
547 for(int i=0; i<M; i++){
548 for(int j=0; j<N; j++){
549 FSUB_BLOCK();
550 FSUB_BLOCK();
551 FSUB_BLOCK();
552 FSUB_BLOCK();
553 FSUB_BLOCK();
554 FSUB_BLOCK();
555 FSUB_BLOCK();
556 FSUB_BLOCK();
557 FSUB_BLOCK();
558 FSUB_BLOCK();
559 }
560 }
561
562 ret = PAPI_stop(EventSet, ev_values);
563 if ( PAPI_OK != ret ) {
564 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
565 // If we can't measure events, no need to print anything.
566 goto clean_up;
567 }
568 long long int fp_op_count = 40LL*N*M; // There are only 50 FP operations.
569 fprintf(fp, "%d %lld # FP_SUB_count: %lld (%.3lf)\n", N, ev_values[0], fp_op_count, (double)ev_values[0]/fp_op_count);
570
571 sum_f64 += f64_00 + f64_01 + f64_02 + f64_03;
572
573clean_up:
574
575 return;
576}
#define FSUB_BLOCK()
Here is the caller graph for this function:

◆ test_f64_sub_max()

void test_f64_sub_max ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 579 of file instructions.c.

579 {
580 int ret;
581 long long int ev_values[2];
582 double f64_00, f64_01, f64_02, f64_03, f64_04, f64_05, f64_06, f64_07;
583 double f64_08, f64_09, f64_10, f64_11;
584 double f64_100, f64_101, f64_102;
585
586 /* Initialize the variables with values that the compiler cannot guess. */
587 f64_00 = p/1.2;
588 f64_01 = -p/1.3;
589 f64_02 = p/1.4;
590 f64_03 = -p/1.5;
591 f64_04 = p/1.6;
592 f64_05 = -p/1.7;
593 f64_06 = p/1.8;
594 f64_07 = -p/1.9;
595 f64_08 = p/2.0;
596 f64_09 = -p/2.1;
597 f64_10 = p/2.2;
598 f64_11 = -p/2.3;
599
600 f64_100 = 0.00100;
601 f64_101 = -0.00101;
602 f64_102 = 0.00102;
603
604 // Start the counters.
605 ret = PAPI_start(EventSet);
606 if ( PAPI_OK != ret ) {
607 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
608 // If we can't measure events, no need to run the kernel.
609 goto clean_up;
610 }
611
612 if( p == 123456 ){
613 p /= 2;
614 f64_100 *= 1.045;
615 f64_101 *= 1.054;
616 f64_102 *= 1.067;
617 }
618
619#define F64_SUBS(_X) {f64_00 -= _X; f64_01 -= _X; f64_02 -= _X; f64_03 -= _X; f64_04 -= _X; f64_05 -= _X; f64_06 -= _X; f64_07 -= _X; f64_08 -= _X; f64_09 -= _X; f64_10 -= _X; f64_11 -= _X;}
620
621 for(int i=0; i<M; i++){
622 for(int j=0; j<N; j++){
623 F64_SUBS(f64_100); F64_SUBS(f64_101); F64_SUBS(f64_102);
624 if( p < 4 ){
625 F64_SUBS(f64_00); F64_SUBS(f64_01); F64_SUBS(f64_02);
626 }
627 }
628 }
629
630 ret = PAPI_stop(EventSet, ev_values);
631 if ( PAPI_OK != ret ) {
632 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
633 // If we can't measure events, no need to print anything.
634 goto clean_up;
635 }
636 fprintf(fp, "%d %lld # FP_SUB_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M));
637
638 sum_f64 += f64_00 + f64_01 + f64_02 + f64_03 + f64_04 + f64_05 + f64_06 + f64_07;
639 sum_f64 += f64_08 + f64_09 + f64_10 + f64_11;
640
641clean_up:
642
643 return;
644}
#define F64_SUBS(_X)
Here is the caller graph for this function:

◆ test_int_add()

void test_int_add ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 16 of file instructions.c.

16 {
17 int ret;
18 long long int ev_values[2];
19 int i32_00, i32_01, i32_02, i32_03, i32_04, i32_05, i32_06, i32_07, i32_08, i32_09;
20
21 /* Initialize the variables with values that the compiler cannot guess. */
22 i32_00 = p/2;
23 i32_01 = -p/3;
24 i32_02 = p/4;
25 i32_03 = -p/5;
26 i32_04 = p/6;
27 i32_05 = -p/7;
28 i32_06 = p/8;
29 i32_07 = -p/9;
30 i32_08 = p/10;
31 i32_09 = -p/11;
32
33 // Start the counters.
34 ret = PAPI_start(EventSet);
35 if ( PAPI_OK != ret ) {
36 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
37 // If we can't measure events, no need to run the kernel.
38 goto clean_up;
39 }
40
41 for(int i=0; i<M; i++){
42 for(int j=0; j<N; j++){
43
44 i32_01 += i32_00;
45 i32_02 += i32_01;
46 i32_03 += i32_02;
47 i32_04 += i32_03;
48 i32_05 += i32_04;
49 i32_06 += i32_05;
50 i32_07 += i32_06;
51 i32_08 += i32_07;
52 i32_09 += i32_08;
53 i32_00 += i32_09;
54
55 i32_01 += i32_00;
56 i32_02 += i32_01;
57 i32_03 += i32_02;
58 i32_04 += i32_03;
59 i32_05 += i32_04;
60 i32_06 += i32_05;
61 i32_07 += i32_06;
62 i32_08 += i32_07;
63 i32_09 += i32_08;
64 i32_00 += i32_09;
65
66 i32_01 += i32_00;
67 i32_02 += i32_01;
68 i32_03 += i32_02;
69 i32_04 += i32_03;
70 i32_05 += i32_04;
71 i32_06 += i32_05;
72 i32_07 += i32_06;
73 i32_08 += i32_07;
74 i32_09 += i32_08;
75 i32_00 += i32_09;
76
77 i32_01 += i32_00;
78 i32_02 += i32_01;
79 i32_03 += i32_02;
80 i32_04 += i32_03;
81 i32_05 += i32_04;
82 i32_06 += i32_05;
83 i32_07 += i32_06;
84 i32_08 += i32_07;
85 i32_09 += i32_08;
86 i32_00 += i32_09;
87
88 i32_01 += i32_00;
89 i32_02 += i32_01;
90 i32_03 += i32_02;
91 i32_04 += i32_03;
92 i32_05 += i32_04;
93 i32_06 += i32_05;
94 i32_07 += i32_06;
95 i32_08 += i32_07;
96 i32_09 += i32_08;
97 i32_00 += i32_09;
98
99 }
100 }
101
102 ret = PAPI_stop(EventSet, ev_values);
103 if ( PAPI_OK != ret ) {
104 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
105 // If we can't measure events, no need to print anything.
106 goto clean_up;
107 }
108 fprintf(fp, "%d %lld # INT_ADD_count: %lld (%.3lf)\n", N, ev_values[0], 50LL*N*M, ev_values[0]/(50.0*N*M));
109
110 sum_i32 += i32_00 + i32_01 + i32_02 + i32_03 + i32_04 + i32_05 + i32_06 + i32_07 + i32_08 + i32_09;
111
112clean_up:
113
114 return;
115}
Here is the caller graph for this function:

◆ test_mem_ops_parallel_RO()

void test_mem_ops_parallel_RO ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 979 of file instructions.c.

979 {
980 int i, ret;
981 long long int ev_values[2];
982 int c0, c1, c2, c3;
983#undef BUFFER_SIZE
984#define BUFFER_SIZE (256+8)
985 int buffer[BUFFER_SIZE];
986
987 /* Initialize the buffer with values that the compiler cannot guess. */
988 for(i=0; i<BUFFER_SIZE; i++){
989 buffer[i] = p/(223+i);
990 }
991
992 /* Initialize the variables with values that the compiler cannot guess. */
993 c0 = (int)((5+p)/(12+1));
994 c1 = (int)((7+p)/(12+2));
995 c2 = (int)((11+p)/(12+3));
996 c3 = (int)((13+p)/(12+4));
997
998 // Start the counters.
999 ret = PAPI_start(EventSet);
1000 if ( PAPI_OK != ret ) {
1001 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
1002 // If we can't measure events, no need to run the kernel.
1003 goto clean_up;
1004 }
1005
1006 for(int i=0; i<M; i++){
1007 // compute some junk value.
1008 uintptr_t base = i*(c0+c1)/(c2+c3+1);
1009 for(int j=0; j<N; j++){
1010 uintptr_t offset = (base+j)%(BUFFER_SIZE-8);
1011
1012 c0 += buffer[offset+1];
1013 c1 += buffer[offset+2];
1014 c2 += buffer[offset+3];
1015 c3 += buffer[offset+4];
1016
1017 c0 += buffer[offset+5];
1018 c1 += buffer[offset+6];
1019 c2 += buffer[offset+7];
1020 c3 += buffer[offset+8];
1021
1022 }
1023 }
1024
1025 ret = PAPI_stop(EventSet, ev_values);
1026 if ( PAPI_OK != ret ) {
1027 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
1028 // If we can't measure events, no need to print anything.
1029 goto clean_up;
1030 }
1031 fprintf(fp, "%d %lld # MEM_OPS_RO_count(par): %lld (%.3lf)\n", N, ev_values[0], 8LL*N*M, ev_values[0]/(8.0*N*M));
1032
1033 sum_i32 += c0+c1+c2+c3;
1034
1035clean_up:
1036
1037 return;
1038}
Here is the caller graph for this function:

◆ test_mem_ops_parallel_WO()

void test_mem_ops_parallel_WO ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 1040 of file instructions.c.

1040 {
1041 int i, ret;
1042 long long int ev_values[2];
1043 int sum=0;
1044#undef BUFFER_SIZE
1045#define BUFFER_SIZE 256
1046 int buffer[BUFFER_SIZE];
1047
1048 /* Initialize the buffer with values that the compiler cannot guess. */
1049 for(i=0; i<BUFFER_SIZE; i++){
1050 buffer[i] = p/(1223+i);
1051 sum += buffer[i];
1052 }
1053
1054 // Start the counters.
1055 ret = PAPI_start(EventSet);
1056 if ( PAPI_OK != ret ) {
1057 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
1058 // If we can't measure events, no need to run the kernel.
1059 goto clean_up;
1060 }
1061
1062 for(int i=0; i<M; i++){
1063 for(int j=0; j<N; j++){
1064 buffer[j%BUFFER_SIZE] = sum;
1065 }
1066 }
1067
1068 ret = PAPI_stop(EventSet, ev_values);
1069 if ( PAPI_OK != ret ) {
1070 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
1071 // If we can't measure events, no need to print anything.
1072 goto clean_up;
1073 }
1074 fprintf(fp, "%d %lld # MEM_OPS_WO_count(par): %lld (%.3lf)\n", N, ev_values[0], 1LL*N*M, ev_values[0]/(1.0*N*M));
1075
1076 sum_i32 += buffer[0] + buffer[BUFFER_SIZE/2] + buffer[BUFFER_SIZE-1];
1077
1078clean_up:
1079
1080 return;
1081}
Here is the caller graph for this function:

◆ test_mem_ops_serial_RO()

void test_mem_ops_serial_RO ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 895 of file instructions.c.

895 {
896 int i, ret;
897 long long int ev_values[2];
898#undef BUFFER_SIZE
899#define BUFFER_SIZE 256
900 int buffer[BUFFER_SIZE];
901
902 /* Initialize the buffer with values that the compiler cannot guess. */
903 for(i=0; i<BUFFER_SIZE; i++){
904 buffer[i] = p/(1223+i);
905 }
906
907 // Start the counters.
908 ret = PAPI_start(EventSet);
909 if ( PAPI_OK != ret ) {
910 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
911 // If we can't measure events, no need to run the kernel.
912 goto clean_up;
913 }
914
915 uintptr_t index = buffer[0];
916 for(int i=0; i<M; i++){
917 for(int j=0; j<N; j++){
918 index = buffer[index%BUFFER_SIZE];
919 }
920 }
921
922 ret = PAPI_stop(EventSet, ev_values);
923 if ( PAPI_OK != ret ) {
924 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
925 // If we can't measure events, no need to print anything.
926 goto clean_up;
927 }
928 fprintf(fp, "%d %lld # MEM_OPS_RO_count: %lld (%.3lf)\n", N, ev_values[0], 1LL*N*M, ev_values[0]/(1.0*N*M));
929
930 sum_i32 += index;
931
932clean_up:
933
934 return;
935}
Here is the caller graph for this function:

◆ test_mem_ops_serial_RW()

void test_mem_ops_serial_RW ( int  p,
int  M,
int  N,
int  EventSet,
FILE *  fp 
)

Definition at line 937 of file instructions.c.

937 {
938 int i, ret;
939 long long int ev_values[2];
940#undef BUFFER_SIZE
941#define BUFFER_SIZE (256+1)
942 int buffer[BUFFER_SIZE];
943
944 /* Initialize the buffer with values that the compiler cannot guess. */
945 for(i=0; i<BUFFER_SIZE; i++){
946 buffer[i] = p/(1223+i);
947 }
948
949 // Start the counters.
950 ret = PAPI_start(EventSet);
951 if ( PAPI_OK != ret ) {
952 fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret));
953 // If we can't measure events, no need to run the kernel.
954 goto clean_up;
955 }
956
957 for(int i=0; i<M; i++){
958 for(int j=0; j<N; j++){
959 uintptr_t index = j%(BUFFER_SIZE-1);
960 buffer[index+1] += buffer[index];
961 }
962 }
963
964 ret = PAPI_stop(EventSet, ev_values);
965 if ( PAPI_OK != ret ) {
966 fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret));
967 // If we can't measure events, no need to print anything.
968 goto clean_up;
969 }
970 fprintf(fp, "%d %lld # MEM_OPS_RW_count: %lld (%.3lf)\n", N, ev_values[0], 2LL*N*M, ev_values[0]/(2.0*N*M));
971
972 sum_i32 += buffer[0] + buffer[BUFFER_SIZE/2] + buffer[BUFFER_SIZE-1];
973
974clean_up:
975
976 return;
977}
Here is the caller graph for this function:

Variable Documentation

◆ sum_f32

float sum_f32 =0.0

Definition at line 13 of file instructions.c.

◆ sum_f64

double sum_f64 =0.0

Definition at line 14 of file instructions.c.

◆ sum_i32

int sum_i32 =0

Definition at line 12 of file instructions.c.