PAPI 7.1.0.0
Loading...
Searching...
No Matches
timing_kernels.c
Go to the documentation of this file.
1#include <inttypes.h>
2#include <unistd.h>
3#include <sys/time.h>
4#include <assert.h>
5#include <math.h>
6#include <papi.h>
7#include <omp.h>
8#include "prepareArray.h"
9#include "timing_kernels.h"
10
11// For do_work macro in the header file
12volatile double x,y;
13
14extern int is_core;
15char* eventname = NULL;
16
17run_output_t probeBufferSize(long long active_buf_len, long long line_size, float pageCountPerBlock, int pattern, uintptr_t **v, uintptr_t *rslt, int latency_only, int mode, int ONT){
18 int _papi_eventset = PAPI_NULL;
19 int retval, buffer = 0, status = 0;
20 int error_line = -1, error_type = PAPI_OK;
21 register uintptr_t *p = NULL;
22 register uintptr_t p_prime;
23 long long count, pageSize, blockSize;
24 long long int counter[ONT];
25 run_output_t out;
26 out.status = 0;
27
28 assert( sizeof(int) >= 4 );
29
30 x = (double)*rslt;
31 x = floor(1.3*x/(1.4*x+1.8));
32 y = x*3.97;
33 if( x > 0 || y > 0 )
34 printf("WARNING: x=%lf y=%lf\n",x,y);
35
36 // Make no fewer accesses than we would for a buffer of size 128KB.
37 long long countMax;
38 long long unsigned threshold = 128*1024;
39 if( active_buf_len*sizeof(uintptr_t) > threshold )
40 countMax = 64LL*((long long)(active_buf_len/line_size));
41 else
42 countMax = 64LL*((long long)(threshold/line_size));
43
44 // Get the size of a page of memory.
45 pageSize = sysconf(_SC_PAGESIZE)/sizeof(uintptr_t);
46 if( pageSize <= 0 ){
47 fprintf(stderr,"Cannot determine pagesize, sysconf() returned an error code.\n");
48 out.status = -1;
49 return out;
50 }
51
52 // Compute the size of a block in the pointer chain and create the pointer chain.
53 blockSize = (long long)(pageCountPerBlock*(float)pageSize);
54 #pragma omp parallel reduction(+:status) default(shared)
55 {
56 int idx = omp_get_thread_num();
57
58 status += prepareArray(v[idx], active_buf_len, line_size, blockSize, pattern);
59 }
60
61 // Start of threaded benchmark.
62 #pragma omp parallel private(p,count,retval) reduction(+:buffer) reduction(+:status) firstprivate(_papi_eventset) default(shared)
63 {
64 int idx = omp_get_thread_num();
65 int thdStatus = 0;
66 double divisor = 1.0;
67 double time1=0, time2=0, dt, factor;
68
69 // Initialize the result to a value indicating an error.
70 // If no error occurs, it will be overwritten.
71 if ( !latency_only ) {
72 out.counter[idx] = -1;
73 }
74
75 // We will use "p" even after the epilogue, so let's set
76 // it here in case an error occurs.
77 p = &v[idx][0];
78 count = countMax;
79
80 if ( !latency_only && (is_core || 0 == idx) ) {
81 retval = PAPI_create_eventset( &_papi_eventset );
82 if (retval != PAPI_OK ){
83 error_type = retval;
84 error_line = __LINE__;
85 thdStatus = -1;
86 // If we can't measure events, no need to run the kernel.
87 goto skip_epilogue;
88 }
89
90 retval = PAPI_add_named_event( _papi_eventset, eventname );
91 if (retval != PAPI_OK ){
92 error_type = retval;
93 error_line = __LINE__;
94 thdStatus = -1;
95 // If we can't measure events, no need to run the kernel.
96 goto clean_up;
97 }
98
99 // Start the counters.
100 retval = PAPI_start(_papi_eventset);
101 if ( PAPI_OK != retval ) {
102 error_type = retval;
103 error_line = __LINE__;
104 thdStatus = -1;
105 // If we can't measure events, no need to run the kernel.
106 goto clean_up;
107 }
108 }
109
110 // Start the actual test.
111
112 // Micro-kernel for memory reading.
113 if( CACHE_READ_ONLY == mode || latency_only )
114 {
115 if( latency_only ) time1 = getticks();
116 while(count > 0){
117 N_128;
118 count -= 128;
119 }
120 if( latency_only ) time2 = getticks();
121 }
122 // Micro-kernel for memory writing.
123 else
124 {
125 while(count > 0){
126 NW_128;
127 count -= 128;
128 }
129 }
130
131 if ( !latency_only && (is_core || 0 == idx) ) {
132 // Stop the counters.
133 retval = PAPI_stop(_papi_eventset, &counter[idx]);
134 if ( PAPI_OK != retval ) {
135 error_type = retval;
136 error_line = __LINE__;
137 thdStatus = -1;
138 goto clean_up;
139 }
140
141 // Get the average event count per access in pointer chase.
142 // If it is not a core event, get average count per thread.
143 divisor = 1.0*countMax;
144 if( !is_core && 0 == idx )
145 divisor *= ONT;
146
147 out.counter[idx] = (1.0*counter[idx])/divisor;
148
149clean_up:
150 retval = PAPI_cleanup_eventset(_papi_eventset);
151 if (retval != PAPI_OK ){
152 error_type = retval;
153 error_line = __LINE__;
154 thdStatus = -1;
155 }
156
157 retval = PAPI_destroy_eventset(&_papi_eventset);
158 if (retval != PAPI_OK ){
159 error_type = retval;
160 error_line = __LINE__;
161 thdStatus = -1;
162 }
163
164 }else{
165 // Compute the duration of the pointer chase.
166 dt = elapsed(time2, time1);
167
168 // Convert time into nanoseconds.
169 factor = 1000.0;
170
171 // Number of accesses per pointer chase.
172 factor /= (1.0*countMax);
173
174 // Get the average nanoseconds per access.
175 out.dt[idx] = dt*factor;
176 }
177
178skip_epilogue:
179 buffer += (uintptr_t)p+(uintptr_t)(x+y);
180 status += thdStatus;
181 }
182
183 // Get the collective status.
184 if(status < 0) {
185 error_handler(error_type, error_line);
186 out.status = -1;
187 }
188
189 // Prevent compiler optimization.
190 *rslt = buffer;
191
192 return out;
193}
194
195void error_handler(int e, int line){
196 int idx;
197 const char *errors[26] = {
198 "No error",
199 "Invalid argument",
200 "Insufficient memory",
201 "A System/C library call failed",
202 "Not supported by component",
203 "Access to the counters was lost or interrupted",
204 "Internal error, please send mail to the developers",
205 "Event does not exist",
206 "Event exists, but cannot be counted due to counter resource limitations",
207 "EventSet is currently not running",
208 "EventSet is currently counting",
209 "No such EventSet Available",
210 "Event in argument is not a valid preset",
211 "Hardware does not support performance counters",
212 "Unknown error code",
213 "Permission level does not permit operation",
214 "PAPI hasn't been initialized yet",
215 "Component Index isn't set",
216 "Not supported",
217 "Not implemented",
218 "Buffer size exceeded",
219 "EventSet domain is not supported for the operation",
220 "Invalid or missing event attributes",
221 "Too many events or attributes",
222 "Bad combination of features",
223 "Component containing event is disabled"
224 };
225
226 idx = -e;
227 if(idx >= 26 || idx < 0 )
228 idx = 15;
229
230 if( NULL != eventname )
231 fprintf(stderr,"\nError \"%s\" occured at line %d when processing event %s.\n", errors[idx], line, eventname);
232 else
233 fprintf(stderr,"\nError \"%s\" occured at line %d.\n", errors[idx], line);
234
235}
static double elapsed(double t1, double t0)
Definition: caches.h:55
static double getticks(void)
Definition: caches.h:46
static long count
add PAPI preset or native hardware event by name to an EventSet
Empty and destroy an EventSet.
Create a new empty PAPI EventSet.
Empty and destroy an EventSet.
Start counting hardware events in an event set.
Stop counting hardware events in an event set.
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_NULL
Definition: f90papi.h:78
static int threshold
Return codes and api definitions.
FILE * stderr
int prepareArray(uintptr_t *array, long long len, long long stride, long long secSize, int pattern)
Definition: prepareArray.c:18
long long int long long
Definition: sde_internal.h:85
double counter[MAXTHREADS]
Definition: caches.h:42
double dt[MAXTHREADS]
Definition: caches.h:41
int status
Definition: caches.h:43
run_output_t probeBufferSize(long long active_buf_len, long long line_size, float pageCountPerBlock, int pattern, uintptr_t **v, uintptr_t *rslt, int latency_only, int mode, int ONT)
volatile double y
char * eventname
volatile double x
void error_handler(int e, int line)
int is_core
Definition: dcache.c:15
#define NW_128
#define CACHE_READ_ONLY
#define N_128
int retval
Definition: zero_fork.c:53