22 int status, evtCode, test_cnt = 0;
25 char *sufx, *papiFileName;
37 sufx = strdup(
".data.writes");
39 sufx = strdup(
".data.reads");
42 int l = strlen(params.
outputdir)+strlen(papi_event_name)+strlen(sufx);
43 papiFileName = (
char *)calloc( 1+l,
sizeof(
char) );
45 fprintf(
stderr,
"Unable to allocate memory. Skipping event %s.\n", papi_event_name);
48 if (l != (sprintf(papiFileName,
"%s%s%s", params.
outputdir, papi_event_name, sufx))) {
49 fprintf(
stderr,
"sprintf error. Skipping event %s.\n", papi_event_name);
52 if (NULL == (ofp_papi = fopen(papiFileName,
"w"))) {
53 fprintf(
stderr,
"Unable to open file %s. Skipping event %s.\n", papiFileName, papi_event_name);
66 for(pattern = 3; pattern <= 4; ++pattern)
68 for(
f = 1;
f <= 2;
f *= 2)
70 stride = cache_line*
f;
74 for(ppb = (
float)hw_desc->
maxPPB; ppb >= 16; ppb *= 16.0/(hw_desc->
maxPPB))
78 printf(
"%3d%%\b\b\b\b",(100*test_cnt++)/6);
81 status =
d_cache_test(pattern, params.
max_iter, hw_desc, stride, ppb, papi_event_name, latency_only, mode, ofp_papi);
90 printf(
"%3d%%\b\b\b\b",(100*test_cnt++)/6);
93 status =
d_cache_test(pattern, params.
max_iter, hw_desc, stride, ppb, papi_event_name, latency_only, mode, ofp_papi);
104 for(
i=0;
i<strlen(
"Total:100% Current test:100%");
i++) putchar(
'\b');
118int d_cache_test(
int pattern,
int max_iter,
hw_desc_t *hw_desc,
long long stride_in_bytes,
float pages_per_block,
char* papi_event_name,
int latency_only,
int mode, FILE* ofp){
121 double ***rslts, *sorted_rslts;
122 double ***counter, *sorted_counter;
123 int status=0, guessCount, ONT;
125 min_size = 2*1024/
sizeof(uintptr_t);
126 max_size = 1024*1024*1024/
sizeof(uintptr_t);
137 for(j=0; j<numCaches; ++j) {
144 double factor = pow((
double)
FACTOR, ((
double)(num_pts-1))/((
double)num_pts));
152 rslts = (
double ***)malloc(max_iter*
sizeof(
double **));
153 for(
i=0;
i<max_iter; ++
i){
154 rslts[
i] = (
double **)malloc(guessCount*
sizeof(
double*));
155 for(j=0; j<guessCount; ++j){
156 rslts[
i][j] = (
double *)malloc(ONT*
sizeof(
double));
159 sorted_rslts = (
double *)malloc(max_iter*
sizeof(
double));
162 counter = (
double ***)malloc(max_iter*
sizeof(
double **));
163 for(
i=0;
i<max_iter; ++
i){
164 counter[
i] = (
double **)malloc(guessCount*
sizeof(
double*));
165 for(j=0; j<guessCount; ++j){
166 counter[
i][j] = (
double *)malloc(ONT*
sizeof(
double));
169 sorted_counter = (
double *)malloc(max_iter*
sizeof(
double));
172 values = (
long long *)malloc(guessCount*
sizeof(
long long));
177 for(
i=0;
i<max_iter; ++
i){
178 status =
varyBufferSizes(
values, rslts[
i], counter[
i], hw_desc, stride_in_bytes, pages_per_block, pattern, latency_only, mode, ONT);
184 fprintf(ofp,
"# PTRN=%d, STRIDE=%lld, PPB=%f, ThreadCount=%d\n", pattern, stride_in_bytes, pages_per_block, ONT);
188 for(j=0; j<guessCount; ++j){
189 fprintf(ofp,
"%lld",
values[j]);
190 for(k=0; k<ONT; ++k){
191 for(
i=0;
i<max_iter; ++
i){
192 sorted_rslts[
i] = rslts[
i][j][k];
194 qsort(sorted_rslts, max_iter,
sizeof(
double),
compar_lf);
195 fprintf(ofp,
" %.4lf", sorted_rslts[0]);
202 for(j=0; j<guessCount; ++j){
203 fprintf(ofp,
"%lld",
values[j]);
204 for(k=0; k<ONT; ++k){
205 for(
i=0;
i<max_iter; ++
i){
206 sorted_counter[
i] = counter[
i][j][k];
208 qsort(sorted_counter, max_iter,
sizeof(
double),
compar_lf);
209 fprintf(ofp,
" %lf", sorted_counter[0]);
216 for(
i=0;
i<max_iter; ++
i){
217 for(j=0; j<guessCount; ++j){
227 free(sorted_counter);
234int varyBufferSizes(
long long *
values,
double **rslts,
double **counter,
hw_desc_t *hw_desc,
long long stride_in_bytes,
float pages_per_block,
int pattern,
int latency_only,
int mode,
int ONT){
237 long long active_buf_len;
241 long long stride = stride_in_bytes/
sizeof(uintptr_t);
243 uintptr_t rslt=42, *v[ONT], *ptr[ONT];
246 #pragma omp parallel private(i) reduction(+:rslt) default(shared)
248 int idx = omp_get_thread_num();
250 ptr[idx] = (uintptr_t *)malloc( (2LL*
max_size+stride)*
sizeof(uintptr_t) );
252 fprintf(
stderr,
"Error: cannot allocate space for experiment.\n");
259 v[idx] = (uintptr_t *)(stride_in_bytes*(((uintptr_t)ptr[idx]+stride_in_bytes)/stride_in_bytes));
273 out =
probeBufferSize(16LL*stride, stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
282 out =
probeBufferSize(active_buf_len, stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
285 for(k = 0; k < ONT; ++k) {
286 rslts[cnt][k] = out.
dt[k];
287 counter[cnt][k] = out.
counter[k];
289 values[cnt++] = ONT*
sizeof(uintptr_t)*active_buf_len;
291 out =
probeBufferSize((
long long)((
double)active_buf_len*1.25), stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
294 for(k = 0; k < ONT; ++k) {
295 rslts[cnt][k] = out.dt[k];
296 counter[cnt][k] = out.counter[k];
298 values[cnt++] = ONT*
sizeof(uintptr_t)*((
long long)((double)active_buf_len*1.25));
300 out =
probeBufferSize((
long long)((
double)active_buf_len*1.5), stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
303 for(k = 0; k < ONT; ++k) {
304 rslts[cnt][k] = out.dt[k];
305 counter[cnt][k] = out.counter[k];
307 values[cnt++] = ONT*
sizeof(uintptr_t)*((
long long)((double)active_buf_len*1.5));
309 out =
probeBufferSize((
long long)((
double)active_buf_len*1.75), stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
312 for(k = 0; k < ONT; ++k) {
313 rslts[cnt][k] = out.dt[k];
314 counter[cnt][k] = out.counter[k];
316 values[cnt++] = ONT*
sizeof(uintptr_t)*((
long long)((double)active_buf_len*1.75));
321 int numHier = numCaches+1;
322 int llc_idx = numCaches-1;
323 int len = 0, ptsToNextCache, tmpIdx = 0;
324 long long currCacheSize, nextCacheSize;
328 for(j=0; j<numCaches; ++j) {
334 if( NULL == (bufSizes = (
long long *)calloc(len,
sizeof(
long long))) )
339 for(j=0; j<numHier; ++j) {
354 if( llc_idx+1 == j ) {
366 for(k = 1; k < ptsToNextCache; ++k) {
367 f = pow(((
double)nextCacheSize)/currCacheSize, ((
double)k)/ptsToNextCache);
368 bufSizes[tmpIdx+k-1] =
f*currCacheSize;
371 if( llc_idx+1 == j ) {
379 for(j=0; j<len; j++){
380 active_buf_len = bufSizes[j]/
sizeof(uintptr_t);
381 out =
probeBufferSize(active_buf_len, stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
384 for(k = 0; k < ONT; ++k) {
385 rslts[cnt][k] = out.
dt[k];
386 counter[cnt][k] = out.
counter[k];
388 values[cnt++] = bufSizes[j];
395 for(j=0; j<ONT; ++j){
402 for(j=0; j<ONT; ++j){
412 #pragma omp parallel default(shared)
414 if(!omp_get_thread_num()) {
415 threadNum = omp_get_num_threads();
434 if( NULL == hw_desc ) {
441 fprintf(ofp,
" L%d:%lld",
i+1, sz);
450 int *pinnings = NULL;
456 pinnings = (
int *)malloc(ONT*
sizeof(
int));
457 if( NULL == pinnings ) {
458 fprintf(
stderr,
"Error: cannot allocate space for experiment.\n");
462 #pragma omp parallel default(shared)
464 int idx = omp_get_thread_num();
466 pinnings[idx] = sched_getcpu();
469 fprintf(ofp,
"# Core:");
470 for(k=0; k<ONT; ++k) {
471 fprintf(ofp,
" %d", pinnings[k]);
int compar_lf(const void *a, const void *b)
Convert a name to a numeric hardware event code.
return component an event belongs to
int varyBufferSizes(long long *values, double **rslts, double **counter, hw_desc_t *hw_desc, long long stride_in_bytes, float pages_per_block, int pattern, int latency_only, int mode, int ONT)
static void print_core_affinities(FILE *ofp)
int d_cache_test(int pattern, int max_iter, hw_desc_t *hw_desc, long long stride_in_bytes, float pages_per_block, char *papi_event_name, int latency_only, int mode, FILE *ofp)
static void print_cache_sizes(FILE *ofp_papi, hw_desc_t *hw_desc)
static void print_header(FILE *ofp_papi, hw_desc_t *hw_desc)
void d_cache_driver(char *papi_event_name, cat_params_t params, hw_desc_t *hw_desc, int latency_only, int mode)
static long long values[NUM_EVENTS]
Return codes and api definitions.
int fclose(FILE *__stream)
int split[_MAX_SUPPORTED_CACHE_LEVELS]
int pts_per_reg[_MAX_SUPPORTED_CACHE_LEVELS]
long long dcache_line_size[_MAX_SUPPORTED_CACHE_LEVELS]
long long dcache_size[_MAX_SUPPORTED_CACHE_LEVELS]
double counter[MAXTHREADS]
run_output_t probeBufferSize(long long active_buf_len, long long line_size, float pageCountPerBlock, int pattern, uintptr_t **v, uintptr_t *rslt, int latency_only, int mode, int ONT)
void error_handler(int e, int line)