234 {
236 int j, k, cnt;
237 long long active_buf_len;
238 int allocErr = 0;
240
241 long long stride = stride_in_bytes/sizeof(uintptr_t);
242
243 uintptr_t rslt=42, *v[ONT], *ptr[ONT];
244
245
246 #pragma omp parallel private(i) reduction(+:rslt) default(shared)
247 {
248 int idx = omp_get_thread_num();
249
250 ptr[idx] = (uintptr_t *)malloc( (2LL*
max_size+stride)*
sizeof(uintptr_t) );
251 if( !ptr[idx] ){
252 fprintf(
stderr,
"Error: cannot allocate space for experiment.\n");
253 #pragma omp critical
254 {
255 allocErr = -1;
256 }
257 }else{
258
259 v[idx] = (uintptr_t *)(stride_in_bytes*(((uintptr_t)ptr[idx]+stride_in_bytes)/stride_in_bytes));
260
261
264 }
265 }
266 }
267 if(allocErr != 0)
268 {
269 goto error;
270 }
271
272
273 out =
probeBufferSize(16LL*stride, stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
275 goto error;
276
277
279 cnt = 0;
280
282 out =
probeBufferSize(active_buf_len, stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
284 goto error;
285 for(k = 0; k < ONT; ++k) {
286 rslts[cnt][k] = out.
dt[k];
287 counter[cnt][k] = out.
counter[k];
288 }
289 values[cnt++] = ONT*
sizeof(uintptr_t)*active_buf_len;
290
291 out =
probeBufferSize((
long long)((
double)active_buf_len*1.25), stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
292 if(out.status != 0)
293 goto error;
294 for(k = 0; k < ONT; ++k) {
295 rslts[cnt][k] = out.dt[k];
296 counter[cnt][k] = out.counter[k];
297 }
298 values[cnt++] = ONT*
sizeof(uintptr_t)*((
long long)((double)active_buf_len*1.25));
299
300 out =
probeBufferSize((
long long)((
double)active_buf_len*1.5), stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
301 if(out.status != 0)
302 goto error;
303 for(k = 0; k < ONT; ++k) {
304 rslts[cnt][k] = out.dt[k];
305 counter[cnt][k] = out.counter[k];
306 }
307 values[cnt++] = ONT*
sizeof(uintptr_t)*((
long long)((double)active_buf_len*1.5));
308
309 out =
probeBufferSize((
long long)((
double)active_buf_len*1.75), stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
310 if(out.status != 0)
311 goto error;
312 for(k = 0; k < ONT; ++k) {
313 rslts[cnt][k] = out.dt[k];
314 counter[cnt][k] = out.counter[k];
315 }
316 values[cnt++] = ONT*
sizeof(uintptr_t)*((
long long)((double)active_buf_len*1.75));
317 }
318 }else{
321 int numHier = numCaches+1;
322 int llc_idx = numCaches-1;
323 int len = 0, ptsToNextCache, tmpIdx = 0;
324 long long currCacheSize, nextCacheSize;
325 long long *bufSizes;
326
327
328 for(j=0; j<numCaches; ++j) {
330 }
332
333
334 if( NULL == (bufSizes = (long long *)calloc(len, sizeof(long long))) )
335 goto error;
336
337
338 tmpIdx = 0;
339 for(j=0; j<numHier; ++j) {
340
341
342
343
344 if( 0 == j ) {
346 } else {
348 }
349
350
351
352
353
354 if( llc_idx+1 == j ) {
357 } else {
360 }
361
362
363
364
365
366 for(k = 1; k < ptsToNextCache; ++k) {
367 f = pow(((
double)nextCacheSize)/currCacheSize, ((
double)k)/ptsToNextCache);
368 bufSizes[tmpIdx+k-1] =
f*currCacheSize;
369 }
370
371 if( llc_idx+1 == j ) {
373 } else {
375 }
376 }
377
378 cnt=0;
379 for(j=0; j<len; j++){
380 active_buf_len = bufSizes[j]/sizeof(uintptr_t);
381 out =
probeBufferSize(active_buf_len, stride, pages_per_block, pattern, v, &rslt, latency_only, mode, ONT);
382 if(out.status != 0)
383 goto error;
384 for(k = 0; k < ONT; ++k) {
385 rslts[cnt][k] = out.dt[k];
386 counter[cnt][k] = out.counter[k];
387 }
388 values[cnt++] = bufSizes[j];
389 }
390
391 free(bufSizes);
392 }
393
394
395 for(j=0; j<ONT; ++j){
396 free(ptr[j]);
397 }
398 return 0;
399
400error:
401
402 for(j=0; j<ONT; ++j){
403 free(ptr[j]);
404 }
405 return -1;
406}
int split[_MAX_SUPPORTED_CACHE_LEVELS]
double counter[MAXTHREADS]
run_output_t probeBufferSize(long long active_buf_len, long long line_size, float pageCountPerBlock, int pattern, uintptr_t **v, uintptr_t *rslt, int latency_only, int mode, int ONT)