35#pragma weak pthread_once
37#define verbose_fprintf \
38 if (verbosity == 1) fprintf
41#define PAPIHL_NUM_OF_COMPONENTS 10
42#define PAPIHL_NUM_OF_EVENTS_PER_COMPONENT 10
44#define PAPIHL_ACTIVE 1
45#define PAPIHL_DEACTIVATED 0
48#define PAPIHL_MAX_STACK_SIZE 10
72typedef struct local_components
107typedef struct regions
230 if ( getenv(
"PAPI_HL_VERBOSE") != NULL ) {
264 if ( getenv(
"PAPI_MULTIPLEX") != NULL ) {
287 if ( pthread_once ) {
309 HLDBG(
"Counter: %s\n", counter);
314 HLDBG(
"Counter %s does not exist\n", counter);
319 HLDBG(
"Cannot add counter %s\n", counter);
337 if ( getenv(
"OMPI_COMM_WORLD_RANK") != NULL )
338 rank = atoi(getenv(
"OMPI_COMM_WORLD_RANK"));
339 else if ( getenv(
"ALPS_APP_PE") != NULL )
340 rank = atoi(getenv(
"ALPS_APP_PE"));
341 else if ( getenv(
"PMI_RANK") != NULL )
342 rank = atoi(getenv(
"PMI_RANK"));
343 else if ( getenv(
"SLURM_PROCID") != NULL )
344 rank = atoi(getenv(
"SLURM_PROCID"));
351 char *out = str, *put = str;
352 for(; *str !=
'\0'; ++str) {
357 while (*str ==
' ' && *(str + 1) ==
' ')
369 HLDBG(
"Default events\n");
370 char *default_events[] = {
373 int num_of_defaults =
sizeof(default_events) /
sizeof(
char*);
381 for (
i = 0;
i < num_of_defaults;
i++ ) {
389 if ( strcmp(default_events[
i],
"PAPI_FP_OPS") == 0 ) {
397 if ( strcmp(default_events[
i],
"PAPI_FP_INS") == 0 ) {
411 char* user_events_copy;
412 const char *separator;
413 int num_of_req_events = 1;
414 int req_event_index = 0;
415 const char *position = NULL;
418 HLDBG(
"User events: %s\n", user_events);
419 user_events_copy = strdup(user_events);
420 if ( user_events_copy == NULL )
424 if ( strlen( user_events_copy ) > 0 )
427 position = user_events_copy;
429 while ( *position ) {
430 if ( strchr( separator, *position ) ) {
439 free(user_events_copy);
444 token = strtok( user_events_copy, separator );
446 if ( req_event_index >= num_of_req_events ){
448 free(user_events_copy);
453 free(user_events_copy);
456 token = strtok( NULL, separator );
462 free(user_events_copy);
482 if ( getenv(
"PAPI_MULTIPLEX") != NULL ) {
485 if ( component_id == 0 ) {
554 verbose_fprintf(
stdout,
"Advice: Use papi_event_chooser to obtain an appropriate event set for this component or set PAPI_MULTIPLEX=1.\n");
572 int component_id = -1;
574 bool component_exists =
false;
575 short event_type = 0;
577 HLDBG(
"Create components\n");
584 const char sep =
'=';
590 if ( strcmp(ret,
"=instant") == 0 )
598 if ( (strcmp(ret,
"=instant") == 0) || (strcmp(ret,
"=delta") == 0) )
622 if (
components[j].component_id == component_id ) {
623 component_exists =
true;
628 component_exists =
false;
633 if (
false == component_exists ) {
690 }
else if ( getenv(
"PAPI_EVENTS") != NULL ) {
691 char *user_events_from_env = strdup( getenv(
"PAPI_EVENTS") );
692 if ( user_events_from_env == NULL )
695 if ( strlen( user_events_from_env ) == 0 ) {
697 free(user_events_from_env);
703 free(user_events_from_env);
706 free(user_events_from_env);
750 if ( getenv(
"PAPI_MULTIPLEX") != NULL ) {
839 if ( ( new_node = malloc(
sizeof(
reads_t)) ) == NULL )
841 new_node->
next = NULL;
842 new_node->
prev = NULL;
845 if ( *head_node == NULL ) {
846 *head_node = new_node;
849 (*head_node)->
prev = new_node;
850 new_node->
next = *head_node;
851 *head_node = new_node;
917 int extended_total_num_events;
923 new_node = malloc(
sizeof(
regions_t) + extended_total_num_events *
sizeof(
value_t));
924 if ( new_node == NULL )
926 new_node->
region = (
char *)malloc((strlen(region) + 1) *
sizeof(char));
927 if ( new_node->
region == NULL ) {
932 new_node->
next = NULL;
933 new_node->
prev = NULL;
937 strcpy(new_node->
region, region);
938 for (
i = 0;
i < extended_total_num_events;
i++ ) {
943 if ( *head_node == NULL ) {
944 *head_node = new_node;
947 (*head_node)->
prev = new_node;
948 new_node->
next = *head_node;
949 *head_node = new_node;
958 while ( find_node != NULL ) {
962 find_node = find_node->
next;
971 if ( new_node == NULL )
974 new_node->
value = NULL;
982 find_node->
key = tid;
984 if ( found != NULL ) {
1002 if ( current_thread_node == NULL ) {
1018 if ( current_region_node == NULL ) {
1114 if ( (
tmp = strdup(dir) ) == NULL )
1120 if ( stat(dir, &
buf) == 0 && S_ISREG(
buf.st_mode) ) {
1121 verbose_fprintf(
stdout,
"PAPI-HL Error: Name conflict with measurement directory and existing file.\n");
1125 if(
tmp[len - 1] ==
'/')
1127 for(p =
tmp + 1; *p; p++)
1152 char *output_prefix = NULL;
1153 if ( getenv(
"PAPI_OUTPUT_DIRECTORY") != NULL ) {
1154 if ( ( output_prefix = strdup( getenv(
"PAPI_OUTPUT_DIRECTORY") ) ) == NULL )
1157 if ( ( output_prefix = strdup( getcwd(NULL,0) ) ) == NULL )
1163 free(output_prefix);
1176 char *new_absolute_output_file_path = NULL;
1177 if ( ( new_absolute_output_file_path = (
char *)malloc((strlen(
absolute_output_file_path) + 64) *
sizeof(
char)) ) == NULL ) {
1178 free(output_prefix);
1184 time_t t = time(NULL);
1185 struct tm tm = *localtime(&t);
1187 sprintf(m_time,
"%d%02d%02d-%02d%02d%02d", tm.tm_year+1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec);
1191 uintmax_t current_unix_time = (uintmax_t)t;
1192 uintmax_t unix_time_from_old_directory =
buf.st_mtime;
1198 if ( unix_time_from_old_directory < current_unix_time ) {
1202 verbose_fprintf(
stdout,
"If you use MPI, another process may have already renamed the directory.\n");
1206 free(new_absolute_output_file_path);
1208 free(output_prefix);
1219 for (
i = 0;
i < width; ++
i )
1229 fprintf(
f,
"\"event_definitions\":{");
1237 const char *event_type =
"delta";
1239 event_type =
"instant";
1245 fprintf(
f,
"\"component\":\"%s\",", cmpinfo->
name);
1247 fprintf(
f,
"\"type\":\"%s\"", event_type);
1262 char **all_event_names = NULL;
1263 int *all_event_types = NULL;
1264 int extended_total_num_events;
1269 all_event_names = (
char**)malloc(extended_total_num_events *
sizeof(
char*));
1270 all_event_names[0] =
"cycles";
1271 all_event_names[1] =
"real_time_nsec";
1273 all_event_types = (
int*)malloc(extended_total_num_events *
sizeof(
int));
1274 all_event_types[0] = 0;
1275 all_event_types[1] = 0;
1283 all_event_types[cmp_iter] = 0;
1285 all_event_types[cmp_iter] = 1;
1290 for ( j = 0; j < extended_total_num_events; j++ ) {
1298 while ( read_node->
next != NULL ) {
1299 read_node = read_node->
next;
1303 fprintf(
f,
"\"%s\":{", all_event_names[j]);
1308 while ( read_node != NULL ) {
1310 fprintf(
f,
"\"read_%d\":\"%lld\"", read_cnt,read_node->
value);
1312 read_node = read_node->
prev;
1314 if ( read_node == NULL ) {
1317 if ( j < extended_total_num_events - 1 )
1328 if ( j < ( extended_total_num_events - 1 ) )
1333 free(all_event_names);
1334 free(all_event_types);
1343 while ( regions->
next != NULL ) {
1344 regions = regions->
next;
1348 while (regions != NULL) {
1355 fprintf(
f,
"\"name\":\"%s\",", regions->
region);
1361 regions = regions->
prev;
1363 if (regions == NULL ) {
1376 fprintf(
f,
"\"threads\":{");
1379 for (
i = 0;
i < threads_num;
i++ )
1381 HLDBG(
"Thread ID:%lu\n", tids[
i]);
1384 if ( thread_node != NULL ) {
1388 fprintf(
f,
"\"%d\":{",
i);
1391 fprintf(
f,
"\"regions\":{");
1399 if (
i < threads_num - 1 ) {
1413 return ( *(
int*)
a - *(
int*)
b );
1422 if ( ( *tids = malloc( *(threads_num) *
sizeof(
unsigned long) ) ) == NULL ) {
1439 bool beautifier =
true;
1454 fprintf(
f,
"\"cpu_info\":\"%s\",", cpu_info);
1457 fprintf(
f,
"\"max_cpu_rate_mhz\":\"%d\",", hwinfo->
cpu_max_mhz);
1459 fprintf(
f,
"\"min_cpu_rate_mhz\":\"%d\",", hwinfo->
cpu_min_mhz);
1477 printf(
"\n\nPAPI-HL Output:\n");
1478 FILE* output_file = fopen(path,
"r");
1479 int c = fgetc(output_file);
1483 c = fgetc(output_file);
1525 srandom( time(NULL) + getpid() );
1526 rank = random() % 1000000;
1529 int unique_output_file_created = 0;
1530 char *final_absolute_output_file_path = NULL;
1535 if ( ( final_absolute_output_file_path = (
char *)malloc((strlen(
absolute_output_file_path) + 20) *
sizeof(
char)) ) == NULL ) {
1538 free(final_absolute_output_file_path);
1543 while ( unique_output_file_created == 0 ) {
1547 fd =
open(final_absolute_output_file_path, O_WRONLY|O_APPEND|O_CREAT|O_NONBLOCK, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
1551 free(final_absolute_output_file_path);
1555 struct flock filelock;
1556 filelock.l_type = F_WRLCK;
1557 filelock.l_start = 0;
1558 filelock.l_whence = SEEK_SET;
1561 if ( fcntl(fd, F_SETLK, &filelock) == 0 ) {
1562 unique_output_file_created = 1;
1566 FILE *
fp = fdopen(fd,
"w");
1570 unsigned long *tids = NULL;
1574 free(final_absolute_output_file_path);
1583 if ( getenv(
"PAPI_REPORT") != NULL ) {
1589 free(final_absolute_output_file_path);
1590 fcntl(fd, F_UNLCK, &filelock);
1593 fcntl(fd, F_UNLCK, &filelock);
1602 free(final_absolute_output_file_path);
1640 int extended_total_num_events;
1651 while ( region != NULL ) {
1655 for (
i = 0;
i < extended_total_num_events;
i++ ) {
1656 reads_t *read_node = region->values[
i].read_values;
1658 while ( read_node != NULL ) {
1659 read_node_tmp = read_node;
1660 read_node = read_node->
next;
1661 free(read_node_tmp);
1666 region = region->next;
1690 int i, num_of_threads;
1715 HLDBG(
"Number of registered threads: %d.\n", num_of_threads);
1728 HLDBG(
"PAPI-HL shutdown!\n");
1730 verbose_fprintf(
stdout,
"PAPI-HL Warning: Could not call PAPI_shutdown() since some threads still have running event sets.\n");
static const char * event_names[2]
int open(const char *pathname, int flags, mode_t mode)
add PAPI preset or native hardware event to an event set
Assign a component index to an existing but empty EventSet.
Empty and destroy an EventSet.
Create a new empty PAPI EventSet.
Empty and destroy an EventSet.
Convert a name to a numeric hardware event code.
get information about a specific software component
get information about the system hardware
Get the multiplexing status of specified event set.
Get real time counter value in nanoseconds.
Read performance events inside of a region and store the difference to the corresponding beginning of...
Read performance events at the beginning of a region.
Read performance events at the end of a region and store the difference to the corresponding beginnin...
initialize the PAPI library.
List the registered thread ids.
Initialize multiplex support in the PAPI library.
Stop a running event set of a rate function.
Read hardware counters with a timestamp.
Read hardware counters from an event set.
Convert a standard event set to a multiplexed event set.
Finish using PAPI and free all related resources.
Start counting hardware events in an event set.
Stop counting hardware events in an event set.
Get the thread identifier of the current thread.
Initialize thread support in the PAPI library.
char event_name[2][PAPI_MAX_STR_LEN]
volatile int buf[CACHE_FLUSH_BUFFER_SIZE_INTS]
char events[MAX_EVENTS][BUFSIZ]
static long long values[NUM_EVENTS]
static double a[MATRIX_SIZE][MATRIX_SIZE]
static double b[MATRIX_SIZE][MATRIX_SIZE]
static double c[MATRIX_SIZE][MATRIX_SIZE]
int multi_thread(int argc, char *argv[])
Return codes and api definitions.
#define PAPI_VERSION_REVISION(x)
#define PAPI_VERSION_MAJOR(x)
#define PAPI_COMPONENT_INDEX(a)
#define PAPI_VERSION_INCREMENT(x)
#define PAPI_VERSION_MINOR(x)
#define HLDBG(format, args...)
int rename(const char *__old, const char *__new) __attribute__((__nothrow__
int fclose(FILE *__stream)
static void _internal_hl_clean_up_global_data()
static int _internal_hl_mkdir(const char *dir)
static int _internal_hl_checkCounter(char *counter)
static int _internal_hl_read_events(const char *events)
int _internal_PAPI_hl_set_events(const char *events)
static int _internal_hl_region_id_push()
THREAD_LOCAL_STORAGE_KEYWORD unsigned int _local_region_id_stack[PAPIHL_MAX_STACK_SIZE]
static void _internal_hl_json_definitions(FILE *f, bool beautifier)
static int _internal_hl_read_user_events()
static int _internal_hl_add_values_to_region(regions_t *node, enum region_type reg_typ)
char ** requested_event_names
#define PAPIHL_DEACTIVATED
static int _internal_hl_store_counters(unsigned long tid, const char *region, enum region_type reg_typ)
static void _internal_hl_json_region_events(FILE *f, bool beautifier, regions_t *regions)
static int output_counter
static int _internal_get_sorted_thread_list(unsigned long **tids, int *threads_num)
#define PAPIHL_MAX_STACK_SIZE
static int _internal_hl_create_global_binary_tree()
static int _internal_hl_determine_default_events()
static void _internal_hl_json_regions(FILE *f, bool beautifier, threads_t *thread_node)
static regions_t * _internal_hl_insert_region_node(regions_t **head_node, const char *region)
THREAD_LOCAL_STORAGE_KEYWORD unsigned int _local_region_begin_cnt
#define PAPIHL_NUM_OF_COMPONENTS
THREAD_LOCAL_STORAGE_KEYWORD unsigned int _local_region_end_cnt
static int _internal_hl_create_event_sets()
static int _internal_hl_read_counters()
int _internal_PAPI_hl_init()
static int region_begin_cnt
static threads_t * _internal_hl_find_thread_node(unsigned long tid)
static void _internal_hl_json_line_break_and_indent(FILE *f, bool b, int width)
static int _internal_hl_add_event_to_component(char *event_name, int event, short event_type, components_t *component)
static int _internal_hl_region_id_stack_peak()
int compar(const void *l, const void *r)
static int _internal_hl_read_and_store_counters(const char *region, enum region_type reg_typ)
static int _internal_hl_new_component(int component_id, components_t *component)
THREAD_LOCAL_STORAGE_KEYWORD int _local_region_id_top
components_t * components
static void _internal_hl_write_json_file(FILE *f, unsigned long *tids, int threads_num)
static int _internal_hl_determine_output_path()
#define PAPIHL_NUM_OF_EVENTS_PER_COMPONENT
static char * _internal_hl_remove_spaces(char *str, int mode)
static regions_t * _internal_hl_find_region_node(regions_t *head_node, const char *region)
int _internal_PAPI_hl_finalize()
static void _internal_hl_write_output()
binary_tree_t * binary_tree
static int _internal_hl_create_components()
static int _internal_hl_start_counters()
static int _internal_hl_check_for_clean_thread_states()
unsigned long master_thread_id
static void _internal_hl_library_init(void)
int num_of_requested_events
THREAD_LOCAL_STORAGE_KEYWORD local_components_t * _local_components
static void _internal_hl_clean_up_all(bool deactivate)
static int _internal_hl_cmpfunc(const void *a, const void *b)
static int _internal_hl_determine_rank()
static void _internal_hl_clean_up_local_data()
static void _internal_hl_json_threads(FILE *f, bool beautifier, unsigned long *tids, int threads_num)
static threads_t * _internal_hl_insert_thread_node(unsigned long tid)
THREAD_LOCAL_STORAGE_KEYWORD volatile bool _local_state
static reads_t * _internal_hl_insert_read_node(reads_t **head_node)
static char * absolute_output_file_path
int max_num_of_components
static int region_end_cnt
void _internal_PAPI_hl_print_output()
static int _internal_hl_region_id_pop()
static void _internal_hl_onetime_library_init(void)
int num_of_cleaned_threads
THREAD_LOCAL_STORAGE_KEYWORD long_long _local_cycles
static void _internal_hl_read_json_file(const char *path)
int _internal_PAPI_hl_cleanup_thread()
THREAD_LOCAL_STORAGE_KEYWORD int _papi_hl_events_running
THREAD_LOCAL_STORAGE_KEYWORD int _papi_rate_events_running
papi_mdi_t _papi_hwi_system_info
EventSetInfo_t ** dataSlotArray
char name[PAPI_MAX_STR_LEN]
char model_string[PAPI_MAX_STR_LEN]
DynamicArray_t global_eventset_map
unsigned long _papi_gettid(void)
unsigned long _papi_getpid(void)
inline_static int _papi_hwi_lock(int lck)
#define THREAD_LOCAL_STORAGE_KEYWORD
inline_static int _papi_hwi_unlock(int lck)