44#define INFINIBAND_MAX_COUNTERS 256
47typedef struct infiniband_register
59typedef struct _ib_device_type
63 struct _ib_device_type *
next;
66typedef struct _ib_counter_type
72 struct _ib_counter_type *
next;
78typedef struct _infiniband_native_event_entry
89typedef struct _infiniband_control_state
99typedef struct _infiniband_context
134 PAPIERROR(
"cannot allocate memory for event description");
137 len = strlen(input_str);
146 if (strstr(input_str,
"rx_atomic_requests")) {
148 "Number of received ATOMIC requests for the associated Queue Pairs",
151 else if (strstr(input_str,
"out_of_buffer")) {
153 "Number of drops which occurred due to lack of Work Queue Entry for the associated Queue Pairs",
156 else if (strstr(input_str,
"out_of_sequence")) {
158 "Number of out of sequence packets received",
161 else if (strstr(input_str,
"lifespan")) {
163 "Maximum sampling period of the counters in milliseconds",
166 else if (strstr(input_str,
"rx_read_requests")) {
168 "Number of received READ requests for the associated Queue Pairs",
171 else if (strstr(input_str,
"rx_write_requests")) {
173 "Number of received WRITE requests for the associated Queue Pairs",
176 else if (strstr(input_str,
"port_rcv_data")) {
178 "Total number of data octets, divided by 4 (lanes), received on all Virtual Lanes. "
179 "Multiply this by 4 to get bytes",
182 else if (strstr(input_str,
"port_rcv_packets")) {
184 "Total number of packets received on all Virtual Lanes from this port, including packets containing errors",
187 else if (strstr(input_str,
"port_multicast_rcv_packets") || strstr(input_str,
"multicast_rcv_packets")) {
189 "Total number of multicast packets, including multicast packets containing errors",
192 else if (strstr(input_str,
"port_unicast_rcv_packets") || strstr(input_str,
"unicast_rcv_packets")) {
194 "Total number of unicast packets, including unicast packets containing errors",
197 else if (strstr(input_str,
"port_xmit_data")) {
199 "Total number of data octets, divided by 4 (lanes), transmitted on all Virtual Lanes. "
200 "Multiply this by 4 to get bytes",
203 else if (strstr(input_str,
"port_xmit_packets")) {
205 "Total number of packets transmitted on all Virtual Lanes from this port, including packets containing errors",
208 else if (strstr(input_str,
"port_rcv_switch_relay_errors")) {
210 "Total number of packets received on port that were discarded"
211 " because they could not be forwarded by switch relay",
214 else if (strstr(input_str,
"port_rcv_errors")) {
216 "Total number of packets containing an error that were received on the port",
219 else if (strstr(input_str,
"port_rcv_constraint_errors")) {
221 "Total number of packets received on the switch physical port that are discarded",
224 else if (strstr(input_str,
"local_link_integrity_errors")) {
226 "Number of times the count of local physical errors exceeded threshold",
229 else if (strstr(input_str,
"port_xmit_wait")) {
231 "Number of ticks during which port had data to transmit but no data was sent during the entire tick",
234 else if (strstr(input_str,
"port_multicast_xmit_packets") || strstr(input_str,
"multicast_xmit_packets")) {
236 "Total number of multicast packets transmitted on all VLs from port,"
237 " including multicast packets containing errors",
240 else if (strstr(input_str,
"port_unicast_xmit_packets") || strstr(input_str,
"unicast_xmit_packets")) {
242 "Total number of unicast packets transmitted on all VLs from port,"
243 " including unicast packets containing errors",
246 else if (strstr(input_str,
"port_xmit_discards")) {
248 "Total number of outbound packets discarded by the port because it is down or congested",
251 else if (strstr(input_str,
"port_xmit_constraint_errors")) {
253 "Total number of packets not transmitted from the switch physical port",
256 else if (strstr(input_str,
"port_rcv_remote_physical_errors")) {
258 "Total number of packets marked with EBP (End of Bad Packet) delimiter received on the port",
261 else if (strstr(input_str,
"symbol_error")) {
263 "Total number of minor link errors detected on one or more physical lanes",
266 else if (strstr(input_str,
"VL15_dropped")) {
268 "Number of incoming VL15 packets (can include management packets) dropped due to resource limitations of the port",
271 else if (strstr(input_str,
"link_error_recovery")) {
273 "Total number of times the Port Training state machine has successfully completed the link error recovery process",
276 else if (strstr(input_str,
"link_downed")) {
278 "Total number of times the Port Training state machine has failed link error recovery process and downed the link",
284 input_str, (extended ?
"free-running 64bit counter" :
285 "overflowing, auto-resetting counter"));
286 desc[0] = toupper(desc[0]);
287 for (
i=0 ;
i<len ; ++
i)
302 PAPIERROR(
"cannot allocate memory for new IB device structure");
310 PAPIERROR(
"cannot allocate memory for device internal fields");
327 PAPIERROR(
"cannot allocate memory for new IB counter structure");
337 PAPIERROR(
"cannot allocate memory for counter internal fields");
355 char counters_path[128];
359 snprintf(counters_path,
sizeof(counters_path),
"%s/%s/ports/%d/counters_ext",
362 cnt_dir = opendir(counters_path);
363 if (cnt_dir == NULL) {
366 SUBDBG(
"cannot open counters directory `%s'\n", counters_path);
368 snprintf(counters_path,
sizeof(counters_path),
"%s/%s/ports/%d/%scounters",
371 cnt_dir = opendir(counters_path);
375 snprintf(counters_path,
sizeof(counters_path),
"%s/%s/ports/%d/counters",
377 cnt_dir = opendir(counters_path);
380 if (cnt_dir == NULL) {
381 SUBDBG(
"cannot open counters directory `%s'\n", counters_path);
385 struct dirent *ev_ent;
387 while ((ev_ent = readdir(cnt_dir)) != NULL) {
388 char *ev_name = ev_ent->d_name;
389 long long value = -1;
390 char event_path[FILENAME_MAX];
393 if (ev_name[0] ==
'.')
397 snprintf(event_path,
sizeof(event_path),
"%s/%s", counters_path, ev_name);
398 if (pscanf(event_path,
"%lld", &value) != 1) {
399 SUBDBG(
"cannot read value for event '%s'\n", ev_name);
407 int fixed_extended = extended;
408 if ( !strcmp(
"port_rcv_data", ev_name)
409 || !strcmp(
"port_rcv_packets", ev_name)
410 || !strcmp(
"port_xmit_data", ev_name))
411 fixed_extended = 3-extended;
414 snprintf(counter_name,
sizeof(counter_name),
"%s_%d%s:%s",
418 SUBDBG(
"Added new counter `%s'\n", counter_name);
438 if (ib_dir == NULL) {
446 struct dirent *hca_ent;
447 while ((hca_ent = readdir(ib_dir)) != NULL) {
448 char *hca = hca_ent->d_name;
449 char ports_path[FILENAME_MAX];
450 DIR *ports_dir = NULL;
455 snprintf(ports_path,
sizeof(ports_path),
"%s/%s/ports",
ib_dir_path, hca);
456 ports_dir = opendir(ports_path);
457 if (ports_dir == NULL) {
458 SUBDBG(
"cannot open `%s'\n", ports_path);
462 struct dirent *port_ent;
463 while ((port_ent = readdir(ports_dir)) != NULL) {
464 int port = atoi(port_ent->d_name);
470 char state_path[FILENAME_MAX];
471 snprintf(state_path,
sizeof(state_path),
"%s/%s/ports/%d/state",
ib_dir_path, hca, port);
472 if (pscanf(state_path,
"%d", &
state) != 1) {
473 SUBDBG(
"cannot read state of IB HCA `%s' port %d\n", hca, port);
478 SUBDBG(
"skipping inactive IB HCA `%s', port %d, state %d\n", hca, port,
state);
483 SUBDBG(
"Found IB device `%s', port %d\n", hca, port);
493 if (ports_dir != NULL)
544 char ev_file[FILENAME_MAX];
545 char counters_path[FILENAME_MAX];
547 long long value = 0ll;
550 if (
iter->extended == 1 ||
iter->extended == 2 ) {
553 snprintf(counters_path,
sizeof(counters_path),
"%s/%s/ports/%d/counters%s",
556 cnt_dir = opendir(counters_path);
557 if (cnt_dir == NULL) {
560 snprintf(counters_path,
sizeof(counters_path),
"%s/%s/ports/%d/%scounters",
563 cnt_dir = opendir(counters_path);
568 snprintf(counters_path,
sizeof(counters_path),
"%s/%s/ports/%d/counters",
570 cnt_dir = opendir(counters_path);
578 snprintf(ev_file, strlen(counters_path) + strlen(
iter->file_name) + 2,
"%s/%s",
579 counters_path,
iter->file_name);
581 if (pscanf(ev_file,
"%lld", &value) != 1) {
582 PAPIERROR(
"cannot read value for counter '%s'\n",
iter->name);
585 SUBDBG(
"Counter '%s': %lld\n",
iter->name, value);
612 free(
iter->dev_name);
726 if (temp < context->start_value[
i]) {
727 SUBDBG(
"Wraparound!\nstart:\t%#016x\ttemp:\t%#016x",
884 *EventCode = *EventCode + 1;
903 int index = EventCode;
918 int index = EventCode;
929 int index = EventCode;
944 strncpy(info->
units,
"\0", 1);
959 .
name =
"infiniband",
960 .short_name =
"infiniband",
962 .description =
"Linux Infiniband statistics using the sysfs interface",
972 .fast_real_timer = 0,
973 .fast_virtual_timer = 0,
975 .attach_must_ptrace = 0,
get real time counter value in microseconds
struct papi_vectors * _papi_hwd[]
#define PAPI_HUGE_STR_LEN
char events[MAX_EVENTS][BUFSIZ]
static long iter[MAX_THREADS]
static infiniband_native_event_entry_t * infiniband_native_events
static int find_ib_device_events(ib_device_t *dev, int extended)
static int _infiniband_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
static int _infiniband_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
#define INFINIBAND_MAX_COUNTERS
static int _infiniband_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
static int _infiniband_shutdown_thread(hwd_context_t *ctx)
static int _infiniband_shutdown_component(void)
static void deallocate_infiniband_resources()
static int _infiniband_init_control_state(hwd_control_state_t *ctl)
static int _infiniband_ntv_enum_events(unsigned int *EventCode, int modifier)
static int _infiniband_ntv_code_to_name(unsigned int EventCode, char *name, int len)
static int _infiniband_set_domain(hwd_control_state_t *ctl, int domain)
static int _infiniband_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
static int find_ib_devices()
static ib_counter_t * root_counter
static int _infiniband_init_thread(hwd_context_t *ctx)
static char * make_ib_event_description(const char *input_str, int extended)
static const char * ib_dir_path
static long long read_ib_counter_value(int index)
static ib_device_t * add_ib_device(const char *name, int port)
static int _infiniband_ntv_code_to_descr(unsigned int EventCode, char *name, int len)
static int _infiniband_init_component(int cidx)
static int _infiniband_read(hwd_context_t *ctx, hwd_control_state_t *ctl, long_long **events, int flags)
static ib_counter_t * add_ib_counter(const char *name, const char *file_name, int extended, ib_device_t *device)
papi_vector_t _infiniband_vector
static int _infiniband_update_control_state(hwd_control_state_t *ctl, NativeInfo_t *native, int count, hwd_context_t *ctx)
static ib_device_t * root_device
static int _infiniband_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
#define PAPI_NATIVE_AND_MASK
Return codes and api definitions.
#define SUBDBG(format, args...)
void PAPIERROR(char *format,...)
#define papi_calloc(a, b)
char name[PAPI_MAX_STR_LEN]
char disabled_reason[PAPI_HUGE_STR_LEN]
char units[PAPI_MIN_STR_LEN]
char symbol[PAPI_HUGE_STR_LEN]
char long_descr[PAPI_HUGE_STR_LEN]
struct _ib_counter_type * next
struct _ib_device_type * next
infiniband_control_state_t state
long long start_value[INFINIBAND_MAX_COUNTERS]
long long counts[INFINIBAND_MAX_COUNTERS]
int being_measured[INFINIBAND_MAX_COUNTERS]
int need_difference[INFINIBAND_MAX_COUNTERS]
infiniband_register_t resources
PAPI_component_info_t cmp_info