PAPI 7.1.0.0
Loading...
Searching...
No Matches
linux-rocm-smi.c
Go to the documentation of this file.
1//-----------------------------------------------------------------------------
2// @file linux-rocm-smi.c
3//
4// @ingroup rocm_components
5//
6// @brief This implements a PAPI component that enables PAPI-C to access
7// hardware system management controls for AMD ROCM GPU devices through the
8// rocm_smi library.
9//
10// The open source software license for PAPI conforms to the BSD License
11// template.
12//-----------------------------------------------------------------------------
13
14#include <stdlib.h>
15#include <stdio.h>
16#include <string.h>
17#include <assert.h>
18
19#include "papi.h"
20#include "papi_internal.h"
21#include "papi_vector.h"
22#include "papi_memory.h"
23#include "extras.h"
24#include "rocs.h"
25
26typedef struct {
28 int state;
31
32typedef struct {
33 unsigned int *events_id;
36 rocs_ctx_t rocs_ctx;
38
39extern unsigned int _rocm_smi_lock;
41
42static int _rocm_smi_init_private(void);
43
44static int
46{
50}
51
52static int
54{
55 rocmsmi_context_t *rocmsmi_ctx = (rocmsmi_context_t *) ctx;
56 memset(rocmsmi_ctx, 0, sizeof(*rocmsmi_ctx));
57 rocmsmi_ctx->initialized = 1;
59 return PAPI_OK;
60}
61
62
63static int
65{
70
72 "Not initialized. Access component events to initialize it.");
74
75 return PAPI_EDELAY_INIT;
76}
77
78static int
80{
81 unsigned int event_code = 0;
82
83 if (rocs_evt_enum(&event_code, PAPI_ENUM_FIRST) == PAPI_OK) {
84 ++(*count);
85 }
86
87 while (rocs_evt_enum(&event_code, PAPI_ENUM_EVENTS) == PAPI_OK) {
88 ++(*count);
89 }
90
91 return PAPI_OK;
92}
93
94static int
96{
97 int papi_errno = PAPI_OK;
98
100
103 goto fn_exit;
104 }
105
106 papi_errno = rocs_init();
107 if (papi_errno != PAPI_OK) {
109 const char *error_str;
110 rocs_err_get_last(&error_str);
111 sprintf(_rocm_smi_vector.cmp_info.disabled_reason, "%s", error_str);
112 goto fn_fail;
113 }
114
115 int count = 0;
116 papi_errno = evt_get_count(&count);
120
121 fn_exit:
125 return papi_errno;
126 fn_fail:
127 goto fn_exit;
128}
129
130
131static int
133{
135}
136
139
140static int
142{
143 int papi_errno =_rocm_smi_check_n_initialize();
144 if (papi_errno != PAPI_OK) {
145 return papi_errno;
146 }
147
148 rocmsmi_control_t *rocmsmi_ctl = (rocmsmi_control_t *) ctrl;
149 rocmsmi_context_t *rocmsmi_ctx = (rocmsmi_context_t *) ctx;
150
151 if (rocmsmi_ctx->state & ROCS_EVENTS_RUNNING) {
152 return PAPI_EMISC;
153 }
154
155 papi_errno = update_native_events(rocmsmi_ctl, nativeInfo, nativeCount);
156 if (papi_errno != PAPI_OK) {
157 return papi_errno;
158 }
159
160 return try_open_events(rocmsmi_ctl);
161}
162
163int
165{
166 int papi_errno = PAPI_OK;
167
168 unsigned int *events = papi_calloc(ntv_count, sizeof(*events));
169 if (events == NULL) {
170 papi_errno = PAPI_ENOMEM;
171 goto fn_fail;
172 }
173
174 int i;
175 for (i = 0; i < ntv_count; ++i) {
176 events[i] = ntv_info[i].ni_event;
177 ntv_info[i].ni_position = i;
178 }
179
180 papi_free(ctl->events_id);
181 ctl->events_id = events;
182 ctl->num_events = ntv_count;
183
184 fn_exit:
185 return papi_errno;
186 fn_fail:
187 ctl->num_events = 0;
188 goto fn_exit;
189}
190
191int
193{
194 return PAPI_OK;
195}
196
197static int
199{
200 int papi_errno = PAPI_OK;
201 rocmsmi_context_t *rocmsmi_ctx = (rocmsmi_context_t *) ctx;
202 rocmsmi_control_t *rocmsmi_ctl = (rocmsmi_control_t *) ctrl;
203
204 if (rocmsmi_ctx->state & ROCS_EVENTS_OPENED) {
205 return PAPI_EMISC;
206 }
207
208 papi_errno = rocs_ctx_open(rocmsmi_ctl->events_id, rocmsmi_ctl->num_events, &rocmsmi_ctl->rocs_ctx);
209 if (papi_errno != PAPI_OK) {
210 return papi_errno;
211 }
212
213 rocmsmi_ctx->state = ROCS_EVENTS_OPENED;
214
215 papi_errno = rocs_ctx_start(rocmsmi_ctl->rocs_ctx);
216 if (papi_errno != PAPI_OK) {
217 goto fn_fail;
218 }
219
220 rocmsmi_ctx->state |= ROCS_EVENTS_RUNNING;
221
222 fn_exit:
223 return papi_errno;
224 fn_fail:
225 if (rocmsmi_ctx->state & ROCS_EVENTS_OPENED) {
226 rocs_ctx_close(rocmsmi_ctl->rocs_ctx);
227 }
228 rocmsmi_ctx->state = 0;
229 goto fn_exit;
230}
231
232static int
233_rocm_smi_read(hwd_context_t *ctx, hwd_control_state_t *ctrl, long long **values, int flags __attribute__((unused)))
234{
235 rocmsmi_context_t *rocmsmi_ctx = (rocmsmi_context_t *) ctx;
236 rocmsmi_control_t *rocmsmi_ctl = (rocmsmi_control_t *) ctrl;
237
238 if (!(rocmsmi_ctx->state & ROCS_EVENTS_RUNNING)) {
239 return PAPI_EMISC;
240 }
241
242 return rocs_ctx_read(rocmsmi_ctl->rocs_ctx, values);
243}
244
245static int
247{
248 rocmsmi_context_t *rocmsmi_ctx = (rocmsmi_context_t *) ctx;
249 rocmsmi_control_t *rocmsmi_ctl = (rocmsmi_control_t *) ctrl;
250
251 if (!(rocmsmi_ctx->state & ROCS_EVENTS_RUNNING)) {
252 return PAPI_EMISC;
253 }
254
255 return rocs_ctx_write(rocmsmi_ctl->rocs_ctx, values);
256}
257
258static int
260{
261 int papi_errno = PAPI_OK;
262 rocmsmi_context_t *rocmsmi_ctx = (rocmsmi_context_t *) ctx;
263 rocmsmi_control_t *rocmsmi_ctl = (rocmsmi_control_t *) ctrl;
264
265 if (!(rocmsmi_ctx->state & ROCS_EVENTS_RUNNING)) {
266 return PAPI_EMISC;
267 }
268
269 papi_errno = rocs_ctx_stop(rocmsmi_ctl->rocs_ctx);
270 if (papi_errno != PAPI_OK) {
271 return papi_errno;
272 }
273
274 rocmsmi_ctx->state &= ~ROCS_EVENTS_RUNNING;
275
276 papi_errno = rocs_ctx_close(rocmsmi_ctl->rocs_ctx);
277
278 rocmsmi_ctx->state = 0;
279 rocmsmi_ctl->rocs_ctx = NULL;
280
281 return papi_errno;
282}
283
284static int
286{
287 rocmsmi_control_t *rocmsmi_ctl = (rocmsmi_control_t *) ctrl;
288
289 if (rocmsmi_ctl->rocs_ctx != NULL) {
290 return PAPI_EMISC;
291 }
292
293 papi_free(rocmsmi_ctl->events_id);
294 rocmsmi_ctl->events_id = NULL;
295 rocmsmi_ctl->num_events = 0;
296
297 return PAPI_OK;
298}
299
300static int
302{
303 rocmsmi_context_t *rocmsmi_ctx = (rocmsmi_context_t *) ctx;
304 rocmsmi_ctx->state = 0;
305 rocmsmi_ctx->initialized = 0;
306 return PAPI_OK;
307}
308
309static int
311{
313 return PAPI_EMISC;
314 }
315
317 return PAPI_EMISC;
318 }
319
320 int papi_errno = rocs_shutdown();
322 return papi_errno;
323}
324
325static int
327{
328 rocmsmi_context_t *rocmsmi_ctx = (rocmsmi_context_t *) ctx;
329 rocmsmi_control_t *rocmsmi_ctl = (rocmsmi_control_t *) ctrl;
330
331 if (!(rocmsmi_ctx->state & ROCS_EVENTS_RUNNING)) {
332 return PAPI_EMISC;
333 }
334
335 return rocs_ctx_reset(rocmsmi_ctl->rocs_ctx);
336}
337
338static int
339_rocm_smi_ctrl(hwd_context_t *ctx __attribute__((unused)), int code __attribute__((unused)), _papi_int_option_t *option __attribute__((unused)))
340{
341 return PAPI_OK;
342}
343
344static int
346{
347 return PAPI_OK;
348}
349
350static int
351_rocm_smi_ntv_enum_events(unsigned int *EventCode, int modifier)
352{
353 int papi_errno =_rocm_smi_check_n_initialize();
354 if (papi_errno != PAPI_OK) {
355 return papi_errno;
356 }
357 return rocs_evt_enum(EventCode, modifier);
358}
359
360static int
361_rocm_smi_ntv_code_to_name(unsigned int EventCode, char *name, int len)
362{
363 int papi_errno =_rocm_smi_check_n_initialize();
364 if (papi_errno != PAPI_OK) {
365 return papi_errno;
366 }
367 return rocs_evt_code_to_name(EventCode, name, len);
368}
369
370static int
371_rocm_smi_ntv_name_to_code(const char *name, unsigned int *EventCode)
372{
373 int papi_errno =_rocm_smi_check_n_initialize();
374 if (papi_errno != PAPI_OK) {
375 return papi_errno;
376 }
377 return rocs_evt_name_to_code(name, EventCode);
378}
379
380static int
381_rocm_smi_ntv_code_to_descr(unsigned int EventCode, char *desc, int len)
382{
383 int papi_errno =_rocm_smi_check_n_initialize();
384 if (papi_errno != PAPI_OK) {
385 return papi_errno;
386 }
387 return rocs_evt_code_to_descr(EventCode, desc, len);
388}
389
390
392 .cmp_info = {
393 .name = "rocm_smi",
394 .short_name = "rocm_smi",
395 .version = "2.0",
396 .description = "AMD GPU System Management Interface via rocm_smi_lib",
397 .default_domain = PAPI_DOM_USER,
398 .default_granularity = PAPI_GRN_THR,
399 .available_granularities = PAPI_GRN_THR,
400 .hardware_intr_sig = PAPI_INT_SIGNAL,
401 .fast_real_timer = 0,
402 .fast_virtual_timer = 0,
403 .attach = 0,
404 .attach_must_ptrace = 0,
405 .available_domains = PAPI_DOM_USER | PAPI_DOM_KERNEL,
406 .initialized = 0,
407 },
408 .size = {
409 .context = sizeof(rocmsmi_context_t),
410 .control_state = sizeof(rocmsmi_control_t),
411 .reg_value = 1,
412 .reg_alloc = 1,
413 },
414 .start = _rocm_smi_start,
415 .stop = _rocm_smi_stop,
416 .read = _rocm_smi_read,
417 .write = _rocm_smi_write,
418 .reset = _rocm_smi_reset,
419 .cleanup_eventset = _rocm_smi_cleanup_eventset,
420 .init_component = _rocm_smi_init_component,
421 .init_thread = _rocm_smi_init_thread,
422 .init_control_state = _rocm_smi_init_control_state,
423 .update_control_state = _rocm_smi_update_control_state,
424 .ctl = _rocm_smi_ctrl,
425 .set_domain = _rocm_smi_set_domain,
426 .ntv_enum_events = _rocm_smi_ntv_enum_events,
427 .ntv_code_to_name = _rocm_smi_ntv_code_to_name,
428 .ntv_name_to_code = _rocm_smi_ntv_name_to_code,
429 .ntv_code_to_descr = _rocm_smi_ntv_code_to_descr,
430 .shutdown_thread = _rocm_smi_shutdown_thread,
431 .shutdown_component = _rocm_smi_shutdown_component,
432};
int i
static long count
Lock one of two mutex variables defined in papi.h.
Unlock one of the mutex variables defined in papi.h.
#define PAPI_DOM_USER
Definition: f90papi.h:174
#define PAPI_ENUM_EVENTS
Definition: f90papi.h:224
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_ENUM_FIRST
Definition: f90papi.h:85
#define PAPI_NUM_LOCK
Definition: f90papi.h:80
#define PAPI_DOM_KERNEL
Definition: f90papi.h:254
#define PAPI_EDELAY_INIT
Definition: f90papi.h:271
#define PAPI_EMISC
Definition: f90papi.h:122
#define PAPI_ENOMEM
Definition: f90papi.h:16
#define PAPI_GRN_THR
Definition: f90papi.h:265
char events[MAX_EVENTS][BUFSIZ]
static long long values[NUM_EVENTS]
Definition: init_fini.c:10
static int _rocm_smi_shutdown_thread(hwd_context_t *ctx)
static int update_native_events(rocmsmi_control_t *, NativeInfo_t *, int)
static int _rocm_smi_update_control_state(hwd_control_state_t *ctrl, NativeInfo_t *nativeInfo, int nativeCount, hwd_context_t *ctx)
static int _rocm_smi_reset(hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int _rocm_smi_cleanup_eventset(hwd_control_state_t *ctrl)
static int try_open_events(rocmsmi_control_t *)
static int _rocm_smi_stop(hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int _rocm_smi_init_control_state(hwd_control_state_t *ctrl __attribute__((unused)))
static int evt_get_count(int *count)
static int _rocm_smi_write(hwd_context_t *ctx, hwd_control_state_t *ctrl, long long *values)
papi_vector_t _rocm_smi_vector
static int _rocm_smi_start(hwd_context_t *ctx, hwd_control_state_t *ctrl)
static int _rocm_smi_check_n_initialize(void)
static int _rocm_smi_read(hwd_context_t *ctx, hwd_control_state_t *ctrl, long long **values, int flags __attribute__((unused)))
static int _rocm_smi_init_component(int cidx)
static int _rocm_smi_shutdown_component(void)
static int _rocm_smi_ntv_code_to_name(unsigned int EventCode, char *name, int len)
static int _rocm_smi_ntv_name_to_code(const char *name, unsigned int *EventCode)
static int _rocm_smi_init_private(void)
static int _rocm_smi_ntv_code_to_descr(unsigned int EventCode, char *desc, int len)
static int _rocm_smi_ntv_enum_events(unsigned int *EventCode, int modifier)
static int _rocm_smi_init_thread(hwd_context_t *ctx)
unsigned int _rocm_smi_lock
Definition: rocs.c:11
static int _rocm_smi_ctrl(hwd_context_t *ctx __attribute__((unused)), int code __attribute__((unused)), _papi_int_option_t *option __attribute__((unused)))
static int _rocm_smi_set_domain(hwd_control_state_t *ctrl __attribute__((unused)), int domain __attribute__((unused)))
unsigned long AO_t __attribute__((__aligned__(4)))
Definition: m68k.h:21
Return codes and api definitions.
#define COMPONENT_LOCK
Definition: papi_internal.h:90
#define PAPI_INT_SIGNAL
Definition: papi_internal.h:52
#define NUM_INNER_LOCK
Definition: papi_lock.h:17
#define papi_calloc(a, b)
Definition: papi_memory.h:37
#define papi_free(a)
Definition: papi_memory.h:35
static int cidx
int rocs_ctx_write(rocs_ctx_t rocs_ctx, long long *counts)
Definition: rocs.c:624
int rocs_err_get_last(const char **err_string)
Definition: rocs.c:450
int rocs_evt_code_to_descr(unsigned int event_code, char *descr, int len)
Definition: rocs.c:410
int rocs_evt_name_to_code(const char *name, unsigned int *event_code)
Definition: rocs.c:421
int rocs_ctx_close(rocs_ctx_t rocs_ctx)
Definition: rocs.c:519
const char * name
Definition: rocs.c:225
int rocs_init(void)
Definition: rocs.c:333
int rocs_shutdown(void)
Definition: rocs.c:655
int rocs_ctx_read(rocs_ctx_t rocs_ctx, long long **counts)
Definition: rocs.c:605
int rocs_ctx_start(rocs_ctx_t rocs_ctx)
Definition: rocs.c:546
int rocs_ctx_reset(rocs_ctx_t rocs_ctx)
Definition: rocs.c:642
int rocs_ctx_open(unsigned int *events_id, int num_events, rocs_ctx_t *rocs_ctx)
Definition: rocs.c:461
int rocs_ctx_stop(rocs_ctx_t rocs_ctx)
Definition: rocs.c:580
int rocs_evt_code_to_name(unsigned int event_code, char *name, int len)
Definition: rocs.c:440
int rocs_evt_enum(unsigned int *event_code, int modifier)
Definition: rocs.c:384
#define ROCS_EVENTS_OPENED
Definition: rocs.h:4
#define ROCS_EVENTS_RUNNING
Definition: rocs.h:5
char name[PAPI_MAX_STR_LEN]
Definition: papi.h:627
char disabled_reason[PAPI_HUGE_STR_LEN]
Definition: papi.h:634
PAPI_component_info_t cmp_info
Definition: papi_vector.h:20
rocs_ctx_t rocs_ctx
unsigned int * events_id