18#define dprint(format, arg...)
32typedef struct _barrier {
47typedef struct _pfms_thread {
72 r = pthread_mutex_init(&
b->mutex, NULL);
73 if (r == -1)
return -1;
75 r = pthread_cond_init(&
b->cond, NULL);
76 if (r == -1)
return -1;
89 r = pthread_mutex_unlock(&
b->mutex);
90 dprint(
"free barrier mutex r=%d\n", r);
102 pthread_mutex_lock(&
b->mutex);
104 pthread_testcancel();
106 if (--
b->counter == 0) {
118 pthread_cond_broadcast(&
b->cond);
121 generation =
b->generation;
123 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &oldstate);
125 while (
b->counter !=
b->max && generation ==
b->generation) {
126 pthread_cond_wait(&
b->cond, &
b->mutex);
129 pthread_setcancelstate(oldstate, NULL);
131 pthread_mutex_unlock(&
b->mutex);
133 pthread_cleanup_pop(0);
153 pid = syscall(__NR_gettid);
155 size =
ncpus *
sizeof(uint64_t);
157 mask = calloc(1, size);
159 dprint(
"CPU%u: cannot allocate bitvector\n", cpu);
162 mask[cpu>>6] = 1ULL << (cpu & 63);
164 ret = syscall(__NR_sched_setaffinity,
pid, size, mask);
175 uint32_t mycpu = (uint32_t)k;
183 memset(&load_args, 0,
sizeof(load_args));
188 dprint(
"CPU%u wthread created and pinned ret=%d\n", mycpu, ret);
193 dprint(
"CPU%u waiting for cmd\n", mycpu);
211 ret = fd < 0 ? -1 : 0;
212 dprint(
"CPU%u CMD_CTX ret=%d errno=%d fd=%d\n", mycpu, ret,
errno, fd);
217 dprint(
"CPU%u CMD_LOAD ret=%d errno=%d fd=%d\n", mycpu, ret,
errno, fd);
221 dprint(
"CPU%u CMD_UNLOAD ret=%d errno=%d fd=%d\n", mycpu, ret,
errno, fd);
225 dprint(
"CPU%u CMD_START ret=%d errno=%d fd=%d\n", mycpu, ret,
errno, fd);
229 dprint(
"CPU%u CMD_STOP ret=%d errno=%d fd=%d\n", mycpu, ret,
errno, fd);
233 dprint(
"CPU%u CMD_WPMCS ret=%d errno=%d fd=%d\n", mycpu, ret,
errno, fd);
237 dprint(
"CPU%u CMD_WPMDS ret=%d errno=%d fd=%d\n", mycpu, ret,
errno, fd);
241 dprint(
"CPU%u CMD_RPMDS ret=%d errno=%d fd=%d\n", mycpu, ret,
errno, fd);
244 dprint(
"CPU%u CMD_CLOSE fd=%d\n", mycpu, fd);
253 dprint(
"CPU%u td->ret=%d\n", mycpu, ret);
264 sem_init(&
tds[cpu].cmd_sem, 0, 0);
266 ret = pthread_create(&
tds[cpu].tid,
283 for(k=0, cpu = 0; k < n; k++, cpu+= 64) {
285 for(
i=0; v && i < 63; i++, v>>=1, cpu++) {
286 if ((v & 0x1) &&
tds[cpu].tid == 0) {
294 dprint(
"cannot create wthread on CPU%u\n", cpu);
302 printf(
"cpu_t=%zu thread=%zu session_t=%zu\n",
307 ncpus = (uint32_t)sysconf(_SC_NPROCESSORS_ONLN);
309 dprint(
"cannot retrieve number of online processors\n");
320 dprint(
"cannot allocate thread descriptors\n");
335 if (cpu_list == NULL || n == 0 || ctx == NULL || desc == NULL) {
336 dprint(
"invalid parameters\n");
341 dprint(
"only works for system wide\n");
351 for(k=0, cpu = 0; k < n; k++, cpu+=64) {
353 for(
i=0; v && i < 63; i++, v>>=1, cpu++) {
356 dprint(
"unavailable CPU%u\n", cpu);
367 s = calloc(1,
sizeof(*
s));
369 dprint(
"cannot allocate %u contexts\n", num);
374 printf(
"%u-way session\n", num);
381 dprint(
"cannot init barrier\n");
392 goto error_free_unlock;
397 for(k=0, cpu = 0; k < n; k++, cpu += 64) {
399 for(
i=0; v && i < 63; i++, v>>=1, cpu++) {
401 if (
tds[cpu].barrier) {
402 dprint(
"CPU%u already managing a session\n", cpu);
403 goto error_free_unlock;
413 for(k=0, cpu = 0; k < n; k++, cpu += 64) {
415 for(
i=0; v && i < 63; i++, v>>=1, cpu++) {
420 sem_post(&
tds[cpu].cmd_sem);
431 for(k=0; k <
ncpus; k++) {
432 if (
tds[k].barrier == &
s->barrier) {
442 for(k=0; k <
ncpus; k++) {
443 if (
tds[k].barrier == &
s->barrier) {
444 if (
tds[k].ret == 0) {
446 sem_post(&
tds[k].cmd_sem);
455 if (ret == 0) *desc =
s;
475 dprint(
"invalid parameters\n");
481 dprint(
"invalid session content 0 CPUS\n");
487 for(k=0; k <
ncpus; k++) {
488 if (
tds[k].barrier == &
s->barrier) {
490 sem_post(&
tds[k].cmd_sem);
501 for(k=0; k <
ncpus; k++) {
502 if (
tds[k].barrier == &
s->barrier) {
505 dprint(
"failure on CPU%u\n", k);
515 for(k=0; k <
ncpus; k++) {
516 if (
tds[k].barrier == &
s->barrier) {
517 if (
tds[k].ret == 0) {
519 sem_post(&
tds[k].cmd_sem);
535 dprint(
"invalid parameters\n");
541 dprint(
"invalid session content 0 CPUS\n");
547 for(k=0; k <
ncpus; k++) {
548 if (
tds[k].barrier == &
s->barrier) {
552 sem_post(&
tds[k].cmd_sem);
562 for(k=0; k <
ncpus; k++) {
563 if (
tds[k].barrier == &
s->barrier) {
566 dprint(
"failure on CPU%zu\n", k);
615 dprint(
"invalid parameters\n");
621 dprint(
"invalid session content 0 CPUS\n");
625 for(k=0; k <
ncpus; k++) {
626 if (
tds[k].barrier == &
s->barrier) {
628 sem_post(&
tds[k].cmd_sem);
639 for(k=0; k <
ncpus; k++) {
640 if (
tds[k].barrier == &
s->barrier) {
642 dprint(
"failure on CPU%zu\n", k);
663 uint32_t k, pmds_per_cpu;
667 dprint(
"invalid parameters\n");
673 dprint(
"invalid session content 0 CPUS\n");
677 dprint(
"invalid number of pfarg_pmd_t provided, must be multiple of %u\n",
s->ncpus);
680 pmds_per_cpu = n /
s->ncpus;
682 dprint(
"n=%u ncpus=%u per_cpu=%u\n", n,
s->ncpus, pmds_per_cpu);
684 for(k=0; k <
ncpus; k++) {
685 if (
tds[k].barrier == &
s->barrier) {
689 sem_post(&
tds[k].cmd_sem);
690 pmds += pmds_per_cpu;
700 for(k=0; k <
ncpus; k++) {
701 if (
tds[k].barrier == &
s->barrier) {
704 dprint(
"failure on CPU%u\n", k);
721#define NUM_PMCS PFMLIB_MAX_PMCS
722#define NUM_PMDS PFMLIB_MAX_PMDS
751main(
int argc,
char **argv)
760 unsigned int num_counters;
761 uint32_t
i, j, k, l,
ncpus, npmds;
775 name = malloc(len+1);
777 fatal_error(
"cannot allocate memory for event name\n");
779 memset(&ctx, 0,
sizeof(ctx));
780 memset(pc, 0,
sizeof(pc));
781 memset(&inp,0,
sizeof(inp));
782 memset(&outp,0,
sizeof(outp));
784 cpu_list = argc > 1 ? strtoul(argv[1], NULL, 0) : 0x3;
798 if (
i > num_counters) {
800 printf(
"too many events provided (max=%d events), using first %d event(s)\n", num_counters,
i);
831 for(l=0, k = 0; l <
ncpus; l++) {
849 ret =
pfms_create(&cpu_list, 1, &ctx, NULL, &desc);
909 for(j=0, k= 0; j <
ncpus; j++) {
912 printf(
"CPU%-3d PMD%u %20"PRIu64
" %s\n",
static double b[MATRIX_SIZE][MATRIX_SIZE]
static double c[MATRIX_SIZE][MATRIX_SIZE]
#define PFM_FL_SYSTEM_WIDE
int pfms_write_pmcs(void *desc, pfarg_pmc_t *pmcs, uint32_t n)
static pfms_thread_t * tds
static int pin_cpu(uint32_t cpu)
static void cleanup_barrier(void *arg)
int pfms_unload(void *desc)
int pfms_write_pmds(void *desc, pfarg_pmd_t *pmds, uint32_t n)
static int create_wthreads(uint64_t *cpu_list, uint32_t n)
int pfms_create(uint64_t *cpu_list, size_t n, pfarg_ctx_t *ctx, pfms_ovfl_t *ovfl, void **desc)
static int barrier_init(barrier_t *b, uint32_t count)
int pfms_stop(void *desc)
int pfms_load(void *desc)
int pfms_close(void *desc)
static int create_one_wthread(int cpu)
int pfms_initialize(void)
int pfms_read_pmds(void *desc, pfarg_pmd_t *pmds, uint32_t n)
static int barrier_wait(barrier_t *b)
#define dprint(format, arg...)
static int __pfms_do_simple_cmd(pfms_cmd_t cmd, void *desc, void *data, uint32_t n)
int pfms_start(void *desc)
static pthread_mutex_t tds_lock
static void pfms_thread_mainloop(void *arg)
int(* pfms_ovfl_t)(pfarg_msg_t *msg)
unsigned long AO_t __attribute__((__aligned__(4)))
unsigned long int pthread_t
os_err_t pfm_stop(int fd)
os_err_t pfm_write_pmds(int fd, pfarg_pmd_t *pmds, int count)
os_err_t pfm_unload_context(int fd)
os_err_t pfm_write_pmcs(int fd, pfarg_pmc_t *pmcs, int count)
os_err_t pfm_start(int fd, pfarg_start_t *start)
os_err_t pfm_create_context(pfarg_ctx_t *ctx, char *smpl_name, void *smpl_arg, size_t smpl_size)
os_err_t pfm_load_context(int fd, pfarg_load_t *load)
os_err_t pfm_read_pmds(int fd, pfarg_pmd_t *pmds, int count)
pfm_err_t pfm_get_inst_retired_event(pfmlib_event_t *e)
char * pfm_strerror(int code)
pfm_err_t pfm_dispatch_events(pfmlib_input_param_t *p, void *model_in, pfmlib_output_param_t *q, void *model_out)
pfm_err_t pfm_get_full_event_name(pfmlib_event_t *e, char *name, size_t maxlen)
pfm_err_t pfm_get_cycle_event(pfmlib_event_t *e)
pfm_err_t pfm_initialize(void)
pfm_err_t pfm_get_max_event_name_len(size_t *len)
#define PFMLIB_PFP_SYSTEMWIDE
pfm_err_t pfm_get_num_counters(unsigned int *num)
pfmlib_reg_t pfp_pmcs[PFMLIB_MAX_PMCS]
unsigned int pfp_pmc_count
unsigned long long reg_value
static void fatal_error(char *fmt,...) __attribute__((noreturn))
static uint32_t popcount(uint64_t c)