PAPI 7.1.0.0
Loading...
Searching...
No Matches
extras.c
Go to the documentation of this file.
1/****************************/
2/* THIS IS OPEN SOURCE CODE */
3/****************************/
4
5/*
6* File: extras.c
7* Author: Philip Mucci
8* mucci@cs.utk.edu
9* Mods: dan terpstra
10* terpstra@cs.utk.edu
11* Mods: Haihang You
12* you@cs.utk.edu
13* Mods: Kevin London
14* london@cs.utk.edu
15* Mods: Maynard Johnson
16* maynardj@us.ibm.com
17*/
18
19/* This file contains portable routines to do things that we wish the
20vendors did in the kernel extensions or performance libraries. */
21
22#include "papi.h"
23#include "papi_internal.h"
24#include "papi_vector.h"
25#include "papi_memory.h"
26#include "extras.h"
27#include "threads.h"
28
29#if (!defined(HAVE_FFSLL) || defined(__bgp__))
30int ffsll( long long lli );
31#else
32#include <string.h>
33#endif
34
35/****************/
36/* BEGIN LOCALS */
37/****************/
38
39static unsigned int _rnum = DEADBEEF;
40
41/**************/
42/* END LOCALS */
43/**************/
44
45inline_static unsigned short
47{
48 return ( unsigned short ) ( _rnum = 1664525 * _rnum + 1013904223 );
49}
50
51
52/* compute the amount by which to increment the bucket.
53 value is the current value of the bucket
54 this routine is used by all three profiling cases
55 it is inlined for speed
56*/
58profil_increment( long long value,
59 int flags, long long excess, long long threshold )
60{
61 int increment = 1;
62
63 if ( flags == PAPI_PROFIL_POSIX ) {
64 return ( 1 );
65 }
66
67 if ( flags & PAPI_PROFIL_RANDOM ) {
68 if ( random_ushort( ) <= ( USHRT_MAX / 4 ) )
69 return ( 0 );
70 }
71
72 if ( flags & PAPI_PROFIL_COMPRESS ) {
73 /* We're likely to ignore the sample if buf[address] gets big. */
74 if ( random_ushort( ) < value ) {
75 return ( 0 );
76 }
77 }
78
79 if ( flags & PAPI_PROFIL_WEIGHTED ) { /* Increment is between 1 and 255 */
80 if ( excess <= ( long long ) 1 )
81 increment = 1;
82 else if ( excess > threshold )
83 increment = 255;
84 else {
85 threshold = threshold / ( long long ) 255;
86 increment = ( int ) ( excess / threshold );
87 }
88 }
89 return ( increment );
90}
91
92
93static void
95 int flags, long long excess, long long threshold )
96{
97 unsigned short *buf16;
98 unsigned int *buf32;
99 unsigned long long *buf64;
100 unsigned long indx;
101 unsigned long long lloffset;
102
103 /* SPECIAL CASE: if starting address is 0 and scale factor is 2
104 then all counts go into first bin.
105 */
106 if ( ( prof->pr_off == 0 ) && ( prof->pr_scale == 0x2 ) )
107 indx = 0;
108 else {
109 /* compute the profile buffer offset by:
110 - subtracting the profiling base address from the pc address
111 - multiplying by the scaling factor
112 - dividing by max scale (65536, or 2^^16)
113 - dividing by implicit 2 (2^^1 for a total of 2^^17), for even addresses
114 NOTE: 131072 is a valid scale value. It produces byte resolution of addresses
115 */
116 lloffset =
117 ( unsigned long long ) ( ( address - prof->pr_off ) *
118 prof->pr_scale );
119 indx = ( unsigned long ) ( lloffset >> 17 );
120 }
121
122 /* confirm addresses within specified range */
123 if ( address >= prof->pr_off ) {
124 /* test first for 16-bit buckets; this should be the fast case */
125 if ( flags & PAPI_PROFIL_BUCKET_16 ) {
126 if ( ( indx * sizeof ( short ) ) < prof->pr_size ) {
127 buf16 = (unsigned short *) prof->pr_base;
128 buf16[indx] =
129 ( unsigned short ) ( ( unsigned short ) buf16[indx] +
130 profil_increment( buf16[indx], flags,
131 excess,
132 threshold ) );
133 PRFDBG( "posix_profil_16() bucket %lu = %u\n", indx,
134 buf16[indx] );
135 }
136 }
137 /* next, look for the 32-bit case */
138 else if ( flags & PAPI_PROFIL_BUCKET_32 ) {
139 if ( ( indx * sizeof ( int ) ) < prof->pr_size ) {
140 buf32 = (unsigned int *) prof->pr_base;
141 buf32[indx] = ( unsigned int ) buf32[indx] +
142 ( unsigned int ) profil_increment( buf32[indx], flags,
143 excess, threshold );
144 PRFDBG( "posix_profil_32() bucket %lu = %u\n", indx,
145 buf32[indx] );
146 }
147 }
148 /* finally, fall through to the 64-bit case */
149 else {
150 if ( ( indx * sizeof ( long long ) ) < prof->pr_size ) {
151 buf64 = (unsigned long long *) prof->pr_base;
152 buf64[indx] = ( unsigned long long ) buf64[indx] +
153 ( unsigned long long ) profil_increment( ( long long )
154 buf64[indx], flags,
155 excess,
156 threshold );
157 PRFDBG( "posix_profil_64() bucket %lu = %lld\n", indx,
158 buf64[indx] );
159 }
160 }
161 }
162}
163
164void
166 long long over, int profile_index )
167{
168 EventSetProfileInfo_t *profile = &ESI->profile;
169 PAPI_sprofil_t *sprof;
170 vptr_t offset = 0;
171 vptr_t best_offset = 0;
172 int count;
173 int best_index = -1;
174 int i;
175
176 PRFDBG( "handled IP %p\n", pc );
177
178 sprof = profile->prof[profile_index];
179 count = profile->count[profile_index];
180
181 for ( i = 0; i < count; i++ ) {
182 offset = sprof[i].pr_off;
183 if ( ( offset < pc ) && ( offset > best_offset ) ) {
184 best_index = i;
185 best_offset = offset;
186 }
187 }
188
189 if ( best_index == -1 )
190 best_index = 0;
191
192 posix_profil( pc, &sprof[best_index], profile->flags, over,
193 profile->threshold[profile_index] );
194}
195
196/* if isHardware is true, then the processor is using hardware overflow,
197 else it is using software overflow. Use this parameter instead of
198 _papi_hwi_system_info.supports_hw_overflow is in CRAY some processors
199 may use hardware overflow, some may use software overflow.
200
201 overflow_bit: if the component can get the overflow bit when overflow
202 occurs, then this should be passed by the component;
203
204 If both genOverflowBit and isHardwareSupport are true, that means
205 the component doesn't know how to get the overflow bit from the
206 kernel directly, so we generate the overflow bit in this function
207 since this function can access the ESI->overflow struct;
208 (The component can only set genOverflowBit parameter to true if the
209 hardware doesn't support multiple hardware overflow. If the
210 component supports multiple hardware overflow and you don't know how
211 to get the overflow bit, then I don't know how to deal with this
212 situation).
213*/
214
215int
216_papi_hwi_dispatch_overflow_signal( void *papiContext, vptr_t address,
217 int *isHardware, long long overflow_bit,
218 int genOverflowBit, ThreadInfo_t ** t,
219 int cidx )
220{
221 int retval, event_counter, i, overflow_flag, pos;
222 int papi_index, j;
223 int profile_index = 0;
224 long long overflow_vector;
225
226 long long temp[_papi_hwd[cidx]->cmp_info.num_cntrs], over;
227 long long latest = 0;
229 EventSetInfo_t *ESI;
230 _papi_hwi_context_t *ctx = ( _papi_hwi_context_t * ) papiContext;
231
232 OVFDBG( "enter\n" );
233
234 if ( *t )
235 thread = *t;
236 else
238
239 if ( thread != NULL ) {
240 ESI = thread->running_eventset[cidx];
241
242 if ( ( ESI == NULL ) || ( ( ESI->state & PAPI_OVERFLOWING ) == 0 ) ) {
243 OVFDBG( "Either no eventset or eventset not set to overflow.\n" );
244#ifdef ANY_THREAD_GETS_SIGNAL
246#endif
247 return ( PAPI_OK );
248 }
249
250 if ( ESI->CmpIdx != cidx )
251 return ( PAPI_ENOCMP );
252
253 if ( ESI->master != thread ) {
255 ( "eventset->thread %p vs. current thread %p mismatch",
256 ESI->master, thread );
257 return ( PAPI_EBUG );
258 }
259
260 if ( isHardware ) {
262 ESI->state |= PAPI_PAUSED;
263 *isHardware = 1;
264 } else
265 *isHardware = 0;
266 }
267 /* Get the latest counter value */
268 event_counter = ESI->overflow.event_counter;
269
270 overflow_flag = 0;
271 overflow_vector = 0;
272
273 if ( !( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) ) {
274 retval = _papi_hwi_read( thread->context[cidx], ESI, ESI->sw_stop );
275 if ( retval < PAPI_OK )
276 return ( retval );
277 for ( i = 0; i < event_counter; i++ ) {
278 papi_index = ESI->overflow.EventIndex[i];
279 latest = ESI->sw_stop[papi_index];
280 temp[i] = -1;
281
282 if ( latest >= ( long long ) ESI->overflow.deadline[i] ) {
283 OVFDBG
284 ( "dispatch_overflow() latest %lld, deadline %lld, threshold %d\n",
285 latest, ESI->overflow.deadline[i],
286 ESI->overflow.threshold[i] );
287 pos = ESI->EventInfoArray[papi_index].pos[0];
288 overflow_vector ^= ( long long ) 1 << pos;
289 temp[i] = latest - ESI->overflow.deadline[i];
290 overflow_flag = 1;
291 /* adjust the deadline */
292 ESI->overflow.deadline[i] =
293 latest + ESI->overflow.threshold[i];
294 }
295 }
296 } else if ( genOverflowBit ) {
297 /* we had assumed the overflow event can't be derived event */
298 papi_index = ESI->overflow.EventIndex[0];
299
300 /* suppose the pos is the same as the counter number
301 * (this is not true in Itanium, but itanium doesn't
302 * need us to generate the overflow bit
303 */
304 pos = ESI->EventInfoArray[papi_index].pos[0];
305 overflow_vector = ( long long ) 1 << pos;
306 } else
307 overflow_vector = overflow_bit;
308
309 if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) || overflow_flag ) {
310 if ( ESI->state & PAPI_PROFILING ) {
311 int k = 0;
312 while ( overflow_vector ) {
313 i = ffsll( overflow_vector ) - 1;
314 for ( j = 0; j < event_counter; j++ ) {
315 papi_index = ESI->overflow.EventIndex[j];
316 /* This loop is here ONLY because Pentium 4 can have tagged *
317 * events that contain more than one counter without being *
318 * derived. You've gotta scan all terms to make sure you *
319 * find the one to profile. */
320 for ( k = 0, pos = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT && pos >= 0;
321 k++ ) {
322 pos = ESI->EventInfoArray[papi_index].pos[k];
323 if ( i == pos ) {
324 profile_index = j;
325 goto foundit;
326 }
327 }
328 }
329 if ( j == event_counter ) {
331 ( "BUG! overflow_vector is 0, dropping interrupt" );
332 return ( PAPI_EBUG );
333 }
334
335 foundit:
336 if ( ( ESI->overflow.flags & PAPI_OVERFLOW_HARDWARE ) )
337 over = 0;
338 else
339 over = temp[profile_index];
340 _papi_hwi_dispatch_profile( ESI, address, over,
341 profile_index );
342 overflow_vector ^= ( long long ) 1 << i;
343 }
344 /* do not use overflow_vector after this place */
345 } else {
346 ESI->overflow.handler( ESI->EventSetIndex, ( void * ) address,
347 overflow_vector, ctx->ucontext );
348 }
349 }
350 ESI->state &= ~( PAPI_PAUSED );
351 }
352#ifdef ANY_THREAD_GETS_SIGNAL
353 else {
354 OVFDBG( "I haven't been noticed by PAPI before\n" );
356 }
357#endif
358 return ( PAPI_OK );
359}
360
361#include <sys/time.h>
362#include <errno.h>
363#include <string.h>
364
366
367int
368_papi_hwi_start_timer( int timer, int signal, int ns )
369{
370 struct itimerval value;
371 int us = ns / 1000;
372
373 if ( us == 0 )
374 us = 1;
375
376#ifdef ANY_THREAD_GETS_SIGNAL
378 if ( ( _papi_hwi_using_signal[signal] - 1 ) ) {
379 INTDBG( "itimer already installed\n" );
381 return ( PAPI_OK );
382 }
384#else
385 ( void ) signal; /*unused */
386#endif
387
388 value.it_interval.tv_sec = 0;
389 value.it_interval.tv_usec = us;
390 value.it_value.tv_sec = 0;
391 value.it_value.tv_usec = us;
392
393 INTDBG( "Installing itimer %d, with %d us interval\n", timer, us );
394 if ( setitimer( timer, &value, NULL ) < 0 ) {
395 PAPIERROR( "setitimer errno %d", errno );
396 return ( PAPI_ESYS );
397 }
398
399 return ( PAPI_OK );
400}
401
402int
403_papi_hwi_start_signal( int signal, int need_context, int cidx )
404{
405 struct sigaction action;
406
409 if ( _papi_hwi_using_signal[signal] - 1 ) {
410 INTDBG( "_papi_hwi_using_signal is now %d\n",
413 return ( PAPI_OK );
414 }
415
416 memset( &action, 0x00, sizeof ( struct sigaction ) );
417 action.sa_flags = SA_RESTART;
418 action.sa_sigaction =
419 ( void ( * )( int, siginfo_t *, void * ) ) _papi_hwd[cidx]->
420 dispatch_timer;
421 if ( need_context )
422#if (defined(_BGL) /*|| defined (__bgp__)*/)
423 action.sa_flags |= SIGPWR;
424#else
425 action.sa_flags |= SA_SIGINFO;
426#endif
427
428 INTDBG( "installing signal handler\n" );
429 if ( sigaction( signal, &action, NULL ) < 0 ) {
430 PAPIERROR( "sigaction errno %d", errno );
432 return ( PAPI_ESYS );
433 }
434
435 INTDBG( "_papi_hwi_using_signal[%d] is now %d.\n", signal,
438
439 return ( PAPI_OK );
440}
441
442int
444{
446 if ( --_papi_hwi_using_signal[signal] == 0 ) {
447 INTDBG( "removing signal handler\n" );
448 if ( sigaction( signal, NULL, NULL ) == -1 ) {
449 PAPIERROR( "sigaction errno %d", errno );
451 return ( PAPI_ESYS );
452 }
453 }
454
455 INTDBG( "_papi_hwi_using_signal[%d] is now %d\n", signal,
458
459 return ( PAPI_OK );
460}
461
462int
464{
465 struct itimerval value;
466
467#ifdef ANY_THREAD_GETS_SIGNAL
469 if ( _papi_hwi_using_signal[signal] > 1 ) {
470 INTDBG( "itimer in use by another thread\n" );
472 return ( PAPI_OK );
473 }
475#else
476 ( void ) signal; /*unused */
477#endif
478
479 value.it_interval.tv_sec = 0;
480 value.it_interval.tv_usec = 0;
481 value.it_value.tv_sec = 0;
482 value.it_value.tv_usec = 0;
483
484 INTDBG( "turning off timer\n" );
485 if ( setitimer( timer, &value, NULL ) == -1 ) {
486 PAPIERROR( "setitimer errno %d", errno );
487 return PAPI_ESYS;
488 }
489
490 return PAPI_OK;
491}
492
493
494
495#if (!defined(HAVE_FFSLL) || defined(__bgp__))
496/* find the first set bit in long long */
497
498int
499ffsll( long long lli )
500{
501 int i, num, t, tmpint, len;
502
503 num = sizeof ( long long ) / sizeof ( int );
504 if ( num == 1 )
505 return ( ffs( ( int ) lli ) );
506 len = sizeof ( int ) * CHAR_BIT;
507
508 for ( i = 0; i < num; i++ ) {
509 tmpint = ( int ) ( ( ( lli >> len ) << len ) ^ lli );
510
511 t = ffs( tmpint );
512 if ( t ) {
513 return ( t + i * len );
514 }
515 lli = lli >> len;
516 }
517 return PAPI_OK;
518}
519#endif
int i
int errno
static long count
struct papi_vectors * _papi_hwd[]
inline_static unsigned short random_ushort(void)
Definition: extras.c:46
int _papi_hwi_using_signal[PAPI_NSIG]
Definition: extras.c:365
int _papi_hwi_stop_timer(int timer, int signal)
Definition: extras.c:463
int _papi_hwi_start_signal(int signal, int need_context, int cidx)
Definition: extras.c:403
inline_static int profil_increment(long long value, int flags, long long excess, long long threshold)
Definition: extras.c:58
static unsigned int _rnum
Definition: extras.c:39
int _papi_hwi_stop_signal(int signal)
Definition: extras.c:443
int _papi_hwi_dispatch_overflow_signal(void *papiContext, vptr_t address, int *isHardware, long long overflow_bit, int genOverflowBit, ThreadInfo_t **t, int cidx)
Definition: extras.c:216
void _papi_hwi_dispatch_profile(EventSetInfo_t *ESI, vptr_t pc, long long over, int profile_index)
Definition: extras.c:165
int _papi_hwi_start_timer(int timer, int signal, int ns)
Definition: extras.c:368
static void posix_profil(vptr_t address, PAPI_sprofil_t *prof, int flags, long long excess, long long threshold)
Definition: extras.c:94
#define PAPI_EBUG
Definition: f90papi.h:176
#define PAPI_PROFIL_BUCKET_32
Definition: f90papi.h:248
#define PAPI_OK
Definition: f90papi.h:73
#define PAPI_PROFIL_WEIGHTED
Definition: f90papi.h:167
#define PAPI_PROFILING
Definition: f90papi.h:150
#define PAPI_PROFIL_POSIX
Definition: f90papi.h:44
#define PAPI_PROFIL_BUCKET_16
Definition: f90papi.h:144
#define PAPI_PAUSED
Definition: f90papi.h:25
#define PAPI_ESYS
Definition: f90papi.h:136
#define PAPI_ENOCMP
Definition: f90papi.h:79
#define PAPI_OVERFLOWING
Definition: f90papi.h:240
#define PAPI_OVERFLOW_HARDWARE
Definition: f90papi.h:157
#define PAPI_PROFIL_COMPRESS
Definition: f90papi.h:53
#define PAPI_PROFIL_RANDOM
Definition: f90papi.h:143
static int threshold
void * thread(void *arg)
Definition: kufrin.c:38
Return codes and api definitions.
void * vptr_t
Definition: papi.h:576
#define INTDBG(format, args...)
Definition: papi_debug.h:66
unsigned long int(* _papi_hwi_thread_id_fn)(void)
Definition: threads.c:42
#define OVFDBG(format, args...)
Definition: papi_debug.h:69
#define PRFDBG(format, args...)
Definition: papi_debug.h:70
__sighandler_t signal(int __sig, __sighandler_t __handler) __attribute__((__nothrow__
int sigaction(int __sig, const struct sigaction *__restrict __act, struct sigaction *__restrict __oact) __attribute__((__nothrow__
void PAPIERROR(char *format,...)
int _papi_hwi_read(hwd_context_t *context, EventSetInfo_t *ESI, long long *values)
#define inline_static
#define PAPI_NSIG
Definition: papi_internal.h:59
#define DEADBEEF
Definition: papi_internal.h:26
#define INTERNAL_LOCK
Definition: papi_internal.h:85
static int cidx
int
Definition: sde_internal.h:89
long long int long long
Definition: sde_internal.h:85
static void action(void *arg, int regno, const char *name, uint8_t bits)
int pos[PAPI_EVENTS_IN_DERIVED_EVENT]
EventSetProfileInfo_t profile
long long * sw_stop
struct _ThreadInfo * master
EventInfo_t * EventInfoArray
EventSetOverflowInfo_t overflow
PAPI_overflow_handler_t handler
PAPI_sprofil_t ** prof
vptr_t pr_off
Definition: papi.h:582
unsigned pr_size
Definition: papi.h:581
void * pr_base
Definition: papi.h:580
unsigned pr_scale
Definition: papi.h:583
hwd_ucontext_t * ucontext
inline_static ThreadInfo_t * _papi_hwi_lookup_thread(int custom_tid)
Definition: threads.h:97
inline_static int _papi_hwi_lock(int lck)
Definition: threads.h:69
int _papi_hwi_broadcast_signal(unsigned int mytid)
inline_static int _papi_hwi_unlock(int lck)
Definition: threads.h:83
int retval
Definition: zero_fork.c:53