PAPI 7.1.0.0
Loading...
Searching...
No Matches
atomic_ops.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
3 * Copyright (c) 2008-2022 Ivan Maidanski
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#ifndef AO_ATOMIC_OPS_H
25#define AO_ATOMIC_OPS_H
26
28 /* Define version numbers here to allow */
29 /* test on build machines for cross-builds. */
30
31#include <assert.h>
32#include <stddef.h>
33
34/* We define various atomic operations on memory in a */
35/* machine-specific way. Unfortunately, this is complicated */
36/* by the fact that these may or may not be combined with */
37/* various memory barriers. Thus the actual operations we */
38/* define have the form AO_<atomic-op>_<barrier>, for all */
39/* plausible combinations of <atomic-op> and <barrier>. */
40/* This of course results in a mild combinatorial explosion. */
41/* To deal with it, we try to generate derived */
42/* definitions for as many of the combinations as we can, as */
43/* automatically as possible. */
44/* */
45/* Our assumption throughout is that the programmer will */
46/* specify the least demanding operation and memory barrier */
47/* that will guarantee correctness for the implementation. */
48/* Our job is to find the least expensive way to implement it */
49/* on the applicable hardware. In many cases that will */
50/* involve, for example, a stronger memory barrier, or a */
51/* combination of hardware primitives. */
52/* */
53/* Conventions: */
54/* "plain" atomic operations are not guaranteed to include */
55/* a barrier. The suffix in the name specifies the barrier */
56/* type. Suffixes are: */
57/* _release: Earlier operations may not be delayed past it. */
58/* _acquire: Later operations may not move ahead of it. */
59/* _read: Subsequent reads must follow this operation and */
60/* preceding reads. */
61/* _write: Earlier writes precede both this operation and */
62/* later writes. */
63/* _full: Ordered with respect to both earlier and later memory */
64/* operations. */
65/* _release_write: Ordered with respect to earlier writes. */
66/* _acquire_read: Ordered with respect to later reads. */
67/* */
68/* Currently we try to define the following atomic memory */
69/* operations, in combination with the above barriers: */
70/* AO_nop */
71/* AO_load */
72/* AO_store */
73/* AO_test_and_set (binary) */
74/* AO_fetch_and_add */
75/* AO_fetch_and_add1 */
76/* AO_fetch_and_sub1 */
77/* AO_and */
78/* AO_or */
79/* AO_xor */
80/* AO_compare_and_swap */
81/* AO_fetch_compare_and_swap */
82/* */
83/* Note that atomicity guarantees are valid only if both */
84/* readers and writers use AO_ operations to access the */
85/* shared value, while ordering constraints are intended to */
86/* apply all memory operations. If a location can potentially */
87/* be accessed simultaneously from multiple threads, and one of */
88/* those accesses may be a write access, then all such */
89/* accesses to that location should be through AO_ primitives. */
90/* However if AO_ operations enforce sufficient ordering to */
91/* ensure that a location x cannot be accessed concurrently, */
92/* or can only be read concurrently, then x can be accessed */
93/* via ordinary references and assignments. */
94/* */
95/* AO_compare_and_swap takes an address and an expected old */
96/* value and a new value, and returns an int. Non-zero result */
97/* indicates that it succeeded. */
98/* AO_fetch_compare_and_swap takes an address and an expected */
99/* old value and a new value, and returns the real old value. */
100/* The operation succeeded if and only if the expected old */
101/* value matches the old value returned. */
102/* */
103/* Test_and_set takes an address, atomically replaces it by */
104/* AO_TS_SET, and returns the prior value. */
105/* An AO_TS_t location can be reset with the */
106/* AO_CLEAR macro, which normally uses AO_store_release. */
107/* AO_fetch_and_add takes an address and an AO_t increment */
108/* value. The AO_fetch_and_add1 and AO_fetch_and_sub1 variants */
109/* are provided, since they allow faster implementations on */
110/* some hardware. AO_and, AO_or, AO_xor do atomically and, or, */
111/* xor (respectively) an AO_t value into a memory location, */
112/* but do not provide access to the original. */
113/* */
114/* We expect this list to grow slowly over time. */
115/* */
116/* Note that AO_nop_full is a full memory barrier. */
117/* */
118/* Note that if some data is initialized with */
119/* data.x = ...; data.y = ...; ... */
120/* AO_store_release_write(&data_is_initialized, 1) */
121/* then data is guaranteed to be initialized after the test */
122/* if (AO_load_acquire_read(&data_is_initialized)) ... */
123/* succeeds. Furthermore, this should generate near-optimal */
124/* code on all common platforms. */
125/* */
126/* All operations operate on unsigned AO_t, which */
127/* is the natural word size, and usually unsigned long. */
128/* It is possible to check whether a particular operation op */
129/* is available on a particular platform by checking whether */
130/* AO_HAVE_op is defined. We make heavy use of these macros */
131/* internally. */
132
133/* The rest of this file basically has three sections: */
134/* */
135/* Some utility and default definitions. */
136/* */
137/* The architecture dependent section: */
138/* This defines atomic operations that have direct hardware */
139/* support on a particular platform, mostly by including the */
140/* appropriate compiler- and hardware-dependent file. */
141/* */
142/* The synthesis section: */
143/* This tries to define other atomic operations in terms of */
144/* those that are explicitly available on the platform. */
145/* This section is hardware independent. */
146/* We make no attempt to synthesize operations in ways that */
147/* effectively introduce locks, except for the debugging/demo */
148/* pthread-based implementation at the beginning. A more */
149/* realistic implementation that falls back to locks could be */
150/* added as a higher layer. But that would sacrifice */
151/* usability from signal handlers. */
152/* The synthesis section is implemented almost entirely in */
153/* atomic_ops/generalize.h. */
154
155/* Some common defaults. Overridden for some architectures. */
156#define AO_t size_t
157
158/* The test_and_set primitive returns an AO_TS_VAL_t value. */
159/* AO_TS_t is the type of an in-memory test-and-set location. */
160
161#define AO_TS_INITIALIZER ((AO_TS_t)AO_TS_CLEAR)
162
163/* Convenient internal macro to test version of GCC. */
164#if defined(__GNUC__) && defined(__GNUC_MINOR__)
165# define AO_GNUC_PREREQ(major, minor) \
166 ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((major) << 16) + (minor))
167#else
168# define AO_GNUC_PREREQ(major, minor) 0 /* false */
169#endif
170
171/* Convenient internal macro to test version of Clang. */
172#if defined(__clang__) && defined(__clang_major__)
173# define AO_CLANG_PREREQ(major, minor) \
174 ((__clang_major__ << 16) + __clang_minor__ >= ((major) << 16) + (minor))
175#else
176# define AO_CLANG_PREREQ(major, minor) 0 /* false */
177#endif
178
179/* Platform-dependent stuff: */
180#if (defined(__GNUC__) || defined(_MSC_VER) || defined(__INTEL_COMPILER) \
181 || defined(__DMC__) || defined(__WATCOMC__)) && !defined(AO_NO_INLINE)
182# define AO_INLINE static __inline
183#elif defined(__sun) && !defined(AO_NO_INLINE)
184# define AO_INLINE static inline
185#else
186# define AO_INLINE static
187#endif
188
189#if AO_GNUC_PREREQ(3, 0) && !defined(LINT2)
190# define AO_EXPECT_FALSE(expr) __builtin_expect(expr, 0)
191 /* Equivalent to (expr) but predict that usually (expr) == 0. */
192#else
193# define AO_EXPECT_FALSE(expr) (expr)
194#endif /* !__GNUC__ */
195
196#if defined(__has_feature)
197 /* __has_feature() is supported. */
198# if __has_feature(address_sanitizer)
199# define AO_ADDRESS_SANITIZER
200# endif
201# if __has_feature(memory_sanitizer)
202# define AO_MEMORY_SANITIZER
203# endif
204# if __has_feature(thread_sanitizer)
205# define AO_THREAD_SANITIZER
206# endif
207#else
208# ifdef __SANITIZE_ADDRESS__
209 /* GCC v4.8+ */
210# define AO_ADDRESS_SANITIZER
211# endif
212#endif /* !__has_feature */
213
214#ifndef AO_ATTR_NO_SANITIZE_MEMORY
215# ifndef AO_MEMORY_SANITIZER
216# define AO_ATTR_NO_SANITIZE_MEMORY /* empty */
217# elif AO_CLANG_PREREQ(3, 8)
218# define AO_ATTR_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
219# else
220# define AO_ATTR_NO_SANITIZE_MEMORY __attribute__((no_sanitize_memory))
221# endif
222#endif /* !AO_ATTR_NO_SANITIZE_MEMORY */
223
224#ifndef AO_ATTR_NO_SANITIZE_THREAD
225# ifndef AO_THREAD_SANITIZER
226# define AO_ATTR_NO_SANITIZE_THREAD /* empty */
227# elif AO_CLANG_PREREQ(3, 8)
228# define AO_ATTR_NO_SANITIZE_THREAD __attribute__((no_sanitize("thread")))
229# else
230# define AO_ATTR_NO_SANITIZE_THREAD __attribute__((no_sanitize_thread))
231# endif
232#endif /* !AO_ATTR_NO_SANITIZE_THREAD */
233
234#if (AO_GNUC_PREREQ(7, 5) || __STDC_VERSION__ >= 201112L) && !defined(LINT2)
235# define AO_ALIGNOF_SUPPORTED 1
236#endif
237
238#if defined(AO_DLL) && !defined(AO_API)
239# ifdef AO_BUILD
240# if defined(__CEGCC__) || (defined(__MINGW32__) && !defined(__cplusplus))
241# define AO_API __declspec(dllexport)
242# elif defined(_MSC_VER) || defined(__BORLANDC__) || defined(__CYGWIN__) \
243 || defined(__DMC__) || defined(__MINGW32__) || defined(__WATCOMC__)
244# define AO_API extern __declspec(dllexport)
245# endif
246# else
247# if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__CEGCC__) \
248 || defined(__CYGWIN__) || defined(__DMC__)
249# define AO_API __declspec(dllimport)
250# elif defined(__MINGW32_DELAY_LOAD__)
251# define AO_API __declspec(dllexport)
252# elif defined(__MINGW32__) || defined(__WATCOMC__)
253# define AO_API extern __declspec(dllimport)
254# endif
255# endif
256#endif /* AO_DLL */
257
258#ifndef AO_API
259# define AO_API extern
260#endif
261
262#ifdef AO_ALIGNOF_SUPPORTED
263# define AO_ASSERT_ADDR_ALIGNED(addr) \
264 assert(((size_t)(addr) & (__alignof__(*(addr)) - 1)) == 0)
265#else
266# define AO_ASSERT_ADDR_ALIGNED(addr) \
267 assert(((size_t)(addr) & (sizeof(*(addr)) - 1)) == 0)
268#endif /* !AO_ALIGNOF_SUPPORTED */
269
270#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
271# define AO_compiler_barrier() __asm__ __volatile__("" : : : "memory")
272#elif defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
273 || defined(__WATCOMC__)
274# if defined(_AMD64_) || defined(_M_X64) || _MSC_VER >= 1400
275# if defined(_WIN32_WCE)
276/* # include <cmnintrin.h> */
277# elif defined(_MSC_VER)
278# include <intrin.h>
279# endif
280# pragma intrinsic(_ReadWriteBarrier)
281# define AO_compiler_barrier() _ReadWriteBarrier()
282 /* We assume this does not generate a fence instruction. */
283 /* The documentation is a bit unclear. */
284# else
285# define AO_compiler_barrier() __asm { }
286 /* The preceding implementation may be preferable here too. */
287 /* But the documentation warns about VC++ 2003 and earlier. */
288# endif
289#elif defined(__INTEL_COMPILER)
290# define AO_compiler_barrier() __memory_barrier()
291 /* FIXME: Too strong? IA64-only? */
292#elif defined(_HPUX_SOURCE)
293# if defined(__ia64)
294# include <machine/sys/inline.h>
295# define AO_compiler_barrier() _Asm_sched_fence()
296# else
297 /* FIXME - We do not know how to do this. This is a guess. */
298 /* And probably a bad one. */
299 static volatile int AO_barrier_dummy;
300# define AO_compiler_barrier() (void)(AO_barrier_dummy = AO_barrier_dummy)
301# endif
302#else
303 /* We conjecture that the following usually gives us the right */
304 /* semantics or an error. */
305# define AO_compiler_barrier() asm("")
306#endif
307
308#if defined(AO_USE_PTHREAD_DEFS)
310#endif /* AO_USE_PTHREAD_DEFS */
311
312#if (defined(__CC_ARM) || defined(__ARMCC__)) && !defined(__GNUC__) \
313 && !defined(AO_USE_PTHREAD_DEFS)
315# define AO_GENERALIZE_TWICE
316#endif
317
318#if defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS) \
319 && !defined(__INTEL_COMPILER)
320# if defined(__i386__)
321 /* We don't define AO_USE_SYNC_CAS_BUILTIN for x86 here because */
322 /* it might require specifying additional options (like -march) */
323 /* or additional link libraries (if -march is not specified). */
325# elif defined(__x86_64__)
326# if AO_GNUC_PREREQ(4, 2) && !defined(AO_USE_SYNC_CAS_BUILTIN)
327 /* It is safe to use __sync CAS built-in on this architecture. */
328# define AO_USE_SYNC_CAS_BUILTIN
329# endif
331# elif defined(__ia64__)
333# define AO_GENERALIZE_TWICE
334# elif defined(__hppa__)
336# define AO_CAN_EMUL_CAS
337# elif defined(__alpha__)
339# define AO_GENERALIZE_TWICE
340# elif defined(__s390__)
342# elif defined(__sparc__)
344# define AO_CAN_EMUL_CAS
345# elif defined(__m68k__)
347# elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
348 || defined(__powerpc64__) || defined(__ppc64__) || defined(_ARCH_PPC)
350# elif defined(__aarch64__)
352# define AO_CAN_EMUL_CAS
353# elif defined(__arm__)
355# define AO_CAN_EMUL_CAS
356# elif defined(__cris__) || defined(CRIS)
358# define AO_CAN_EMUL_CAS
359# define AO_GENERALIZE_TWICE
360# elif defined(__mips__)
362# elif defined(__sh__) || defined(SH4)
364# define AO_CAN_EMUL_CAS
365# elif defined(__avr32__)
367# elif defined(__e2k__)
369# elif defined(__hexagon__)
371# elif defined(__nios2__)
373# define AO_CAN_EMUL_CAS
374# elif defined(__riscv)
376# elif defined(__tile__)
378# else /* etc. */
380# endif
381#endif /* __GNUC__ && !AO_USE_PTHREAD_DEFS */
382
383#if (defined(__IBMC__) || defined(__IBMCPP__)) && !defined(__GNUC__) \
384 && !defined(AO_USE_PTHREAD_DEFS)
385# if defined(__powerpc__) || defined(__powerpc) || defined(__ppc__) \
386 || defined(__PPC__) || defined(_M_PPC) || defined(_ARCH_PPC) \
387 || defined(_ARCH_PWR)
389# define AO_GENERALIZE_TWICE
390# endif
391#endif
392
393#if defined(__INTEL_COMPILER) && !defined(AO_USE_PTHREAD_DEFS)
394# if defined(__ia64__)
396# define AO_GENERALIZE_TWICE
397# endif
398# if defined(__GNUC__)
399 /* Intel Compiler in GCC compatible mode */
400# if defined(__i386__)
402# endif /* __i386__ */
403# if defined(__x86_64__)
404# if (__INTEL_COMPILER > 1110) && !defined(AO_USE_SYNC_CAS_BUILTIN)
405# define AO_USE_SYNC_CAS_BUILTIN
406# endif
408# endif /* __x86_64__ */
409# endif
410#endif
411
412#if defined(_HPUX_SOURCE) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
413# if defined(__ia64)
415# define AO_GENERALIZE_TWICE
416# else
418# define AO_CAN_EMUL_CAS
419# endif
420#endif
421
422#if defined(_MSC_VER) || defined(__DMC__) || defined(__BORLANDC__) \
423 || (defined(__WATCOMC__) && defined(__NT__))
424# if defined(_AMD64_) || defined(_M_X64)
426# elif defined(_M_ARM64)
428# elif defined(_M_IX86) || defined(x86)
430# elif defined(_M_ARM) || defined(ARM) || defined(_ARM_)
432# define AO_GENERALIZE_TWICE
433# endif
434#endif
435
436#if defined(__sun) && !defined(__GNUC__) && !defined(AO_USE_PTHREAD_DEFS)
437 /* Note: use -DAO_USE_PTHREAD_DEFS if Sun CC does not handle inline asm. */
438# if defined(__i386) || defined(__x86_64) || defined(__amd64)
440# endif
441#endif
442
443#if !defined(__GNUC__) && (defined(sparc) || defined(__sparc)) \
444 && !defined(AO_USE_PTHREAD_DEFS)
446# define AO_CAN_EMUL_CAS
447#endif
448
449#if (defined(AO_REQUIRE_CAS) && !defined(AO_HAVE_compare_and_swap) \
450 && !defined(AO_HAVE_fetch_compare_and_swap) \
451 && !defined(AO_HAVE_compare_and_swap_full) \
452 && !defined(AO_HAVE_fetch_compare_and_swap_full) \
453 && !defined(AO_HAVE_compare_and_swap_acquire) \
454 && !defined(AO_HAVE_fetch_compare_and_swap_acquire)) || defined(CPPCHECK)
455# if defined(AO_CAN_EMUL_CAS)
457# elif !defined(CPPCHECK)
458# error Cannot implement AO_compare_and_swap_full on this architecture.
459# endif
460#endif /* AO_REQUIRE_CAS && !AO_HAVE_compare_and_swap ... */
461
462/* The most common way to clear a test-and-set location */
463/* at the end of a critical section. */
464#if defined(AO_AO_TS_T) && !defined(AO_HAVE_CLEAR)
465# define AO_CLEAR(addr) AO_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
466# define AO_HAVE_CLEAR
467#endif
468#if defined(AO_CHAR_TS_T) && !defined(AO_HAVE_CLEAR)
469# define AO_CLEAR(addr) AO_char_store_release((AO_TS_t *)(addr), AO_TS_CLEAR)
470# define AO_HAVE_CLEAR
471#endif
472
473/* The generalization section. */
474#if !defined(AO_GENERALIZE_TWICE) && defined(AO_CAN_EMUL_CAS) \
475 && !defined(AO_HAVE_compare_and_swap_full) \
476 && !defined(AO_HAVE_fetch_compare_and_swap_full)
477# define AO_GENERALIZE_TWICE
478#endif
479
480/* Theoretically we should repeatedly include atomic_ops/generalize.h. */
481/* In fact, we observe that this converges after a small fixed number */
482/* of iterations, usually one. */
484
485#if !defined(AO_GENERALIZE_TWICE) \
486 && defined(AO_HAVE_compare_double_and_swap_double) \
487 && (!defined(AO_HAVE_double_load) || !defined(AO_HAVE_double_store))
488# define AO_GENERALIZE_TWICE
489#endif
490
491#ifdef AO_T_IS_INT
492 /* Included after the first generalization pass. */
494# ifndef AO_GENERALIZE_TWICE
495 /* Always generalize again. */
496# define AO_GENERALIZE_TWICE
497# endif
498#endif /* AO_T_IS_INT */
499
500#ifdef AO_GENERALIZE_TWICE
501# include "atomic_ops/generalize.h"
502#endif
503
504/* For compatibility with version 0.4 and earlier */
505#define AO_TS_T AO_TS_t
506#define AO_T AO_t
507#define AO_TS_VAL AO_TS_VAL_t
508
509#endif /* !AO_ATOMIC_OPS_H */