PAPI 7.1.0.0
Loading...
Searching...
No Matches
x86_64.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
3 * Copyright (c) 2009-2021 Ivan Maidanski
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "../all_aligned_atomic_load_store.h"
25
26/* Real X86 implementations appear */
27/* to enforce ordering between memory operations, EXCEPT that a later */
28/* read can pass earlier writes, presumably due to the visible */
29/* presence of store buffers. */
30/* We ignore the fact that the official specs */
31/* seem to be much weaker (and arguably too weak to be usable). */
32#include "../ordered_except_wr.h"
33
34#ifndef AO_ASSUME_WINDOWS98
35 /* CAS is always available */
36# define AO_ASSUME_WINDOWS98
37#endif
38#ifndef AO_USE_INTERLOCKED_INTRINSICS
39# define AO_USE_INTERLOCKED_INTRINSICS
40#endif
41#include "common32_defs.h"
42
43#ifdef AO_ASM_X64_AVAILABLE
44
45#if _MSC_VER < 1800
46 AO_INLINE unsigned char
47 AO_char_fetch_and_add_full(volatile unsigned char *p, unsigned char incr)
48 {
49 __asm
50 {
51 mov al, incr
52 mov rbx, p
53 lock xadd byte ptr [rbx], al
54 }
55 }
56# define AO_HAVE_char_fetch_and_add_full
57
58 AO_INLINE unsigned short
59 AO_short_fetch_and_add_full(volatile unsigned short *p, unsigned short incr)
60 {
61 __asm
62 {
63 mov ax, incr
64 mov rbx, p
65 lock xadd word ptr [rbx], ax
66 }
67 }
68# define AO_HAVE_short_fetch_and_add_full
69#endif /* _MSC_VER < 1800 */
70
71 /* As far as we can tell, the lfence and sfence instructions are not */
72 /* currently needed or useful for cached memory accesses. */
73
74 AO_INLINE void
75 AO_nop_full(void)
76 {
77 /* Note: "mfence" (SSE2) is supported on all x86_64/amd64 chips. */
78 __asm { mfence }
79 }
80# define AO_HAVE_nop_full
81
82# ifndef AO_HAVE_test_and_set_full
83# include "../test_and_set_t_is_char.h"
84
86 AO_test_and_set_full(volatile AO_TS_t *addr)
87 {
88 __asm
89 {
90 mov rax,AO_TS_SET ;
91 mov rbx,addr ;
92 xchg byte ptr [rbx],al ;
93 }
94 }
95# define AO_HAVE_test_and_set_full
96# endif
97
98#endif /* AO_ASM_X64_AVAILABLE */
99
100#ifndef AO_HAVE_test_and_set_full
101# include "../test_and_set_t_is_ao_t.h"
102 /* AO_test_and_set_full() is emulated using word-wide CAS. */
103#endif
104
105#ifdef AO_CMPXCHG16B_AVAILABLE
106
107# if _MSC_VER >= 1500
108# include "../standard_ao_double_t.h"
109# pragma intrinsic (_InterlockedCompareExchange128)
110
111 AO_INLINE int
113 AO_t old_val1, AO_t old_val2,
114 AO_t new_val1, AO_t new_val2)
115 {
116 __int64 comparandResult[2];
117
119 comparandResult[0] = old_val1; /* low */
120 comparandResult[1] = old_val2; /* high */
121 return _InterlockedCompareExchange128((volatile __int64 *)addr,
122 new_val2 /* high */,
123 new_val1 /* low */,
124 comparandResult);
125 }
126# define AO_HAVE_compare_double_and_swap_double_full
127
128# elif defined(AO_ASM_X64_AVAILABLE)
129# include "../standard_ao_double_t.h"
130
131 /* If there is no intrinsic _InterlockedCompareExchange128 then we */
132 /* need basically what's given below. */
133 AO_INLINE int
135 AO_t old_val1, AO_t old_val2,
136 AO_t new_val1, AO_t new_val2)
137 {
138 __asm
139 {
140 mov rdx,QWORD PTR [old_val2] ;
141 mov rax,QWORD PTR [old_val1] ;
142 mov rcx,QWORD PTR [new_val2] ;
143 mov rbx,QWORD PTR [new_val1] ;
144 lock cmpxchg16b [addr] ;
145 setz rax ;
146 }
147 }
148# define AO_HAVE_compare_double_and_swap_double_full
149# endif /* AO_ASM_X64_AVAILABLE && (_MSC_VER < 1500) */
150
151#endif /* AO_CMPXCHG16B_AVAILABLE */
atomic_p lock[]
Definition: aix.c:25
AO_INLINE void AO_nop_full(void)
Definition: alpha.h:27
#define AO_ASSERT_ADDR_ALIGNED(addr)
Definition: atomic_ops.h:266
#define AO_t
Definition: atomic_ops.h:156
#define AO_INLINE
Definition: atomic_ops.h:186
AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr)
Definition: avr32.h:33
#define AO_compare_double_and_swap_double_full(addr, old1, old2, newval1, newval2)
Definition: emul_cas.h:67
#define AO_TS_t
Definition: gcc/hppa.h:39
#define AO_TS_VAL_t
Definition: gcc/hppa.h:44
#define AO_TS_SET
Definition: gcc/hppa.h:46
AO_INLINE unsigned char AO_char_fetch_and_add_full(volatile unsignedchar *addr, unsignedchar incr)
AO_INLINE unsigned short AO_short_fetch_and_add_full(volatile unsignedshort *addr, unsignedshort incr)