PAPI 7.1.0.0
Loading...
Searching...
No Matches
gcc/powerpc.h
Go to the documentation of this file.
1/*
2 * Copyright (c) 1991-1994 by Xerox Corporation. All rights reserved.
3 * Copyright (c) 1996-1999 by Silicon Graphics. All rights reserved.
4 * Copyright (c) 2003-2011 Hewlett-Packard Development Company, L.P.
5 *
6 *
7 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
8 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
9 *
10 * Permission is hereby granted to use or copy this program
11 * for any purpose, provided the above notices are retained on all copies.
12 * Permission to modify the code and to distribute modified code is granted,
13 * provided the above notices are retained, and a notice that the code was
14 * modified is included with the above copyright notice.
15 *
16 */
17
18/* Memory model documented at http://www-106.ibm.com/developerworks/ */
19/* eserver/articles/archguide.html and (clearer) */
20/* http://www-106.ibm.com/developerworks/eserver/articles/powerpc.html. */
21/* There appears to be no implicit ordering between any kind of */
22/* independent memory references. */
23
24/* TODO: Implement double-wide operations if available. */
25
26#if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 8)) \
27 && !defined(AO_DISABLE_GCC_ATOMICS)
28 /* Probably, it could be enabled even for earlier gcc/clang versions. */
29
30 /* TODO: As of clang-3.8.1, it emits lwsync in AO_load_acquire */
31 /* (i.e., the code is less efficient than the one given below). */
32
33# include "generic.h"
34
35#else /* AO_DISABLE_GCC_ATOMICS */
36
37/* Architecture enforces some ordering based on control dependence. */
38/* I don't know if that could help. */
39/* Data-dependent loads are always ordered. */
40/* Based on the above references, eieio is intended for use on */
41/* uncached memory, which we don't support. It does not order loads */
42/* from cached memory. */
43
44#include "../all_aligned_atomic_load_store.h"
45
46#include "../test_and_set_t_is_ao_t.h"
47 /* There seems to be no byte equivalent of lwarx, so this */
48 /* may really be what we want, at least in the 32-bit case. */
49
50AO_INLINE void
52{
53 __asm__ __volatile__("sync" : : : "memory");
54}
55#define AO_HAVE_nop_full
56
57/* lwsync apparently works for everything but a StoreLoad barrier. */
58AO_INLINE void
60{
61#ifdef __NO_LWSYNC__
62 __asm__ __volatile__("sync" : : : "memory");
63#else
64 __asm__ __volatile__("lwsync" : : : "memory");
65#endif
66}
67
68#define AO_nop_write() AO_lwsync()
69#define AO_HAVE_nop_write
70
71#define AO_nop_read() AO_lwsync()
72#define AO_HAVE_nop_read
73
74#if defined(__powerpc64__) || defined(__ppc64__) || defined(__64BIT__)
75 /* ppc64 uses ld not lwz */
76# define AO_PPC_LD "ld"
77# define AO_PPC_LxARX "ldarx"
78# define AO_PPC_CMPx "cmpd"
79# define AO_PPC_STxCXd "stdcx."
80# define AO_PPC_LOAD_CLOBBER "cr0"
81#else
82# define AO_PPC_LD "lwz"
83# define AO_PPC_LxARX "lwarx"
84# define AO_PPC_CMPx "cmpw"
85# define AO_PPC_STxCXd "stwcx."
86# define AO_PPC_LOAD_CLOBBER "cc"
87 /* FIXME: We should get gcc to allocate one of the condition */
88 /* registers. I always got "impossible constraint" when I */
89 /* tried the "y" constraint. */
90# define AO_T_IS_INT
91#endif
92
93#ifdef _AIX
94 /* Labels are not supported on AIX. */
95 /* ppc64 has same size of instructions as 32-bit one. */
96# define AO_PPC_L(label) /* empty */
97# define AO_PPC_BR_A(labelBF, addr) addr
98#else
99# define AO_PPC_L(label) label ": "
100# define AO_PPC_BR_A(labelBF, addr) labelBF
101#endif
102
103/* We explicitly specify load_acquire, since it is important, and can */
104/* be implemented relatively cheaply. It could be implemented */
105/* with an ordinary load followed by a lwsync. But the general wisdom */
106/* seems to be that a data dependent branch followed by an isync is */
107/* cheaper. And the documentation is fairly explicit that this also */
108/* has acquire semantics. */
110AO_load_acquire(const volatile AO_t *addr)
111{
112 AO_t result;
113
114 __asm__ __volatile__ (
115 AO_PPC_LD "%U1%X1 %0,%1\n"
116 "cmpw %0,%0\n"
117 "bne- " AO_PPC_BR_A("1f", "$+4") "\n"
118 AO_PPC_L("1") "isync\n"
119 : "=r" (result)
120 : "m"(*addr) : "memory", AO_PPC_LOAD_CLOBBER);
121 return result;
122}
123#define AO_HAVE_load_acquire
124
125/* We explicitly specify store_release, since it relies */
126/* on the fact that lwsync is also a LoadStore barrier. */
127AO_INLINE void
128AO_store_release(volatile AO_t *addr, AO_t value)
129{
130 AO_lwsync();
131 *addr = value;
132}
133#define AO_HAVE_store_release
134
135#ifndef AO_PREFER_GENERALIZED
136/* This is similar to the code in the garbage collector. Deleting */
137/* this and having it synthesized from compare_and_swap would probably */
138/* only cost us a load immediate instruction. */
140AO_test_and_set(volatile AO_TS_t *addr) {
141 /* TODO: And we should be using smaller objects anyway. */
142 AO_t oldval;
143 AO_t temp = 1; /* locked value */
144
145 __asm__ __volatile__(
146 AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n"
147 /* load and reserve */
148 AO_PPC_CMPx "i %0, 0\n" /* if load is */
149 "bne " AO_PPC_BR_A("2f", "$+12") "\n"
150 /* non-zero, return already set */
151 AO_PPC_STxCXd " %2,0,%1\n" /* else store conditional */
152 "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
153 /* retry if lost reservation */
154 AO_PPC_L("2") "\n" /* oldval is zero if we set */
155 : "=&r"(oldval)
156 : "r"(addr), "r"(temp)
157 : "memory", "cr0");
158 return (AO_TS_VAL_t)oldval;
159}
160#define AO_HAVE_test_and_set
161
165 AO_lwsync();
166 return result;
167}
168#define AO_HAVE_test_and_set_acquire
169
172 AO_lwsync();
173 return AO_test_and_set(addr);
174}
175#define AO_HAVE_test_and_set_release
176
180 AO_lwsync();
181 result = AO_test_and_set(addr);
182 AO_lwsync();
183 return result;
184}
185#define AO_HAVE_test_and_set_full
186#endif /* !AO_PREFER_GENERALIZED */
187
188#ifndef AO_GENERALIZE_ASM_BOOL_CAS
189
190 AO_INLINE int
191 AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val)
192 {
193 AO_t oldval;
194 int result = 0;
195
196 __asm__ __volatile__(
197 AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */
198 AO_PPC_CMPx " %0, %4\n" /* if load is not equal to */
199 "bne " AO_PPC_BR_A("2f", "$+16") "\n" /* old, fail */
200 AO_PPC_STxCXd " %3,0,%2\n" /* else store conditional */
201 "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
202 /* retry if lost reservation */
203 "li %1,1\n" /* result = 1; */
204 AO_PPC_L("2") "\n"
205 : "=&r"(oldval), "=&r"(result)
206 : "r"(addr), "r"(new_val), "r"(old), "1"(result)
207 : "memory", "cr0");
208 return result;
209 }
210# define AO_HAVE_compare_and_swap
211
212 AO_INLINE int
213 AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val)
214 {
215 int result = AO_compare_and_swap(addr, old, new_val);
216 AO_lwsync();
217 return result;
218 }
219# define AO_HAVE_compare_and_swap_acquire
220
221 AO_INLINE int
222 AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val)
223 {
224 AO_lwsync();
225 return AO_compare_and_swap(addr, old, new_val);
226 }
227# define AO_HAVE_compare_and_swap_release
228
229 AO_INLINE int
230 AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val)
231 {
232 int result;
233 AO_lwsync();
234 result = AO_compare_and_swap(addr, old, new_val);
235 if (result)
236 AO_lwsync();
237 return result;
238 }
239# define AO_HAVE_compare_and_swap_full
240
241#endif /* !AO_GENERALIZE_ASM_BOOL_CAS */
242
244AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
245{
246 AO_t fetched_val;
247
248 __asm__ __volatile__(
249 AO_PPC_L("1") AO_PPC_LxARX " %0,0,%1\n" /* load and reserve */
250 AO_PPC_CMPx " %0, %3\n" /* if load is not equal to */
251 "bne " AO_PPC_BR_A("2f", "$+12") "\n" /* old_val, fail */
252 AO_PPC_STxCXd " %2,0,%1\n" /* else store conditional */
253 "bne- " AO_PPC_BR_A("1b", "$-16") "\n"
254 /* retry if lost reservation */
255 AO_PPC_L("2") "\n"
256 : "=&r"(fetched_val)
257 : "r"(addr), "r"(new_val), "r"(old_val)
258 : "memory", "cr0");
259 return fetched_val;
260}
261#define AO_HAVE_fetch_compare_and_swap
262
265 AO_t new_val)
266{
267 AO_t result = AO_fetch_compare_and_swap(addr, old_val, new_val);
268 AO_lwsync();
269 return result;
270}
271#define AO_HAVE_fetch_compare_and_swap_acquire
272
275 AO_t new_val)
276{
277 AO_lwsync();
278 return AO_fetch_compare_and_swap(addr, old_val, new_val);
279}
280#define AO_HAVE_fetch_compare_and_swap_release
281
284 AO_t new_val)
285{
286 AO_t result;
287 AO_lwsync();
288 result = AO_fetch_compare_and_swap(addr, old_val, new_val);
289 if (result == old_val)
290 AO_lwsync();
291 return result;
292}
293#define AO_HAVE_fetch_compare_and_swap_full
294
295#ifndef AO_PREFER_GENERALIZED
297AO_fetch_and_add(volatile AO_t *addr, AO_t incr) {
298 AO_t oldval;
299 AO_t newval;
300
301 __asm__ __volatile__(
302 AO_PPC_L("1") AO_PPC_LxARX " %0,0,%2\n" /* load and reserve */
303 "add %1,%0,%3\n" /* increment */
304 AO_PPC_STxCXd " %1,0,%2\n" /* store conditional */
305 "bne- " AO_PPC_BR_A("1b", "$-12") "\n"
306 /* retry if lost reservation */
307 : "=&r"(oldval), "=&r"(newval)
308 : "r"(addr), "r"(incr)
309 : "memory", "cr0");
310 return oldval;
311}
312#define AO_HAVE_fetch_and_add
313
315AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr) {
316 AO_t result = AO_fetch_and_add(addr, incr);
317 AO_lwsync();
318 return result;
319}
320#define AO_HAVE_fetch_and_add_acquire
321
323AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr) {
324 AO_lwsync();
325 return AO_fetch_and_add(addr, incr);
326}
327#define AO_HAVE_fetch_and_add_release
328
330AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr) {
331 AO_t result;
332 AO_lwsync();
333 result = AO_fetch_and_add(addr, incr);
334 AO_lwsync();
335 return result;
336}
337#define AO_HAVE_fetch_and_add_full
338#endif /* !AO_PREFER_GENERALIZED */
339
340#undef AO_PPC_BR_A
341#undef AO_PPC_CMPx
342#undef AO_PPC_L
343#undef AO_PPC_LD
344#undef AO_PPC_LOAD_CLOBBER
345#undef AO_PPC_LxARX
346#undef AO_PPC_STxCXd
347
348#endif /* AO_DISABLE_GCC_ATOMICS */
volatile int result
#define AO_t
Definition: atomic_ops.h:156
#define AO_INLINE
Definition: atomic_ops.h:186
#define AO_fetch_compare_and_swap_full(addr, old, newval)
Definition: emul_cas.h:61
#define AO_TS_t
Definition: gcc/hppa.h:39
#define AO_TS_VAL_t
Definition: gcc/hppa.h:44
AO_INLINE AO_TS_VAL_t AO_test_and_set_acquire(volatile AO_TS_t *addr)
Definition: gcc/powerpc.h:163
AO_INLINE AO_t AO_fetch_and_add_release(volatile AO_t *addr, AO_t incr)
Definition: gcc/powerpc.h:323
AO_INLINE AO_t AO_fetch_and_add(volatile AO_t *addr, AO_t incr)
Definition: gcc/powerpc.h:297
AO_INLINE AO_t AO_fetch_compare_and_swap_release(volatile AO_t *addr, AO_t old_val, AO_t new_val)
Definition: gcc/powerpc.h:274
AO_INLINE int AO_compare_and_swap(volatile AO_t *addr, AO_t old, AO_t new_val)
Definition: gcc/powerpc.h:191
#define AO_PPC_STxCXd
Definition: gcc/powerpc.h:85
#define AO_PPC_LxARX
Definition: gcc/powerpc.h:83
AO_INLINE AO_t AO_fetch_and_add_acquire(volatile AO_t *addr, AO_t incr)
Definition: gcc/powerpc.h:315
AO_INLINE int AO_compare_and_swap_full(volatile AO_t *addr, AO_t old, AO_t new_val)
Definition: gcc/powerpc.h:230
AO_INLINE AO_t AO_fetch_compare_and_swap(volatile AO_t *addr, AO_t old_val, AO_t new_val)
Definition: gcc/powerpc.h:244
AO_INLINE void AO_lwsync(void)
Definition: gcc/powerpc.h:59
AO_INLINE void AO_store_release(volatile AO_t *addr, AO_t value)
Definition: gcc/powerpc.h:128
AO_INLINE int AO_compare_and_swap_acquire(volatile AO_t *addr, AO_t old, AO_t new_val)
Definition: gcc/powerpc.h:213
AO_INLINE AO_TS_VAL_t AO_test_and_set(volatile AO_TS_t *addr)
Definition: gcc/powerpc.h:140
#define AO_PPC_LD
Definition: gcc/powerpc.h:82
#define AO_PPC_CMPx
Definition: gcc/powerpc.h:84
AO_INLINE AO_TS_VAL_t AO_test_and_set_full(volatile AO_TS_t *addr)
Definition: gcc/powerpc.h:178
AO_INLINE AO_TS_VAL_t AO_test_and_set_release(volatile AO_TS_t *addr)
Definition: gcc/powerpc.h:171
AO_INLINE AO_t AO_fetch_and_add_full(volatile AO_t *addr, AO_t incr)
Definition: gcc/powerpc.h:330
#define AO_PPC_LOAD_CLOBBER
Definition: gcc/powerpc.h:86
#define AO_PPC_L(label)
Definition: gcc/powerpc.h:99
AO_INLINE int AO_compare_and_swap_release(volatile AO_t *addr, AO_t old, AO_t new_val)
Definition: gcc/powerpc.h:222
#define AO_PPC_BR_A(labelBF, addr)
Definition: gcc/powerpc.h:100
AO_INLINE AO_t AO_fetch_compare_and_swap_acquire(volatile AO_t *addr, AO_t old_val, AO_t new_val)
Definition: gcc/powerpc.h:264
AO_INLINE AO_t AO_load_acquire(const volatile AO_t *addr)
Definition: gcc/powerpc.h:110
AO_INLINE void AO_nop_full(void)
Definition: gcc/powerpc.h:51