DYT/Tool/OpenSceneGraph-3.6.5/include/server/port/atomics/arch-ppc.h
2024-12-25 07:49:36 +08:00

255 lines
6.9 KiB
C

/*-------------------------------------------------------------------------
*
* arch-ppc.h
* Atomic operations considerations specific to PowerPC
*
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* NOTES:
*
* src/include/port/atomics/arch-ppc.h
*
*-------------------------------------------------------------------------
*/
#if defined(__GNUC__)
/*
* lwsync orders loads with respect to each other, and similarly with stores.
* But a load can be performed before a subsequent store, so sync must be used
* for a full memory barrier.
*/
#define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory")
#define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
#endif
#define PG_HAVE_ATOMIC_U32_SUPPORT
typedef struct pg_atomic_uint32
{
volatile uint32 value;
} pg_atomic_uint32;
/* 64bit atomics are only supported in 64bit mode */
#if SIZEOF_VOID_P >= 8
#define PG_HAVE_ATOMIC_U64_SUPPORT
typedef struct pg_atomic_uint64
{
volatile uint64 value pg_attribute_aligned(8);
} pg_atomic_uint64;
#endif
/*
* This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
* code generation differs at the end. __atomic_compare_exchange_n():
* 100: isync
* 104: mfcr r3
* 108: rlwinm r3,r3,3,31,31
* 10c: bne 120 <.eb+0x10>
* 110: clrldi r3,r3,63
* 114: addi r1,r1,112
* 118: blr
* 11c: nop
* 120: clrldi r3,r3,63
* 124: stw r9,0(r4)
* 128: addi r1,r1,112
* 12c: blr
*
* This:
* f0: isync
* f4: mfcr r9
* f8: rldicl. r3,r9,35,63
* fc: bne 104 <.eb>
* 100: stw r10,0(r4)
* 104: addi r1,r1,112
* 108: blr
*
* This implementation may or may not have materially different performance.
* It's not exploiting the fact that cr0 still holds the relevant comparison
* bits, set during the __asm__. One could fix that by moving more code into
* the __asm__. (That would remove the freedom to eliminate dead stores when
* the caller ignores "expected", but few callers do.)
*
* Recognizing constant "newval" would be superfluous, because there's no
* immediate-operand version of stwcx.
*/
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
static inline bool
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
uint32 *expected, uint32 newval)
{
uint32 found;
uint32 condition_register;
bool ret;
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
if (__builtin_constant_p(*expected) &&
(int32) *expected <= PG_INT16_MAX &&
(int32) *expected >= PG_INT16_MIN)
__asm__ __volatile__(
" sync \n"
" lwarx %0,0,%5,1 \n"
" cmpwi %0,%3 \n"
" bne $+12 \n" /* branch to lwsync */
" stwcx. %4,0,%5 \n"
" bne $-16 \n" /* branch to lwarx */
" lwsync \n"
" mfcr %1 \n"
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
: "i"(*expected), "r"(newval), "r"(&ptr->value)
: "memory", "cc");
else
#endif
__asm__ __volatile__(
" sync \n"
" lwarx %0,0,%5,1 \n"
" cmpw %0,%3 \n"
" bne $+12 \n" /* branch to lwsync */
" stwcx. %4,0,%5 \n"
" bne $-16 \n" /* branch to lwarx */
" lwsync \n"
" mfcr %1 \n"
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
: "r"(*expected), "r"(newval), "r"(&ptr->value)
: "memory", "cc");
ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
if (!ret)
*expected = found;
return ret;
}
/*
* This mirrors gcc __sync_fetch_and_add().
*
* Like tas(), use constraint "=&b" to avoid allocating r0.
*/
#define PG_HAVE_ATOMIC_FETCH_ADD_U32
static inline uint32
pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
{
uint32 _t;
uint32 res;
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
if (__builtin_constant_p(add_) &&
add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
__asm__ __volatile__(
" sync \n"
" lwarx %1,0,%4,1 \n"
" addi %0,%1,%3 \n"
" stwcx. %0,0,%4 \n"
" bne $-12 \n" /* branch to lwarx */
" lwsync \n"
: "=&r"(_t), "=&b"(res), "+m"(ptr->value)
: "i"(add_), "r"(&ptr->value)
: "memory", "cc");
else
#endif
__asm__ __volatile__(
" sync \n"
" lwarx %1,0,%4,1 \n"
" add %0,%1,%3 \n"
" stwcx. %0,0,%4 \n"
" bne $-12 \n" /* branch to lwarx */
" lwsync \n"
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
: "r"(add_), "r"(&ptr->value)
: "memory", "cc");
return res;
}
#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
static inline bool
pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
uint64 *expected, uint64 newval)
{
uint64 found;
uint32 condition_register;
bool ret;
/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
if (__builtin_constant_p(*expected) &&
(int64) *expected <= PG_INT16_MAX &&
(int64) *expected >= PG_INT16_MIN)
__asm__ __volatile__(
" sync \n"
" ldarx %0,0,%5,1 \n"
" cmpdi %0,%3 \n"
" bne $+12 \n" /* branch to lwsync */
" stdcx. %4,0,%5 \n"
" bne $-16 \n" /* branch to ldarx */
" lwsync \n"
" mfcr %1 \n"
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
: "i"(*expected), "r"(newval), "r"(&ptr->value)
: "memory", "cc");
else
#endif
__asm__ __volatile__(
" sync \n"
" ldarx %0,0,%5,1 \n"
" cmpd %0,%3 \n"
" bne $+12 \n" /* branch to lwsync */
" stdcx. %4,0,%5 \n"
" bne $-16 \n" /* branch to ldarx */
" lwsync \n"
" mfcr %1 \n"
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
: "r"(*expected), "r"(newval), "r"(&ptr->value)
: "memory", "cc");
ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
if (!ret)
*expected = found;
return ret;
}
#define PG_HAVE_ATOMIC_FETCH_ADD_U64
static inline uint64
pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
{
uint64 _t;
uint64 res;
/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
if (__builtin_constant_p(add_) &&
add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
__asm__ __volatile__(
" sync \n"
" ldarx %1,0,%4,1 \n"
" addi %0,%1,%3 \n"
" stdcx. %0,0,%4 \n"
" bne $-12 \n" /* branch to ldarx */
" lwsync \n"
: "=&r"(_t), "=&b"(res), "+m"(ptr->value)
: "i"(add_), "r"(&ptr->value)
: "memory", "cc");
else
#endif
__asm__ __volatile__(
" sync \n"
" ldarx %1,0,%4,1 \n"
" add %0,%1,%3 \n"
" stdcx. %0,0,%4 \n"
" bne $-12 \n" /* branch to ldarx */
" lwsync \n"
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
: "r"(add_), "r"(&ptr->value)
: "memory", "cc");
return res;
}
#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
/* per architecture manual doubleword accesses have single copy atomicity */
#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY