255 lines
6.9 KiB
C
255 lines
6.9 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* arch-ppc.h
|
|
* Atomic operations considerations specific to PowerPC
|
|
*
|
|
* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* NOTES:
|
|
*
|
|
* src/include/port/atomics/arch-ppc.h
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#if defined(__GNUC__)
|
|
|
|
/*
|
|
* lwsync orders loads with respect to each other, and similarly with stores.
|
|
* But a load can be performed before a subsequent store, so sync must be used
|
|
* for a full memory barrier.
|
|
*/
|
|
#define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory")
|
|
#define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
|
|
#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
|
|
#endif
|
|
|
|
#define PG_HAVE_ATOMIC_U32_SUPPORT
|
|
typedef struct pg_atomic_uint32
|
|
{
|
|
volatile uint32 value;
|
|
} pg_atomic_uint32;
|
|
|
|
/* 64bit atomics are only supported in 64bit mode */
|
|
#if SIZEOF_VOID_P >= 8
|
|
#define PG_HAVE_ATOMIC_U64_SUPPORT
|
|
typedef struct pg_atomic_uint64
|
|
{
|
|
volatile uint64 value pg_attribute_aligned(8);
|
|
} pg_atomic_uint64;
|
|
|
|
#endif
|
|
|
|
/*
|
|
* This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
|
|
* code generation differs at the end. __atomic_compare_exchange_n():
|
|
* 100: isync
|
|
* 104: mfcr r3
|
|
* 108: rlwinm r3,r3,3,31,31
|
|
* 10c: bne 120 <.eb+0x10>
|
|
* 110: clrldi r3,r3,63
|
|
* 114: addi r1,r1,112
|
|
* 118: blr
|
|
* 11c: nop
|
|
* 120: clrldi r3,r3,63
|
|
* 124: stw r9,0(r4)
|
|
* 128: addi r1,r1,112
|
|
* 12c: blr
|
|
*
|
|
* This:
|
|
* f0: isync
|
|
* f4: mfcr r9
|
|
* f8: rldicl. r3,r9,35,63
|
|
* fc: bne 104 <.eb>
|
|
* 100: stw r10,0(r4)
|
|
* 104: addi r1,r1,112
|
|
* 108: blr
|
|
*
|
|
* This implementation may or may not have materially different performance.
|
|
* It's not exploiting the fact that cr0 still holds the relevant comparison
|
|
* bits, set during the __asm__. One could fix that by moving more code into
|
|
* the __asm__. (That would remove the freedom to eliminate dead stores when
|
|
* the caller ignores "expected", but few callers do.)
|
|
*
|
|
* Recognizing constant "newval" would be superfluous, because there's no
|
|
* immediate-operand version of stwcx.
|
|
*/
|
|
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
|
|
static inline bool
|
|
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
|
|
uint32 *expected, uint32 newval)
|
|
{
|
|
uint32 found;
|
|
uint32 condition_register;
|
|
bool ret;
|
|
|
|
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
|
|
if (__builtin_constant_p(*expected) &&
|
|
(int32) *expected <= PG_INT16_MAX &&
|
|
(int32) *expected >= PG_INT16_MIN)
|
|
__asm__ __volatile__(
|
|
" sync \n"
|
|
" lwarx %0,0,%5,1 \n"
|
|
" cmpwi %0,%3 \n"
|
|
" bne $+12 \n" /* branch to lwsync */
|
|
" stwcx. %4,0,%5 \n"
|
|
" bne $-16 \n" /* branch to lwarx */
|
|
" lwsync \n"
|
|
" mfcr %1 \n"
|
|
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
|
|
: "i"(*expected), "r"(newval), "r"(&ptr->value)
|
|
: "memory", "cc");
|
|
else
|
|
#endif
|
|
__asm__ __volatile__(
|
|
" sync \n"
|
|
" lwarx %0,0,%5,1 \n"
|
|
" cmpw %0,%3 \n"
|
|
" bne $+12 \n" /* branch to lwsync */
|
|
" stwcx. %4,0,%5 \n"
|
|
" bne $-16 \n" /* branch to lwarx */
|
|
" lwsync \n"
|
|
" mfcr %1 \n"
|
|
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
|
|
: "r"(*expected), "r"(newval), "r"(&ptr->value)
|
|
: "memory", "cc");
|
|
|
|
ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
|
|
if (!ret)
|
|
*expected = found;
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* This mirrors gcc __sync_fetch_and_add().
|
|
*
|
|
* Like tas(), use constraint "=&b" to avoid allocating r0.
|
|
*/
|
|
#define PG_HAVE_ATOMIC_FETCH_ADD_U32
|
|
static inline uint32
|
|
pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
|
|
{
|
|
uint32 _t;
|
|
uint32 res;
|
|
|
|
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
|
|
if (__builtin_constant_p(add_) &&
|
|
add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
|
|
__asm__ __volatile__(
|
|
" sync \n"
|
|
" lwarx %1,0,%4,1 \n"
|
|
" addi %0,%1,%3 \n"
|
|
" stwcx. %0,0,%4 \n"
|
|
" bne $-12 \n" /* branch to lwarx */
|
|
" lwsync \n"
|
|
: "=&r"(_t), "=&b"(res), "+m"(ptr->value)
|
|
: "i"(add_), "r"(&ptr->value)
|
|
: "memory", "cc");
|
|
else
|
|
#endif
|
|
__asm__ __volatile__(
|
|
" sync \n"
|
|
" lwarx %1,0,%4,1 \n"
|
|
" add %0,%1,%3 \n"
|
|
" stwcx. %0,0,%4 \n"
|
|
" bne $-12 \n" /* branch to lwarx */
|
|
" lwsync \n"
|
|
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
|
|
: "r"(add_), "r"(&ptr->value)
|
|
: "memory", "cc");
|
|
|
|
return res;
|
|
}
|
|
|
|
#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
|
|
|
|
#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
|
|
static inline bool
|
|
pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
|
|
uint64 *expected, uint64 newval)
|
|
{
|
|
uint64 found;
|
|
uint32 condition_register;
|
|
bool ret;
|
|
|
|
/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
|
|
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
|
|
if (__builtin_constant_p(*expected) &&
|
|
(int64) *expected <= PG_INT16_MAX &&
|
|
(int64) *expected >= PG_INT16_MIN)
|
|
__asm__ __volatile__(
|
|
" sync \n"
|
|
" ldarx %0,0,%5,1 \n"
|
|
" cmpdi %0,%3 \n"
|
|
" bne $+12 \n" /* branch to lwsync */
|
|
" stdcx. %4,0,%5 \n"
|
|
" bne $-16 \n" /* branch to ldarx */
|
|
" lwsync \n"
|
|
" mfcr %1 \n"
|
|
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
|
|
: "i"(*expected), "r"(newval), "r"(&ptr->value)
|
|
: "memory", "cc");
|
|
else
|
|
#endif
|
|
__asm__ __volatile__(
|
|
" sync \n"
|
|
" ldarx %0,0,%5,1 \n"
|
|
" cmpd %0,%3 \n"
|
|
" bne $+12 \n" /* branch to lwsync */
|
|
" stdcx. %4,0,%5 \n"
|
|
" bne $-16 \n" /* branch to ldarx */
|
|
" lwsync \n"
|
|
" mfcr %1 \n"
|
|
: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
|
|
: "r"(*expected), "r"(newval), "r"(&ptr->value)
|
|
: "memory", "cc");
|
|
|
|
ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
|
|
if (!ret)
|
|
*expected = found;
|
|
return ret;
|
|
}
|
|
|
|
#define PG_HAVE_ATOMIC_FETCH_ADD_U64
|
|
static inline uint64
|
|
pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
|
|
{
|
|
uint64 _t;
|
|
uint64 res;
|
|
|
|
/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
|
|
#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
|
|
if (__builtin_constant_p(add_) &&
|
|
add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
|
|
__asm__ __volatile__(
|
|
" sync \n"
|
|
" ldarx %1,0,%4,1 \n"
|
|
" addi %0,%1,%3 \n"
|
|
" stdcx. %0,0,%4 \n"
|
|
" bne $-12 \n" /* branch to ldarx */
|
|
" lwsync \n"
|
|
: "=&r"(_t), "=&b"(res), "+m"(ptr->value)
|
|
: "i"(add_), "r"(&ptr->value)
|
|
: "memory", "cc");
|
|
else
|
|
#endif
|
|
__asm__ __volatile__(
|
|
" sync \n"
|
|
" ldarx %1,0,%4,1 \n"
|
|
" add %0,%1,%3 \n"
|
|
" stdcx. %0,0,%4 \n"
|
|
" bne $-12 \n" /* branch to ldarx */
|
|
" lwsync \n"
|
|
: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
|
|
: "r"(add_), "r"(&ptr->value)
|
|
: "memory", "cc");
|
|
|
|
return res;
|
|
}
|
|
|
|
#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
|
|
|
|
/* per architecture manual doubleword accesses have single copy atomicity */
|
|
#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
|