git: bfffb66e7f3c - main - Reapply libcxxrt atomics cleanup commit, preparing for upstream fix
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 20 Mar 2022 21:35:58 UTC
The branch main has been updated by dim: URL: https://cgit.FreeBSD.org/src/commit/?id=bfffb66e7f3cf9766e45a23080200c18535c2b85 commit bfffb66e7f3cf9766e45a23080200c18535c2b85 Author: Dimitry Andric <dim@FreeBSD.org> AuthorDate: 2022-03-20 21:19:27 +0000 Commit: Dimitry Andric <dim@FreeBSD.org> CommitDate: 2022-03-20 21:25:35 +0000 Reapply libcxxrt atomics cleanup commit, preparing for upstream fix This reapplies upstream commit fd484be, as there is a follow-up fix for the possible hangs in __cxa_guard_acquire() on i386: Atomics cleanup (#11) We need to test exception specifiers but they're gone in C++17 so compile the tests with an older version of the standard. Rewrite the guard logic to be more idiomatic C++ and more comprehensible and make sure that atomics are used where necessary. Obtained from: https://github.com/libcxxrt/libcxxrt/commit/fd484be Fixes: 56aaed388b0a MFC after: 2 weeks --- contrib/libcxxrt/atomic.h | 110 ++++++++++--- contrib/libcxxrt/exception.cc | 21 +-- contrib/libcxxrt/guard.cc | 368 ++++++++++++++++++++++++++++++------------ contrib/libcxxrt/memory.cc | 7 +- 4 files changed, 374 insertions(+), 132 deletions(-) diff --git a/contrib/libcxxrt/atomic.h b/contrib/libcxxrt/atomic.h index 131ca9f57798..701d05337cf1 100644 --- a/contrib/libcxxrt/atomic.h +++ b/contrib/libcxxrt/atomic.h @@ -1,30 +1,102 @@ #ifndef __has_builtin -#define __has_builtin(x) 0 +# define __has_builtin(x) 0 #endif #ifndef __has_feature -#define __has_feature(x) 0 +# define __has_feature(x) 0 +#endif +#ifndef __has_extension +# define __has_extension(x) 0 +#endif + +#if !__has_extension(c_atomic) +# define _Atomic(T) T #endif -/** - * Swap macro that enforces a happens-before relationship with a corresponding - * ATOMIC_LOAD. - */ #if __has_builtin(__c11_atomic_exchange) -#define ATOMIC_SWAP(addr, val)\ - __c11_atomic_exchange(reinterpret_cast<_Atomic(__typeof__(val))*>(addr), val, __ATOMIC_ACQ_REL) -#elif __has_builtin(__sync_swap) -#define ATOMIC_SWAP(addr, val)\ - __sync_swap(addr, val) +# define ATOMIC_BUILTIN(name) __c11_atomic_##name #else -#define ATOMIC_SWAP(addr, val)\ - __sync_lock_test_and_set(addr, val) +# define ATOMIC_BUILTIN(name) __atomic_##name##_n #endif -#if __has_builtin(__c11_atomic_load) -#define ATOMIC_LOAD(addr)\ - __c11_atomic_load(reinterpret_cast<_Atomic(__typeof__(*addr))*>(addr), __ATOMIC_ACQUIRE) +namespace +{ + /** + * C++11 memory orders. We only need a subset of them. + */ + enum memory_order + { + /** + * Acquire order. + */ + acquire = __ATOMIC_ACQUIRE, + + /** + * Release order. + */ + release = __ATOMIC_RELEASE, + + /** + * Sequentially consistent memory ordering. + */ + seqcst = __ATOMIC_SEQ_CST + }; + + /** + * Atomic, implements a subset of `std::atomic`. + */ + template<typename T> + class atomic + { + /** + * The underlying value. Use C11 atomic qualification if available. + */ + _Atomic(T) val; + + public: + /** + * Constructor, takes a value. + */ + atomic(T init) : val(init) {} + + /** + * Atomically load with the specified memory order. + */ + T load(memory_order order = memory_order::seqcst) + { + return ATOMIC_BUILTIN(load)(&val, order); + } + + /** + * Atomically store with the specified memory order. + */ + void store(T v, memory_order order = memory_order::seqcst) + { + return ATOMIC_BUILTIN(store)(&val, v, order); + } + + /** + * Atomically exchange with the specified memory order. + */ + T exchange(T v, memory_order order = memory_order::seqcst) + { + return ATOMIC_BUILTIN(exchange)(&val, v, order); + } + + /** + * Atomically exchange with the specified memory order. + */ + bool compare_exchange(T & expected, + T desired, + memory_order order = memory_order::seqcst) + { +#if __has_builtin(__c11_atomic_compare_exchange_strong) + return __c11_atomic_compare_exchange_strong( + &val, &expected, desired, order, order); #else -#define ATOMIC_LOAD(addr)\ - (__sync_synchronize(), *addr) + return __atomic_compare_exchange_n( + &val, &expected, desired, true, order, order); #endif - + } + }; +} // namespace +#undef ATOMIC_BUILTIN diff --git a/contrib/libcxxrt/exception.cc b/contrib/libcxxrt/exception.cc index 0fb26ddb4ed2..2f1dc4030ba4 100644 --- a/contrib/libcxxrt/exception.cc +++ b/contrib/libcxxrt/exception.cc @@ -1,5 +1,6 @@ /* * Copyright 2010-2011 PathScale, Inc. All rights reserved. + * Copyright 2021 David Chisnall. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -289,9 +290,9 @@ using namespace ABI_NAMESPACE; /** The global termination handler. */ -static terminate_handler terminateHandler = abort; +static atomic<terminate_handler> terminateHandler = abort; /** The global unexpected exception handler. */ -static unexpected_handler unexpectedHandler = std::terminate; +static atomic<unexpected_handler> unexpectedHandler = std::terminate; /** Key used for thread-local data. */ static pthread_key_t eh_key; @@ -744,12 +745,12 @@ static void throw_exception(__cxa_exception *ex) ex->unexpectedHandler = info->unexpectedHandler; if (0 == ex->unexpectedHandler) { - ex->unexpectedHandler = unexpectedHandler; + ex->unexpectedHandler = unexpectedHandler.load(); } ex->terminateHandler = info->terminateHandler; if (0 == ex->terminateHandler) { - ex->terminateHandler = terminateHandler; + ex->terminateHandler = terminateHandler.load(); } info->globals.uncaughtExceptions++; @@ -1449,7 +1450,7 @@ namespace std { if (thread_local_handlers) { return pathscale::set_unexpected(f); } - return ATOMIC_SWAP(&unexpectedHandler, f); + return unexpectedHandler.exchange(f); } /** * Sets the function that is called to terminate the program. @@ -1458,7 +1459,7 @@ namespace std { if (thread_local_handlers) { return pathscale::set_terminate(f); } - return ATOMIC_SWAP(&terminateHandler, f); + return terminateHandler.exchange(f); } /** * Terminates the program, calling a custom terminate implementation if @@ -1474,7 +1475,7 @@ namespace std // return. abort(); } - terminateHandler(); + terminateHandler.load()(); } /** * Called when an unexpected exception is encountered (i.e. an exception @@ -1491,7 +1492,7 @@ namespace std // return. abort(); } - unexpectedHandler(); + unexpectedHandler.load()(); } /** * Returns whether there are any exceptions currently being thrown that @@ -1521,7 +1522,7 @@ namespace std { return info->unexpectedHandler; } - return ATOMIC_LOAD(&unexpectedHandler); + return unexpectedHandler.load(); } /** * Returns the current terminate handler. @@ -1533,7 +1534,7 @@ namespace std { return info->terminateHandler; } - return ATOMIC_LOAD(&terminateHandler); + return terminateHandler.load(); } } #if defined(__arm__) && !defined(__ARM_DWARF_EH__) diff --git a/contrib/libcxxrt/guard.cc b/contrib/libcxxrt/guard.cc index 34d294cf7432..515992563a10 100644 --- a/contrib/libcxxrt/guard.cc +++ b/contrib/libcxxrt/guard.cc @@ -1,5 +1,6 @@ -/* +/* * Copyright 2010-2012 PathScale, Inc. All rights reserved. + * Copyright 2021 David Chisnall. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -10,7 +11,7 @@ * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -38,126 +39,305 @@ * value as a low-overhead lock. Because statics (in most sane code) are * accessed far more times than they are initialised, this lock implementation * is heavily optimised towards the case where the static has already been - * initialised. + * initialised. */ +#include "atomic.h" +#include <assert.h> +#include <pthread.h> #include <stdint.h> #include <stdlib.h> -#include <stdio.h> -#include <pthread.h> -#include <assert.h> -#include "atomic.h" // Older GCC doesn't define __LITTLE_ENDIAN__ #ifndef __LITTLE_ENDIAN__ - // If __BYTE_ORDER__ is defined, use that instead +// If __BYTE_ORDER__ is defined, use that instead # ifdef __BYTE_ORDER__ # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ # define __LITTLE_ENDIAN__ # endif - // x86 and ARM are the most common little-endian CPUs, so let's have a - // special case for them (ARM is already special cased). Assume everything - // else is big endian. +// x86 and ARM are the most common little-endian CPUs, so let's have a +// special case for them (ARM is already special cased). Assume everything +// else is big endian. # elif defined(__x86_64) || defined(__i386) # define __LITTLE_ENDIAN__ # endif #endif - /* - * The least significant bit of the guard variable indicates that the object - * has been initialised, the most significant bit is used for a spinlock. + * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32) + * values with one bit defined to indicate that the guarded variable is and + * another bit to indicate that it's currently locked (initialisation in + * progress). The bit to use depends on the byte order of the target. + * + * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we + * treat the two halves of the 64-bit word as independent values and */ +namespace +{ + /** + * The state of the guard variable when an attempt is made to lock it. + */ + enum class GuardState + { + /** + * The lock is not held but is not needed because initialisation is + * one. + */ + InitDone, + + /** + * Initialisation is not done but the lock is held by the caller. + */ + InitLockSucceeded, + + /** + * Attempting to acquire the lock failed. + */ + InitLockFailed + }; + + /** + * Class encapsulating a single atomic word being used to represent the + * guard. The word size is defined by the type of `GuardWord`. The bit + * used to indicate the locked state is `1<<LockedBit`, the bit used to + * indicate the initialised state is `1<<InitBit`. + */ + template<typename GuardWord, int LockedBit, int InitBit> + struct SingleWordGuard + { + /** + * The value indicating that the lock bit is set (and no other bits). + */ + static constexpr GuardWord locked = static_cast<GuardWord>(1) + << LockedBit; + + /** + * The value indicating that the initialised bit is set (and all other + * bits are zero). + */ + static constexpr GuardWord initialised = static_cast<GuardWord>(1) + << InitBit; + + /** + * The guard variable. + */ + atomic<GuardWord> val; + + public: + /** + * Release the lock and set the initialised state. In the single-word + * implementation here, these are both done by a single store. + */ + void unlock(bool isInitialised) + { + val.store(isInitialised ? initialised : 0, memory_order::release); +#ifndef NDEBUG + GuardWord init_state = initialised; + assert(*reinterpret_cast<uint8_t*>(&init_state) != 0); +#endif + } + + /** + * Try to acquire the lock. This has a tri-state return, indicating + * either that the lock was acquired, it wasn't acquired because it was + * contended, or it wasn't acquired because the guarded variable is + * already initialised. + */ + GuardState try_lock() + { + GuardWord old = 0; + // Try to acquire the lock, assuming that we are in the state where + // the lock is not held and the variable is not initialised (so the + // expected value is 0). + if (val.compare_exchange(old, locked)) + { + return GuardState::InitLockSucceeded; + } + // If the CAS failed and the old value indicates that this is + // initialised, return that initialisation is done and skip further + // retries. + if (old == initialised) + { + return GuardState::InitDone; + } + // Otherwise, report failure. + return GuardState::InitLockFailed; + } + + /** + * Check whether the guard indicates that the variable is initialised. + */ + bool is_initialised() + { + return (val.load(memory_order::acquire) & initialised) == + initialised; + } + }; + + /** + * Class encapsulating using two 32-bit atomic values to represent a 64-bit + * guard variable. + */ + template<int LockedBit, int InitBit> + class DoubleWordGuard + { + /** + * The value of `lock_word` when the lock is held. + */ + static constexpr uint32_t locked = static_cast<uint32_t>(1) + << LockedBit; + + /** + * The value of `init_word` when the guarded variable is initialised. + */ + static constexpr uint32_t initialised = static_cast<uint32_t>(1) + << InitBit; + + /** + * The word used for the initialised flag. This is always the first + * word irrespective of endian because the generated code compares the + * first byte in memory against 0. + */ + atomic<uint32_t> init_word; + + /** + * The word used for the lock. + */ + atomic<uint32_t> lock_word; + + public: + /** + * Try to acquire the lock. This has a tri-state return, indicating + * either that the lock was acquired, it wasn't acquired because it was + * contended, or it wasn't acquired because the guarded variable is + * already initialised. + */ + GuardState try_lock() + { + uint32_t old = 0; + // Try to acquire the lock + if (lock_word.compare_exchange(old, locked)) + { + // If we succeeded, check if initialisation has happened. In + // this version, we don't have atomic manipulation of both the + // lock and initialised bits together. Instead, we have an + // ordering rule that the initialised bit is only ever updated + // with the lock held. + if (is_initialised()) + { + // If another thread did manage to initialise this, release + // the lock and notify the caller that initialisation is + // done. + lock_word.store(initialised, memory_order::release); + return GuardState::InitDone; + } + return GuardState::InitLockSucceeded; + } + return GuardState::InitLockFailed; + } + + /** + * Set the initialised state and release the lock. In this + * implementation, this is ordered, not atomic: the initialise bit is + * set while the lock is held. + */ + void unlock(bool isInitialised) + { + init_word.store(isInitialised ? initialised : 0, + memory_order::release); + lock_word.store(0, memory_order::release); + assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised); + } + + /** + * Return whether the guarded variable is initialised. + */ + bool is_initialised() + { + return (init_word.load(memory_order::acquire) & initialised) == + initialised; + } + }; + + // Check that the two implementations are the correct size. + static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t), + "Single-word 32-bit guard must be 32 bits"); + static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t), + "Single-word 64-bit guard must be 64 bits"); + static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t), + "Double-word guard must be 64 bits"); + #ifdef __arm__ -// ARM ABI - 32-bit guards. -typedef uint32_t guard_t; -typedef uint32_t guard_lock_t; -static const uint32_t LOCKED = static_cast<guard_t>(1) << 31; -static const uint32_t INITIALISED = 1; -#define LOCK_PART(guard) (guard) -#define INIT_PART(guard) (guard) + /** + * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words. + */ + using Guard = SingleWordGuard<uint32_t, 31, 0>; #elif defined(_LP64) -typedef uint64_t guard_t; -typedef uint64_t guard_lock_t; # if defined(__LITTLE_ENDIAN__) -static const guard_t LOCKED = static_cast<guard_t>(1) << 63; -static const guard_t INITIALISED = 1; + /** + * On little-endian 64-bit platforms the guard word is a single 64-bit + * atomic with the lock in the high bit and the initialised flag in the low + * bit. + */ + using Guard = SingleWordGuard<uint64_t, 63, 0>; # else -static const guard_t LOCKED = 1; -static const guard_t INITIALISED = static_cast<guard_t>(1) << 56; + /** + * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic + * with the lock in the low bit and the initialised bit in the highest + * byte. + */ + using Guard = SingleWordGuard<uint64_t, 0, 56>; # endif -#define LOCK_PART(guard) (guard) -#define INIT_PART(guard) (guard) #else -typedef uint32_t guard_lock_t; # if defined(__LITTLE_ENDIAN__) -typedef struct { - uint32_t init_half; - uint32_t lock_half; -} guard_t; -static const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31; -static const uint32_t INITIALISED = 1; + /** + * 32-bit platforms use the same layout as 64-bit. + */ + using Guard = DoubleWordGuard<31, 0>; # else -typedef struct { - uint32_t init_half; - uint32_t lock_half; -} guard_t; -static_assert(sizeof(guard_t) == sizeof(uint64_t), ""); -static const uint32_t LOCKED = 1; -static const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24; + /** + * 32-bit platforms use the same layout as 64-bit. + */ + using Guard = DoubleWordGuard<0, 24>; # endif -#define LOCK_PART(guard) (&(guard)->lock_half) -#define INIT_PART(guard) (&(guard)->init_half) #endif -static const guard_lock_t INITIAL = 0; + +} // namespace /** * Acquires a lock on a guard, returning 0 if the object has already been * initialised, and 1 if it has not. If the object is already constructed then * this function just needs to read a byte from memory and return. */ -extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object) +extern "C" int __cxa_guard_acquire(Guard *guard_object) { - guard_lock_t old; - // Not an atomic read, doesn't establish a happens-before relationship, but - // if one is already established and we end up seeing an initialised state - // then it's a fast path, otherwise we'll do something more expensive than - // this test anyway... - if (INITIALISED == *INIT_PART(guard_object)) + // Check if this is already initialised. If so, we don't have to do + // anything. + if (guard_object->is_initialised()) + { return 0; - // Spin trying to do the initialisation + } + // Spin trying to acquire the lock. If we fail to acquire the lock the + // first time then another thread will *probably* initialise it, but if the + // constructor throws an exception then we may have to try again in this + // thread. for (;;) { - // Loop trying to move the value of the guard from 0 (not - // locked, not initialised) to the locked-uninitialised - // position. - old = __sync_val_compare_and_swap(LOCK_PART(guard_object), - INITIAL, LOCKED); - if (old == INITIAL) { - // Lock obtained. If lock and init bit are - // in separate words, check for init race. - if (INIT_PART(guard_object) == LOCK_PART(guard_object)) - return 1; - if (INITIALISED != *INIT_PART(guard_object)) + // Try to acquire the lock. + switch (guard_object->try_lock()) + { + // If we failed to acquire the lock but another thread has + // initialised the lock while we were waiting, return immediately + // indicating that initialisation is not required. + case GuardState::InitDone: + return 0; + // If we acquired the lock, return immediately to start + // initialisation. + case GuardState::InitLockSucceeded: return 1; - - // No need for a memory barrier here, - // see first comment. - *LOCK_PART(guard_object) = INITIAL; - return 0; + // If we didn't acquire the lock, pause and retry. + case GuardState::InitLockFailed: + break; } - // If lock and init bit are in the same word, check again - // if we are done. - if (INIT_PART(guard_object) == LOCK_PART(guard_object) && - old == INITIALISED) - return 0; - - assert(old == LOCKED); - // Another thread holds the lock. - // If lock and init bit are in different words, check - // if we are done before yielding and looping. - if (INIT_PART(guard_object) != LOCK_PART(guard_object) && - INITIALISED == *INIT_PART(guard_object)) - return 0; sched_yield(); } } @@ -166,28 +346,16 @@ extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object) * Releases the lock without marking the object as initialised. This function * is called if initialising a static causes an exception to be thrown. */ -extern "C" void __cxa_guard_abort(volatile guard_t *guard_object) +extern "C" void __cxa_guard_abort(Guard *guard_object) { - __attribute__((unused)) - bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object), - LOCKED, INITIAL); - assert(reset); + guard_object->unlock(false); } + /** * Releases the guard and marks the object as initialised. This function is * called after successful initialisation of a static. */ -extern "C" void __cxa_guard_release(volatile guard_t *guard_object) +extern "C" void __cxa_guard_release(Guard *guard_object) { - guard_lock_t old; - if (INIT_PART(guard_object) == LOCK_PART(guard_object)) - old = LOCKED; - else - old = INITIAL; - __attribute__((unused)) - bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object), - old, INITIALISED); - assert(reset); - if (INIT_PART(guard_object) != LOCK_PART(guard_object)) - *LOCK_PART(guard_object) = INITIAL; + guard_object->unlock(true); } diff --git a/contrib/libcxxrt/memory.cc b/contrib/libcxxrt/memory.cc index 6dd43a5b897e..7beb048ae914 100644 --- a/contrib/libcxxrt/memory.cc +++ b/contrib/libcxxrt/memory.cc @@ -51,7 +51,7 @@ typedef void (*new_handler)(); * The function to call when allocation fails. By default, there is no * handler and a bad allocation exception is thrown if an allocation fails. */ -static new_handler new_handl; +static atomic<new_handler> new_handl{nullptr}; namespace std { @@ -61,12 +61,13 @@ namespace std __attribute__((weak)) new_handler set_new_handler(new_handler handler) { - return ATOMIC_SWAP(&new_handl, handler); + return new_handl.exchange(handler); } + __attribute__((weak)) new_handler get_new_handler(void) { - return ATOMIC_LOAD(&new_handl); + return new_handl.load(); } }