git: 2548237983c4 - main - Revert upstream libcxxrt commit which can cause hangs on i386
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sat, 19 Mar 2022 19:48:39 UTC
The branch main has been updated by dim: URL: https://cgit.FreeBSD.org/src/commit/?id=2548237983c47c61601b883bb9d6c9a803ef8ce4 commit 2548237983c47c61601b883bb9d6c9a803ef8ce4 Author: Dimitry Andric <dim@FreeBSD.org> AuthorDate: 2022-03-19 19:46:28 +0000 Commit: Dimitry Andric <dim@FreeBSD.org> CommitDate: 2022-03-19 19:47:29 +0000 Revert upstream libcxxrt commit which can cause hangs on i386 This reverts upstream commit fd484be: Atomics cleanup (#11) We need to test exception specifiers but they're gone in C++17 so compile the tests with an older version of the standard. Rewrite the guard logic to be more idiomatic C++ and more comprehensible and make sure that atomics are used where necessary. It looks like there are some corner cases in the i386 and/or 32-bit atomics handling, which can make __cxa_guard_acquire() hang in certain situations. Reported by: antoine Obtained from: https://github.com/libcxxrt/libcxxrt/commit/fd484be Fixes: 56aaed388b0a MFC after: 2 weeks --- contrib/libcxxrt/atomic.h | 110 +++---------- contrib/libcxxrt/exception.cc | 21 ++- contrib/libcxxrt/guard.cc | 368 ++++++++++++------------------------------ contrib/libcxxrt/memory.cc | 7 +- 4 files changed, 132 insertions(+), 374 deletions(-) diff --git a/contrib/libcxxrt/atomic.h b/contrib/libcxxrt/atomic.h index 701d05337cf1..131ca9f57798 100644 --- a/contrib/libcxxrt/atomic.h +++ b/contrib/libcxxrt/atomic.h @@ -1,102 +1,30 @@ #ifndef __has_builtin -# define __has_builtin(x) 0 +#define __has_builtin(x) 0 #endif #ifndef __has_feature -# define __has_feature(x) 0 -#endif -#ifndef __has_extension -# define __has_extension(x) 0 -#endif - -#if !__has_extension(c_atomic) -# define _Atomic(T) T +#define __has_feature(x) 0 #endif +/** + * Swap macro that enforces a happens-before relationship with a corresponding + * ATOMIC_LOAD. + */ #if __has_builtin(__c11_atomic_exchange) -# define ATOMIC_BUILTIN(name) __c11_atomic_##name +#define ATOMIC_SWAP(addr, val)\ + __c11_atomic_exchange(reinterpret_cast<_Atomic(__typeof__(val))*>(addr), val, __ATOMIC_ACQ_REL) +#elif __has_builtin(__sync_swap) +#define ATOMIC_SWAP(addr, val)\ + __sync_swap(addr, val) #else -# define ATOMIC_BUILTIN(name) __atomic_##name##_n +#define ATOMIC_SWAP(addr, val)\ + __sync_lock_test_and_set(addr, val) #endif -namespace -{ - /** - * C++11 memory orders. We only need a subset of them. - */ - enum memory_order - { - /** - * Acquire order. - */ - acquire = __ATOMIC_ACQUIRE, - - /** - * Release order. - */ - release = __ATOMIC_RELEASE, - - /** - * Sequentially consistent memory ordering. - */ - seqcst = __ATOMIC_SEQ_CST - }; - - /** - * Atomic, implements a subset of `std::atomic`. - */ - template<typename T> - class atomic - { - /** - * The underlying value. Use C11 atomic qualification if available. - */ - _Atomic(T) val; - - public: - /** - * Constructor, takes a value. - */ - atomic(T init) : val(init) {} - - /** - * Atomically load with the specified memory order. - */ - T load(memory_order order = memory_order::seqcst) - { - return ATOMIC_BUILTIN(load)(&val, order); - } - - /** - * Atomically store with the specified memory order. - */ - void store(T v, memory_order order = memory_order::seqcst) - { - return ATOMIC_BUILTIN(store)(&val, v, order); - } - - /** - * Atomically exchange with the specified memory order. - */ - T exchange(T v, memory_order order = memory_order::seqcst) - { - return ATOMIC_BUILTIN(exchange)(&val, v, order); - } - - /** - * Atomically exchange with the specified memory order. - */ - bool compare_exchange(T & expected, - T desired, - memory_order order = memory_order::seqcst) - { -#if __has_builtin(__c11_atomic_compare_exchange_strong) - return __c11_atomic_compare_exchange_strong( - &val, &expected, desired, order, order); +#if __has_builtin(__c11_atomic_load) +#define ATOMIC_LOAD(addr)\ + __c11_atomic_load(reinterpret_cast<_Atomic(__typeof__(*addr))*>(addr), __ATOMIC_ACQUIRE) #else - return __atomic_compare_exchange_n( - &val, &expected, desired, true, order, order); +#define ATOMIC_LOAD(addr)\ + (__sync_synchronize(), *addr) #endif - } - }; -} // namespace -#undef ATOMIC_BUILTIN + diff --git a/contrib/libcxxrt/exception.cc b/contrib/libcxxrt/exception.cc index 2f1dc4030ba4..0fb26ddb4ed2 100644 --- a/contrib/libcxxrt/exception.cc +++ b/contrib/libcxxrt/exception.cc @@ -1,6 +1,5 @@ /* * Copyright 2010-2011 PathScale, Inc. All rights reserved. - * Copyright 2021 David Chisnall. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -290,9 +289,9 @@ using namespace ABI_NAMESPACE; /** The global termination handler. */ -static atomic<terminate_handler> terminateHandler = abort; +static terminate_handler terminateHandler = abort; /** The global unexpected exception handler. */ -static atomic<unexpected_handler> unexpectedHandler = std::terminate; +static unexpected_handler unexpectedHandler = std::terminate; /** Key used for thread-local data. */ static pthread_key_t eh_key; @@ -745,12 +744,12 @@ static void throw_exception(__cxa_exception *ex) ex->unexpectedHandler = info->unexpectedHandler; if (0 == ex->unexpectedHandler) { - ex->unexpectedHandler = unexpectedHandler.load(); + ex->unexpectedHandler = unexpectedHandler; } ex->terminateHandler = info->terminateHandler; if (0 == ex->terminateHandler) { - ex->terminateHandler = terminateHandler.load(); + ex->terminateHandler = terminateHandler; } info->globals.uncaughtExceptions++; @@ -1450,7 +1449,7 @@ namespace std { if (thread_local_handlers) { return pathscale::set_unexpected(f); } - return unexpectedHandler.exchange(f); + return ATOMIC_SWAP(&unexpectedHandler, f); } /** * Sets the function that is called to terminate the program. @@ -1459,7 +1458,7 @@ namespace std { if (thread_local_handlers) { return pathscale::set_terminate(f); } - return terminateHandler.exchange(f); + return ATOMIC_SWAP(&terminateHandler, f); } /** * Terminates the program, calling a custom terminate implementation if @@ -1475,7 +1474,7 @@ namespace std // return. abort(); } - terminateHandler.load()(); + terminateHandler(); } /** * Called when an unexpected exception is encountered (i.e. an exception @@ -1492,7 +1491,7 @@ namespace std // return. abort(); } - unexpectedHandler.load()(); + unexpectedHandler(); } /** * Returns whether there are any exceptions currently being thrown that @@ -1522,7 +1521,7 @@ namespace std { return info->unexpectedHandler; } - return unexpectedHandler.load(); + return ATOMIC_LOAD(&unexpectedHandler); } /** * Returns the current terminate handler. @@ -1534,7 +1533,7 @@ namespace std { return info->terminateHandler; } - return terminateHandler.load(); + return ATOMIC_LOAD(&terminateHandler); } } #if defined(__arm__) && !defined(__ARM_DWARF_EH__) diff --git a/contrib/libcxxrt/guard.cc b/contrib/libcxxrt/guard.cc index 515992563a10..34d294cf7432 100644 --- a/contrib/libcxxrt/guard.cc +++ b/contrib/libcxxrt/guard.cc @@ -1,6 +1,5 @@ -/* +/* * Copyright 2010-2012 PathScale, Inc. All rights reserved. - * Copyright 2021 David Chisnall. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -11,7 +10,7 @@ * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. - * + * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR @@ -39,305 +38,126 @@ * value as a low-overhead lock. Because statics (in most sane code) are * accessed far more times than they are initialised, this lock implementation * is heavily optimised towards the case where the static has already been - * initialised. + * initialised. */ -#include "atomic.h" -#include <assert.h> -#include <pthread.h> #include <stdint.h> #include <stdlib.h> +#include <stdio.h> +#include <pthread.h> +#include <assert.h> +#include "atomic.h" // Older GCC doesn't define __LITTLE_ENDIAN__ #ifndef __LITTLE_ENDIAN__ -// If __BYTE_ORDER__ is defined, use that instead + // If __BYTE_ORDER__ is defined, use that instead # ifdef __BYTE_ORDER__ # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ # define __LITTLE_ENDIAN__ # endif -// x86 and ARM are the most common little-endian CPUs, so let's have a -// special case for them (ARM is already special cased). Assume everything -// else is big endian. + // x86 and ARM are the most common little-endian CPUs, so let's have a + // special case for them (ARM is already special cased). Assume everything + // else is big endian. # elif defined(__x86_64) || defined(__i386) # define __LITTLE_ENDIAN__ # endif #endif + /* - * The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32) - * values with one bit defined to indicate that the guarded variable is and - * another bit to indicate that it's currently locked (initialisation in - * progress). The bit to use depends on the byte order of the target. - * - * On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we - * treat the two halves of the 64-bit word as independent values and + * The least significant bit of the guard variable indicates that the object + * has been initialised, the most significant bit is used for a spinlock. */ -namespace -{ - /** - * The state of the guard variable when an attempt is made to lock it. - */ - enum class GuardState - { - /** - * The lock is not held but is not needed because initialisation is - * one. - */ - InitDone, - - /** - * Initialisation is not done but the lock is held by the caller. - */ - InitLockSucceeded, - - /** - * Attempting to acquire the lock failed. - */ - InitLockFailed - }; - - /** - * Class encapsulating a single atomic word being used to represent the - * guard. The word size is defined by the type of `GuardWord`. The bit - * used to indicate the locked state is `1<<LockedBit`, the bit used to - * indicate the initialised state is `1<<InitBit`. - */ - template<typename GuardWord, int LockedBit, int InitBit> - struct SingleWordGuard - { - /** - * The value indicating that the lock bit is set (and no other bits). - */ - static constexpr GuardWord locked = static_cast<GuardWord>(1) - << LockedBit; - - /** - * The value indicating that the initialised bit is set (and all other - * bits are zero). - */ - static constexpr GuardWord initialised = static_cast<GuardWord>(1) - << InitBit; - - /** - * The guard variable. - */ - atomic<GuardWord> val; - - public: - /** - * Release the lock and set the initialised state. In the single-word - * implementation here, these are both done by a single store. - */ - void unlock(bool isInitialised) - { - val.store(isInitialised ? initialised : 0, memory_order::release); -#ifndef NDEBUG - GuardWord init_state = initialised; - assert(*reinterpret_cast<uint8_t*>(&init_state) != 0); -#endif - } - - /** - * Try to acquire the lock. This has a tri-state return, indicating - * either that the lock was acquired, it wasn't acquired because it was - * contended, or it wasn't acquired because the guarded variable is - * already initialised. - */ - GuardState try_lock() - { - GuardWord old = 0; - // Try to acquire the lock, assuming that we are in the state where - // the lock is not held and the variable is not initialised (so the - // expected value is 0). - if (val.compare_exchange(old, locked)) - { - return GuardState::InitLockSucceeded; - } - // If the CAS failed and the old value indicates that this is - // initialised, return that initialisation is done and skip further - // retries. - if (old == initialised) - { - return GuardState::InitDone; - } - // Otherwise, report failure. - return GuardState::InitLockFailed; - } - - /** - * Check whether the guard indicates that the variable is initialised. - */ - bool is_initialised() - { - return (val.load(memory_order::acquire) & initialised) == - initialised; - } - }; - - /** - * Class encapsulating using two 32-bit atomic values to represent a 64-bit - * guard variable. - */ - template<int LockedBit, int InitBit> - class DoubleWordGuard - { - /** - * The value of `lock_word` when the lock is held. - */ - static constexpr uint32_t locked = static_cast<uint32_t>(1) - << LockedBit; - - /** - * The value of `init_word` when the guarded variable is initialised. - */ - static constexpr uint32_t initialised = static_cast<uint32_t>(1) - << InitBit; - - /** - * The word used for the initialised flag. This is always the first - * word irrespective of endian because the generated code compares the - * first byte in memory against 0. - */ - atomic<uint32_t> init_word; - - /** - * The word used for the lock. - */ - atomic<uint32_t> lock_word; - - public: - /** - * Try to acquire the lock. This has a tri-state return, indicating - * either that the lock was acquired, it wasn't acquired because it was - * contended, or it wasn't acquired because the guarded variable is - * already initialised. - */ - GuardState try_lock() - { - uint32_t old = 0; - // Try to acquire the lock - if (lock_word.compare_exchange(old, locked)) - { - // If we succeeded, check if initialisation has happened. In - // this version, we don't have atomic manipulation of both the - // lock and initialised bits together. Instead, we have an - // ordering rule that the initialised bit is only ever updated - // with the lock held. - if (is_initialised()) - { - // If another thread did manage to initialise this, release - // the lock and notify the caller that initialisation is - // done. - lock_word.store(initialised, memory_order::release); - return GuardState::InitDone; - } - return GuardState::InitLockSucceeded; - } - return GuardState::InitLockFailed; - } - - /** - * Set the initialised state and release the lock. In this - * implementation, this is ordered, not atomic: the initialise bit is - * set while the lock is held. - */ - void unlock(bool isInitialised) - { - init_word.store(isInitialised ? initialised : 0, - memory_order::release); - lock_word.store(0, memory_order::release); - assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised); - } - - /** - * Return whether the guarded variable is initialised. - */ - bool is_initialised() - { - return (init_word.load(memory_order::acquire) & initialised) == - initialised; - } - }; - - // Check that the two implementations are the correct size. - static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t), - "Single-word 32-bit guard must be 32 bits"); - static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t), - "Single-word 64-bit guard must be 64 bits"); - static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t), - "Double-word guard must be 64 bits"); - #ifdef __arm__ - /** - * The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words. - */ - using Guard = SingleWordGuard<uint32_t, 31, 0>; +// ARM ABI - 32-bit guards. +typedef uint32_t guard_t; +typedef uint32_t guard_lock_t; +static const uint32_t LOCKED = static_cast<guard_t>(1) << 31; +static const uint32_t INITIALISED = 1; +#define LOCK_PART(guard) (guard) +#define INIT_PART(guard) (guard) #elif defined(_LP64) +typedef uint64_t guard_t; +typedef uint64_t guard_lock_t; # if defined(__LITTLE_ENDIAN__) - /** - * On little-endian 64-bit platforms the guard word is a single 64-bit - * atomic with the lock in the high bit and the initialised flag in the low - * bit. - */ - using Guard = SingleWordGuard<uint64_t, 63, 0>; +static const guard_t LOCKED = static_cast<guard_t>(1) << 63; +static const guard_t INITIALISED = 1; # else - /** - * On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic - * with the lock in the low bit and the initialised bit in the highest - * byte. - */ - using Guard = SingleWordGuard<uint64_t, 0, 56>; +static const guard_t LOCKED = 1; +static const guard_t INITIALISED = static_cast<guard_t>(1) << 56; # endif +#define LOCK_PART(guard) (guard) +#define INIT_PART(guard) (guard) #else +typedef uint32_t guard_lock_t; # if defined(__LITTLE_ENDIAN__) - /** - * 32-bit platforms use the same layout as 64-bit. - */ - using Guard = DoubleWordGuard<31, 0>; +typedef struct { + uint32_t init_half; + uint32_t lock_half; +} guard_t; +static const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31; +static const uint32_t INITIALISED = 1; # else - /** - * 32-bit platforms use the same layout as 64-bit. - */ - using Guard = DoubleWordGuard<0, 24>; +typedef struct { + uint32_t init_half; + uint32_t lock_half; +} guard_t; +static_assert(sizeof(guard_t) == sizeof(uint64_t), ""); +static const uint32_t LOCKED = 1; +static const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24; # endif +#define LOCK_PART(guard) (&(guard)->lock_half) +#define INIT_PART(guard) (&(guard)->init_half) #endif - -} // namespace +static const guard_lock_t INITIAL = 0; /** * Acquires a lock on a guard, returning 0 if the object has already been * initialised, and 1 if it has not. If the object is already constructed then * this function just needs to read a byte from memory and return. */ -extern "C" int __cxa_guard_acquire(Guard *guard_object) +extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object) { - // Check if this is already initialised. If so, we don't have to do - // anything. - if (guard_object->is_initialised()) - { + guard_lock_t old; + // Not an atomic read, doesn't establish a happens-before relationship, but + // if one is already established and we end up seeing an initialised state + // then it's a fast path, otherwise we'll do something more expensive than + // this test anyway... + if (INITIALISED == *INIT_PART(guard_object)) return 0; - } - // Spin trying to acquire the lock. If we fail to acquire the lock the - // first time then another thread will *probably* initialise it, but if the - // constructor throws an exception then we may have to try again in this - // thread. + // Spin trying to do the initialisation for (;;) { - // Try to acquire the lock. - switch (guard_object->try_lock()) - { - // If we failed to acquire the lock but another thread has - // initialised the lock while we were waiting, return immediately - // indicating that initialisation is not required. - case GuardState::InitDone: - return 0; - // If we acquired the lock, return immediately to start - // initialisation. - case GuardState::InitLockSucceeded: + // Loop trying to move the value of the guard from 0 (not + // locked, not initialised) to the locked-uninitialised + // position. + old = __sync_val_compare_and_swap(LOCK_PART(guard_object), + INITIAL, LOCKED); + if (old == INITIAL) { + // Lock obtained. If lock and init bit are + // in separate words, check for init race. + if (INIT_PART(guard_object) == LOCK_PART(guard_object)) + return 1; + if (INITIALISED != *INIT_PART(guard_object)) return 1; - // If we didn't acquire the lock, pause and retry. - case GuardState::InitLockFailed: - break; + + // No need for a memory barrier here, + // see first comment. + *LOCK_PART(guard_object) = INITIAL; + return 0; } + // If lock and init bit are in the same word, check again + // if we are done. + if (INIT_PART(guard_object) == LOCK_PART(guard_object) && + old == INITIALISED) + return 0; + + assert(old == LOCKED); + // Another thread holds the lock. + // If lock and init bit are in different words, check + // if we are done before yielding and looping. + if (INIT_PART(guard_object) != LOCK_PART(guard_object) && + INITIALISED == *INIT_PART(guard_object)) + return 0; sched_yield(); } } @@ -346,16 +166,28 @@ extern "C" int __cxa_guard_acquire(Guard *guard_object) * Releases the lock without marking the object as initialised. This function * is called if initialising a static causes an exception to be thrown. */ -extern "C" void __cxa_guard_abort(Guard *guard_object) +extern "C" void __cxa_guard_abort(volatile guard_t *guard_object) { - guard_object->unlock(false); + __attribute__((unused)) + bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object), + LOCKED, INITIAL); + assert(reset); } - /** * Releases the guard and marks the object as initialised. This function is * called after successful initialisation of a static. */ -extern "C" void __cxa_guard_release(Guard *guard_object) +extern "C" void __cxa_guard_release(volatile guard_t *guard_object) { - guard_object->unlock(true); + guard_lock_t old; + if (INIT_PART(guard_object) == LOCK_PART(guard_object)) + old = LOCKED; + else + old = INITIAL; + __attribute__((unused)) + bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object), + old, INITIALISED); + assert(reset); + if (INIT_PART(guard_object) != LOCK_PART(guard_object)) + *LOCK_PART(guard_object) = INITIAL; } diff --git a/contrib/libcxxrt/memory.cc b/contrib/libcxxrt/memory.cc index 7beb048ae914..6dd43a5b897e 100644 --- a/contrib/libcxxrt/memory.cc +++ b/contrib/libcxxrt/memory.cc @@ -51,7 +51,7 @@ typedef void (*new_handler)(); * The function to call when allocation fails. By default, there is no * handler and a bad allocation exception is thrown if an allocation fails. */ -static atomic<new_handler> new_handl{nullptr}; +static new_handler new_handl; namespace std { @@ -61,13 +61,12 @@ namespace std __attribute__((weak)) new_handler set_new_handler(new_handler handler) { - return new_handl.exchange(handler); + return ATOMIC_SWAP(&new_handl, handler); } - __attribute__((weak)) new_handler get_new_handler(void) { - return new_handl.load(); + return ATOMIC_LOAD(&new_handl); } }