git: 0c0fe8a6ff8b - stable/12 - Provide generic sub-word atomic *cmpset

From: Kyle Evans <kevans_at_FreeBSD.org>
Date: Thu, 07 Oct 2021 03:28:09 UTC
The branch stable/12 has been updated by kevans:

URL: https://cgit.FreeBSD.org/src/commit/?id=0c0fe8a6ff8b01018aa2c0f5a93d4dd52da0dfcd

commit 0c0fe8a6ff8b01018aa2c0f5a93d4dd52da0dfcd
Author:     Kyle Evans <kevans@FreeBSD.org>
AuthorDate: 2019-10-02 17:06:28 +0000
Commit:     Kyle Evans <kevans@FreeBSD.org>
CommitDate: 2021-10-07 03:27:26 +0000

    Provide generic sub-word atomic *cmpset
    
    Provide *cmpset_{8,16} as wrappers around atomic_fcmpset_32. Initial users
    will be mips and sparc64, and perhaps parts of powerpc.
    
    This are not for general consumption; machine/atomic.h should include this
    header as needed to provide atomic_{,f}cmpset_{8,16} and machine/atomic.h
    should provide acq_ and rel_ variants.
    
    (cherry picked from commit b6c5d1ef76cdcfe2103fd2ed8ef8f97e1c740edc)
---
 sys/sys/_atomic_subword.h | 168 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)

diff --git a/sys/sys/_atomic_subword.h b/sys/sys/_atomic_subword.h
new file mode 100644
index 000000000000..fd03ea6a6aed
--- /dev/null
+++ b/sys/sys/_atomic_subword.h
@@ -0,0 +1,168 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2019 Kyle Evans <kevans@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _SYS__ATOMIC_SUBWORD_H_
+#define	_SYS__ATOMIC_SUBWORD_H_
+
+/*
+ * This header is specifically for platforms that either do not have ways to or
+ * simply do not do sub-word atomic operations.  These are not ideal as they
+ * require a little more effort to make sure our atomic operations are failing
+ * because of the bits of the word we're trying to write rather than the rest
+ * of the word.
+ */
+#ifndef _MACHINE_ATOMIC_H_
+#error do not include this header, use machine/atomic.h
+#endif
+
+#include <machine/endian.h>
+
+#ifndef NBBY
+#define	NBBY	8
+#endif
+
+#define	_ATOMIC_WORD_ALIGNED(p)		\
+    (uint32_t *)((__uintptr_t)(p) - ((__uintptr_t)(p) % 4))
+
+#if _BYTE_ORDER == _BIG_ENDIAN
+#define	_ATOMIC_BYTE_SHIFT(p)		\
+    ((3 - ((__uintptr_t)(p) % 4)) * NBBY)
+
+#define	_ATOMIC_HWORD_SHIFT(p)		\
+    ((2 - ((__uintptr_t)(p) % 4)) * NBBY)
+#else
+#define	_ATOMIC_BYTE_SHIFT(p)		\
+    ((((__uintptr_t)(p) % 4)) * NBBY)
+
+#define	_ATOMIC_HWORD_SHIFT(p)		\
+    ((((__uintptr_t)(p) % 4)) * NBBY)
+#endif
+
+/*
+ * Pass these bad boys a couple words and a mask of the bits you care about,
+ * they'll loop until we either succeed or fail because of those bits rather
+ * than the ones we're not masking.  old and val should already be preshifted to
+ * the proper position.
+ */
+static __inline int
+_atomic_cmpset_masked_word(uint32_t *addr, uint32_t old, uint32_t val,
+    uint32_t mask)
+{
+	int ret;
+	uint32_t wcomp;
+
+	wcomp = old;
+
+	/*
+	 * We'll attempt the cmpset on the entire word.  Loop here in case the
+	 * operation fails due to the other half-word resident in that word,
+	 * rather than the half-word we're trying to operate on.  Ideally we
+	 * only take one trip through here.  We'll have to recalculate the old
+	 * value since it's the other part of the word changing.
+	 */
+	do {
+		old = (*addr & ~mask) | wcomp;
+		ret = atomic_fcmpset_32(addr, &old, (old & ~mask) | val);
+	} while (ret == 0 && (old & mask) == wcomp);
+
+	return (ret);
+}
+
+static __inline int
+_atomic_fcmpset_masked_word(uint32_t *addr, uint32_t *old, uint32_t val,
+    uint32_t mask)
+{
+
+	/*
+	 * fcmpset_* is documented in atomic(9) to allow spurious failures where
+	 * *old == val on ll/sc architectures because the sc may fail due to
+	 * parallel writes or other reasons.  We take advantage of that here
+	 * and only attempt once, because the caller should be compensating for
+	 * that possibility.
+	 */
+	*old = (*addr & ~mask) | *old;
+	return (atomic_fcmpset_32(addr, old, (*old & ~mask) | val));
+}
+
+static __inline int
+atomic_cmpset_8(__volatile uint8_t *addr, uint8_t old, uint8_t val)
+{
+	int shift;
+
+	shift = _ATOMIC_BYTE_SHIFT(addr);
+
+	return (_atomic_cmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
+	    old << shift, val << shift, 0xff << shift));
+}
+
+static __inline int
+atomic_fcmpset_8(__volatile uint8_t *addr, uint8_t *old, uint8_t val)
+{
+	int ret, shift;
+	uint32_t wold;
+
+	shift = _ATOMIC_BYTE_SHIFT(addr);
+	wold = *old << shift;
+	ret = _atomic_fcmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
+	    &wold, val << shift, 0xff << shift);
+	if (ret == 0)
+		*old = (wold >> shift) & 0xff;
+	return (ret);
+}
+
+static __inline int
+atomic_cmpset_16(__volatile uint16_t *addr, uint16_t old, uint16_t val)
+{
+	int shift;
+
+	shift = _ATOMIC_HWORD_SHIFT(addr);
+
+	return (_atomic_cmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
+	    old << shift, val << shift, 0xffff << shift));
+}
+
+static __inline int
+atomic_fcmpset_16(__volatile uint16_t *addr, uint16_t *old, uint16_t val)
+{
+	int ret, shift;
+	uint32_t wold;
+
+	shift = _ATOMIC_HWORD_SHIFT(addr);
+	wold = *old << shift;
+	ret = _atomic_fcmpset_masked_word(_ATOMIC_WORD_ALIGNED(addr),
+	    &wold, val << shift, 0xffff << shift);
+	if (ret == 0)
+		*old = (wold >> shift) & 0xffff;
+	return (ret);
+}
+
+#undef _ATOMIC_WORD_ALIGNED
+#undef _ATOMIC_BYTE_SHIFT
+#undef _ATOMIC_HWORD_SHIFT
+
+#endif	/* _SYS__ATOMIC_SUBWORD_H_ */