git: 2329393c615c - main - amd64: Use __seg_gs to implement per-CPU data accesses.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Fri, 07 Jul 2023 20:07:31 UTC
The branch main has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=2329393c615cf61c19b0d7bd1ca18663e02789ce

commit 2329393c615cf61c19b0d7bd1ca18663e02789ce
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2023-07-07 20:06:55 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2023-07-07 20:06:55 +0000

    amd64: Use __seg_gs to implement per-CPU data accesses.
    
    This makes use of the alternate address space support in both GCC and
    clang to access per-CPU data as accesses relative to GS:.  The
    original motivation for this is that it quiets verbose warnings from
    GCC 12.  However, this version is also much easier to read and
    allows the compiler to generate better code (e.g. the compiler can
    use a GS: memory operand directly in other instructions such as IMUL
    and CMP rather than always MOVing to a temporary register).
    
    The one caveat is that the current approach is very inefficient at -O0
    since the compiler expects to load the 0 base offset from a global
    variable instead of assuming it is 0 (even with the const).
    
    Reviewed by:    kib
    Differential Revision:  https://reviews.freebsd.org/D40647
---
 sys/amd64/include/pcpu.h | 69 +++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index 523cf105e1c7..dc78a07b83e9 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -113,18 +113,74 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
 #define MONITOR_STOPSTATE_RUNNING	0
 #define MONITOR_STOPSTATE_STOPPED	1
 
-/*
- * Evaluates to the byte offset of the per-cpu variable name.
- */
-#define	__pcpu_offset(name)						\
-	__offsetof(struct pcpu, name)
-
 /*
  * Evaluates to the type of the per-cpu variable name.
  */
 #define	__pcpu_type(name)						\
 	__typeof(((struct pcpu *)0)->name)
 
+#ifdef __SEG_GS
+#define	get_pcpu() __extension__ ({					\
+	static struct pcpu __seg_gs *__pc = 0;				\
+									\
+	__pc->pc_prvspace;						\
+})
+
+/*
+ * Evaluates to the address of the per-cpu variable name.
+ */
+#define	__PCPU_PTR(name) __extension__ ({				\
+	struct pcpu *__pc = get_pcpu();					\
+									\
+	&__pc->name;							\
+})
+
+/*
+ * Evaluates to the value of the per-cpu variable name.
+ */
+#define	__PCPU_GET(name) __extension__ ({				\
+	static struct pcpu __seg_gs *__pc = 0;				\
+									\
+	__pc->name;							\
+})
+
+/*
+ * Adds the value to the per-cpu counter name.  The implementation
+ * must be atomic with respect to interrupts.
+ */
+#define	__PCPU_ADD(name, val) do {					\
+	static struct pcpu __seg_gs *__pc = 0;				\
+	__pcpu_type(name) __val;					\
+									\
+	__val = (val);							\
+	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
+	    sizeof(__val) == 4 || sizeof(__val) == 8) {			\
+		__pc->name += __val;					\
+	} else								\
+		*__PCPU_PTR(name) += __val;				\
+} while (0)
+
+/*
+ * Sets the value of the per-cpu variable name to value val.
+ */
+#define	__PCPU_SET(name, val) {						\
+	static struct pcpu __seg_gs *__pc = 0;				\
+	__pcpu_type(name) __val;					\
+									\
+	__val = (val);							\
+	if (sizeof(__val) == 1 || sizeof(__val) == 2 ||			\
+	    sizeof(__val) == 4 || sizeof(__val) == 8) {			\
+		__pc->name = __val;					\
+	} else								\
+		*__PCPU_PTR(name) = __val;				\
+} while (0)
+#else /* !__SEG_GS */
+/*
+ * Evaluates to the byte offset of the per-cpu variable name.
+ */
+#define	__pcpu_offset(name)						\
+	__offsetof(struct pcpu, name)
+
 /*
  * Evaluates to the address of the per-cpu variable name.
  */
@@ -210,6 +266,7 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line");
 	    : "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))));	\
 	__pc;								\
 })
+#endif /* !__SEG_GS */
 
 #define	PCPU_GET(member)	__PCPU_GET(pc_ ## member)
 #define	PCPU_ADD(member, val)	__PCPU_ADD(pc_ ## member, val)