[PATCH] amd64/pcpu.h: Use Clang builtins for clarity when referencing thread's pcpu
Meyer, Conrad
conrad.meyer at isilon.com
Fri Mar 14 18:31:28 UTC 2014
We can efficiently reference thread-local pcpu members via the %gs
register with Clang-annotated C code, in place of inline GNU assembly.
Motivations:
- Use C in leiu of inline assembly for clarity
- Clang's static analyser may be better able to understand PCPU_*
macros using the C constructs rather than inline assembly
(unverified)
Sponsored by: EMC/Isilon storage division
Signed-off-by: Conrad Meyer <conrad.meyer at isilon.com>
Reviewed-by: Max Laier <mlaier at FreeBSD.org>
---
This is more of a "what do you think?" than a pull request. It seems like using
annotated C instead of asm is nice (in particular, Clang detects casts from
pointers typed with one segment to another, or unsegmented type). On the other
hand, this is code that doesn't change frequently, and we may still need to
support GCC for some time. So adding a second, parallel implementation just
doubles room for bugs.
Open questions:
- How long is GCC intended to be supported as a compiler?
- How atomic does PCPU_INC() need to be? It looks like it updates cpu-local
counters; as long as it's a single asm instruction, should it be fine
w.r.t. interrupts? The existing implementation does NOT use the 'lock; ' prefix.
See the following simple example:
$ cat gstest.c
#include <sys/cdefs.h>
#include <stdint.h>
#define GS_RELATIVE __attribute__((address_space(256)))
struct pcpu {
void *curthread;
struct pcpu *self;
};
struct pcpu *
__curpcpu(void)
{
volatile struct pcpu * GS_RELATIVE *res =
(volatile struct pcpu * GS_RELATIVE *)
__offsetof(struct pcpu, self);
return (*res);
}
$ clang -Wall -Wextra -O1 -c gstest.c
$ objdump -d gstest.o
gstest.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <__curpcpu>:
0: 65 48 8b 04 25 08 00 mov %gs:0x8,%rax
7: 00 00
9: c3 retq
Support has been present since at least April 9, 2009 (when
documentation of the feature was first added to
LanguageExtensions.html). So all Clang versions in BSD core (BSD9,
BSD10) should support the feature.
---
sys/amd64/include/pcpu.h | 98 +++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 84 insertions(+), 14 deletions(-)
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index fe898e9..68892fc 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -81,7 +81,7 @@ extern struct pcpu *pcpup;
#define PCPU_PTR(member) (&pcpup->pc_ ## member)
#define PCPU_SET(member, val) (pcpup->pc_ ## member = (val))
-#elif defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
+#elif defined(__clang__) || (defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF))
/*
* Evaluates to the byte offset of the per-cpu variable name.
@@ -95,6 +95,80 @@ extern struct pcpu *pcpup;
#define __pcpu_type(name) \
__typeof(((struct pcpu *)0)->name)
+#if defined(__clang__)
+
+#define __GS_RELATIVE __attribute__((address_space(256)))
+
+/*
+ * Evaluates to the address of the per-cpu variable name.
+ */
+#define __PCPU_PTR(name) __extension__ ({ \
+ volatile __pcpu_type(name) __GS_RELATIVE *__p; \
+ \
+ __p = (volatile __pcpu_type(name) __GS_RELATIVE *)__pcpu_offset(name); \
+ __p; \
+})
+
+#define __PCPU_PTRX(name) __extension__ ({ \
+ volatile __pcpu_type(pc_prvspace) __GS_RELATIVE *__p; \
+ __pcpu_type(name) *__mp; \
+ \
+ __p = __PCPU_PTR(pc_prvspace); \
+ __mp = &(*__p)->name; \
+ __mp; \
+})
+#define PCPU_PTR(member) __PCPU_PTRX(pc_ ## member)
+
+/*
+ * Evaluates to the value of the per-cpu variable name.
+ */
+#define __PCPU_GET(name) __extension__ ({ \
+ *__PCPU_PTR(name); \
+})
+
+/*
+ * Adds the value to the per-cpu counter name. The implementation
+ * must be atomic with respect to interrupts.
+ */
+#define __PCPU_ADD(name, val) do { \
+ __pcpu_type(name) __val; \
+ volatile __pcpu_type(name) __GS_RELATIVE *__ptr; \
+ \
+ __val = (val); \
+ __ptr = __PCPU_PTR(name); \
+ *__ptr += __val; \
+} while (0)
+
+/*
+ * Increments the value of the per-cpu counter name. The implementation
+ * must be atomic with respect to interrupts.
+ */
+#define __PCPU_INC(name) __PCPU_ADD(name, 1)
+
+/*
+ * Sets the value of the per-cpu variable name to value val.
+ */
+#define __PCPU_SET(name, val) do { \
+ __pcpu_type(name) __val; \
+ volatile __pcpu_type(name) __GS_RELATIVE *__ptr; \
+ \
+ __val = (val); \
+ __ptr = __PCPU_PTR(name); \
+ *__ptr = __val; \
+} while (0)
+
+#define curthread __extension__ ({ \
+ *((volatile __pcpu_type(pc_curthread) __GS_RELATIVE *) \
+ __pcpu_offset(pc_curthread)); \
+})
+
+#define curpcb __extension__ ({ \
+ *((volatile __pcpu_type(pc_curpcb) __GS_RELATIVE *) \
+ __pcpu_offset(pc_curpcb)); \
+})
+
+#else /* !__clang__ */
+
/*
* Evaluates to the address of the per-cpu variable name.
*/
@@ -200,17 +274,7 @@ extern struct pcpu *pcpup;
} \
}
-#define PCPU_GET(member) __PCPU_GET(pc_ ## member)
-#define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val)
-#define PCPU_INC(member) __PCPU_INC(pc_ ## member)
-#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member)
-#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val)
-
#define OFFSETOF_CURTHREAD 0
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wnull-dereference"
-#endif
static __inline __pure2 struct thread *
__curthread(void)
{
@@ -220,9 +284,6 @@ __curthread(void)
: "m" (*(char *)OFFSETOF_CURTHREAD));
return (td);
}
-#ifdef __clang__
-#pragma clang diagnostic pop
-#endif
#define curthread (__curthread())
#define OFFSETOF_CURPCB 32
@@ -236,6 +297,15 @@ __curpcb(void)
}
#define curpcb (__curpcb())
+#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member)
+
+#endif /* __clang__ */
+
+#define PCPU_GET(member) __PCPU_GET(pc_ ## member)
+#define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val)
+#define PCPU_INC(member) __PCPU_INC(pc_ ## member)
+#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val)
+
#define IS_BSP() (PCPU_GET(cpuid) == 0)
#else /* !lint || defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */
--
1.8.5.3
More information about the freebsd-hackers
mailing list