git: 9c0b759bf9b5 - main - x86 atomics: use lock prefix unconditionally
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Fri, 04 Feb 2022 12:01:45 UTC
The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=9c0b759bf9b520537616d026f21a0a98d70acd11 commit 9c0b759bf9b520537616d026f21a0a98d70acd11 Author: Konstantin Belousov <kib@FreeBSD.org> AuthorDate: 2022-02-03 09:51:36 +0000 Commit: Konstantin Belousov <kib@FreeBSD.org> CommitDate: 2022-02-04 12:01:39 +0000 x86 atomics: use lock prefix unconditionally Atomics have significant other use besides providing in-system primitives for safe memory updates. They are used for implementing communication with out of system software or hardware following some protocols. For instance, even UP kernel might require a protocol using atomics to communicate with the software-emulated device on SMP hypervisor. Or real hardware might need atomic accesses as part of the proper management protocol. Another point is that UP configurations on x86 are extinct, so slight performance hit by unconditionally use proper atomics is not important. It is compensated by less code clutter, which in fact improves the UP/i386 lifetime expectations. Requested by: Elliott Mitchell <ehem+freebsd@m5p.com> Reviewed by: Elliott Mitchell, imp, jhb, markj, royger Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D34153 --- sys/amd64/include/atomic.h | 67 +++++++++++----------------------------------- sys/i386/include/atomic.h | 52 +++++++++++------------------------ 2 files changed, 32 insertions(+), 87 deletions(-) diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h index ed80c426add2..f0191970e8cc 100644 --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -143,16 +143,10 @@ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #else /* !__GNUCLIKE_ASM */ /* - * For userland, always use lock prefixes so that the binaries will run - * on both SMP and !SMP systems. - */ -#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE) -#define MPLOCKED "lock ; " -#else -#define MPLOCKED -#endif - -/* + * Always use lock prefixes. The result is slighly less optimal for + * UP systems, but it matters less now, and sometimes UP is emulated + * over SMP. + * * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. @@ -161,7 +155,7 @@ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(MPLOCKED OP \ + __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ @@ -170,7 +164,7 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(MPLOCKED OP \ + __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ @@ -199,8 +193,7 @@ atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \ u_char res; \ \ __asm __volatile( \ - " " MPLOCKED " " \ - " cmpxchg %3,%1 ; " \ + " lock; cmpxchg %3,%1 ; " \ "# atomic_cmpset_" #TYPE " " \ : "=@cce" (res), /* 0 */ \ "+m" (*dst), /* 1 */ \ @@ -216,8 +209,7 @@ atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \ u_char res; \ \ __asm __volatile( \ - " " MPLOCKED " " \ - " cmpxchg %3,%1 ; " \ + " lock; cmpxchg %3,%1 ; " \ "# atomic_fcmpset_" #TYPE " " \ : "=@cce" (res), /* 0 */ \ "+m" (*dst), /* 1 */ \ @@ -241,8 +233,7 @@ atomic_fetchadd_int(volatile u_int *p, u_int v) { __asm __volatile( - " " MPLOCKED " " - " xaddl %0,%1 ; " + " lock; xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ @@ -259,8 +250,7 @@ atomic_fetchadd_long(volatile u_long *p, u_long v) { __asm __volatile( - " " MPLOCKED " " - " xaddq %0,%1 ; " + " lock; xaddq %0,%1 ; " "# atomic_fetchadd_long" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ @@ -274,8 +264,7 @@ atomic_testandset_int(volatile u_int *p, u_int v) u_char res; __asm __volatile( - " " MPLOCKED " " - " btsl %2,%1 ; " + " lock; btsl %2,%1 ; " "# atomic_testandset_int" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ @@ -290,8 +279,7 @@ atomic_testandset_long(volatile u_long *p, u_int v) u_char res; __asm __volatile( - " " MPLOCKED " " - " btsq %2,%1 ; " + " lock; btsq %2,%1 ; " "# atomic_testandset_long" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ @@ -306,8 +294,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v) u_char res; __asm __volatile( - " " MPLOCKED " " - " btrl %2,%1 ; " + " lock; btrl %2,%1 ; " "# atomic_testandclear_int" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ @@ -322,8 +309,7 @@ atomic_testandclear_long(volatile u_long *p, u_int v) u_char res; __asm __volatile( - " " MPLOCKED " " - " btrq %2,%1 ; " + " lock; btrq %2,%1 ; " "# atomic_testandclear_long" : "=@ccc" (res), /* 0 */ "+m" (*p) /* 1 */ @@ -344,39 +330,18 @@ atomic_testandclear_long(volatile u_long *p, u_int v) * special address for "mem". In the kernel, we use a private per-cpu * cache line. In user space, we use a word in the stack's red zone * (-8(%rsp)). - * - * For UP kernels, however, the memory of the single processor is - * always consistent, so we only need to stop the compiler from - * reordering accesses in a way that violates the semantics of acquire - * and release. */ -#if defined(_KERNEL) - -#if defined(SMP) || defined(KLD_MODULE) static __inline void __storeload_barrier(void) { - +#if defined(_KERNEL) __asm __volatile("lock; addl $0,%%gs:%0" : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc"); -} -#else /* _KERNEL && UP */ -static __inline void -__storeload_barrier(void) -{ - - __compiler_membar(); -} -#endif /* SMP */ #else /* !_KERNEL */ -static __inline void -__storeload_barrier(void) -{ - __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc"); -} #endif /* _KERNEL*/ +} #define ATOMIC_LOAD(TYPE) \ static __inline u_##TYPE \ diff --git a/sys/i386/include/atomic.h b/sys/i386/include/atomic.h index c65a7ac83a93..2e72de935c23 100644 --- a/sys/i386/include/atomic.h +++ b/sys/i386/include/atomic.h @@ -141,16 +141,10 @@ void atomic_subtract_64(volatile uint64_t *, uint64_t); #else /* !__GNUCLIKE_ASM */ /* - * For userland, always use lock prefixes so that the binaries will run - * on both SMP and !SMP systems. - */ -#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE) -#define MPLOCKED "lock ; " -#else -#define MPLOCKED -#endif - -/* + * Always use lock prefixes. The result is slighly less optimal for + * UP systems, but it matters less now, and sometimes UP is emulated + * over SMP. + * * The assembly is volatilized to avoid code chunk removal by the compiler. * GCC aggressively reorders operations and memory clobbering is necessary * in order to avoid that for memory barriers. @@ -159,7 +153,7 @@ void atomic_subtract_64(volatile uint64_t *, uint64_t); static __inline void \ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(MPLOCKED OP \ + __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "cc"); \ @@ -168,7 +162,7 @@ atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ static __inline void \ atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ { \ - __asm __volatile(MPLOCKED OP \ + __asm __volatile("lock; " OP \ : "+m" (*p) \ : CONS (V) \ : "memory", "cc"); \ @@ -197,8 +191,7 @@ atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \ u_char res; \ \ __asm __volatile( \ - " " MPLOCKED " " \ - " cmpxchg %3,%1 ; " \ + " lock; cmpxchg %3,%1 ; " \ " sete %0 ; " \ "# atomic_cmpset_" #TYPE " " \ : "=q" (res), /* 0 */ \ @@ -215,8 +208,7 @@ atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \ u_char res; \ \ __asm __volatile( \ - " " MPLOCKED " " \ - " cmpxchg %3,%1 ; " \ + " lock; cmpxchg %3,%1 ; " \ " sete %0 ; " \ "# atomic_fcmpset_" #TYPE " " \ : "=q" (res), /* 0 */ \ @@ -240,8 +232,7 @@ atomic_fetchadd_int(volatile u_int *p, u_int v) { __asm __volatile( - " " MPLOCKED " " - " xaddl %0,%1 ; " + " lock; xaddl %0,%1 ; " "# atomic_fetchadd_int" : "+r" (v), /* 0 */ "+m" (*p) /* 1 */ @@ -255,8 +246,7 @@ atomic_testandset_int(volatile u_int *p, u_int v) u_char res; __asm __volatile( - " " MPLOCKED " " - " btsl %2,%1 ; " + " lock; btsl %2,%1 ; " " setc %0 ; " "# atomic_testandset_int" : "=q" (res), /* 0 */ @@ -272,8 +262,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v) u_char res; __asm __volatile( - " " MPLOCKED " " - " btrl %2,%1 ; " + " lock; btrl %2,%1 ; " " setc %0 ; " "# atomic_testandclear_int" : "=q" (res), /* 0 */ @@ -303,11 +292,7 @@ atomic_testandclear_int(volatile u_int *p, u_int v) */ #if defined(_KERNEL) -#if defined(SMP) || defined(KLD_MODULE) #define __storeload_barrier() __mbk() -#else /* _KERNEL && UP */ -#define __storeload_barrier() __compiler_membar() -#endif /* SMP */ #else /* !_KERNEL */ #define __storeload_barrier() __mbu() #endif /* _KERNEL*/ @@ -484,8 +469,7 @@ atomic_cmpset_64_i586(volatile uint64_t *dst, uint64_t expect, uint64_t src) u_char res; __asm __volatile( - " " MPLOCKED " " - " cmpxchg8b %1 ; " + " lock; cmpxchg8b %1 ; " " sete %0" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ @@ -502,8 +486,7 @@ atomic_fcmpset_64_i586(volatile uint64_t *dst, uint64_t *expect, uint64_t src) u_char res; __asm __volatile( - " " MPLOCKED " " - " cmpxchg8b %1 ; " + " lock; cmpxchg8b %1 ; " " sete %0" : "=q" (res), /* 0 */ "+m" (*dst), /* 1 */ @@ -522,8 +505,7 @@ atomic_load_acq_64_i586(volatile uint64_t *p) __asm __volatile( " movl %%ebx,%%eax ; " " movl %%ecx,%%edx ; " - " " MPLOCKED " " - " cmpxchg8b %1" + " lock; cmpxchg8b %1" : "=&A" (res), /* 0 */ "+m" (*p) /* 1 */ : : "memory", "cc"); @@ -538,8 +520,7 @@ atomic_store_rel_64_i586(volatile uint64_t *p, uint64_t v) " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " - " " MPLOCKED " " - " cmpxchg8b %0 ; " + " lock; cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */ @@ -554,8 +535,7 @@ atomic_swap_64_i586(volatile uint64_t *p, uint64_t v) " movl %%eax,%%ebx ; " " movl %%edx,%%ecx ; " "1: " - " " MPLOCKED " " - " cmpxchg8b %0 ; " + " lock; cmpxchg8b %0 ; " " jne 1b" : "+m" (*p), /* 0 */ "+A" (v) /* 1 */