From e6c7d10a97c959e72ce312e51f7a27fc577a0333 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 11 Feb 2022 21:13:51 +0000 Subject: [PATCH 01/49] sparc: Fix membar_sync with LDSTUB. membar_sync is required to be a full sequential consistency barrier, equivalent to MEMBAR #StoreStore|LoadStore|StoreLoad|LoadLoad on sparcv9. LDSTUB and SWAP are the only pre-v9 instructions that do this and SWAP doesn't exist on all v7 hardware, so use LDSTUB. Note: I'm having a hard time nailing down a reference for the ordering implied by LDSTUB and SWAP. I'm _pretty sure_ SWAP has to imply store-load ordering since the SPARCv8 manual recommends it for Dekker's algorithm (which notoriously requires store-load ordering), and the formal memory model treats LDSTUB and SWAP the same for ordering. But the v8 and v9 manuals aren't clear. GCC issues STBAR and LDSTUB, but (a) I don't see why STBAR is necessary here, (b) STBAR doesn't exist on v7 so it'd be a pain to use, and (c) from what I've heard (although again it's hard to nail down authoritative references here) all actual SPARC hardware is TSO or SC anyway so STBAR is a noop in all the silicon anyway. Either way, certainly this is better than what we had before, which was nothing implying ordering at all, just a store! --- .../lib/libc/arch/sparc/atomic/membar_ops.S | 36 ++++++++++++++----- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/common/lib/libc/arch/sparc/atomic/membar_ops.S b/common/lib/libc/arch/sparc/atomic/membar_ops.S index a534ef8dc436..9a56fec1a1dc 100644 --- a/common/lib/libc/arch/sparc/atomic/membar_ops.S +++ b/common/lib/libc/arch/sparc/atomic/membar_ops.S @@ -31,25 +31,43 @@ #include "atomic_op_asm.h" +#ifdef _KERNEL_OPT +#include "opt_multiprocessor.h" +#endif + .text -/* These assume Total Store Order (TSO) */ +/* + * These assume Total Store Order (TSO), which may reorder + * store-before-load but nothing else. Hence, only membar_sync must + * issue anything -- specifically, an LDSTUB, which (along with SWAP) + * is the only instruction that implies a sequential consistency + * barrier. + * + * If we ran with Partial Store Order (PSO), we would also need to + * issue STBAR for membar_exit (load/store-before-store) and + * membar_producer (store-before-store). + */ -ENTRY(_membar_producer) +ENTRY(_membar_consumer) retl nop +END(_membar_consumer) -ENTRY(_membar_consumer) - add %sp, -112, %sp - st %g0, [%sp+100] +ENTRY(_membar_sync) retl - sub %sp, -112, %sp +#if !defined(_KERNEL) || defined(MULTIPROCESSOR) + ldstub [%sp - 4], %g0 +#else + nop +#endif +END(_membar_sync) -ATOMIC_OP_ALIAS(membar_producer,_membar_producer) +ATOMIC_OP_ALIAS(membar_producer,_membar_consumer) +STRONG_ALIAS(_membar_producer,_membar_consumer) ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) ATOMIC_OP_ALIAS(membar_enter,_membar_consumer) STRONG_ALIAS(_membar_enter,_membar_consumer) ATOMIC_OP_ALIAS(membar_exit,_membar_consumer) STRONG_ALIAS(_membar_exit,_membar_consumer) -ATOMIC_OP_ALIAS(membar_sync,_membar_consumer) -STRONG_ALIAS(_membar_sync,_membar_consumer) +ATOMIC_OP_ALIAS(membar_sync,_membar_sync) From 38a80a96615db640dbc1cfec86ce64a391a71ec5 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 11 Feb 2022 21:29:26 +0000 Subject: [PATCH 02/49] sparc64: Fix membar_sync by issuing membar #StoreLoad. In TSO this is the only memory barrier ever needed, and somehow we got this wrong and instead issued an unnecessary membar #LoadLoad -- not needed even in PSO let alone in TSO. XXX Apparently we may run userland programs with PSO or RMO, in which case all of these membars need fixing: PSO RMO membar_consumer nop membar #LoadLoad membar_producer membar #StoreStore membar #StoreStore membar_enter nop membar #LoadLoad|LoadStore membar_exit membar #StoreStore membar #LoadStore|StoreStore membar_sync membar #StoreLoad|StoreStore membar #...everything... But at least this fixes the TSO case in which we run the kernel. Also I'm not sure there's any non-TSO hardware out there in practice. --- .../lib/libc/arch/sparc64/atomic/membar_ops.S | 40 +++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/common/lib/libc/arch/sparc64/atomic/membar_ops.S b/common/lib/libc/arch/sparc64/atomic/membar_ops.S index 2090aaf669f6..9413a8385336 100644 --- a/common/lib/libc/arch/sparc64/atomic/membar_ops.S +++ b/common/lib/libc/arch/sparc64/atomic/membar_ops.S @@ -33,22 +33,48 @@ .text -/* These assume Total Store Order (TSO) */ +/* + * These assume Total Store Order (TSO), which may reorder + * store-before-load but nothing else. Hence, only membar_sync must + * issue anything -- namely, membar #StoreLoad. + * + * If we ran with Partial Store Order (PSO), we would also need to + * issue membar #StoreStore for membar_exit (load/store-before-store) + * and membar_producer (store-before-store). + */ -ENTRY(_membar_producer) +ENTRY(_membar_consumer) retl nop +END(_membar_consumer) -ENTRY(_membar_consumer) - membar #LoadLoad +ENTRY(_membar_sync) + /* + * Some SPARC CPUs have errata with MEMBAR in the delay slot of + * a branch, such as the UltraSPARC-IIi: + * + * `Apparently, the deadlock is most easily caused if the + * delay slot of the JMPL is a MEMBAR #Sync, or any + * instruction that synchronizes on the load or store + * buffers being empty.' + * + * UltraSPARC-IIi User's Manual, Part No. 805-0087-01, Sun + * Microsystems, October 1997, Appendix K.2 `Errata + * Created by UltraSPARC-I', Erratum 51, p. 476. + * https://www.oracle.com/technetwork/server-storage/sun-sparc-enterprise/documentation/sparc-2i-usersmanual-2516677.pdf#page=518 + * + * So let's avoid doing that. + */ + membar #StoreLoad retl nop +END(_membar_sync) -ATOMIC_OP_ALIAS(membar_producer,_membar_producer) +ATOMIC_OP_ALIAS(membar_producer,_membar_consumer) +STRONG_ALIAS(_membar_producer,_membar_consumer) ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) ATOMIC_OP_ALIAS(membar_enter,_membar_consumer) STRONG_ALIAS(_membar_enter,_membar_consumer) ATOMIC_OP_ALIAS(membar_exit,_membar_consumer) STRONG_ALIAS(_membar_exit,_membar_consumer) -ATOMIC_OP_ALIAS(membar_sync,_membar_consumer) -STRONG_ALIAS(_membar_sync,_membar_consumer) +ATOMIC_OP_ALIAS(membar_sync,_membar_sync) From 5cf7a1c46e2dc956ebabc2f105df8ef33982de80 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 30 Mar 2022 20:33:03 +0000 Subject: [PATCH 03/49] x86: Every load is a load-acquire, so membar_consumer is a noop. lfence is only needed for MD logic, such as operations on I/O memory rather than normal cacheable memory, or special instructions like RDTSC -- never for MI synchronization between threads/CPUs. No need for hot-patching to do lfence here. (The x86_lfence function might reasonably be patched on i386 to do lfence for MD logic, but it isn't now and this doesn't change that.) --- common/lib/libc/arch/i386/atomic/atomic.S | 17 ++++---------- common/lib/libc/arch/x86_64/atomic/atomic.S | 17 ++++---------- sys/arch/amd64/include/frameasm.h | 3 +-- sys/arch/i386/include/frameasm.h | 9 ++++--- sys/arch/x86/x86/patch.c | 26 +++++++-------------- 5 files changed, 23 insertions(+), 49 deletions(-) diff --git a/common/lib/libc/arch/i386/atomic/atomic.S b/common/lib/libc/arch/i386/atomic/atomic.S index d1fcef92a4fe..8c494ca5f587 100644 --- a/common/lib/libc/arch/i386/atomic/atomic.S +++ b/common/lib/libc/arch/i386/atomic/atomic.S @@ -46,12 +46,10 @@ #include "opt_xen.h" #include #define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock -#define HOTPATCH_SSE2_LFENCE HOTPATCH(HP_NAME_SSE2_LFENCE, 7); #define HOTPATCH_SSE2_MFENCE HOTPATCH(HP_NAME_SSE2_MFENCE, 7); #define HOTPATCH_CAS_64 HOTPATCH(HP_NAME_CAS_64, 49); #else #define LOCK lock -#define HOTPATCH_SSE2_LFENCE /* nothing */ #define HOTPATCH_SSE2_MFENCE /* nothing */ #define HOTPATCH_CAS_64 /* nothing */ #endif @@ -181,10 +179,11 @@ ENTRY(_atomic_cas_32_ni) END(_atomic_cas_32_ni) ENTRY(_membar_consumer) - HOTPATCH_SSE2_LFENCE - /* 7 bytes of instructions */ - LOCK - addl $0, -4(%esp) + /* + * Every load from normal memory is a load-acquire on x86, so + * there is never any need for explicit barriers to order + * load-before-anything. + */ ret END(_membar_consumer) @@ -396,12 +395,6 @@ STRONG_ALIAS(_membar_exit,_membar_producer) #ifdef _HARDKERNEL .section .rodata -LABEL(sse2_lfence) - lfence - ret - nop; nop; nop; -LABEL(sse2_lfence_end) - LABEL(sse2_mfence) mfence ret diff --git a/common/lib/libc/arch/x86_64/atomic/atomic.S b/common/lib/libc/arch/x86_64/atomic/atomic.S index 0206a746a8f2..a483aa98de1d 100644 --- a/common/lib/libc/arch/x86_64/atomic/atomic.S +++ b/common/lib/libc/arch/x86_64/atomic/atomic.S @@ -41,11 +41,9 @@ #ifdef _HARDKERNEL #include #define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock -#define HOTPATCH_SSE2_LFENCE HOTPATCH(HP_NAME_SSE2_LFENCE, 8); #define HOTPATCH_SSE2_MFENCE HOTPATCH(HP_NAME_SSE2_MFENCE, 8); #else #define LOCK lock -#define HOTPATCH_SSE2_LFENCE /* nothing */ #define HOTPATCH_SSE2_MFENCE /* nothing */ #endif @@ -256,10 +254,11 @@ END(_atomic_cas_64_ni) /* memory barriers */ ENTRY(_membar_consumer) - HOTPATCH_SSE2_LFENCE - /* 8 bytes of instructions */ - LOCK - addq $0, -8(%rsp) + /* + * Every load from normal memory is a load-acquire on x86, so + * there is never any need for explicit barriers to order + * load-before-anything. + */ ret END(_membar_consumer) @@ -419,12 +418,6 @@ STRONG_ALIAS(_membar_exit,_membar_producer) #ifdef _HARDKERNEL .section .rodata -LABEL(sse2_lfence) - lfence - ret - nop; nop; nop; nop; -LABEL(sse2_lfence_end) - LABEL(sse2_mfence) mfence ret diff --git a/sys/arch/amd64/include/frameasm.h b/sys/arch/amd64/include/frameasm.h index bbd30dd78e57..e82077dd8e03 100644 --- a/sys/arch/amd64/include/frameasm.h +++ b/sys/arch/amd64/include/frameasm.h @@ -63,8 +63,7 @@ #define HP_NAME_SVS_ENTER_NMI 11 #define HP_NAME_SVS_LEAVE_NMI 12 #define HP_NAME_MDS_LEAVE 13 -#define HP_NAME_SSE2_LFENCE 14 -#define HP_NAME_SSE2_MFENCE 15 +#define HP_NAME_SSE2_MFENCE 14 #define HOTPATCH(name, size) \ 123: ; \ diff --git a/sys/arch/i386/include/frameasm.h b/sys/arch/i386/include/frameasm.h index f24d05b164d8..3467fa521046 100644 --- a/sys/arch/i386/include/frameasm.h +++ b/sys/arch/i386/include/frameasm.h @@ -48,11 +48,10 @@ #define HP_NAME_STAC 2 #define HP_NAME_NOLOCK 3 #define HP_NAME_RETFENCE 4 -#define HP_NAME_SSE2_LFENCE 5 -#define HP_NAME_SSE2_MFENCE 6 -#define HP_NAME_CAS_64 7 -#define HP_NAME_SPLLOWER 8 -#define HP_NAME_MUTEX_EXIT 9 +#define HP_NAME_SSE2_MFENCE 5 +#define HP_NAME_CAS_64 6 +#define HP_NAME_SPLLOWER 7 +#define HP_NAME_MUTEX_EXIT 8 #define HOTPATCH(name, size) \ 123: ; \ diff --git a/sys/arch/x86/x86/patch.c b/sys/arch/x86/x86/patch.c index 4b91b67dc668..69efb230b05c 100644 --- a/sys/arch/x86/x86/patch.c +++ b/sys/arch/x86/x86/patch.c @@ -117,19 +117,6 @@ static const struct x86_hotpatch_descriptor hp_nolock_desc = { }; __link_set_add_rodata(x86_hotpatch_descriptors, hp_nolock_desc); -/* Use LFENCE if available, part of SSE2. */ -extern uint8_t sse2_lfence, sse2_lfence_end; -static const struct x86_hotpatch_source hp_sse2_lfence_source = { - .saddr = &sse2_lfence, - .eaddr = &sse2_lfence_end -}; -static const struct x86_hotpatch_descriptor hp_sse2_lfence_desc = { - .name = HP_NAME_SSE2_LFENCE, - .nsrc = 1, - .srcs = { &hp_sse2_lfence_source } -}; -__link_set_add_rodata(x86_hotpatch_descriptors, hp_sse2_lfence_desc); - /* Use MFENCE if available, part of SSE2. */ extern uint8_t sse2_mfence, sse2_mfence_end; static const struct x86_hotpatch_source hp_sse2_mfence_source = { @@ -342,12 +329,15 @@ x86_patch(bool early) if (!early && (cpu_feature[0] & CPUID_SSE2) != 0) { /* - * Faster memory barriers. We do not need to patch - * membar_producer to use SFENCE because on x86 - * ordinary non-temporal stores are always issued in - * program order to main memory and to other CPUs. + * Faster memory barriers. The only barrier x86 ever + * requires for MI synchronization between CPUs is + * MFENCE for store-before-load ordering; all other + * ordering is guaranteed already -- every load is a + * load-acquire and every store is a store-release. + * + * LFENCE and SFENCE are relevant only for MD logic + * involving I/O devices or non-temporal stores. */ - x86_hotpatch(HP_NAME_SSE2_LFENCE, 0); x86_hotpatch(HP_NAME_SSE2_MFENCE, 0); } From eca7e21fbb9357cb4f97eef2462b5a07cb960c84 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 30 Mar 2022 20:55:02 +0000 Subject: [PATCH 04/49] x86: Omit needless store in membar_producer/exit. On x86, every store is a store-release, so there is no need for any barrier. But this wasn't a barrier anyway; it was just a store, which was redundant with the store of the return address to the stack implied by CALL even if issuing a store made a difference. --- common/lib/libc/arch/i386/atomic/atomic.S | 7 +++++-- common/lib/libc/arch/x86_64/atomic/atomic.S | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/common/lib/libc/arch/i386/atomic/atomic.S b/common/lib/libc/arch/i386/atomic/atomic.S index 8c494ca5f587..43582643103a 100644 --- a/common/lib/libc/arch/i386/atomic/atomic.S +++ b/common/lib/libc/arch/i386/atomic/atomic.S @@ -188,8 +188,11 @@ ENTRY(_membar_consumer) END(_membar_consumer) ENTRY(_membar_producer) - /* A store is enough */ - movl $0, -4(%esp) + /* + * Every store to normal memory is a store-release on x86, so + * there is never any need for explicit barriers to order + * anything-before-store. + */ ret END(_membar_producer) diff --git a/common/lib/libc/arch/x86_64/atomic/atomic.S b/common/lib/libc/arch/x86_64/atomic/atomic.S index a483aa98de1d..02554cd00edd 100644 --- a/common/lib/libc/arch/x86_64/atomic/atomic.S +++ b/common/lib/libc/arch/x86_64/atomic/atomic.S @@ -263,8 +263,11 @@ ENTRY(_membar_consumer) END(_membar_consumer) ENTRY(_membar_producer) - /* A store is enough */ - movq $0, -8(%rsp) + /* + * Every store to normal memory is a store-release on x86, so + * there is never any need for explicit barriers to order + * anything-before-store. + */ ret END(_membar_producer) From 5d2a99d07a13577c7b8d7ef60603d7459067ff5b Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 30 Mar 2022 21:08:23 +0000 Subject: [PATCH 05/49] x86: Add a note on membar_sync and mfence. --- common/lib/libc/arch/i386/atomic/atomic.S | 6 ++++++ common/lib/libc/arch/x86_64/atomic/atomic.S | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/common/lib/libc/arch/i386/atomic/atomic.S b/common/lib/libc/arch/i386/atomic/atomic.S index 43582643103a..988cf5775b35 100644 --- a/common/lib/libc/arch/i386/atomic/atomic.S +++ b/common/lib/libc/arch/i386/atomic/atomic.S @@ -197,6 +197,12 @@ ENTRY(_membar_producer) END(_membar_producer) ENTRY(_membar_sync) + /* + * MFENCE, or a serializing instruction like a locked addq, + * is necessary to order store-before-load. Every other + * ordering -- load-before-anything, anything-before-store -- + * is already guaranteed without explicit barriers. + */ HOTPATCH_SSE2_MFENCE /* 7 bytes of instructions */ LOCK diff --git a/common/lib/libc/arch/x86_64/atomic/atomic.S b/common/lib/libc/arch/x86_64/atomic/atomic.S index 02554cd00edd..a940e7abfff6 100644 --- a/common/lib/libc/arch/x86_64/atomic/atomic.S +++ b/common/lib/libc/arch/x86_64/atomic/atomic.S @@ -272,6 +272,12 @@ ENTRY(_membar_producer) END(_membar_producer) ENTRY(_membar_sync) + /* + * MFENCE, or a serializing instruction like a locked addq, + * is necessary to order store-before-load. Every other + * ordering -- load-before-anything, anything-before-store -- + * is already guaranteed without explicit barriers. + */ HOTPATCH_SSE2_MFENCE /* 8 bytes of instructions */ LOCK From a385e4207e555a4133c6397f14a6425925cf8ce0 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 30 Mar 2022 22:02:54 +0000 Subject: [PATCH 06/49] aarch64/membar_ops: Fix wrong symbol end. --- common/lib/libc/arch/aarch64/atomic/membar_ops.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lib/libc/arch/aarch64/atomic/membar_ops.S b/common/lib/libc/arch/aarch64/atomic/membar_ops.S index 898104d6ebbc..8b9009dcc39f 100644 --- a/common/lib/libc/arch/aarch64/atomic/membar_ops.S +++ b/common/lib/libc/arch/aarch64/atomic/membar_ops.S @@ -42,7 +42,7 @@ STRONG_ALIAS(_membar_write,_membar_producer) ENTRY_NP(_membar_consumer) dmb ishld ret -END(_membar_producer) +END(_membar_consumer) ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) ATOMIC_OP_ALIAS(membar_read,_membar_consumer) STRONG_ALIAS(_membar_read,_membar_consumer) From f3d53b83723a1a4de14c46f6f78438df9dd99a1a Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 00:01:51 +0000 Subject: [PATCH 07/49] sparc/membar_ops: Upgrade membar_enter from R/RW to RW/RW. This will be deprecated soon but let's avoid leaving rakes to trip on with it arising from disagreement over the documentation (W/RW) and implementation and usage (R/RW). --- common/lib/libc/arch/sparc/atomic/membar_ops.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/common/lib/libc/arch/sparc/atomic/membar_ops.S b/common/lib/libc/arch/sparc/atomic/membar_ops.S index 9a56fec1a1dc..a1c780bf1089 100644 --- a/common/lib/libc/arch/sparc/atomic/membar_ops.S +++ b/common/lib/libc/arch/sparc/atomic/membar_ops.S @@ -39,10 +39,10 @@ /* * These assume Total Store Order (TSO), which may reorder - * store-before-load but nothing else. Hence, only membar_sync must - * issue anything -- specifically, an LDSTUB, which (along with SWAP) - * is the only instruction that implies a sequential consistency - * barrier. + * store-before-load but nothing else. Hence, only membar_sync (and + * its deprecated alias membar_enter) must issue anything -- + * specifically, an LDSTUB, which (along with SWAP) is the only + * instruction that implies a sequential consistency barrier. * * If we ran with Partial Store Order (PSO), we would also need to * issue STBAR for membar_exit (load/store-before-store) and @@ -66,8 +66,8 @@ END(_membar_sync) ATOMIC_OP_ALIAS(membar_producer,_membar_consumer) STRONG_ALIAS(_membar_producer,_membar_consumer) ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) -ATOMIC_OP_ALIAS(membar_enter,_membar_consumer) -STRONG_ALIAS(_membar_enter,_membar_consumer) +ATOMIC_OP_ALIAS(membar_enter,_membar_sync) +STRONG_ALIAS(_membar_enter,_membar_sync) ATOMIC_OP_ALIAS(membar_exit,_membar_consumer) STRONG_ALIAS(_membar_exit,_membar_consumer) ATOMIC_OP_ALIAS(membar_sync,_membar_sync) From 68d782f7e30692238d21bcae815bf4a56cbac138 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 00:03:59 +0000 Subject: [PATCH 08/49] sparc64/membar_ops: Upgrade membar_enter from R/RW to RW/RW. This will be deprecated soon but let's avoid leaving rakes to trip on with it arising from disagreement over the documentation (W/RW) and implementation and usage (R/RW). --- common/lib/libc/arch/sparc64/atomic/membar_ops.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/common/lib/libc/arch/sparc64/atomic/membar_ops.S b/common/lib/libc/arch/sparc64/atomic/membar_ops.S index 9413a8385336..4f26b9e2d0bc 100644 --- a/common/lib/libc/arch/sparc64/atomic/membar_ops.S +++ b/common/lib/libc/arch/sparc64/atomic/membar_ops.S @@ -35,8 +35,9 @@ /* * These assume Total Store Order (TSO), which may reorder - * store-before-load but nothing else. Hence, only membar_sync must - * issue anything -- namely, membar #StoreLoad. + * store-before-load but nothing else. Hence, only membar_sync (and + * its deprecated alias membar_enter) must issue anything -- namely, + * membar #StoreLoad. * * If we ran with Partial Store Order (PSO), we would also need to * issue membar #StoreStore for membar_exit (load/store-before-store) @@ -73,8 +74,8 @@ END(_membar_sync) ATOMIC_OP_ALIAS(membar_producer,_membar_consumer) STRONG_ALIAS(_membar_producer,_membar_consumer) ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) -ATOMIC_OP_ALIAS(membar_enter,_membar_consumer) -STRONG_ALIAS(_membar_enter,_membar_consumer) +ATOMIC_OP_ALIAS(membar_enter,_membar_sync) +STRONG_ALIAS(_membar_enter,_membar_sync) ATOMIC_OP_ALIAS(membar_exit,_membar_consumer) STRONG_ALIAS(_membar_exit,_membar_consumer) ATOMIC_OP_ALIAS(membar_sync,_membar_sync) From 6e5237f5dca2bc354317f648b8968b16cd60f449 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 00:32:08 +0000 Subject: [PATCH 09/49] i386/membar_ops: Upgrade membar_enter from R/RW to RW/RW. This will be deprecated soon but let's avoid leaving rakes to trip on with it arising from disagreement over the documentation (W/RW) and implementation and usage (R/RW). --- common/lib/libc/arch/i386/atomic/atomic.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/lib/libc/arch/i386/atomic/atomic.S b/common/lib/libc/arch/i386/atomic/atomic.S index 988cf5775b35..2fad744d36c3 100644 --- a/common/lib/libc/arch/i386/atomic/atomic.S +++ b/common/lib/libc/arch/i386/atomic/atomic.S @@ -342,7 +342,7 @@ ALIAS(__sync_val_compare_and_swap_8,_atomic_cas_64) ALIAS(membar_consumer,_membar_consumer) ALIAS(membar_producer,_membar_producer) -ALIAS(membar_enter,_membar_consumer) +ALIAS(membar_enter,_membar_sync) ALIAS(membar_exit,_membar_producer) ALIAS(membar_sync,_membar_sync) @@ -398,7 +398,7 @@ STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_32_ni) STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_32_ni) -STRONG_ALIAS(_membar_enter,_membar_consumer) +STRONG_ALIAS(_membar_enter,_membar_sync) STRONG_ALIAS(_membar_exit,_membar_producer) #ifdef _HARDKERNEL From a4f5167d5fcf50d3469768647751fcce3cbb8fb5 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 00:34:17 +0000 Subject: [PATCH 10/49] x86_64/membar_ops: Upgrade membar_enter from R/RW to RW/RW. This will be deprecated soon but let's avoid leaving rakes to trip on with it arising from disagreement over the documentation (W/RW) and implementation and usage (R/RW). --- common/lib/libc/arch/x86_64/atomic/atomic.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/common/lib/libc/arch/x86_64/atomic/atomic.S b/common/lib/libc/arch/x86_64/atomic/atomic.S index a940e7abfff6..a76015b91874 100644 --- a/common/lib/libc/arch/x86_64/atomic/atomic.S +++ b/common/lib/libc/arch/x86_64/atomic/atomic.S @@ -365,7 +365,7 @@ ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni) ALIAS(membar_consumer,_membar_consumer) ALIAS(membar_producer,_membar_producer) -ALIAS(membar_enter,_membar_consumer) +ALIAS(membar_enter,_membar_sync) ALIAS(membar_exit,_membar_producer) ALIAS(membar_sync,_membar_sync) @@ -421,7 +421,7 @@ STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni) STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni) -STRONG_ALIAS(_membar_enter,_membar_consumer) +STRONG_ALIAS(_membar_enter,_membar_sync) STRONG_ALIAS(_membar_exit,_membar_producer) #ifdef _HARDKERNEL From f57b09ce38e99bbd27eaa1984ff9c0a40e777155 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 00:35:43 +0000 Subject: [PATCH 11/49] riscv/membar_ops: Upgrade membar_enter from W/RW to RW/RW. This will be deprecated soon but let's avoid leaving rakes to trip on with it arising from disagreement over the documentation (W/RW) and implementation and usage (R/RW). --- common/lib/libc/arch/riscv/atomic/membar_ops.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/lib/libc/arch/riscv/atomic/membar_ops.S b/common/lib/libc/arch/riscv/atomic/membar_ops.S index 03d303ab4293..20aa64af3449 100644 --- a/common/lib/libc/arch/riscv/atomic/membar_ops.S +++ b/common/lib/libc/arch/riscv/atomic/membar_ops.S @@ -39,7 +39,7 @@ ATOMIC_OP_ALIAS(membar_sync,_membar_sync) CRT_ALIAS(__sync_synchronize,_membar_sync) ENTRY_NP(_membar_enter) - fence w,rw + fence rw,rw ret END(_membar_enter) ATOMIC_OP_ALIAS(membar_enter,_membar_enter) From df5f1407eb831c41a94ea623d01a75bf13ccfa67 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 11 Feb 2022 21:03:08 +0000 Subject: [PATCH 12/49] Introduce membar_acquire/release. Deprecate membar_enter/exit. The names membar_enter/exit were unclear, and the documentation of membar_enter has disagreed with the implementations on sparc, powerpc, and even x86(!) for the entire time it has been in NetBSD. The terms `acquire' and `release' are ubiquitous in the literature today, and have been adopted in the C and C++ standards to mean load-before-load/store and load/store-before-store, respectively, which are exactly the orderings required by acquiring and releasing a mutex, as well as other useful applications like decrementing a reference count and then freeing the underlying object if it went to zero. Originally I proposed changing one word in the documentation for membar_enter to make it load-before-load/store instead of store-before-load/store, i.e., to make it an acquire barrier. I proposed this on the grounds that (a) all implementations guarantee load-before-load/store, (b) some implementations fail to guarantee store-before-load/store, and (c) all uses in-tree assume load-before-load/store. I verified parts (a) and (b) (except, for (a), powerpc didn't even guarantee load-before-load/store -- isync isn't necessarily enough; need lwsync in general -- but it _almost_ did, and it certainly didn't guarantee store-before-load/store). Part (c) might not be correct, however: under the mistaken assumption that atomic-r/m/w then membar-w/rw is equivalent to atomic-r/m/w then membar-r/rw, I only audited the cases of membar_enter that _aren't_ immediately after an atomic-r/m/w. All of those cases assume load-before-load/store. But my assumption was wrong -- there are cases of atomic-r/m/w then membar-w/rw that would be broken by changing to atomic-r/m/w then membar-r/rw: https://mail-index.netbsd.org/tech-kern/2022/03/29/msg028044.html Furthermore, the name membar_enter has been adopted in other places like OpenBSD where it actually does follow the documentation and guarantee store-before-load/store, even if that order is not useful. So the name membar_enter currently lives in a bad place where it means either of two things -- r/rw or w/rw. With this change, we deprecate membar_enter/exit, introduce membar_acquire/release as better names for the useful pair (r/rw and rw/w), and make sure the implementation of membar_enter guarantees both what was documented _and_ what was implemented, making it an alias for membar_sync. While here, rework all of the membar_* definitions and aliases. The new logic follows a rule to make it easier to audit: membar_X is defined as an alias for membar_Y iff membar_X is no stronger than membar_Y. The `no stronger than' relation is (the transitive closure of): - membar_consumer (r/r) is no stronger than membar_acquire (r/rw) - membar_producer (w/w) is no stronger than membar_release (rw/w) - membar_acquire (r/rw) is no stronger than membar_sync (rw/rw) - membar_release (rw/w) is no stronger than membar_sync (rw/rw) And, for the deprecated membars: - membar_enter (w/rw) is no stronger than membar_sync (rw/rw) - membar_exit (rw/w) is no stronger than membar_release (rw/w) (membar_exit is identical to membar_release, but the name is deprecated.) Finally, while here, annotate some of the instructions with their semantics. For powerpc, leave an essay with citations on the unfortunate but -- as far as I can tell -- necessary decision to use lwsync, not isync, for membar_acquire and membar_consumer. Also add membar(3) and atomic(3) man page links. --- .../lib/libc/arch/aarch64/atomic/membar_ops.S | 14 +- .../lib/libc/arch/alpha/atomic/membar_ops.S | 25 ++-- common/lib/libc/arch/arm/atomic/membar_ops.S | 8 +- common/lib/libc/arch/hppa/atomic/membar_ops.S | 36 ++--- common/lib/libc/arch/i386/atomic/atomic.S | 22 +-- common/lib/libc/arch/ia64/atomic/atomic.S | 12 ++ common/lib/libc/arch/mips/atomic/membar_ops.S | 28 +++- common/lib/libc/arch/or1k/atomic/membar_ops.S | 17 ++- .../lib/libc/arch/powerpc/atomic/membar_ops.S | 115 +++++++++++++-- .../lib/libc/arch/riscv/atomic/membar_ops.S | 12 ++ .../lib/libc/arch/sparc/atomic/membar_ops.S | 26 ++-- .../lib/libc/arch/sparc64/atomic/membar_ops.S | 24 ++-- common/lib/libc/arch/x86_64/atomic/atomic.S | 22 +-- common/lib/libc/atomic/membar_ops_nop.c | 6 + distrib/sets/lists/comp/mi | 12 ++ lib/libc/atomic/Makefile.inc | 4 + lib/libc/atomic/membar_ops.3 | 135 ++++++++++-------- share/man/man9/atomic_loadstore.9 | 34 ++--- sys/sys/atomic.h | 10 +- tests/lib/libc/membar/t_dekker.c | 4 - tests/lib/libc/membar/t_spinlock.c | 4 - 21 files changed, 387 insertions(+), 183 deletions(-) diff --git a/common/lib/libc/arch/aarch64/atomic/membar_ops.S b/common/lib/libc/arch/aarch64/atomic/membar_ops.S index 8b9009dcc39f..1cac3fa47fd2 100644 --- a/common/lib/libc/arch/aarch64/atomic/membar_ops.S +++ b/common/lib/libc/arch/aarch64/atomic/membar_ops.S @@ -32,19 +32,21 @@ #include "atomic_op_asm.h" ENTRY_NP(_membar_producer) - dmb ishst + dmb ishst /* store-before-store */ ret END(_membar_producer) ATOMIC_OP_ALIAS(membar_producer,_membar_producer) ATOMIC_OP_ALIAS(membar_write,_membar_producer) STRONG_ALIAS(_membar_write,_membar_producer) -ENTRY_NP(_membar_consumer) - dmb ishld +ENTRY_NP(_membar_acquire) + dmb ishld /* load-before-load/store */ ret -END(_membar_consumer) -ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) +END(_membar_acquire) +ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire) +ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire) ATOMIC_OP_ALIAS(membar_read,_membar_consumer) +STRONG_ALIAS(_membar_consumer,_membar_acquire) STRONG_ALIAS(_membar_read,_membar_consumer) ENTRY_NP(_membar_sync) @@ -52,8 +54,10 @@ ENTRY_NP(_membar_sync) ret END(_membar_sync) ATOMIC_OP_ALIAS(membar_sync,_membar_sync) +ATOMIC_OP_ALIAS(membar_release,_membar_sync) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) ATOMIC_OP_ALIAS(membar_exit,_membar_sync) STRONG_ALIAS(__sync_synchronize,_membar_sync) +STRONG_ALIAS(_membar_release,_membar_sync) STRONG_ALIAS(_membar_enter,_membar_sync) STRONG_ALIAS(_membar_exit,_membar_sync) diff --git a/common/lib/libc/arch/alpha/atomic/membar_ops.S b/common/lib/libc/arch/alpha/atomic/membar_ops.S index 7ec1b011bfb4..5a1b0bb4f343 100644 --- a/common/lib/libc/arch/alpha/atomic/membar_ops.S +++ b/common/lib/libc/arch/alpha/atomic/membar_ops.S @@ -42,45 +42,50 @@ LEAF(_membar_producer, 0) RET nop - END(_membar_producer) +END(_membar_producer) EXPORT(_membar_producer_end) LEAF(_membar_sync, 0) RET nop - END(_membar_sync) +END(_membar_sync) EXPORT(_membar_sync_end) LEAF(_membar_producer_mp, 0) - wmb + wmb /* store-before-store */ RET - END(_membar_producer_mp) +END(_membar_producer_mp) EXPORT(_membar_producer_mp_end) LEAF(_membar_sync_mp, 0) - mb + mb /* load/store-before-load/store */ RET - END(_membar_sync_mp) +END(_membar_sync_mp) EXPORT(_membar_sync_mp_end) #else /* _KERNEL */ LEAF(_membar_producer, 0) - mb + mb /* load/store-before-load/store */ RET - END(_membar_producer) +END(_membar_producer) EXPORT(_membar_producer_end) LEAF(_membar_sync, 0) - mb + mb /* load/store-before-load/store */ RET - END(_membar_sync) +END(_membar_sync) EXPORT(_membar_sync_end) #endif /* _KERNEL */ ATOMIC_OP_ALIAS(membar_producer,_membar_producer) ATOMIC_OP_ALIAS(membar_sync,_membar_sync) + +ATOMIC_OP_ALIAS(membar_acquire,_membar_sync) +STRONG_ALIAS(_membar_acquire,_membar_sync) +ATOMIC_OP_ALIAS(membar_release,_membar_sync) +STRONG_ALIAS(_membar_release,_membar_sync) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) STRONG_ALIAS(_membar_enter,_membar_sync) ATOMIC_OP_ALIAS(membar_exit,_membar_sync) diff --git a/common/lib/libc/arch/arm/atomic/membar_ops.S b/common/lib/libc/arch/arm/atomic/membar_ops.S index 08efd74d38fc..710eee16558e 100644 --- a/common/lib/libc/arch/arm/atomic/membar_ops.S +++ b/common/lib/libc/arch/arm/atomic/membar_ops.S @@ -33,7 +33,7 @@ #if defined(_ARM_ARCH_6) ENTRY_NP(_membar_producer) - DMBST + DMBST /* store-before-store */ RET END(_membar_producer) ATOMIC_OP_ALIAS(membar_producer,_membar_producer) @@ -41,15 +41,19 @@ ATOMIC_OP_ALIAS(membar_write,_membar_producer) STRONG_ALIAS(_membar_write,_membar_producer) ENTRY_NP(_membar_sync) - DMB + DMB /* load/store-before-load/store */ RET END(_membar_sync) ATOMIC_OP_ALIAS(membar_sync,_membar_sync) +ATOMIC_OP_ALIAS(membar_acquire,_membar_sync) +ATOMIC_OP_ALIAS(membar_release,_membar_sync) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) ATOMIC_OP_ALIAS(membar_exit,_membar_sync) ATOMIC_OP_ALIAS(membar_consumer,_membar_sync) ATOMIC_OP_ALIAS(membar_read,_membar_sync) STRONG_ALIAS(__sync_synchronize,_membar_sync) +STRONG_ALIAS(_membar_acquire,_membar_sync) +STRONG_ALIAS(_membar_release,_membar_sync) STRONG_ALIAS(_membar_enter,_membar_sync) STRONG_ALIAS(_membar_exit,_membar_sync) STRONG_ALIAS(_membar_consumer,_membar_sync) diff --git a/common/lib/libc/arch/hppa/atomic/membar_ops.S b/common/lib/libc/arch/hppa/atomic/membar_ops.S index a8b5414478c7..a55dad7d5012 100644 --- a/common/lib/libc/arch/hppa/atomic/membar_ops.S +++ b/common/lib/libc/arch/hppa/atomic/membar_ops.S @@ -35,7 +35,7 @@ RCSID("$NetBSD: membar_ops.S,v 1.1 2011/01/17 07:40:21 skrll Exp $") .text -LEAF_ENTRY(_membar_consumer) +LEAF_ENTRY(_membar_sync) sync nop nop @@ -44,24 +44,18 @@ LEAF_ENTRY(_membar_consumer) nop bv %r0(%rp) nop -EXIT(_membar_consumer) +EXIT(_membar_sync) +ATOMIC_OP_ALIAS(membar_sync,_membar_sync) -LEAF_ENTRY(_membar_producer) - sync - nop - nop - nop - nop - nop - bv %r0(%rp) - nop -EXIT(_membar_producer) - -ATOMIC_OP_ALIAS(membar_producer,_membar_producer) -ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) -ATOMIC_OP_ALIAS(membar_enter,_membar_consumer) -STRONG_ALIAS(_membar_enter,_membar_consumer) -ATOMIC_OP_ALIAS(membar_exit,_membar_producer) -STRONG_ALIAS(_membar_exit,_membar_producer) -ATOMIC_OP_ALIAS(membar_sync,_membar_producer) -STRONG_ALIAS(_membar_sync,_membar_producer) +ATOMIC_OP_ALIAS(membar_producer,_membar_sync) +STRONG_ALIAS(_membar_producer,_membar_sync) +ATOMIC_OP_ALIAS(membar_consumer,_membar_sync) +STRONG_ALIAS(_membar_consumer,_membar_sync) +ATOMIC_OP_ALIAS(membar_acquire,_membar_sync) +STRONG_ALIAS(_membar_acquire,_membar_sync) +ATOMIC_OP_ALIAS(membar_release,_membar_sync) +STRONG_ALIAS(_membar_release,_membar_sync) +ATOMIC_OP_ALIAS(membar_enter,_membar_sync) +STRONG_ALIAS(_membar_enter,_membar_sync) +ATOMIC_OP_ALIAS(membar_exit,_membar_sync) +STRONG_ALIAS(_membar_exit,_membar_sync) diff --git a/common/lib/libc/arch/i386/atomic/atomic.S b/common/lib/libc/arch/i386/atomic/atomic.S index 2fad744d36c3..d363f53f5118 100644 --- a/common/lib/libc/arch/i386/atomic/atomic.S +++ b/common/lib/libc/arch/i386/atomic/atomic.S @@ -178,23 +178,23 @@ ENTRY(_atomic_cas_32_ni) ret END(_atomic_cas_32_ni) -ENTRY(_membar_consumer) +ENTRY(_membar_acquire) /* * Every load from normal memory is a load-acquire on x86, so * there is never any need for explicit barriers to order * load-before-anything. */ ret -END(_membar_consumer) +END(_membar_acquire) -ENTRY(_membar_producer) +ENTRY(_membar_release) /* * Every store to normal memory is a store-release on x86, so * there is never any need for explicit barriers to order * anything-before-store. */ ret -END(_membar_producer) +END(_membar_release) ENTRY(_membar_sync) /* @@ -340,10 +340,14 @@ ALIAS(atomic_cas_64_ni,_atomic_cas_64) ALIAS(__sync_val_compare_and_swap_8,_atomic_cas_64) #endif /* __HAVE_ATOMIC64_OPS || _KERNEL */ -ALIAS(membar_consumer,_membar_consumer) -ALIAS(membar_producer,_membar_producer) +ALIAS(membar_acquire,_membar_acquire) +ALIAS(membar_release,_membar_release) +ALIAS(membar_sync,_membar_sync) + +ALIAS(membar_consumer,_membar_acquire) +ALIAS(membar_producer,_membar_release) ALIAS(membar_enter,_membar_sync) -ALIAS(membar_exit,_membar_producer) +ALIAS(membar_exit,_membar_release) ALIAS(membar_sync,_membar_sync) STRONG_ALIAS(_atomic_add_int,_atomic_add_32) @@ -398,8 +402,10 @@ STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_32_ni) STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_32_ni) +STRONG_ALIAS(_membar_consumer,_membar_acquire) +STRONG_ALIAS(_membar_producer,_membar_release) STRONG_ALIAS(_membar_enter,_membar_sync) -STRONG_ALIAS(_membar_exit,_membar_producer) +STRONG_ALIAS(_membar_exit,_membar_release) #ifdef _HARDKERNEL .section .rodata diff --git a/common/lib/libc/arch/ia64/atomic/atomic.S b/common/lib/libc/arch/ia64/atomic/atomic.S index 9c4d78b13c6a..9872b666d0b8 100644 --- a/common/lib/libc/arch/ia64/atomic/atomic.S +++ b/common/lib/libc/arch/ia64/atomic/atomic.S @@ -117,6 +117,16 @@ ENTRY(_membar_producer,0) br.ret.sptk rp END(_membar_producer) +ENTRY(_membar_acquire,0) + mf + br.ret.sptk rp +END(_membar_acquire) + +ENTRY(_membar_release,0) + mf + br.ret.sptk rp +END(_membar_release) + ENTRY(_membar_enter,0) mf br.ret.sptk rp @@ -213,6 +223,8 @@ ALIAS(atomic_cas_ptr_ni,_atomic_cas_64) ALIAS(membar_consumer,_membar_consumer) ALIAS(membar_producer,_membar_producer) +ALIAS(membar_acquire,_membar_acquire) +ALIAS(membar_release,_membar_release) ALIAS(membar_enter,_membar_enter) ALIAS(membar_exit,_membar_exit) ALIAS(membar_sync,_membar_sync) diff --git a/common/lib/libc/arch/mips/atomic/membar_ops.S b/common/lib/libc/arch/mips/atomic/membar_ops.S index 950536856082..dae62c17d1d6 100644 --- a/common/lib/libc/arch/mips/atomic/membar_ops.S +++ b/common/lib/libc/arch/mips/atomic/membar_ops.S @@ -40,22 +40,40 @@ LEAF(_membar_sync) END(_membar_sync) #ifdef __OCTEON__ -LEAF(_membar_producer) +LEAF(_membar_release) + /* + * syncw is documented as ordering store-before-store in + * + * Cavium OCTEON III CN78XX Hardware Reference Manual, + * CN78XX-HM-0.99E, September 2014. + * + * It's unclear from the documentation the architecture + * guarantees load-before-store ordering without barriers, but + * this code assumes it does. If that assumption is wrong, we + * can only use syncw for membar_producer -- membar_release has + * to use the full sync. + */ j ra syncw -END(_membar_producer) +END(_membar_release) #endif ATOMIC_OP_ALIAS(membar_sync,_membar_sync) +ATOMIC_OP_ALIAS(membar_acquire,_membar_sync) +STRONG_ALIAS(_membar_acquire,_membar_sync) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) STRONG_ALIAS(_membar_enter,_membar_sync) #ifdef __OCTEON__ -ATOMIC_OP_ALIAS(membar_exit,_membar_producer) -STRONG_ALIAS(_membar_exit,_membar_producer) -STRONG_ALIAS(membar_producer,_membar_producer) +ATOMIC_OP_ALIAS(membar_exit,_membar_release) +STRONG_ALIAS(_membar_exit,_membar_release) +ATOMIC_OP_ALIAS(membar_release,_membar_release) +ATOMIC_OP_ALIAS(membar_producer,_membar_release) +STRONG_ALIAS(_membar_producer,_membar_release) #else ATOMIC_OP_ALIAS(membar_exit,_membar_sync) STRONG_ALIAS(_membar_exit,_membar_sync) +ATOMIC_OP_ALIAS(membar_release,_membar_sync) +STRONG_ALIAS(_membar_release,_membar_sync) ATOMIC_OP_ALIAS(membar_producer,_membar_sync) STRONG_ALIAS(_membar_producer,_membar_sync) #endif diff --git a/common/lib/libc/arch/or1k/atomic/membar_ops.S b/common/lib/libc/arch/or1k/atomic/membar_ops.S index 3ce1b187005a..3bd07573b14c 100644 --- a/common/lib/libc/arch/or1k/atomic/membar_ops.S +++ b/common/lib/libc/arch/or1k/atomic/membar_ops.S @@ -31,27 +31,26 @@ #include "atomic_op_asm.h" -ENTRY_NP(_membar_producer) - l.msync - l.jr lr - l.nop -END(_membar_producer) -ATOMIC_OP_ALIAS(membar_producer,_membar_producer) -ATOMIC_OP_ALIAS(membar_write,_membar_producer) -STRONG_ALIAS(_membar_write,_membar_producer) - ENTRY_NP(_membar_sync) l.msync l.jr lr l.nop END(_membar_sync) ATOMIC_OP_ALIAS(membar_sync,_membar_sync) +ATOMIC_OP_ALIAS(membar_acquire,_membar_sync) +ATOMIC_OP_ALIAS(membar_release,_membar_sync) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) ATOMIC_OP_ALIAS(membar_exit,_membar_sync) ATOMIC_OP_ALIAS(membar_consumer,_membar_sync) ATOMIC_OP_ALIAS(membar_read,_membar_sync) +ATOMIC_OP_ALIAS(membar_producer,_membar_sync) +ATOMIC_OP_ALIAS(membar_write,_membar_sync) CRT_ALIAS(__sync_synchronize,_membar_sync) +STRONG_ALIAS(_membar_acquire,_membar_sync) +STRONG_ALIAS(_membar_release,_membar_sync) STRONG_ALIAS(_membar_enter,_membar_sync) STRONG_ALIAS(_membar_exit,_membar_sync) STRONG_ALIAS(_membar_consumer,_membar_sync) STRONG_ALIAS(_membar_read,_membar_sync) +STRONG_ALIAS(_membar_producer,_membar_sync) +STRONG_ALIAS(_membar_write,_membar_sync) diff --git a/common/lib/libc/arch/powerpc/atomic/membar_ops.S b/common/lib/libc/arch/powerpc/atomic/membar_ops.S index 387c887a3340..d254e7dc2d37 100644 --- a/common/lib/libc/arch/powerpc/atomic/membar_ops.S +++ b/common/lib/libc/arch/powerpc/atomic/membar_ops.S @@ -34,23 +34,108 @@ __RCSID("$NetBSD: membar_ops.S,v 1.4 2011/01/15 07:31:11 matt Exp $") .text -/* These assume Total Store Order (TSO) */ -ENTRY(_membar_consumer) - isync +ENTRY(_membar_acquire) + /* + * It is tempting to use isync to order load-before-load/store. + * However, isync orders prior loads only if their value flows + * into a control-flow dependency prior to the isync: + * + * `[I]f an isync follows a conditional Branch instruction + * that depends on the value returned by a preceding Load + * instruction, the load on which the Branch depends is + * performed before any loads caused by instructions + * following the isync. This applies even if the effects + * of the ``dependency'' are independent of the value + * loaded (e.g., the value is compared to itself and the + * Branch tests the EQ bit in the selected CR field), and + * even if the branch target is the sequentially next + * instruction.' + * + * --PowerPC Virtual Environment Architecture, Book II, + * Version 2.01, December 2003, 1.7.1 `Storage Access + * Ordering', p. 7. + * + * We are required here, however, to order _all_ prior loads, + * even if they do not flow into any control flow dependency. + * For example: + * + * x = *p; + * membar_acquire(); + * if (x) goto foo; + * + * This can't be implemented by: + * + * lwz x, p + * isync + * cmpwi x, 0 + * bne foo + * + * isync doesn't work here because there's no conditional + * dependency on x between the lwz x, p and the isync. + * + * isync would only work if it followed the branch: + * + * lwz x, p + * isync + * cmpwi x, 0 + * bne foo + * ... + * foo: isync + * ... + * + * lwsync orders everything except store-before-load, so it + * serves here -- see below in membar_release in lwsync. + * Except we can't use it on booke, so use sync for now. + */ + sync blr -END(_membar_consumer) +END(_membar_acquire) +ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire) -ENTRY(_membar_producer) +ENTRY(_membar_release) + /* + * `The memory barrier provides an ordering function for + * the storage accesses caused by Load, Store, and dcbz + * instructions that are executed by the processor + * executing the [lwsync] instruction and for which the + * specified storage location is in storage that is + * Memory Coherence Required and is neither Write Through + * Required nor Caching Inhibited. The applicable pairs + * are all pairs a_i, b_j of such accesses except those + * in which a_i is an access caused by a Store or dcbz + * instruction and b_j is an access caused by a Load + * instruction.' + * + * --PowerPC Virtual Environment Architecture, Book II, + * Version 2.01, December 2003, 3.3.3 `Memory Barrier + * Instructions', p. 25. + * + * In brief, lwsync is an acquire-release barrier -- it orders + * load-before-load/store and load/store-before-store, but not + * store-before-load. Except we can't use it on booke, so use + * sync for now. + */ sync blr -END(_membar_producer) - -ATOMIC_OP_ALIAS(membar_producer,_membar_producer) -ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) -ATOMIC_OP_ALIAS(membar_enter,_membar_consumer) -STRONG_ALIAS(_membar_enter,_membar_consumer) -ATOMIC_OP_ALIAS(membar_exit,_membar_producer) -STRONG_ALIAS(_membar_exit,_membar_producer) -ATOMIC_OP_ALIAS(membar_sync,_membar_producer) -STRONG_ALIAS(_membar_sync,_membar_producer) +END(_membar_release) +ATOMIC_OP_ALIAS(membar_release,_membar_release) + +ENTRY(_membar_sync) + /* + * sync, or `heavyweight sync', is a full sequential + * consistency barrier. + */ + sync + blr +END(_membar_sync) +ATOMIC_OP_ALIAS(membar_sync,_membar_sync) + +ATOMIC_OP_ALIAS(membar_producer,_membar_release) +STRONG_ALIAS(_membar_producer,_membar_release) +ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire) +STRONG_ALIAS(_membar_consumer,_membar_acquire) +ATOMIC_OP_ALIAS(membar_enter,_membar_sync) +STRONG_ALIAS(_membar_enter,_membar_sync) +ATOMIC_OP_ALIAS(membar_exit,_membar_release) +STRONG_ALIAS(_membar_exit,_membar_release) diff --git a/common/lib/libc/arch/riscv/atomic/membar_ops.S b/common/lib/libc/arch/riscv/atomic/membar_ops.S index 20aa64af3449..43cb54a98556 100644 --- a/common/lib/libc/arch/riscv/atomic/membar_ops.S +++ b/common/lib/libc/arch/riscv/atomic/membar_ops.S @@ -38,6 +38,18 @@ END(_membar_sync) ATOMIC_OP_ALIAS(membar_sync,_membar_sync) CRT_ALIAS(__sync_synchronize,_membar_sync) +ENTRY_NP(_membar_acquire) + fence r,rw + ret +END(_membar_acquire) +ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire) + +ENTRY_NP(_membar_release) + fence rw,w + ret +END(_membar_release) +ATOMIC_OP_ALIAS(membar_release,_membar_release) + ENTRY_NP(_membar_enter) fence rw,rw ret diff --git a/common/lib/libc/arch/sparc/atomic/membar_ops.S b/common/lib/libc/arch/sparc/atomic/membar_ops.S index a1c780bf1089..780cde6ba905 100644 --- a/common/lib/libc/arch/sparc/atomic/membar_ops.S +++ b/common/lib/libc/arch/sparc/atomic/membar_ops.S @@ -45,14 +45,21 @@ * instruction that implies a sequential consistency barrier. * * If we ran with Partial Store Order (PSO), we would also need to - * issue STBAR for membar_exit (load/store-before-store) and + * issue STBAR for membar_release (load/store-before-store) and * membar_producer (store-before-store). */ -ENTRY(_membar_consumer) +ENTRY(_membar_acquire) retl nop -END(_membar_consumer) +END(_membar_acquire) +ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire) + +ENTRY(_membar_release) + retl + nop +END(_membar_release) +ATOMIC_OP_ALIAS(membar_release,_membar_release) ENTRY(_membar_sync) retl @@ -62,12 +69,13 @@ ENTRY(_membar_sync) nop #endif END(_membar_sync) +ATOMIC_OP_ALIAS(membar_sync,_membar_sync) -ATOMIC_OP_ALIAS(membar_producer,_membar_consumer) -STRONG_ALIAS(_membar_producer,_membar_consumer) -ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) +ATOMIC_OP_ALIAS(membar_producer,_membar_release) +STRONG_ALIAS(_membar_producer,_membar_release) +ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire) +STRONG_ALIAS(_membar_consumer,_membar_acquire) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) STRONG_ALIAS(_membar_enter,_membar_sync) -ATOMIC_OP_ALIAS(membar_exit,_membar_consumer) -STRONG_ALIAS(_membar_exit,_membar_consumer) -ATOMIC_OP_ALIAS(membar_sync,_membar_sync) +ATOMIC_OP_ALIAS(membar_exit,_membar_release) +STRONG_ALIAS(_membar_exit,_membar_release) diff --git a/common/lib/libc/arch/sparc64/atomic/membar_ops.S b/common/lib/libc/arch/sparc64/atomic/membar_ops.S index 4f26b9e2d0bc..23b74c82f15b 100644 --- a/common/lib/libc/arch/sparc64/atomic/membar_ops.S +++ b/common/lib/libc/arch/sparc64/atomic/membar_ops.S @@ -44,10 +44,17 @@ * and membar_producer (store-before-store). */ -ENTRY(_membar_consumer) +ENTRY(_membar_acquire) retl nop -END(_membar_consumer) +END(_membar_acquire) +ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire) + +ENTRY(_membar_release) + retl + nop +END(_membar_release) +ATOMIC_OP_ALIAS(membar_release,_membar_release) ENTRY(_membar_sync) /* @@ -70,12 +77,13 @@ ENTRY(_membar_sync) retl nop END(_membar_sync) +ATOMIC_OP_ALIAS(membar_sync,_membar_sync) -ATOMIC_OP_ALIAS(membar_producer,_membar_consumer) -STRONG_ALIAS(_membar_producer,_membar_consumer) -ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer) +ATOMIC_OP_ALIAS(membar_producer,_membar_release) +STRONG_ALIAS(_membar_producer,_membar_release) +ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire) +STRONG_ALIAS(_membar_consumer,_membar_acquire) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) STRONG_ALIAS(_membar_enter,_membar_sync) -ATOMIC_OP_ALIAS(membar_exit,_membar_consumer) -STRONG_ALIAS(_membar_exit,_membar_consumer) -ATOMIC_OP_ALIAS(membar_sync,_membar_sync) +ATOMIC_OP_ALIAS(membar_exit,_membar_release) +STRONG_ALIAS(_membar_exit,_membar_release) diff --git a/common/lib/libc/arch/x86_64/atomic/atomic.S b/common/lib/libc/arch/x86_64/atomic/atomic.S index a76015b91874..58149dfda2f5 100644 --- a/common/lib/libc/arch/x86_64/atomic/atomic.S +++ b/common/lib/libc/arch/x86_64/atomic/atomic.S @@ -253,23 +253,23 @@ END(_atomic_cas_64_ni) /* memory barriers */ -ENTRY(_membar_consumer) +ENTRY(_membar_acquire) /* * Every load from normal memory is a load-acquire on x86, so * there is never any need for explicit barriers to order * load-before-anything. */ ret -END(_membar_consumer) +END(_membar_acquire) -ENTRY(_membar_producer) +ENTRY(_membar_release) /* * Every store to normal memory is a store-release on x86, so * there is never any need for explicit barriers to order * anything-before-store. */ ret -END(_membar_producer) +END(_membar_release) ENTRY(_membar_sync) /* @@ -363,10 +363,14 @@ ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni) ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni) ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni) -ALIAS(membar_consumer,_membar_consumer) -ALIAS(membar_producer,_membar_producer) +ALIAS(membar_acquire,_membar_acquire) +ALIAS(membar_release,_membar_release) +ALIAS(membar_sync,_membar_sync) + +ALIAS(membar_consumer,_membar_acquire) +ALIAS(membar_producer,_membar_release) ALIAS(membar_enter,_membar_sync) -ALIAS(membar_exit,_membar_producer) +ALIAS(membar_exit,_membar_release) ALIAS(membar_sync,_membar_sync) STRONG_ALIAS(_atomic_add_int,_atomic_add_32) @@ -421,8 +425,10 @@ STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni) STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni) +STRONG_ALIAS(_membar_consumer,_membar_acquire) +STRONG_ALIAS(_membar_producer,_membar_release) STRONG_ALIAS(_membar_enter,_membar_sync) -STRONG_ALIAS(_membar_exit,_membar_producer) +STRONG_ALIAS(_membar_exit,_membar_release) #ifdef _HARDKERNEL .section .rodata diff --git a/common/lib/libc/atomic/membar_ops_nop.c b/common/lib/libc/atomic/membar_ops_nop.c index 1d71089ed2b5..5c882c1315cc 100644 --- a/common/lib/libc/atomic/membar_ops_nop.c +++ b/common/lib/libc/atomic/membar_ops_nop.c @@ -40,6 +40,12 @@ membar_sync(void) /* nothing */ } +#undef membar_acquire +atomic_op_alias(membar_acquire,_membar_sync) +__strong_alias(_membar_acquire,_membar_sync) +#undef membar_release +atomic_op_alias(membar_release,_membar_sync) +__strong_alias(_membar_release,_membar_sync) #undef membar_enter atomic_op_alias(membar_enter,_membar_sync) __strong_alias(_membar_enter,_membar_sync) diff --git a/distrib/sets/lists/comp/mi b/distrib/sets/lists/comp/mi index a5e424fae75c..5dbacb400226 100644 --- a/distrib/sets/lists/comp/mi +++ b/distrib/sets/lists/comp/mi @@ -6307,6 +6307,7 @@ ./usr/share/man/cat3/atoi.0 comp-c-catman .cat ./usr/share/man/cat3/atol.0 comp-c-catman .cat ./usr/share/man/cat3/atoll.0 comp-c-catman .cat +./usr/share/man/cat3/atomic.0 comp-c-catman .cat ./usr/share/man/cat3/atomic_add.0 comp-c-catman .cat ./usr/share/man/cat3/atomic_add_32.0 comp-c-catman .cat ./usr/share/man/cat3/atomic_add_32_nv.0 comp-c-catman .cat @@ -8913,12 +8914,15 @@ ./usr/share/man/cat3/md5.0 comp-c-catman .cat ./usr/share/man/cat3/mdc2.0 comp-obsolete obsolete ./usr/share/man/cat3/mdoc.0 comp-obsolete obsolete +./usr/share/man/cat3/membar.0 comp-c-catman .cat +./usr/share/man/cat3/membar_acquire.0 comp-c-catman .cat ./usr/share/man/cat3/membar_consumer.0 comp-c-catman .cat ./usr/share/man/cat3/membar_datadep_consumer.0 comp-c-catman .cat ./usr/share/man/cat3/membar_enter.0 comp-c-catman .cat ./usr/share/man/cat3/membar_exit.0 comp-c-catman .cat ./usr/share/man/cat3/membar_ops.0 comp-c-catman .cat ./usr/share/man/cat3/membar_producer.0 comp-c-catman .cat +./usr/share/man/cat3/membar_release.0 comp-c-catman .cat ./usr/share/man/cat3/membar_sync.0 comp-c-catman .cat ./usr/share/man/cat3/memccpy.0 comp-c-catman .cat ./usr/share/man/cat3/memchr.0 comp-c-catman .cat @@ -14668,6 +14672,7 @@ ./usr/share/man/html3/atoi.html comp-c-htmlman html ./usr/share/man/html3/atol.html comp-c-htmlman html ./usr/share/man/html3/atoll.html comp-c-htmlman html +./usr/share/man/html3/atomic.html comp-c-htmlman html ./usr/share/man/html3/atomic_add.html comp-c-htmlman html ./usr/share/man/html3/atomic_add_32.html comp-c-htmlman html ./usr/share/man/html3/atomic_add_32_nv.html comp-c-htmlman html @@ -17195,12 +17200,15 @@ ./usr/share/man/html3/md4.html comp-c-htmlman html ./usr/share/man/html3/md5.html comp-c-htmlman html ./usr/share/man/html3/mdoc.html comp-obsolete obsolete +./usr/share/man/html3/membar.html comp-c-htmlman html +./usr/share/man/html3/membar_acquire.html comp-c-htmlman html ./usr/share/man/html3/membar_consumer.html comp-c-htmlman html ./usr/share/man/html3/membar_datadep_consumer.html comp-c-htmlman html ./usr/share/man/html3/membar_enter.html comp-c-htmlman html ./usr/share/man/html3/membar_exit.html comp-c-htmlman html ./usr/share/man/html3/membar_ops.html comp-c-htmlman html ./usr/share/man/html3/membar_producer.html comp-c-htmlman html +./usr/share/man/html3/membar_release.html comp-c-htmlman html ./usr/share/man/html3/membar_sync.html comp-c-htmlman html ./usr/share/man/html3/memccpy.html comp-c-htmlman html ./usr/share/man/html3/memchr.html comp-c-htmlman html @@ -22866,6 +22874,7 @@ ./usr/share/man/man3/atoi.3 comp-c-man .man ./usr/share/man/man3/atol.3 comp-c-man .man ./usr/share/man/man3/atoll.3 comp-c-man .man +./usr/share/man/man3/atomic.3 comp-c-man .man ./usr/share/man/man3/atomic_add.3 comp-c-man .man ./usr/share/man/man3/atomic_add_32.3 comp-c-man .man ./usr/share/man/man3/atomic_add_32_nv.3 comp-c-man .man @@ -25484,12 +25493,15 @@ ./usr/share/man/man3/md5.3 comp-c-man .man ./usr/share/man/man3/mdc2.3 comp-obsolete obsolete ./usr/share/man/man3/mdoc.3 comp-obsolete obsolete +./usr/share/man/man3/membar.3 comp-c-man .man +./usr/share/man/man3/membar_acquire.3 comp-c-man .man ./usr/share/man/man3/membar_consumer.3 comp-c-man .man ./usr/share/man/man3/membar_datadep_consumer.3 comp-c-man .man ./usr/share/man/man3/membar_enter.3 comp-c-man .man ./usr/share/man/man3/membar_exit.3 comp-c-man .man ./usr/share/man/man3/membar_ops.3 comp-c-man .man ./usr/share/man/man3/membar_producer.3 comp-c-man .man +./usr/share/man/man3/membar_release.3 comp-c-man .man ./usr/share/man/man3/membar_sync.3 comp-c-man .man ./usr/share/man/man3/memccpy.3 comp-c-man .man ./usr/share/man/man3/memchr.3 comp-c-man .man diff --git a/lib/libc/atomic/Makefile.inc b/lib/libc/atomic/Makefile.inc index 55af586964ed..b3389ba5a81e 100644 --- a/lib/libc/atomic/Makefile.inc +++ b/lib/libc/atomic/Makefile.inc @@ -75,6 +75,10 @@ MLINKS+=atomic_add.3 atomic_add_32.3 \ atomic_swap.3 atomic_swap_ulong.3 \ atomic_swap.3 atomic_swap_ptr.3 \ atomic_swap.3 atomic_swap_64.3 \ + atomic_ops.3 atomic.3 \ + membar_ops.3 membar.3 \ + membar_ops.3 membar_acquire.3 \ + membar_ops.3 membar_release.3 \ membar_ops.3 membar_enter.3 \ membar_ops.3 membar_exit.3 \ membar_ops.3 membar_producer.3 \ diff --git a/lib/libc/atomic/membar_ops.3 b/lib/libc/atomic/membar_ops.3 index a6b26ccef121..5d2ca5917542 100644 --- a/lib/libc/atomic/membar_ops.3 +++ b/lib/libc/atomic/membar_ops.3 @@ -27,13 +27,13 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd September 2, 2020 +.Dd March 30, 2022 .Dt MEMBAR_OPS 3 .Os .Sh NAME .Nm membar_ops , -.Nm membar_enter , -.Nm membar_exit , +.Nm membar_acquire , +.Nm membar_release , .Nm membar_producer , .Nm membar_consumer , .Nm membar_datadep_consumer , @@ -45,9 +45,9 @@ .In sys/atomic.h .\" .Ft void -.Fn membar_enter "void" +.Fn membar_acquire "void" .Ft void -.Fn membar_exit "void" +.Fn membar_release "void" .Ft void .Fn membar_producer "void" .Ft void @@ -65,9 +65,9 @@ relaxed load and store order. .Pp In general, memory barriers must come in pairs \(em a barrier on one CPU, such as -.Fn membar_exit , +.Fn membar_release , must pair with a barrier on another CPU, such as -.Fn membar_enter , +.Fn membar_acquire , in order to synchronize anything between the two CPUs. Code using .Nm @@ -92,40 +92,25 @@ not just C11 atomic operations on .Vt _Atomic\^ Ns -qualified objects. .Bl -tag -width abcd -.It Fn membar_enter -Any store preceding -.Fn membar_enter +.It Fn membar_acquire +Any load preceding +.Fn membar_acquire will happen before all memory operations following it. .Pp -An atomic read/modify/write operation -.Pq Xr atomic_ops 3 -followed by a -.Fn membar_enter +A load followed by a +.Fn membar_acquire implies a .Em load-acquire operation in the language of C11. +.Fn membar_acquire +should only be used after atomic read/modify/write, such as +.Xr atomic_cas_uint 3 . +For regular loads, instead of +.Li "x = *p; membar_acquire()" , +you should use +.Li "x = atomic_load_acquire(p)" . .Pp -.Sy WARNING : -A load followed by -.Fn membar_enter -.Em does not -imply a -.Em load-acquire -operation, even though -.Fn membar_exit -followed by a store implies a -.Em store-release -operation; the symmetry of these names and asymmetry of the semantics -is a historical mistake. -In the -.Nx -kernel, you can use -.Xr atomic_load_acquire 9 -for a -.Em load-acquire -operation without any atomic read/modify/write. -.Pp -.Fn membar_enter +.Fn membar_acquire is typically used in code that implements locking primitives to ensure that a lock protects its data, and is typically paired with .Fn membar_exit ; @@ -150,14 +135,14 @@ you should use .Pp .Fn membar_exit is typically paired with -.Fn membar_enter , +.Fn membar_acquire , and is typically used in code that implements locking or reference counting primitives. Releasing a lock or reference count should use .Fn membar_exit , and acquiring a lock or handling an object after draining references should use -.Fn membar_enter , +.Fn membar_acquire , so that whatever happened before releasing will also have happened before acquiring. For example: @@ -174,7 +159,7 @@ atomic_dec_uint(&obj->refcnt); */ while (atomic_cas_uint(&obj->refcnt, 0, -1) != 0) continue; -membar_enter(); +membar_acquire(); KASSERT(valid(&obj->state)); obj->state.mumblefrotz--; .Ed @@ -190,7 +175,7 @@ in thread A setting the reference count to zero, everything in thread A before the .Fn membar_exit is guaranteed to happen before everything in thread B after the -.Fn membar_enter , +.Fn membar_acquire , as if the machine had sequentially executed: .Bd -literal -offset abcdefgh obj->state.mumblefrotz = 42; /* from thread A */ @@ -204,7 +189,7 @@ obj->state.mumblefrotz--; followed by a store, serving as a .Em store-release operation, may also be paired with a subsequent load followed by -.Fn membar_sync , +.Fn membar_acquire , serving as the corresponding .Em load-acquire operation. @@ -337,24 +322,48 @@ in C11. is typically paired with .Fn membar_sync . .Pp -A load followed by -.Fn membar_sync , -serving as a -.Em load-acquire -operation, may also be paired with a prior -.Fn membar_exit -followed by a store, serving as the corresponding -.Em store-release -operation. -However, you should use -.Xr atomic_load_acquire 9 -instead of -.No load-then- Ns Fn membar_sync -if it is a regular load, or -.Fn membar_enter -instead of .Fn membar_sync -if the load is in an atomic read/modify/write operation. +is typically not needed except in exotic synchronization schemes like +Dekker's algorithm that require store-before-load ordering. +If you are tempted to reach for it, see if there is another way to do +what you're trying to do first. +.El +.Sh DEPRECATED MEMORY BARRIERS +The following memory barriers are deprecated. +They were imported from Solaris, which describes them as providing +ordering relative to +.Sq lock acquisition , +but the documentation in +.Nx +disagreed with the implementation and use on the semantics. +.Bl -tag -width abcd +.It Fn membar_enter +Originally documented as store-before-load/store, this was instead +implemented as load-before-load/store on some platforms, which is what +essentially all uses relied on. +Now this is implemented as an alias for +.Fn membar_sync +everywhere, meaning a full load/store-before-load/store sequential +consistency barrier, in order to guarantee what the documentation +claimed +.Em and +what the implementation actually did. +.Pp +New code should use +.Fn membar_acquire +for load-before-load/store ordering, which is what most uses need, or +.Fn membar_sync +for store-before-load/store ordering, which typically only appears in +exotic synchronization schemes like Dekker's algorithm. +.It Fn membar_exit +Alias for +.Fn membar_release . +This was originally meant to be paired with +.Fn membar_enter . +.Pp +New code should use +.Fn membar_release +instead. .El .Sh SEE ALSO .Xr atomic_ops 3 , @@ -366,7 +375,19 @@ The .Nm membar_ops functions first appeared in .Nx 5.0 . +.Pp The data-dependent load barrier, .Fn membar_datadep_consumer , first appeared in .Nx 7.0 . +.Pp +The +.Fn membar_acquire +and +.Fn membar_release +functions first appeared, and the +.Fn membar_enter +and +.Fn membar_exit +functions were deprecated, in +.Nx 10.0 . diff --git a/share/man/man9/atomic_loadstore.9 b/share/man/man9/atomic_loadstore.9 index 762524c12406..dd2b8f84f552 100644 --- a/share/man/man9/atomic_loadstore.9 +++ b/share/man/man9/atomic_loadstore.9 @@ -27,7 +27,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd November 25, 2019 +.Dd February 11, 2022 .Dt ATOMIC_LOADSTORE 9 .Os .Sh NAME @@ -648,7 +648,7 @@ is an atomic read/modify/write operation in .Xr atomic_ops 3 , then .Bd -literal - membar_exit(); + membar_release(); atomic_r/m/w(obj, ...); .Ed .Pp @@ -657,32 +657,34 @@ functions like a release operation on and .Bd -literal atomic_r/m/w(obj, ...); - membar_enter(); + membar_acquire(); .Ed .Pp functions like a acquire operation on .Va obj . .Pp -.Sy WARNING : -The combination of -.Fn atomic_load_relaxed -and -.Xr membar_enter 3 -.Em does not -make an acquire operation; only read/modify/write atomics may be -combined with -.Xr membar_enter 3 -this way. -.Pp On architectures where .Dv __HAVE_ATOMIC_AS_MEMBAR is defined, all the .Xr atomic_ops 3 imply release and acquire operations, so the -.Xr membar_enter 3 +.Xr membar_acquire 3 and -.Xr membar_exit 3 +.Xr membar_release 3 are redundant. +.Pp +The combination of +.Fn atomic_load_relaxed +and +.Xr membar_acquire 3 +in that order is equivalent to +.Fn atomic_load_acquire , +and the combination of +.Xr membar_release 3 +and +.Fn atomic_store_relaxed +in that order is equivalent to +.Fn atomic_store_release . .Sh EXAMPLES Maintaining lossy counters. These may lose some counts, because the read/modify/write cycle as a diff --git a/sys/sys/atomic.h b/sys/sys/atomic.h index 916861c468c2..76b8ed841d65 100644 --- a/sys/sys/atomic.h +++ b/sys/sys/atomic.h @@ -181,12 +181,18 @@ uint8_t atomic_cas_8(volatile uint8_t *, uint8_t, uint8_t); /* * Memory barrier operations */ -void membar_enter(void); -void membar_exit(void); +void membar_acquire(void); +void membar_release(void); void membar_producer(void); void membar_consumer(void); void membar_sync(void); +/* + * Deprecated memory barriers + */ +void membar_enter(void); +void membar_exit(void); + #ifdef __HAVE_MEMBAR_DATADEP_CONSUMER void membar_datadep_consumer(void); #else diff --git a/tests/lib/libc/membar/t_dekker.c b/tests/lib/libc/membar/t_dekker.c index f1f2656099e8..c8ea984c7970 100644 --- a/tests/lib/libc/membar/t_dekker.c +++ b/tests/lib/libc/membar/t_dekker.c @@ -44,10 +44,6 @@ __RCSID("$NetBSD$"); #include #include -/* XXX */ -#define membar_acquire() membar_enter() -#define membar_release() membar_exit() - #ifdef BROKEN_SYNC #undef membar_sync #define membar_sync() asm volatile("" ::: "memory") diff --git a/tests/lib/libc/membar/t_spinlock.c b/tests/lib/libc/membar/t_spinlock.c index 7addd4e557cb..1fbdf1775288 100644 --- a/tests/lib/libc/membar/t_spinlock.c +++ b/tests/lib/libc/membar/t_spinlock.c @@ -44,10 +44,6 @@ __RCSID("$NetBSD$"); #include #include -/* XXX */ -#define membar_acquire() membar_enter() -#define membar_release() membar_exit() - #ifdef BROKEN_ACQUIRE #undef membar_acquire #define membar_acquire() asm volatile("" ::: "memory") From df2433d15ef6d2c33296b69c73ca7820f7ffad57 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Fri, 11 Feb 2022 21:09:44 +0000 Subject: [PATCH 13/49] atomic_loadstore(9): Use membar_acquire/release. --- sys/sys/atomic.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sys/sys/atomic.h b/sys/sys/atomic.h index 76b8ed841d65..8b9690dd4745 100644 --- a/sys/sys/atomic.h +++ b/sys/sys/atomic.h @@ -454,18 +454,12 @@ void kcsan_atomic_store(volatile void *, const void *, int); __END_ATOMIC_LOAD(__al_val); \ }) -/* - * We want {loads}-before-{loads,stores}. It is tempting to use - * membar_enter, but that provides {stores}-before-{loads,stores}, - * which may not help. So we must use membar_sync, which does the - * slightly stronger {loads,stores}-before-{loads,stores}. - */ #define atomic_load_acquire(p) \ ({ \ const volatile __typeof__(*(p)) *__al_ptr = (p); \ __ATOMIC_PTR_CHECK(__al_ptr); \ __BEGIN_ATOMIC_LOAD(__al_ptr, __al_val); \ - membar_sync(); \ + membar_acquire(); \ __END_ATOMIC_LOAD(__al_val); \ }) @@ -482,7 +476,7 @@ void kcsan_atomic_store(volatile void *, const void *, int); volatile __typeof__(*(p)) *__as_ptr = (p); \ __typeof__(*(p)) __as_val = (v); \ __ATOMIC_PTR_CHECK(__as_ptr); \ - membar_exit(); \ + membar_release(); \ __DO_ATOMIC_STORE(__as_ptr, __as_val); \ }) From 9349128d2315211f708cbbfc90fc1cf3cc38fec4 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sun, 27 Feb 2022 19:43:11 +0000 Subject: [PATCH 14/49] mips/cavium: Insert appropriate membars around IPIs. --- sys/arch/mips/cavium/octeon_intr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sys/arch/mips/cavium/octeon_intr.c b/sys/arch/mips/cavium/octeon_intr.c index 2fa9a89e0331..d259f48141ae 100644 --- a/sys/arch/mips/cavium/octeon_intr.c +++ b/sys/arch/mips/cavium/octeon_intr.c @@ -548,6 +548,7 @@ octeon_ipi_intr(void *arg) ipi_mask &= mips3_ld(cpu->cpu_mbox_set); if (ipi_mask == 0) return 0; + membar_acquire(); mips3_sd(cpu->cpu_mbox_clr, ipi_mask); @@ -566,8 +567,9 @@ octeon_ipi_intr(void *arg) #endif /* if the request is clear, it was previously processed */ - if ((ci->ci_request_ipis & ipi_mask) == 0) + if ((atomic_load_relaxed(&ci->ci_request_ipis) & ipi_mask) == 0) return 0; + membar_acquire(); atomic_or_64(&ci->ci_active_ipis, ipi_mask); atomic_and_64(&ci->ci_request_ipis, ~ipi_mask); @@ -600,8 +602,10 @@ octeon_send_ipi(struct cpu_info *ci, int req) const u_int ipi_shift = ipi_prio[req] == IPL_SCHED ? 16 : 0; const uint32_t ipi_mask = __BIT(req + ipi_shift); + membar_release(); atomic_or_64(&ci->ci_request_ipis, ipi_mask); + membar_release(); mips3_sd(cpu->cpu_mbox_set, ipi_mask); return 0; From bb9012b0f2748e1d9e9fffbb868bc8893a8a90f6 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 2 Mar 2022 18:18:27 +0000 Subject: [PATCH 15/49] mips/rmixl: Insert appropriate membars around IPIs. --- sys/arch/mips/rmi/rmixl_intr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sys/arch/mips/rmi/rmixl_intr.c b/sys/arch/mips/rmi/rmixl_intr.c index 3e9b49731bdf..6b49ab96e72b 100644 --- a/sys/arch/mips/rmi/rmixl_intr.c +++ b/sys/arch/mips/rmi/rmixl_intr.c @@ -967,6 +967,7 @@ rmixl_send_ipi(struct cpu_info *ci, int tag) | (RMIXL_INTRVEC_IPI + tag); mutex_enter(&rmixl_ipi_lock); + membar_release(); atomic_or_64(&ci->ci_request_ipis, req); RMIXL_PICREG_WRITE(RMIXL_PIC_IPIBASE, r); mutex_exit(&rmixl_ipi_lock); @@ -984,8 +985,9 @@ rmixl_ipi_intr(void *arg) KASSERT((uintptr_t)arg < NIPIS); /* if the request is clear, it was previously processed */ - if ((ci->ci_request_ipis & ipi_mask) == 0) + if ((atomic_load_relaxed(&ci->ci_request_ipis) & ipi_mask) == 0) return 0; + membar_acquire(); atomic_or_64(&ci->ci_active_ipis, ipi_mask); atomic_and_64(&ci->ci_request_ipis, ~ipi_mask); From 961655f915028fd850429f9ab41ea1c2b4ed1433 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 01:21:08 +0000 Subject: [PATCH 16/49] audio(4): Use membar_acquire, not membar_enter. Cheaper and adequate to make an atomic_swap into a load-acquire. --- sys/dev/audio/audio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/dev/audio/audio.c b/sys/dev/audio/audio.c index 424762986e1b..9a59f1873a12 100644 --- a/sys/dev/audio/audio.c +++ b/sys/dev/audio/audio.c @@ -316,7 +316,7 @@ audio_mlog_flush(void) /* Nothing to do if already in use ? */ if (atomic_swap_32(&mlog_inuse, 1) == 1) return; - membar_enter(); + membar_acquire(); int rpage = mlog_wpage; mlog_wpage ^= 1; @@ -353,7 +353,7 @@ audio_mlog_printf(const char *fmt, ...) mlog_drop++; return; } - membar_enter(); + membar_acquire(); va_start(ap, fmt); len = vsnprintf( @@ -1684,7 +1684,7 @@ audio_track_lock_tryenter(audio_track_t *track) if (atomic_swap_uint(&track->lock, 1) != 0) return false; - membar_enter(); + membar_acquire(); return true; } From 460d8c960ae542f4331957bb95ba5c56f74df51e Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 01:24:14 +0000 Subject: [PATCH 17/49] alpha: Omit needless membar in pmap_reference. If the pmap is published enough for us to obtain a reference to it then there's no membar needed. If it's not then something else is wrong and we can't use pmap_reference here anyway. Membars are needed only on the destruction side to make sure all use, by any thread, happens-before all freeing in the last user thread. --- sys/arch/alpha/alpha/pmap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c index 181a74a7add9..f56a83053607 100644 --- a/sys/arch/alpha/alpha/pmap.c +++ b/sys/arch/alpha/alpha/pmap.c @@ -1706,7 +1706,6 @@ pmap_reference(pmap_t pmap) newcount = atomic_inc_uint_nv(&pmap->pm_count); KASSERT(newcount != 0); - PMAP_MP(membar_enter()); } /* From 6b9247338aa28647942f8a50b119771cef529f43 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 01:47:19 +0000 Subject: [PATCH 18/49] sys: Use membar_release/acquire around reference drop. This just goes through my recent reference count membar audit and changes membar_exit to membar_release and membar_enter to membar_acquire -- this should make everything cheaper on most CPUs without hurting correctness, because membar_acquire is generally cheaper than membar_enter. --- sys/arch/aarch64/aarch64/pmap.c | 4 ++-- sys/arch/alpha/alpha/pmap.c | 4 ++-- sys/arch/arm/arm32/pmap.c | 4 ++-- sys/arch/hppa/hppa/pmap.c | 4 ++-- sys/arch/ia64/ia64/pmap.c | 6 +++--- sys/arch/powerpc/oea/pmap.c | 4 ++-- sys/arch/sparc/sparc/pmap.c | 4 ++-- sys/arch/sparc64/sparc64/pmap.c | 4 ++-- sys/dev/hyperv/vmbus.c | 4 ++-- sys/dev/marvell/mvxpsec.c | 4 ++-- sys/dev/scsipi/atapiconf.c | 4 ++-- sys/dev/scsipi/scsiconf.c | 4 ++-- sys/dev/scsipi/scsipi_base.c | 4 ++-- sys/external/bsd/drm2/linux/linux_stop_machine.c | 4 ++-- sys/kern/kern_auth.c | 4 ++-- sys/kern/kern_exec.c | 4 ++-- sys/kern/kern_mutex_obj.c | 4 ++-- sys/kern/kern_resource.c | 4 ++-- sys/kern/kern_rwlock_obj.c | 4 ++-- sys/kern/kern_sig.c | 4 ++-- sys/kern/subr_kcpuset.c | 4 ++-- sys/kern/sys_futex.c | 4 ++-- sys/kern/uipc_mbuf.c | 4 ++-- sys/kern/vfs_cwd.c | 4 ++-- sys/kern/vfs_mount.c | 4 ++-- sys/kern/vfs_vnode.c | 14 +++++++------- sys/kern/vfs_wapbl.c | 4 ++-- sys/net/if.c | 4 ++-- sys/net/npf/npf_nat.c | 4 ++-- sys/net/npf/npf_rproc.c | 4 ++-- sys/net/npf/npf_tableset.c | 4 ++-- sys/uvm/pmap/pmap.c | 4 ++-- sys/uvm/uvm_aobj.c | 4 ++-- sys/uvm/uvm_map.c | 4 ++-- 34 files changed, 74 insertions(+), 74 deletions(-) diff --git a/sys/arch/aarch64/aarch64/pmap.c b/sys/arch/aarch64/aarch64/pmap.c index 2815303953cd..9f5a3cdf9210 100644 --- a/sys/arch/aarch64/aarch64/pmap.c +++ b/sys/arch/aarch64/aarch64/pmap.c @@ -1563,11 +1563,11 @@ pmap_destroy(struct pmap *pm) if (pm == pmap_kernel()) panic("cannot destroy kernel pmap"); - membar_exit(); + membar_release(); refcnt = atomic_dec_uint_nv(&pm->pm_refcnt); if (refcnt > 0) return; - membar_enter(); + membar_acquire(); KASSERT(LIST_EMPTY(&pm->pm_pvlist)); pmap_tlb_asid_release_all(pm); diff --git a/sys/arch/alpha/alpha/pmap.c b/sys/arch/alpha/alpha/pmap.c index f56a83053607..4e7eac1859cf 100644 --- a/sys/arch/alpha/alpha/pmap.c +++ b/sys/arch/alpha/alpha/pmap.c @@ -1659,11 +1659,11 @@ pmap_destroy(pmap_t pmap) printf("pmap_destroy(%p)\n", pmap); #endif - PMAP_MP(membar_exit()); + PMAP_MP(membar_release()); KASSERT(atomic_load_relaxed(&pmap->pm_count) > 0); if (atomic_dec_uint_nv(&pmap->pm_count) > 0) return; - PMAP_MP(membar_enter()); + PMAP_MP(membar_acquire()); pt_entry_t *lev1map = pmap_lev1map(pmap); diff --git a/sys/arch/arm/arm32/pmap.c b/sys/arch/arm/arm32/pmap.c index 1622a5ffe0ba..510293c37eec 100644 --- a/sys/arch/arm/arm32/pmap.c +++ b/sys/arch/arm/arm32/pmap.c @@ -5275,7 +5275,7 @@ pmap_destroy(pmap_t pm) /* * Drop reference count */ - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&pm->pm_refs) > 0) { #ifndef ARM_MMU_EXTENDED if (pmap_is_current(pm)) { @@ -5286,7 +5286,7 @@ pmap_destroy(pmap_t pm) #endif return; } - membar_enter(); + membar_acquire(); /* * reference count is zero, free pmap resources and then free pmap. diff --git a/sys/arch/hppa/hppa/pmap.c b/sys/arch/hppa/hppa/pmap.c index 6f70350f9653..fbf1a5536990 100644 --- a/sys/arch/hppa/hppa/pmap.c +++ b/sys/arch/hppa/hppa/pmap.c @@ -1249,10 +1249,10 @@ pmap_destroy(pmap_t pmap) off_t off; #endif - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&pmap->pm_obj.uo_refs) > 0) return; - membar_enter(); + membar_acquire(); #ifdef DIAGNOSTIC uvm_page_array_init(&a, &pmap->pm_obj, 0); diff --git a/sys/arch/ia64/ia64/pmap.c b/sys/arch/ia64/ia64/pmap.c index 145dbca4362e..09722466364c 100644 --- a/sys/arch/ia64/ia64/pmap.c +++ b/sys/arch/ia64/ia64/pmap.c @@ -1516,11 +1516,11 @@ pmap_destroy(pmap_t pmap) UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); UVMHIST_LOG(maphist, "(pm=%p)", pmap, 0, 0, 0); - - membar_exit(); + + membar_release(); if (atomic_dec_64_nv(&pmap->pm_refcount) > 0) return; - membar_enter(); + membar_acquire(); KASSERT(pmap->pm_stats.resident_count == 0); KASSERT(pmap->pm_stats.wired_count == 0); diff --git a/sys/arch/powerpc/oea/pmap.c b/sys/arch/powerpc/oea/pmap.c index 3de683b61731..44e4faa59d77 100644 --- a/sys/arch/powerpc/oea/pmap.c +++ b/sys/arch/powerpc/oea/pmap.c @@ -1230,9 +1230,9 @@ pmap_reference(pmap_t pm) void pmap_destroy(pmap_t pm) { - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&pm->pm_refs) == 0) { - membar_enter(); + membar_acquire(); pmap_release(pm); pool_put(&pmap_pool, pm); } diff --git a/sys/arch/sparc/sparc/pmap.c b/sys/arch/sparc/sparc/pmap.c index 0619ca46f64d..e5a27c6ce849 100644 --- a/sys/arch/sparc/sparc/pmap.c +++ b/sys/arch/sparc/sparc/pmap.c @@ -4465,9 +4465,9 @@ pmap_destroy(struct pmap *pm) { DPRINTF(PDB_DESTROY, "pmap_destroy[%d](%p)", cpu_number(), pm); - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&pm->pm_refcount) == 0) { - membar_enter(); + membar_acquire(); pmap_quiet_check(pm); pool_cache_put(&pmap_cache, pm); } diff --git a/sys/arch/sparc64/sparc64/pmap.c b/sys/arch/sparc64/sparc64/pmap.c index b684114d236e..4a47886f10a7 100644 --- a/sys/arch/sparc64/sparc64/pmap.c +++ b/sys/arch/sparc64/sparc64/pmap.c @@ -1510,11 +1510,11 @@ pmap_destroy(struct pmap *pm) #endif struct vm_page *pg; - membar_exit(); + membar_release(); if ((int)atomic_dec_uint_nv(&pm->pm_refs) > 0) { return; } - membar_enter(); + membar_acquire(); DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm)); #ifdef MULTIPROCESSOR CPUSET_CLEAR(pmap_cpus_active); diff --git a/sys/dev/hyperv/vmbus.c b/sys/dev/hyperv/vmbus.c index 4e47e8dd73b7..606a6a4dfa0c 100644 --- a/sys/dev/hyperv/vmbus.c +++ b/sys/dev/hyperv/vmbus.c @@ -1452,10 +1452,10 @@ vmbus_channel_detach(struct vmbus_channel *ch) KASSERTMSG(ch->ch_refs > 0, "channel%u: invalid refcnt %d", ch->ch_id, ch->ch_refs); - membar_exit(); + membar_release(); refs = atomic_dec_uint_nv(&ch->ch_refs); if (refs == 0) { - membar_enter(); + membar_acquire(); /* Detach the target channel. */ vmbus_devq_enqueue(ch->ch_sc, VMBUS_DEV_TYPE_DETACH, ch); } diff --git a/sys/dev/marvell/mvxpsec.c b/sys/dev/marvell/mvxpsec.c index c1459c67d578..4ca7d33b0cb6 100644 --- a/sys/dev/marvell/mvxpsec.c +++ b/sys/dev/marvell/mvxpsec.c @@ -1551,10 +1551,10 @@ mvxpsec_session_unref(struct mvxpsec_session *mv_s) { uint32_t refs; - membar_exit(); + membar_release(); refs = atomic_dec_32_nv(&mv_s->refs); if (refs == 0) { - membar_enter(); + membar_acquire(); pool_cache_put(mv_s->sc->sc_session_pool, mv_s); } } diff --git a/sys/dev/scsipi/atapiconf.c b/sys/dev/scsipi/atapiconf.c index c51bcb24d01e..2a40abb958ae 100644 --- a/sys/dev/scsipi/atapiconf.c +++ b/sys/dev/scsipi/atapiconf.c @@ -219,9 +219,9 @@ atapibusdetach(device_t self, int flags) cv_destroy(&chan->chan_cv_comp); cv_destroy(&chan->chan_cv_thr); - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&chan_running(chan)) == 0) { - membar_enter(); + membar_acquire(); mutex_destroy(chan_mtx(chan)); } diff --git a/sys/dev/scsipi/scsiconf.c b/sys/dev/scsipi/scsiconf.c index 88349c54a3e4..bc44eedc3a12 100644 --- a/sys/dev/scsipi/scsiconf.c +++ b/sys/dev/scsipi/scsiconf.c @@ -365,9 +365,9 @@ scsibusdetach(device_t self, int flags) cv_destroy(&chan->chan_cv_comp); cv_destroy(&chan->chan_cv_thr); - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&chan_running(chan)) == 0) { - membar_enter(); + membar_acquire(); mutex_destroy(chan_mtx(chan)); } diff --git a/sys/dev/scsipi/scsipi_base.c b/sys/dev/scsipi/scsipi_base.c index 3fc49c9a2caa..a889eb22c109 100644 --- a/sys/dev/scsipi/scsipi_base.c +++ b/sys/dev/scsipi/scsipi_base.c @@ -2733,10 +2733,10 @@ void scsipi_adapter_delref(struct scsipi_adapter *adapt) { - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&adapt->adapt_refcnt) == 0 && adapt->adapt_enable != NULL) { - membar_enter(); + membar_acquire(); scsipi_adapter_lock(adapt); (void) scsipi_adapter_enable(adapt, 0); scsipi_adapter_unlock(adapt); diff --git a/sys/external/bsd/drm2/linux/linux_stop_machine.c b/sys/external/bsd/drm2/linux/linux_stop_machine.c index 7d22bb63ac28..530bfcb82208 100644 --- a/sys/external/bsd/drm2/linux/linux_stop_machine.c +++ b/sys/external/bsd/drm2/linux/linux_stop_machine.c @@ -59,13 +59,13 @@ stop_machine_xcall(void *a, void *b) s = splhigh(); /* Note that we're ready, and see whether we're the chosen one. */ - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&S->ncpu) != 0) { while (!atomic_load_acquire(&S->done)) SPINLOCK_BACKOFF_HOOK; goto out; } - membar_enter(); + membar_acquire(); /* It's time. Call the callback. */ S->callback(S->cookie); diff --git a/sys/kern/kern_auth.c b/sys/kern/kern_auth.c index 6e7a51152ac2..4679f0dfcd3b 100644 --- a/sys/kern/kern_auth.c +++ b/sys/kern/kern_auth.c @@ -145,12 +145,12 @@ kauth_cred_free(kauth_cred_t cred) ASSERT_SLEEPABLE(); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (atomic_dec_uint_nv(&cred->cr_refcnt) > 0) return; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif kauth_cred_hook(cred, KAUTH_CRED_FREE, NULL, NULL); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 8325d3fa4d68..5fcc9bc89a7d 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -2138,10 +2138,10 @@ static void spawn_exec_data_release(struct spawn_exec_data *data) { - membar_exit(); + membar_release(); if (atomic_dec_32_nv(&data->sed_refcnt) != 0) return; - membar_enter(); + membar_acquire(); cv_destroy(&data->sed_cv_child_ready); mutex_destroy(&data->sed_mtx_child); diff --git a/sys/kern/kern_mutex_obj.c b/sys/kern/kern_mutex_obj.c index 92fcde00e450..5339b3a42e20 100644 --- a/sys/kern/kern_mutex_obj.c +++ b/sys/kern/kern_mutex_obj.c @@ -158,13 +158,13 @@ mutex_obj_free(kmutex_t *lock) __func__, mo, mo->mo_refcnt); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (atomic_dec_uint_nv(&mo->mo_refcnt) > 0) { return false; } #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif mutex_destroy(&mo->mo_lock); pool_cache_put(mutex_obj_cache, mo); diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index fae9c5ac2e35..47d75988eb78 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -793,11 +793,11 @@ lim_free(struct plimit *lim) struct plimit *sv_lim; do { - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&lim->pl_refcnt) > 0) { return; } - membar_enter(); + membar_acquire(); if (lim->pl_corename != defcorename) { kmem_free(lim->pl_corename, lim->pl_cnlen); } diff --git a/sys/kern/kern_rwlock_obj.c b/sys/kern/kern_rwlock_obj.c index 52fa08784d12..f76b75294683 100644 --- a/sys/kern/kern_rwlock_obj.c +++ b/sys/kern/kern_rwlock_obj.c @@ -148,13 +148,13 @@ rw_obj_free(krwlock_t *lock) KASSERT(ro->ro_refcnt > 0); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (atomic_dec_uint_nv(&ro->ro_refcnt) > 0) { return false; } #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif rw_destroy(&ro->ro_lock); pool_cache_put(rw_obj_cache, ro); diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 1526f28c94cf..3e5d48cbb4d9 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -311,9 +311,9 @@ void sigactsfree(struct sigacts *ps) { - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&ps->sa_refcnt) == 0) { - membar_enter(); + membar_acquire(); mutex_destroy(&ps->sa_mutex); pool_cache_put(sigacts_cache, ps); } diff --git a/sys/kern/subr_kcpuset.c b/sys/kern/subr_kcpuset.c index 63547abb3e53..506b2a3fd5d5 100644 --- a/sys/kern/subr_kcpuset.c +++ b/sys/kern/subr_kcpuset.c @@ -262,11 +262,11 @@ kcpuset_unuse(kcpuset_t *kcp, kcpuset_t **lst) KASSERT(kc_initialised); KASSERT(kc->kc_refcnt > 0); - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&kc->kc_refcnt) != 0) { return; } - membar_enter(); + membar_acquire(); KASSERT(kc->kc_next == NULL); if (lst == NULL) { kcpuset_destroy(kcp); diff --git a/sys/kern/sys_futex.c b/sys/kern/sys_futex.c index debd9ba33bfc..00f352a13bda 100644 --- a/sys/kern/sys_futex.c +++ b/sys/kern/sys_futex.c @@ -538,7 +538,7 @@ futex_rele(struct futex *f) if (refcnt == 1) goto trylast; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif } while (atomic_cas_ulong(&f->fx_refcnt, refcnt, refcnt - 1) != refcnt); return; @@ -547,7 +547,7 @@ trylast: mutex_enter(&futex_tab.lock); if (atomic_dec_ulong_nv(&f->fx_refcnt) == 0) { #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif if (f->fx_on_tree) { if (__predict_false(f->fx_shared)) diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index 49ea7b7c2f7c..6018838772a0 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -1915,7 +1915,7 @@ m_ext_free(struct mbuf *m) refcnt = m->m_ext.ext_refcnt = 0; } else { #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif refcnt = atomic_dec_uint_nv(&m->m_ext.ext_refcnt); } @@ -1934,7 +1934,7 @@ m_ext_free(struct mbuf *m) * dropping the last reference */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif if (!embedded) { m->m_ext.ext_refcnt++; /* XXX */ diff --git a/sys/kern/vfs_cwd.c b/sys/kern/vfs_cwd.c index 446315157022..7aea7ed8aa19 100644 --- a/sys/kern/vfs_cwd.c +++ b/sys/kern/vfs_cwd.c @@ -138,10 +138,10 @@ void cwdfree(struct cwdinfo *cwdi) { - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&cwdi->cwdi_refcnt) > 0) return; - membar_enter(); + membar_acquire(); vrele(cwdi->cwdi_cdir); if (cwdi->cwdi_rdir) diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c index 8ab0165fb50a..b2e8b9d3d585 100644 --- a/sys/kern/vfs_mount.c +++ b/sys/kern/vfs_mount.c @@ -284,13 +284,13 @@ vfs_rele(struct mount *mp) { #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { return; } #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif /* diff --git a/sys/kern/vfs_vnode.c b/sys/kern/vfs_vnode.c index dc6c3336e2dc..06f24a8bce79 100644 --- a/sys/kern/vfs_vnode.c +++ b/sys/kern/vfs_vnode.c @@ -355,7 +355,7 @@ vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to, /* Open/close the gate for vcache_tryvget(). */ if (to == VS_LOADED) { #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE); } else { @@ -398,10 +398,10 @@ vstate_change(vnode_t *vp, enum vnode_state from, enum vnode_state to) { vnode_impl_t *vip = VNODE_TO_VIMPL(vp); - /* Open/close the gate for vcache_tryvget(). */ + /* Open/close the gate for vcache_tryvget(). */ if (to == VS_LOADED) { #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE); } else { @@ -736,7 +736,7 @@ vtryrele(vnode_t *vp) u_int use, next; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { if (__predict_false((use & VUSECOUNT_MASK) == 1)) { @@ -836,7 +836,7 @@ retry: } } #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif if (vrefcnt(vp) <= 0 || vp->v_writecount != 0) { vnpanic(vp, "%s: bad ref count", __func__); @@ -1003,7 +1003,7 @@ out: } } #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif if (VSTATE_GET(vp) == VS_RECLAIMED && vp->v_holdcnt == 0) { @@ -1476,7 +1476,7 @@ vcache_tryvget(vnode_t *vp) use, (use + 1) | VUSECOUNT_VGET); if (__predict_true(next == use)) { #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif return 0; } diff --git a/sys/kern/vfs_wapbl.c b/sys/kern/vfs_wapbl.c index f5e8f5ca29a5..974c3a9e6595 100644 --- a/sys/kern/vfs_wapbl.c +++ b/sys/kern/vfs_wapbl.c @@ -2273,9 +2273,9 @@ wapbl_inodetrk_free(struct wapbl *wl) /* XXX this KASSERT needs locking/mutex analysis */ KASSERT(wl->wl_inohashcnt == 0); hashdone(wl->wl_inohash, HASH_LIST, wl->wl_inohashmask); - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&wapbl_ino_pool_refcount) == 0) { - membar_enter(); + membar_acquire(); pool_destroy(&wapbl_ino_pool); } } diff --git a/sys/net/if.c b/sys/net/if.c index f2965841919a..72569f3c9658 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1820,12 +1820,12 @@ ifafree(struct ifaddr *ifa) KASSERTMSG(ifa->ifa_refcnt > 0, "ifa_refcnt=%d", ifa->ifa_refcnt); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (atomic_dec_uint_nv(&ifa->ifa_refcnt) != 0) return; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif free(ifa, M_IFADDR); } diff --git a/sys/net/npf/npf_nat.c b/sys/net/npf/npf_nat.c index d6711165dcf2..bce297033904 100644 --- a/sys/net/npf/npf_nat.c +++ b/sys/net/npf/npf_nat.c @@ -280,13 +280,13 @@ npf_natpolicy_release(npf_natpolicy_t *np) KASSERT(atomic_load_relaxed(&np->n_refcnt) > 0); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (atomic_dec_uint_nv(&np->n_refcnt) != 0) { return; } #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif KASSERT(LIST_EMPTY(&np->n_nat_list)); mutex_destroy(&np->n_lock); diff --git a/sys/net/npf/npf_rproc.c b/sys/net/npf/npf_rproc.c index ddf2a622f8d6..65221225fa91 100644 --- a/sys/net/npf/npf_rproc.c +++ b/sys/net/npf/npf_rproc.c @@ -331,13 +331,13 @@ npf_rproc_release(npf_rproc_t *rp) KASSERT(atomic_load_relaxed(&rp->rp_refcnt) > 0); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (atomic_dec_uint_nv(&rp->rp_refcnt) != 0) { return; } #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif /* XXXintr */ for (unsigned i = 0; i < rp->rp_ext_count; i++) { diff --git a/sys/net/npf/npf_tableset.c b/sys/net/npf/npf_tableset.c index 2a6ac9f5db9a..12b997559ec1 100644 --- a/sys/net/npf/npf_tableset.c +++ b/sys/net/npf/npf_tableset.c @@ -160,10 +160,10 @@ npf_tableset_destroy(npf_tableset_t *ts) if (t == NULL) continue; - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&t->t_refcnt) > 0) continue; - membar_enter(); + membar_acquire(); npf_table_destroy(t); } kmem_free(ts, NPF_TABLESET_SIZE(ts->ts_nitems)); diff --git a/sys/uvm/pmap/pmap.c b/sys/uvm/pmap/pmap.c index 19847997a03c..5983b2dea21b 100644 --- a/sys/uvm/pmap/pmap.c +++ b/sys/uvm/pmap/pmap.c @@ -690,13 +690,13 @@ pmap_destroy(pmap_t pmap) UVMHIST_FUNC(__func__); UVMHIST_CALLARGS(pmaphist, "(pmap=%#jx)", (uintptr_t)pmap, 0, 0, 0); - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&pmap->pm_count) > 0) { PMAP_COUNT(dereference); UVMHIST_LOG(pmaphist, " <-- done (deref)", 0, 0, 0, 0); return; } - membar_enter(); + membar_acquire(); PMAP_COUNT(destroy); KASSERT(pmap->pm_count == 0); diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c index 6841b2761684..6829a026aa43 100644 --- a/sys/uvm/uvm_aobj.c +++ b/sys/uvm/uvm_aobj.c @@ -605,14 +605,14 @@ uao_detach(struct uvm_object *uobj) UVMHIST_LOG(maphist," (uobj=%#jx) ref=%jd", (uintptr_t)uobj, uobj->uo_refs, 0, 0); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (atomic_dec_uint_nv(&uobj->uo_refs) > 0) { UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0); return; } #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif /* diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index 15d7ef8d7e30..3c76e8a99fe6 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -4235,10 +4235,10 @@ uvmspace_free(struct vmspace *vm) UVMHIST_CALLARGS(maphist,"(vm=%#jx) ref=%jd", (uintptr_t)vm, vm->vm_refcnt, 0, 0); - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&vm->vm_refcnt) > 0) return; - membar_enter(); + membar_acquire(); /* * at this point, there should be no other references to the map. From e7a3693d02be3c8e4718fb008b5f5e9a9612c410 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 01:55:08 +0000 Subject: [PATCH 19/49] libc/atomic: Fix membars in __atomic_load/store_* stubs. - membar_enter/exit ordering was backwards. - membar_enter doesn't make any sense for load anyway. - Switch to membar_release for store and membar_acquire for load. The only sensible orderings for a simple load or store are acquire or release, respectively, or sequential consistency. This never provided correct sequential consistency before -- we should really make it conditional on memmodel but I don't know offhand what the values of memmodel might be and this is at least better than before. --- common/lib/libc/atomic/atomic_load.c | 3 +-- common/lib/libc/atomic/atomic_store.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/common/lib/libc/atomic/atomic_load.c b/common/lib/libc/atomic/atomic_load.c index a93bc0ee4ea0..c19d2074ba96 100644 --- a/common/lib/libc/atomic/atomic_load.c +++ b/common/lib/libc/atomic/atomic_load.c @@ -40,9 +40,8 @@ uint ## b ## _t \ __atomic_load_ ## n(const volatile void *ptr, int memmodel) \ { \ uint## b ##_t val; \ - membar_enter(); \ val = *(const volatile uint ## b ## _t *)ptr; \ - membar_exit(); \ + membar_acquire(); \ return val; \ } diff --git a/common/lib/libc/atomic/atomic_store.c b/common/lib/libc/atomic/atomic_store.c index 474bb43b8ba9..cf53ae83010c 100644 --- a/common/lib/libc/atomic/atomic_store.c +++ b/common/lib/libc/atomic/atomic_store.c @@ -40,9 +40,8 @@ void \ __atomic_store_ ## n(volatile void *ptr, uint ## b ## _t val, \ int memmodel) \ { \ - membar_enter(); \ + membar_release(); \ *(volatile uint ## b ## _t *)ptr = val; \ - membar_exit(); \ } atomic_store_n(1, 8) From d41e042c398e34e60fa37d1e083ced1cfae24c2f Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:02:03 +0000 Subject: [PATCH 20/49] rtld: Convert membar_exit/enter to membar_release/acquire. These are basic CAS-based locking primitives needing release and acquire semantics, nothing fancy here -- except the membar_sync parts which are questionable but not relevant to the present audit. --- libexec/ld.elf_so/rtld.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libexec/ld.elf_so/rtld.c b/libexec/ld.elf_so/rtld.c index 59ed3c4530b2..f08664529bb8 100644 --- a/libexec/ld.elf_so/rtld.c +++ b/libexec/ld.elf_so/rtld.c @@ -1682,7 +1682,7 @@ _rtld_shared_enter(void) /* Yes, so increment use counter */ if (atomic_cas_uint(&_rtld_mutex, cur, cur + 1) != cur) continue; - membar_enter(); + membar_acquire(); return; } /* @@ -1728,7 +1728,7 @@ _rtld_shared_exit(void) * Wakeup LWPs waiting for an exclusive lock if this is the last * LWP on the shared lock. */ - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&_rtld_mutex)) return; membar_sync(); @@ -1750,7 +1750,7 @@ _rtld_exclusive_enter(sigset_t *mask) for (;;) { if (atomic_cas_uint(&_rtld_mutex, 0, locked_value) == 0) { - membar_enter(); + membar_acquire(); break; } waiter = atomic_swap_uint(&_rtld_waiter_exclusive, self); @@ -1774,7 +1774,7 @@ _rtld_exclusive_exit(sigset_t *mask) { lwpid_t waiter; - membar_exit(); + membar_release(); _rtld_mutex = 0; membar_sync(); if ((waiter = _rtld_waiter_exclusive) != 0) From 1bd9922fa9d1318e4fd1c88a7bd42b8801129255 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:04:35 +0000 Subject: [PATCH 21/49] alpha: Convert cpu_iccb_send from membar_exit to membar_release. XXX Maybe this should really use alpha_mb, since it's not writing to normal MI-type memory so technically the membr_* semantics doesn't apply? --- sys/arch/alpha/alpha/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/arch/alpha/alpha/cpu.c b/sys/arch/alpha/alpha/cpu.c index 1eaa3a768f43..18fce4d6915a 100644 --- a/sys/arch/alpha/alpha/cpu.c +++ b/sys/arch/alpha/alpha/cpu.c @@ -887,7 +887,7 @@ cpu_iccb_send(long cpu_id, const char *msg) */ strcpy(pcsp->pcs_iccb.iccb_rxbuf, msg); pcsp->pcs_iccb.iccb_rxlen = strlen(msg); - membar_exit(); + membar_release(); atomic_or_ulong(&hwrpb->rpb_rxrdy, cpumask); /* Wait for the message to be received. */ From 7e71f84283acace6f36b34d425323f707c39bad0 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:05:19 +0000 Subject: [PATCH 22/49] alpha: Convert ipifuncs.c to membar_release/acquire. --- sys/arch/alpha/alpha/ipifuncs.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sys/arch/alpha/alpha/ipifuncs.c b/sys/arch/alpha/alpha/ipifuncs.c index 39a049020166..79d28595b21f 100644 --- a/sys/arch/alpha/alpha/ipifuncs.c +++ b/sys/arch/alpha/alpha/ipifuncs.c @@ -127,10 +127,11 @@ alpha_ipi_process(struct cpu_info *ci, struct trapframe *framep) while ((pending_ipis = atomic_swap_ulong(&ci->ci_ipis, 0)) != 0) { /* - * Ensure the atomic swap is globally visible before - * we do any of the work. + * Ensure everything before setting ci_ipis + * happens-before everything after swapping ci_ipis so + * we're not working on stale inputs. */ - membar_enter(); + membar_acquire(); sc->sc_evcnt_ipi.ev_count++; @@ -159,7 +160,7 @@ alpha_send_ipi(u_long const cpu_id, u_long const ipimask) * alpha_send_ipi() have completed before informing * the CPU of the work we are asking it to do. */ - membar_exit(); + membar_release(); atomic_or_ulong(&cpu_info[cpu_id]->ci_ipis, ipimask); /* From 160bd3610689f0e821a9651faf4df0ee9ea722f1 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:06:15 +0000 Subject: [PATCH 23/49] hppa: Convert ipiuncs.c to membar_release/acquire. --- sys/arch/hppa/hppa/ipifuncs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sys/arch/hppa/hppa/ipifuncs.c b/sys/arch/hppa/hppa/ipifuncs.c index 35dab6f50381..774b1ce15d57 100644 --- a/sys/arch/hppa/hppa/ipifuncs.c +++ b/sys/arch/hppa/hppa/ipifuncs.c @@ -83,7 +83,7 @@ hppa_ipi_intr(void *arg) /* Handle an IPI. */ ipi_pending = atomic_swap_ulong(&ci->ci_ipi, 0); - membar_enter(); /* matches membar_exit in xc_send_ipi, cpu_ipi */ + membar_acquire(); /* matches membar_release in xc_send_ipi, cpu_ipi */ KASSERT(ipi_pending); @@ -169,7 +169,7 @@ xc_send_ipi(struct cpu_info *ci) KASSERT(kpreempt_disabled()); KASSERT(curcpu() != ci); - membar_exit(); /* matches membar_enter in hppa_ipi_intr */ + membar_release(); /* matches membar_acquire in hppa_ipi_intr */ if (ci) { /* Unicast: remote CPU. */ hppa_ipi_send(ci, HPPA_IPI_XCALL); @@ -185,7 +185,7 @@ cpu_ipi(struct cpu_info *ci) KASSERT(kpreempt_disabled()); KASSERT(curcpu() != ci); - membar_exit(); /* matches membar_enter in hppa_ipi_intr */ + membar_release(); /* matches membar_acquire in hppa_ipi_intr */ if (ci) { /* Unicast: remote CPU. */ hppa_ipi_send(ci, HPPA_IPI_GENERIC); From 126c19d1984bff5d9c75d9af35b5bce02e2bf43e Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:07:00 +0000 Subject: [PATCH 24/49] mips: Convert lock.h to membar_release/acquire. --- sys/arch/mips/include/lock.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sys/arch/mips/include/lock.h b/sys/arch/mips/include/lock.h index 200f95bb88e5..092863823177 100644 --- a/sys/arch/mips/include/lock.h +++ b/sys/arch/mips/include/lock.h @@ -114,7 +114,7 @@ __cpu_simple_lock_try(__cpu_simple_lock_t *lp) * Successful _atomic_cas_uint functions as a load-acquire -- * on MP systems, it issues sync after the LL/SC CAS succeeds; * on non-MP systems every load is a load-acquire so it's moot. - * This pairs with the membar_exit and store sequence in + * This pairs with the membar_release and store sequence in * __cpu_simple_unlock that functions as a store-release * operation. * @@ -153,14 +153,14 @@ __cpu_simple_unlock(__cpu_simple_lock_t *lp) { /* - * The membar_exit and then store functions as a store-release - * operation that pairs with the load-acquire operation in - * successful __cpu_simple_lock_try. + * The membar_release and then store functions as a + * store-release operation that pairs with the load-acquire + * operation in successful __cpu_simple_lock_try. * * Can't use atomic_store_release here because that's not * available in userland at the moment. */ - membar_exit(); + membar_release(); *lp = __SIMPLELOCK_UNLOCKED; #ifdef _MIPS_ARCH_OCTEONP From d1b67fe4517c5a5b78858f79007ecbde706168bd Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:09:11 +0000 Subject: [PATCH 25/49] linux: Convert various API shims to use membar_release/acquire. --- sys/external/bsd/common/include/asm/barrier.h | 4 +-- .../bsd/common/include/linux/compiler.h | 2 +- sys/external/bsd/common/linux/linux_tasklet.c | 28 +++++++++---------- sys/external/bsd/common/linux/linux_work.c | 4 +-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/sys/external/bsd/common/include/asm/barrier.h b/sys/external/bsd/common/include/asm/barrier.h index de0405ae2b45..d6aa0523d726 100644 --- a/sys/external/bsd/common/include/asm/barrier.h +++ b/sys/external/bsd/common/include/asm/barrier.h @@ -68,8 +68,8 @@ #endif #if defined(MULTIPROCESSOR) && !defined(__HAVE_ATOMIC_AS_MEMBAR) -# define smp_mb__before_atomic() membar_exit() -# define smp_mb__after_atomic() membar_sync() /* XXX acquire */ +# define smp_mb__before_atomic() membar_release() +# define smp_mb__after_atomic() membar_acquire() #else # define smp_mb__before_atomic() __insn_barrier() # define smp_mb__after_atomic() __insn_barrier() diff --git a/sys/external/bsd/common/include/linux/compiler.h b/sys/external/bsd/common/include/linux/compiler.h index 5f4e8cd2abad..c0b288e7ddec 100644 --- a/sys/external/bsd/common/include/linux/compiler.h +++ b/sys/external/bsd/common/include/linux/compiler.h @@ -80,7 +80,7 @@ #define smp_store_release(X, V) do { \ typeof(X) __smp_store_release_tmp = (V); \ - membar_exit(); \ + membar_release(); \ (X) = __write_once_tmp; \ } while (0) diff --git a/sys/external/bsd/common/linux/linux_tasklet.c b/sys/external/bsd/common/linux/linux_tasklet.c index 5288a01c1543..4ccf9231f722 100644 --- a/sys/external/bsd/common/linux/linux_tasklet.c +++ b/sys/external/bsd/common/linux/linux_tasklet.c @@ -245,7 +245,7 @@ tasklet_softintr(void *cookie) /* * Check whether it's currently disabled. * - * Pairs with membar_exit in __tasklet_enable. + * Pairs with membar_release in __tasklet_enable. */ if (atomic_load_acquire(&tasklet->tl_disablecount)) { /* @@ -394,9 +394,9 @@ tasklet_disable_nosync(struct tasklet_struct *tasklet) KASSERT(disablecount < UINT_MAX); KASSERT(disablecount != 0); - /* Pairs with membar_exit in __tasklet_enable. */ + /* Pairs with membar_release in __tasklet_enable. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif } @@ -514,9 +514,9 @@ tasklet_trylock(struct tasklet_struct *tasklet) } while (atomic_cas_uint(&tasklet->tl_state, state, state | TASKLET_RUNNING) != state); - /* Pairs with membar_exit in tasklet_unlock. */ + /* Pairs with membar_release in tasklet_unlock. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif return true; @@ -536,11 +536,11 @@ tasklet_unlock(struct tasklet_struct *tasklet) KASSERT(atomic_load_relaxed(&tasklet->tl_state) & TASKLET_RUNNING); /* - * Pairs with membar_enter in tasklet_trylock and with + * Pairs with membar_acquire in tasklet_trylock and with * atomic_load_acquire in tasklet_unlock_wait. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif atomic_and_uint(&tasklet->tl_state, ~TASKLET_RUNNING); } @@ -556,7 +556,7 @@ void tasklet_unlock_wait(const struct tasklet_struct *tasklet) { - /* Pairs with membar_exit in tasklet_unlock. */ + /* Pairs with membar_release in tasklet_unlock. */ while (atomic_load_acquire(&tasklet->tl_state) & TASKLET_RUNNING) SPINLOCK_BACKOFF_HOOK; } @@ -589,9 +589,9 @@ __tasklet_disable_sync_once(struct tasklet_struct *tasklet) KASSERT(disablecount < UINT_MAX); KASSERT(disablecount != 0); - /* Pairs with membar_exit in __tasklet_enable_sync_once. */ + /* Pairs with membar_release in __tasklet_enable_sync_once. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif /* @@ -613,9 +613,9 @@ __tasklet_enable_sync_once(struct tasklet_struct *tasklet) { unsigned int disablecount; - /* Pairs with membar_enter in __tasklet_disable_sync_once. */ + /* Pairs with membar_acquire in __tasklet_disable_sync_once. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif /* Decrement the disable count. */ @@ -681,10 +681,10 @@ __tasklet_enable(struct tasklet_struct *tasklet) * decrementing the disable count. * * Pairs with atomic_load_acquire in tasklet_softintr and with - * membar_enter in tasklet_disable. + * membar_acquire in tasklet_disable. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif /* Decrement the disable count. */ diff --git a/sys/external/bsd/common/linux/linux_work.c b/sys/external/bsd/common/linux/linux_work.c index 978dd4c1e780..016947c10e9e 100644 --- a/sys/external/bsd/common/linux/linux_work.c +++ b/sys/external/bsd/common/linux/linux_work.c @@ -639,7 +639,7 @@ acquire_work(struct work_struct *work, struct workqueue_struct *wq) owner0); KASSERT(work_queue(work) == wq); - membar_enter(); + membar_acquire(); SDT_PROBE2(sdt, linux, work, acquire, work, wq); return true; } @@ -660,7 +660,7 @@ release_work(struct work_struct *work, struct workqueue_struct *wq) KASSERT(mutex_owned(&wq->wq_lock)); SDT_PROBE2(sdt, linux, work, release, work, wq); - membar_exit(); + membar_release(); /* * Non-interlocked r/m/w is safe here because nobody else can From 510ed7dff71505920e0319c2522f624b411a3791 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:13:23 +0000 Subject: [PATCH 26/49] linux/kref: Fix memory barriers and use membar_release/acquire. --- sys/external/bsd/drm2/include/linux/kref.h | 23 +++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/sys/external/bsd/drm2/include/linux/kref.h b/sys/external/bsd/drm2/include/linux/kref.h index 00619bfad828..93fc01c950e4 100644 --- a/sys/external/bsd/drm2/include/linux/kref.h +++ b/sys/external/bsd/drm2/include/linux/kref.h @@ -58,10 +58,6 @@ kref_get(struct kref *kref) atomic_inc_uint_nv(&kref->kr_count); KASSERTMSG((count > 1), "getting released kref"); - -#ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); -#endif } static inline bool @@ -76,10 +72,6 @@ kref_get_unless_zero(struct kref *kref) } while (atomic_cas_uint(&kref->kr_count, count, (count + 1)) != count); -#ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); -#endif - return true; } @@ -89,7 +81,7 @@ kref_sub(struct kref *kref, unsigned int count, void (*release)(struct kref *)) unsigned int old, new; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif do { @@ -100,6 +92,9 @@ kref_sub(struct kref *kref, unsigned int count, void (*release)(struct kref *)) } while (atomic_cas_uint(&kref->kr_count, old, new) != old); if (new == 0) { +#ifndef __HAVE_ATOMIC_AS_MEMBAR + membar_acquire(); +#endif (*release)(kref); return 1; } @@ -114,7 +109,7 @@ kref_put_lock(struct kref *kref, void (*release)(struct kref *), unsigned int old, new; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif do { @@ -123,6 +118,9 @@ kref_put_lock(struct kref *kref, void (*release)(struct kref *), if (old == 1) { spin_lock(interlock); if (atomic_add_int_nv(&kref->kr_count, -1) == 0) { +#ifndef __HAVE_ATOMIC_AS_MEMBAR + membar_acquire(); +#endif (*release)(kref); return 1; } @@ -149,7 +147,7 @@ kref_put_mutex(struct kref *kref, void (*release)(struct kref *), unsigned int old, new; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif do { @@ -158,6 +156,9 @@ kref_put_mutex(struct kref *kref, void (*release)(struct kref *), if (old == 1) { mutex_lock(interlock); if (atomic_add_int_nv(&kref->kr_count, -1) == 0) { +#ifndef __HAVE_ATOMIC_AS_MEMBAR + membar_acquire(); +#endif (*release)(kref); return 1; } From faa10555568b65e89b671341399b4bcaced6a149 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:13:44 +0000 Subject: [PATCH 27/49] linux/llist: Use membar_release and membar_datadep_consumer. No need for membar_acquire here! Loads are all data-dependent. --- sys/external/bsd/drm2/include/linux/llist.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/external/bsd/drm2/include/linux/llist.h b/sys/external/bsd/drm2/include/linux/llist.h index ac096e7d300b..4190392d2ee7 100644 --- a/sys/external/bsd/drm2/include/linux/llist.h +++ b/sys/external/bsd/drm2/include/linux/llist.h @@ -70,7 +70,7 @@ llist_add(struct llist_node *node, struct llist_head *head) do { first = head->first; node->next = first; - membar_exit(); + membar_release(); } while (atomic_cas_ptr(&head->first, first, node) != first); return first == NULL; @@ -96,7 +96,7 @@ llist_del_all(struct llist_head *head) struct llist_node *first; first = atomic_swap_ptr(&head->first, NULL); - membar_enter(); + membar_datadep_consumer(); return first; } From dda20596b57c71620d3bc72eab55cb4c35118cdd Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:17:23 +0000 Subject: [PATCH 28/49] linux/ratelimit: Convert to membar_acquire and atomic_store_release. Simplify while here: atomic_swap is enough, no need for atomic_cas. (Maybe drm'll run faster on sparcv8 this way...!) --- sys/external/bsd/drm2/include/linux/ratelimit.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sys/external/bsd/drm2/include/linux/ratelimit.h b/sys/external/bsd/drm2/include/linux/ratelimit.h index 4628f71ec472..cd9a617d9310 100644 --- a/sys/external/bsd/drm2/include/linux/ratelimit.h +++ b/sys/external/bsd/drm2/include/linux/ratelimit.h @@ -86,14 +86,13 @@ __ratelimit(struct ratelimit_state *r) { int ok; - if (atomic_cas_uint(&r->rl_lock, 0, 1)) { + if (atomic_swap_uint(&r->rl_lock, 1)) { ok = false; goto out; } - membar_enter(); + membar_acquire(); ok = ppsratecheck(&r->rl_lasttime, &r->rl_curpps, r->rl_maxpps); - membar_exit(); - r->rl_lock = 0; + atomic_store_release(&r->rl_lock, 0); out: if (!ok) atomic_store_relaxed(&r->missed, 1); From 093ddf770f2dc8ebd1c690ea15b8d68b5a4f67da Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:20:23 +0000 Subject: [PATCH 29/49] drm: Convert membar_enter/exit stragglers to membar_acquire/release. --- sys/external/bsd/drm2/linux/linux_dma_buf.c | 4 ++-- sys/external/bsd/drm2/linux/linux_dma_fence.c | 4 ++-- sys/external/bsd/drm2/linux/linux_dma_fence_chain.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sys/external/bsd/drm2/linux/linux_dma_buf.c b/sys/external/bsd/drm2/linux/linux_dma_buf.c index 4acccf3b490b..e7c3fc1ea16d 100644 --- a/sys/external/bsd/drm2/linux/linux_dma_buf.c +++ b/sys/external/bsd/drm2/linux/linux_dma_buf.c @@ -160,10 +160,10 @@ void dma_buf_put(struct dma_buf *dmabuf) { - membar_exit(); + membar_release(); if (atomic_dec_uint_nv(&dmabuf->db_refcnt) != 0) return; - membar_enter(); + membar_acquire(); dma_resv_poll_fini(&dmabuf->db_resv_poll); mutex_destroy(&dmabuf->db_lock); diff --git a/sys/external/bsd/drm2/linux/linux_dma_fence.c b/sys/external/bsd/drm2/linux/linux_dma_fence.c index 9f1a2ad96da4..09626f9f7a25 100644 --- a/sys/external/bsd/drm2/linux/linux_dma_fence.c +++ b/sys/external/bsd/drm2/linux/linux_dma_fence.c @@ -245,9 +245,9 @@ dma_fence_context_alloc(unsigned n) } S; uint64_t c; - while (__predict_false(atomic_cas_uint(&S.lock, 0, 1) != 0)) + while (__predict_false(atomic_swap_uint(&S.lock, 1))) SPINLOCK_BACKOFF_HOOK; - membar_enter(); + membar_acquire(); c = S.context; S.context += n; atomic_store_release(&S.lock, 0); diff --git a/sys/external/bsd/drm2/linux/linux_dma_fence_chain.c b/sys/external/bsd/drm2/linux/linux_dma_fence_chain.c index 2e1ae92efb90..7c1dda93894e 100644 --- a/sys/external/bsd/drm2/linux/linux_dma_fence_chain.c +++ b/sys/external/bsd/drm2/linux/linux_dma_fence_chain.c @@ -262,7 +262,7 @@ dma_fence_chain_walk(struct dma_fence *fence) break; splice = NULL; } - membar_exit(); /* pairs with dma_fence_get_rcu_safe */ + membar_release(); /* pairs with dma_fence_get_rcu_safe */ if (atomic_cas_ptr(&chain->dfc_prev, prev, splice) == prev) dma_fence_put(prev); /* transferred to splice */ else From df89bbad4692500e7c089e131a134fe2ff9bc5e1 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:22:02 +0000 Subject: [PATCH 30/49] ena: Fix rmb/wmb/mb to match Linux on x86 and aarch64. This is not right but it's not worse than it was before. --- sys/external/bsd/ena-com/ena_plat.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/sys/external/bsd/ena-com/ena_plat.h b/sys/external/bsd/ena-com/ena_plat.h index b280a2adb224..28c41b04aba1 100644 --- a/sys/external/bsd/ena-com/ena_plat.h +++ b/sys/external/bsd/ena-com/ena_plat.h @@ -378,8 +378,23 @@ void prefetch(void *x) #include "ena_defs/ena_includes.h" -#define rmb() membar_enter() -#define wmb() membar_exit() +/* + * XXX This is not really right. Need to adjust the driver to use + * bus_space_barrier or bus_dmamap_sync. + */ +#if defined(__i386__) || defined(__x86_64__) +#include +#define rmb() x86_lfence() +#define wmb() x86_sfence() +#define mb() x86_mfence() +#elif defined(__aarch64__) +#define rmb() __asm __volatile("dsb ld" ::: "memory") +#define wmb() __asm __volatile("dsb st" ::: "memory") +#define mb() __asm __volatile("dsb sy" ::: "memory") +#else +#define rmb() membar_acquire() +#define wmb() membar_release() #define mb() membar_sync() +#endif #endif /* ENA_PLAT_H_ */ From 8e47ff84adf99f13ee3be9f7db55554b743f2e76 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:36:19 +0000 Subject: [PATCH 31/49] if_shmem(4): Use membar_acquire/release for lock acquire/release. --- sys/rump/net/lib/libshmif/if_shmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/rump/net/lib/libshmif/if_shmem.c b/sys/rump/net/lib/libshmif/if_shmem.c index 173aebc8e49b..39487337d38f 100644 --- a/sys/rump/net/lib/libshmif/if_shmem.c +++ b/sys/rump/net/lib/libshmif/if_shmem.c @@ -144,7 +144,7 @@ shmif_lockbus(struct shmif_mem *busmem) } continue; } - membar_enter(); + membar_acquire(); } static void @@ -152,7 +152,7 @@ shmif_unlockbus(struct shmif_mem *busmem) { unsigned int old __diagused; - membar_exit(); + membar_release(); old = atomic_swap_32(&busmem->shm_lock, LOCK_UNLOCKED); KASSERT(old == LOCK_LOCKED); } From 47c270d040d03d0e8cdd300f33743c758ce79dd1 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:37:01 +0000 Subject: [PATCH 32/49] rumpkern/scheduler: Use membar_release. ...but add an XXX comment asking for clarity on what it pairs with. --- sys/rump/librump/rumpkern/scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/rump/librump/rumpkern/scheduler.c b/sys/rump/librump/rumpkern/scheduler.c index 12dfde2f1765..577d6ea4986e 100644 --- a/sys/rump/librump/rumpkern/scheduler.c +++ b/sys/rump/librump/rumpkern/scheduler.c @@ -473,7 +473,7 @@ rump_unschedule_cpu1(struct lwp *l, void *interlock) if (interlock == rcpu->rcpu_mtx) rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx); else - membar_exit(); + membar_release(); /* XXX what does this pair with? */ /* Release the CPU. */ old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l); From 8d072f205f6d92a7bed20a68a635b634a69c3af3 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:37:26 +0000 Subject: [PATCH 33/49] rumpkern/sleepq: Convert membar_exit/store to atomic_store_release. --- sys/rump/librump/rumpkern/sleepq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sys/rump/librump/rumpkern/sleepq.c b/sys/rump/librump/rumpkern/sleepq.c index 4d1ded236e88..5fe92a5ae55d 100644 --- a/sys/rump/librump/rumpkern/sleepq.c +++ b/sys/rump/librump/rumpkern/sleepq.c @@ -163,7 +163,6 @@ lwp_unlock_to(struct lwp *l, kmutex_t *new) KASSERT(mutex_owned(l->l_mutex)); old = l->l_mutex; - membar_exit(); - l->l_mutex = new; + atomic_store_release(&l->l_mutex, new); mutex_spin_exit(old); } From 7f0677197a91283edec4190c040df32f2da751a1 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:37:59 +0000 Subject: [PATCH 34/49] kern: Set l_mutex with atomic_store_release. Nix membar_exit. --- sys/kern/kern_lwp.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index 9af79c9bec01..273a44a6dbe3 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -1567,8 +1567,7 @@ lwp_setlock(struct lwp *l, kmutex_t *mtx) KASSERT(mutex_owned(oldmtx)); - membar_exit(); - l->l_mutex = mtx; + atomic_store_release(&l->l_mutex, mtx); return oldmtx; } @@ -1584,8 +1583,7 @@ lwp_unlock_to(struct lwp *l, kmutex_t *mtx) KASSERT(lwp_locked(l, NULL)); old = l->l_mutex; - membar_exit(); - l->l_mutex = mtx; + atomic_store_release(&l->l_mutex, mtx); mutex_spin_exit(old); } From fc38b190131dffcc9ebb859ecef28c1c360514f6 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:39:38 +0000 Subject: [PATCH 35/49] vfs(9): Add XXX comment about unclear membar_enter. --- sys/kern/vfs_vnode.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sys/kern/vfs_vnode.c b/sys/kern/vfs_vnode.c index 06f24a8bce79..f7cc9cc48d34 100644 --- a/sys/kern/vfs_vnode.c +++ b/sys/kern/vfs_vnode.c @@ -273,6 +273,8 @@ _vstate_assert(vnode_t *vp, enum vnode_state state, const char *func, int line, /* * Prevent predictive loads from the CPU, but check the state * without loooking first. + * + * XXX what does this pair with? */ membar_enter(); if (state == VS_ACTIVE && refcnt > 0 && From 07c73e384bf896225c43f65ec89cc0e0a9e03960 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:39:59 +0000 Subject: [PATCH 36/49] mutex(9): Convert to membar_acquire/release. Except for setting the waiters bit -- not sure if this is actually required to be store-before-load/store. Seems unlikely -- surely we'd have seen some serious bug by now if not, because membar_enter has failed to guarantee that on x86! -- but I'm leaving it for now until I have time to think enough about it to be sure one way or another. --- sys/kern/kern_mutex.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index e94157ba77e7..57a546c5e172 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -169,10 +169,12 @@ do { \ */ #ifdef __HAVE_ATOMIC_AS_MEMBAR #define MUTEX_MEMBAR_ENTER() -#define MUTEX_MEMBAR_EXIT() +#define MUTEX_MEMBAR_ACQUIRE() +#define MUTEX_MEMBAR_RELEASE() #else #define MUTEX_MEMBAR_ENTER() membar_enter() -#define MUTEX_MEMBAR_EXIT() membar_exit() +#define MUTEX_MEMBAR_ACQUIRE() membar_acquire() +#define MUTEX_MEMBAR_RELEASE() membar_release() #endif /* @@ -238,7 +240,7 @@ MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread) MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner); MUTEX_INHERITDEBUG(newown, oldown); rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown); - MUTEX_MEMBAR_ENTER(); + MUTEX_MEMBAR_ACQUIRE(); return rv; } @@ -256,7 +258,7 @@ MUTEX_RELEASE(kmutex_t *mtx) { uintptr_t newown; - MUTEX_MEMBAR_EXIT(); + MUTEX_MEMBAR_RELEASE(); newown = 0; MUTEX_INHERITDEBUG(newown, mtx->mtx_owner); mtx->mtx_owner = newown; From f3e75e4a542479f338355abcd90e39e7e96a0118 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:41:01 +0000 Subject: [PATCH 37/49] rwlock(9): Convert to membar_acquire/release. Leave an XXX comment where I suspect there might be a missing membar -- to be audited when I have more time to think! --- sys/kern/kern_rwlock.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index 14b6dd46cd42..48a82ddb12a0 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -101,12 +101,12 @@ do { \ * Memory barriers. */ #ifdef __HAVE_ATOMIC_AS_MEMBAR -#define RW_MEMBAR_ENTER() -#define RW_MEMBAR_EXIT() +#define RW_MEMBAR_ACQUIRE() +#define RW_MEMBAR_RELEASE() #define RW_MEMBAR_PRODUCER() #else -#define RW_MEMBAR_ENTER() membar_enter() -#define RW_MEMBAR_EXIT() membar_exit() +#define RW_MEMBAR_ACQUIRE() membar_acquire() +#define RW_MEMBAR_RELEASE() membar_release() #define RW_MEMBAR_PRODUCER() membar_producer() #endif @@ -344,7 +344,7 @@ rw_vector_enter(krwlock_t *rw, const krw_t op) ~RW_WRITE_WANTED); if (__predict_true(next == owner)) { /* Got it! */ - RW_MEMBAR_ENTER(); + RW_MEMBAR_ACQUIRE(); break; } @@ -396,6 +396,7 @@ rw_vector_enter(krwlock_t *rw, const krw_t op) continue; } next = rw_cas(rw, owner, owner | set_wait); + /* XXX membar? */ if (__predict_false(next != owner)) { turnstile_exit(rw); owner = next; @@ -471,7 +472,7 @@ rw_vector_exit(krwlock_t *rw) * proceed to do direct handoff if there are waiters, and if the * lock would become unowned. */ - RW_MEMBAR_EXIT(); + RW_MEMBAR_RELEASE(); for (;;) { newown = (owner - decr); if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) @@ -585,7 +586,7 @@ rw_vector_tryenter(krwlock_t *rw, const krw_t op) RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || (op == RW_READER && RW_COUNT(rw) != 0)); - RW_MEMBAR_ENTER(); + RW_MEMBAR_ACQUIRE(); return 1; } From f1ea03c9f47eb0d8e31c33e8e7f9f73ea5f9ab03 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:41:33 +0000 Subject: [PATCH 38/49] callout(9): Convert membar_exit/store to atomic_store_release. Leave an XXX comment to make it clearer what this pairs with. --- sys/kern/kern_timeout.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index cc3277f9bebe..aea725515a72 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -593,9 +593,11 @@ callout_bind(callout_t *cs, struct cpu_info *ci) * structure, as we don't hold the new CPU's lock. * Issue memory barrier to prevent accesses being * reordered. + * + * XXX Pairs with acquire barrier implied by + * mutex_enter in callout_lock? Unclear... */ - membar_exit(); - c->c_cpu = cc; + atomic_store_release(&c->c_cpu, cc); } mutex_spin_exit(lock); } From 7c80e5e06995bffd7ccce28bda173ee37d467cc1 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:42:06 +0000 Subject: [PATCH 39/49] ucas(9): Convert membar_exit to membar_release. --- sys/kern/subr_copy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/kern/subr_copy.c b/sys/kern/subr_copy.c index 0ba7e40c54a2..a6604b81b7d1 100644 --- a/sys/kern/subr_copy.c +++ b/sys/kern/subr_copy.c @@ -412,7 +412,7 @@ ucas_critical_cpu_gate(void *arg __unused) * the following atomic_dec_uint into a store-release. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif atomic_dec_uint(&ucas_critical_pausing_cpus); @@ -449,7 +449,7 @@ ucas_critical_wait(void) * happen before the ucas -- no buffered stores in other CPUs * can clobber it later on, for instance. * - * Matches membar_exit/atomic_dec_uint (store-release) in + * Matches membar_release/atomic_dec_uint (store-release) in * ucas_critical_cpu_gate. */ while (atomic_load_acquire(&ucas_critical_pausing_cpus) > 0) { From f4b43aed417204659f08b66fd8bd4949227b3cb9 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:42:27 +0000 Subject: [PATCH 40/49] ipi(9): Convert membar_exit/enter to membar_release/acquire. No store-before-load ordering needed here, just need to ensure that in A, ipi_send, ipi receive, B, memory operations in A happen-before memory operations in B. --- sys/kern/subr_ipi.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sys/kern/subr_ipi.c b/sys/kern/subr_ipi.c index 3d76d764a55c..1598df1cbf79 100644 --- a/sys/kern/subr_ipi.c +++ b/sys/kern/subr_ipi.c @@ -190,7 +190,7 @@ ipi_mark_pending(u_int ipi_id, struct cpu_info *ci) /* Mark as pending and return true if not previously marked. */ if ((atomic_load_acquire(&ci->ci_ipipend[i]) & bitm) == 0) { #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif atomic_or_32(&ci->ci_ipipend[i], bitm); return true; @@ -265,7 +265,7 @@ ipi_trigger_broadcast(u_int ipi_id, bool skip_self) /* * put_msg: insert message into the mailbox. * - * Caller is responsible for issuing membar_exit first. + * Caller is responsible for issuing membar_release first. */ static inline void put_msg(ipi_mbox_t *mbox, ipi_msg_t *msg) @@ -304,7 +304,7 @@ ipi_cpu_handler(void) } pending = atomic_swap_32(&ci->ci_ipipend[i], 0); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_enter(); + membar_acquire(); #endif while ((bit = ffs(pending)) != 0) { const u_int ipi_id = (i << IPI_BITW_SHIFT) | --bit; @@ -342,7 +342,7 @@ ipi_msg_cpu_handler(void *arg __unused) /* Ack the request. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif atomic_dec_uint(&msg->_pending); } @@ -365,7 +365,7 @@ ipi_unicast(ipi_msg_t *msg, struct cpu_info *ci) msg->_pending = 1; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif put_msg(&ipi_mboxes[id], msg); @@ -391,7 +391,7 @@ ipi_multicast(ipi_msg_t *msg, const kcpuset_t *target) local = !!kcpuset_isset(target, cpu_index(self)); msg->_pending = kcpuset_countset(target) - local; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif for (CPU_INFO_FOREACH(cii, ci)) { @@ -429,7 +429,7 @@ ipi_broadcast(ipi_msg_t *msg, bool skip_self) msg->_pending = ncpu - 1; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif /* Broadcast IPIs for remote CPUs. */ From dc48eaaade83040a4ec76d940c63d73b5cd0cdb4 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:43:35 +0000 Subject: [PATCH 41/49] pool(9): Convert membar_exit to membar_release. --- sys/kern/subr_pool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c index 1d991f6d9f1a..5f46a3d5e09b 100644 --- a/sys/kern/subr_pool.c +++ b/sys/kern/subr_pool.c @@ -2621,7 +2621,7 @@ pool_pcg_put(pcg_t *volatile *head, pcg_t *pcg) } pcg->pcg_next = o; #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif n = atomic_cas_ptr(head, o, pcg); if (o == n) { From 17a4a100cb2eeb05ee05e555af9cb84c0203d219 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:43:52 +0000 Subject: [PATCH 42/49] thmap(9): Convert membar_exit to membar_release. --- sys/kern/subr_thmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/kern/subr_thmap.c b/sys/kern/subr_thmap.c index 6aceb05c0b0a..8ee27cd74a60 100644 --- a/sys/kern/subr_thmap.c +++ b/sys/kern/subr_thmap.c @@ -121,7 +121,7 @@ THMAP_RCSID("$NetBSD: subr_thmap.c,v 1.10 2022/02/13 19:20:33 riastradh Exp $"); */ #ifdef _KERNEL #define ASSERT KASSERT -#define atomic_thread_fence(x) membar_exit() /* only used for release order */ +#define atomic_thread_fence(x) membar_release() /* only used for release order */ #define atomic_compare_exchange_weak_explicit_32(p, e, n, m1, m2) \ (atomic_cas_32((p), *(e), (n)) == *(e)) #define atomic_compare_exchange_weak_explicit_ptr(p, e, n, m1, m2) \ From da99423e4cc4d178a92442048411ef5ebf02ac06 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:44:04 +0000 Subject: [PATCH 43/49] futex(9): Convert membar_enter/exit to membar_acquire/release. No functional change -- this is just in an illustrative comment! --- sys/kern/sys_futex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/kern/sys_futex.c b/sys/kern/sys_futex.c index 00f352a13bda..fed462d80ba0 100644 --- a/sys/kern/sys_futex.c +++ b/sys/kern/sys_futex.c @@ -63,13 +63,13 @@ __KERNEL_RCSID(0, "$NetBSD: sys_futex.c,v 1.16 2022/03/12 15:32:32 riastradh Exp * continue; * } * } while (atomic_cas_uint(&lock, v, v & ~1) != v); - * membar_enter(); + * membar_acquire(); * * ... * * // Release the lock. Optimistically assume there are * // no waiters first until demonstrated otherwise. - * membar_exit(); + * membar_release(); * if (atomic_cas_uint(&lock, 1, 0) != 1) { * // There may be waiters. * v = atomic_swap_uint(&lock, 0); From 111bce93de7ddf13a71ad6b5f80c92b725aff7b3 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:44:36 +0000 Subject: [PATCH 44/49] select(9): Use membar_acquire/release and atomic_store_release. No store-before-load ordering here -- this was obviously always intended to be load-before-load/store all along. --- sys/kern/sys_select.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sys/kern/sys_select.c b/sys/kern/sys_select.c index 8e23ef4114ef..891984d67304 100644 --- a/sys/kern/sys_select.c +++ b/sys/kern/sys_select.c @@ -282,7 +282,7 @@ sel_do_scan(const char *opname, void *fds, const int nf, const size_t ni, l->l_selflag = SEL_SCANNING; } ncoll = sc->sc_ncoll; - membar_exit(); + membar_release(); if (opname == selop_select) { error = selscan((char *)fds, nf, ni, retval); @@ -653,7 +653,7 @@ selrecord(lwp_t *selector, struct selinfo *sip) * barrier to ensure that we access sel_lwp (above) before * other fields - this guards against a call to selclear(). */ - membar_enter(); + membar_acquire(); sip->sel_lwp = selector; SLIST_INSERT_HEAD(&selector->l_selwait, sip, sel_chain); /* Copy the argument, which is for selnotify(). */ @@ -872,9 +872,8 @@ selclear(void) * globally visible. */ next = SLIST_NEXT(sip, sel_chain); - membar_exit(); /* Release the record for another named waiter to use. */ - sip->sel_lwp = NULL; + atomic_store_release(&sip->sel_lwp, NULL); } mutex_spin_exit(lock); } From d731cd3c3a8db7f179f1e52ccf9b02e1ab8227a1 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 02:45:29 +0000 Subject: [PATCH 45/49] unix(4): Convert membar_exit to membar_release. Leave an XXX comment here because I don't know what this pairs with, but (deprecated) membar_exit and (new) membar_release have identical semantics so this can't break anything. --- sys/kern/uipc_usrreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index ac7cf5428d26..a51e82970868 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -291,7 +291,7 @@ unp_setpeerlocks(struct socket *so, struct socket *so2) lock = unp->unp_streamlock; unp->unp_streamlock = NULL; mutex_obj_hold(lock); - membar_exit(); + membar_release(); /* XXX what does this pair with? */ /* * possible race if lock is not held - see comment in * uipc_usrreq(PRU_ACCEPT). From 5788052254df618cfb3913105da3e5f8f4520a79 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 6 Apr 2022 21:10:56 +0000 Subject: [PATCH 46/49] powerpc: Rework store-store and release/acquire membars. - For userland code, use the cheapest instruction encoding that has the same meaning on classic powerpc and booke powerpc. This means avoiding lwsync -- only classic sync (L=0), which on booke encodes msync with the same semantics, and classic eieio, which on booke encodes mbar 0 with stronger semantcs. lwsync will execute in userland but only by trapping to the kernel for emulation, which is no doubt more expensive than just issuing a full sync. - For kernel code, use classic powerpc or booke powerpc barrier instructions according to what the kernel is being built for. . On classic powerpc, take advantage of lwsync (acq/rel, i.e. r/rw and rw/w) for membar_acquire/release and eieio (w/w) for membar_producer. . On booke, take advantage of mbar 1 (w/w) for membar_producer. . For !MULTIPROCESSOR, skip the instructions altogether. Keep this organized by macros in machine/asm.h. For now, this imposes a performance penalty on classic powerpc by using sync where lwsync would work. We could fix this with an ld.so.conf for classic powerpc membars, to use lwsync where possible. Put notes and references in membar_ops.S for future editors and auditors. --- .../lib/libc/arch/powerpc/atomic/membar_ops.S | 199 +++++++++++------- sys/arch/powerpc/include/asm.h | 35 +++ sys/arch/powerpc/powerpc/lock_stubs.S | 9 - 3 files changed, 154 insertions(+), 89 deletions(-) diff --git a/common/lib/libc/arch/powerpc/atomic/membar_ops.S b/common/lib/libc/arch/powerpc/atomic/membar_ops.S index d254e7dc2d37..4af5ad38da66 100644 --- a/common/lib/libc/arch/powerpc/atomic/membar_ops.S +++ b/common/lib/libc/arch/powerpc/atomic/membar_ops.S @@ -33,106 +33,145 @@ __RCSID("$NetBSD: membar_ops.S,v 1.4 2011/01/15 07:31:11 matt Exp $") +/* + * Classic PowerPC and Book E have slightly different synchronization + * instructions, with different meanings for overlapping encodings: + * + * 21:30 L/MO Classic PowerPC Book E + * 598 L=0 sync or hwsync, rw/rw msync, rw/rw + * 598 L=1 lwsync, r/rw and rw/w (undefined) + * 854 MO=0 eieio, w/w mbar 0, rw/rw + * 854 MO=1 (undefined) mbar 1, w/w (e500/e200 only?) + * + * References: + * + * (Classic PowerPC) PowerPC Virtual Environment Architecture, + * Book II, Version 2.01, December 2003, IBM. + * https://archive.org/details/bitsavers_ibmpowerpcvironmentArchitectureBookIIVer2.01200312_275529 + * + * Book E: Enhanced PowerPC Architecture, Version 1.0, May 7, + * 2002, Freescale Semiconductor. + * https://www.nxp.com/docs/en/user-guide/BOOK_EUM.pdf + * + * EREF: A Programmer's Reference Manual for Freescale Power + * Architecture Processors, EREF_RM, Rev. 1 (EIS 2.1), 06/2014, + * Freescale Semiconductor. + * https://www.nxp.com/files-static/32bit/doc/ref_manual/EREF_RM.pdf + * + * PowerPC e500 Core Family Reference Manual, E500CORERM, Rev. 1, + * 4/2005, Freescale Semiconductor. + * https://www.nxp.com/docs/en/reference-manual/E500CORERM.pdf + * + * There is an erratum in AN3441 about mbar 1 in e500 CPUs: + * + * Coherency and Synchronization Requirements for PowerQUICC III, + * Application Note AN3441, Rev. 1, 12/2007, Freescale + * Semiconductor. + * https://www.nxp.com/docs/en/application-note/AN3441.pdf + * + * However, it is only applicable to store-load ordering on + * cache-inhibited guarded memory for memory-mapped I/O devices, not + * regular memory that membar_* is applicable to: + * + * `mbar MO=1 fails to properly order caching-inhibited guarded + * loads with respect to caching-inhibited guarded stores. For + * guaranteed store-load ordering to cache-inhibited guarded + * memory, mbar with MO=0 or msync is required. The failure is + * limited to cache-inhibited loads bypassing the mbar MO=1 + * barrier. The mbar MO=1 instruction correctly enforces + * ordering of cache-inhibited stores to cache-inhibited stores, + * and the ordering of cacheable stores to cacheable stores. + * + * For kernel code, we can pick classic or Book E because we use + * different kernels for the two, but the same userland has to work on + * both so for userland we limit ourselves to sync. + * + * It is tempting to use isync to order load-before-load/store. + * However, isync orders prior loads only if their value flows into a + * control-flow dependency prior to the isync: + * + * `[I]f an isync follows a conditional Branch instruction that + * depends on the value returned by a preceding Load instruction, + * the load on which the Branch depends is performed before any + * loads caused by instructions following the isync. This applies + * even if the effects of the ``dependency'' are independent of + * the value loaded (e.g., the value is compared to itself and + * the Branch tests the EQ bit in the selected CR field), and + * even if the branch target is the sequentially next + * instruction.' + * + * --PowerPC Virtual Environment Architecture, Book II, Version + * 2.01, December 2003, 1.7.1 `Storage Access Ordering', p. 7. + * + * membar_acquire, however, must order _all_ prior loads, even if they + * do not flow into any control flow dependency preceding the isync. + * For example: + * + * x = *p; + * membar_acquire(); + * if (x) goto foo; + * + * This can't be implemented by: + * + * lwz x, p + * isync + * cmpwi cc, x, 0 + * bne cc, foo + * + * isync doesn't work here because there's no conditional dependency on + * x between the lwz x, p and the isync. The ordering would be + * guaranteed by + * + * lwz x, p + * cmpwi cc, x, 0 + * bne cc, foo + * isync + * ... + * foo: isync + * + * But it would require some compiler support to ensure there is a + * branch between the load and the isync. Making this connection with + * membar_acquire requires a clever compiler to notice the control + * flow. atomic_load_acquire is easier because it has the load as part + * of the operation, so compilers will usually implement x = + * atomic_load_acquire(p) as + * + * lwz x, p + * cmpw cc, x, x + * bne- cc, 1f + * 1: isync + */ + .text ENTRY(_membar_acquire) - /* - * It is tempting to use isync to order load-before-load/store. - * However, isync orders prior loads only if their value flows - * into a control-flow dependency prior to the isync: - * - * `[I]f an isync follows a conditional Branch instruction - * that depends on the value returned by a preceding Load - * instruction, the load on which the Branch depends is - * performed before any loads caused by instructions - * following the isync. This applies even if the effects - * of the ``dependency'' are independent of the value - * loaded (e.g., the value is compared to itself and the - * Branch tests the EQ bit in the selected CR field), and - * even if the branch target is the sequentially next - * instruction.' - * - * --PowerPC Virtual Environment Architecture, Book II, - * Version 2.01, December 2003, 1.7.1 `Storage Access - * Ordering', p. 7. - * - * We are required here, however, to order _all_ prior loads, - * even if they do not flow into any control flow dependency. - * For example: - * - * x = *p; - * membar_acquire(); - * if (x) goto foo; - * - * This can't be implemented by: - * - * lwz x, p - * isync - * cmpwi x, 0 - * bne foo - * - * isync doesn't work here because there's no conditional - * dependency on x between the lwz x, p and the isync. - * - * isync would only work if it followed the branch: - * - * lwz x, p - * isync - * cmpwi x, 0 - * bne foo - * ... - * foo: isync - * ... - * - * lwsync orders everything except store-before-load, so it - * serves here -- see below in membar_release in lwsync. - * Except we can't use it on booke, so use sync for now. - */ - sync + LWSYNC /* acq/rel -- r/rw _and_ rw/w (but not w/r) */ blr END(_membar_acquire) ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire) ENTRY(_membar_release) - /* - * `The memory barrier provides an ordering function for - * the storage accesses caused by Load, Store, and dcbz - * instructions that are executed by the processor - * executing the [lwsync] instruction and for which the - * specified storage location is in storage that is - * Memory Coherence Required and is neither Write Through - * Required nor Caching Inhibited. The applicable pairs - * are all pairs a_i, b_j of such accesses except those - * in which a_i is an access caused by a Store or dcbz - * instruction and b_j is an access caused by a Load - * instruction.' - * - * --PowerPC Virtual Environment Architecture, Book II, - * Version 2.01, December 2003, 3.3.3 `Memory Barrier - * Instructions', p. 25. - * - * In brief, lwsync is an acquire-release barrier -- it orders - * load-before-load/store and load/store-before-store, but not - * store-before-load. Except we can't use it on booke, so use - * sync for now. - */ - sync + LWSYNC /* acq/rel -- r/rw _and_ rw/w (but not w/r) */ blr END(_membar_release) ATOMIC_OP_ALIAS(membar_release,_membar_release) +ENTRY(_membar_producer) + SYNC_STORE /* w/w (and, with eieio, unrelated I/O ordering) */ + blr +END(_membar_producer) +ATOMIC_OP_ALIAS(membar_producer,_membar_producer) + ENTRY(_membar_sync) /* * sync, or `heavyweight sync', is a full sequential - * consistency barrier. + * consistency barrier (rw/rw). */ - sync + SYNC blr END(_membar_sync) ATOMIC_OP_ALIAS(membar_sync,_membar_sync) -ATOMIC_OP_ALIAS(membar_producer,_membar_release) -STRONG_ALIAS(_membar_producer,_membar_release) ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire) STRONG_ALIAS(_membar_consumer,_membar_acquire) ATOMIC_OP_ALIAS(membar_enter,_membar_sync) diff --git a/sys/arch/powerpc/include/asm.h b/sys/arch/powerpc/include/asm.h index 9b61a80e270f..165dc12d771f 100644 --- a/sys/arch/powerpc/include/asm.h +++ b/sys/arch/powerpc/include/asm.h @@ -34,6 +34,11 @@ #ifndef _PPC_ASM_H_ #define _PPC_ASM_H_ +#ifdef _KERNEL_OPT +#include "opt_multiprocessor.h" +#include "opt_ppcarch.h" +#endif + #ifdef _LP64 /* ppc64 is always PIC, r2 is always the TOC */ @@ -450,4 +455,34 @@ y: .quad .##y,.TOC.@tocbase,0; \ #define IBM405_ERRATA77_SYNC /* nothing */ #endif +#if defined(_KERNEL) && !defined(MULTIPROCESSOR) + +#define ISYNC /* nothing */ +#define SYNC /* nothing */ +#define LWSYNC /* nothing */ +#define SYNC_STORE /* nothing */ + +#else /* !_KERNEL, or _KERNEL && MULTIPROCESSOR */ + +/* + * See common/lib/libc/powerpc/arch/membar_ops.S for notes and + * references. + */ + +#define ISYNC isync +#define SYNC sync + +#if !defined(_KERNEL) +#define LWSYNC sync /* rw/rw */ +#define SYNC_STORE eieio /* cheaper w/w on classic, rw/rw on booke */ +#elif defined(PPC_BOOKE) +#define LWSYNC msync /* rw/rw; no cheaper r/rw or rw/w barrier */ +#define SYNC_STORE mbar 1 /* w/w, same as classic eieio */ +#else /* _KERNEL && !PPC_BOOKE */ +#define LWSYNC lwsync /* acq/rel (r/rw and rw/w) */ +#define SYNC_STORE eieio /* w/w (plus I/O ordering) */ +#endif + +#endif + #endif /* !_PPC_ASM_H_ */ diff --git a/sys/arch/powerpc/powerpc/lock_stubs.S b/sys/arch/powerpc/powerpc/lock_stubs.S index 1a63a5b90bdf..62cc593916f1 100644 --- a/sys/arch/powerpc/powerpc/lock_stubs.S +++ b/sys/arch/powerpc/powerpc/lock_stubs.S @@ -34,16 +34,7 @@ #include "assym.h" #ifdef _KERNEL_OPT -#include "opt_multiprocessor.h" #include "opt_lockdebug.h" -#endif - -#if defined(MULTIPROCESSOR) -#define ISYNC isync -#define SYNC sync -#else -#define ISYNC /* nothing */ -#define SYNC /* nothing */ #endif .text From 9e2708dfaa93f6532860680b47d3252b2bf35278 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 6 Apr 2022 21:15:21 +0000 Subject: [PATCH 47/49] powerpc: Use lwsync, not sync, for mutex_exit. This just needs to be a store-release so that the body of the mutex section happens before releasing the lock. lwsync is a release/acquire barrier (load/store-before-store and load-before-load/store), so issuing lwsync-then-store suffices here. (On booke this will be msync anyway, a full sequential consistency barrier same as classic powerpc sync, so no change to semantics or performance on booke.) --- sys/arch/powerpc/powerpc/lock_stubs.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sys/arch/powerpc/powerpc/lock_stubs.S b/sys/arch/powerpc/powerpc/lock_stubs.S index 62cc593916f1..c6cc52bb7db2 100644 --- a/sys/arch/powerpc/powerpc/lock_stubs.S +++ b/sys/arch/powerpc/powerpc/lock_stubs.S @@ -79,7 +79,7 @@ ENTRY(mutex_enter) * void mutex_exit(kmutex_t *); */ ENTRY(mutex_exit) - SYNC + LWSYNC li %r7,0 1: lptrarx %r10,0,%r3 @@ -163,7 +163,7 @@ ENTRY(rw_tryenter) */ ENTRY(rw_exit) ldptr %r9,RW_OWNER(%r3) - SYNC + LWSYNC andi. %r0,%r9,RW_WRITE_LOCKED bne- 1f From a498c50a25a312b45b9ee585464d0e127e9e77ce Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 31 Mar 2022 12:06:30 +0000 Subject: [PATCH 48/49] mips/cavium: Take advantage of Octeon's guaranteed r/rw ordering. --- common/lib/libc/arch/mips/atomic/membar_ops.S | 94 +++++++++++++------ sys/arch/mips/include/asm.h | 40 +++++++- 2 files changed, 100 insertions(+), 34 deletions(-) diff --git a/common/lib/libc/arch/mips/atomic/membar_ops.S b/common/lib/libc/arch/mips/atomic/membar_ops.S index dae62c17d1d6..a1e2440791d8 100644 --- a/common/lib/libc/arch/mips/atomic/membar_ops.S +++ b/common/lib/libc/arch/mips/atomic/membar_ops.S @@ -38,44 +38,80 @@ LEAF(_membar_sync) j ra BDSYNC END(_membar_sync) +ATOMIC_OP_ALIAS(membar_sync,_membar_sync) + +STRONG_ALIAS(_membar_enter,_membar_sync) +ATOMIC_OP_ALIAS(membar_enter,_membar_sync) #ifdef __OCTEON__ + +/* + * cnMIPS guarantees load-before-load/store ordering without any + * barriers. So the only barriers we need are store-before-load (sync) + * and store-before-store (syncw, i.e., sync 4). See Table 2-32 + * `Execution Ordering Rules' on p. 104 of Cavium OCTEON III CN78XX + * Hardware Reference Manual, CN78XX-HM-0.99E, September 2014: + * + * First Operation DLD [load instruction to a physical + * address that is L2/DRAM] + * Second Operation Any + * Execution Ordering Comments + * + * The second operation cannot appear to execute before + * the first (DLD) operation, regardless of the presence + * or absence of SYNC* instructions. + * + * Note: I'm not sure if this applies to earlier cnMIPS -- can't find + * it in the Cavium Networks OCTEON Plus CN50XX Hardware Reference + * Manual CN50XX-HM-0.99E, July 2008. Experimentally, on an erlite3 + * (Cavium Octeon CN5020-500), I can easily detect reordering of + * store-before-store and store-before-load, but I haven't been able to + * detect any reordering of load-before-load or load-before-store. + * + * Note: On early cnMIPS (CN3xxx), there is an erratum which sometimes + * requires issuing two syncw's in a row. I don't know the details -- + * don't have documentation -- and in Linux it is only used for I/O + * purposes. + * + * Currently we don't build kernels that work on both Octeon and + * non-Octeon MIPS CPUs, so none of this is done with binary patching. + * For userlands we could use a separate shared library on Octeon with + * ld.so.conf to override the symbols with cheaper definitions, but we + * don't do that now. + */ + +LEAF(_membar_acquire) + j ra + nop +END(_membar_acquire) +ATOMIC_OP_ALIAS(membar_acquire,_membar_acquire) + +STRONG_ALIAS(_membar_consumer,_membar_acquire) +ATOMIC_OP_ALIAS(membar_consumer,_membar_acquire) + LEAF(_membar_release) - /* - * syncw is documented as ordering store-before-store in - * - * Cavium OCTEON III CN78XX Hardware Reference Manual, - * CN78XX-HM-0.99E, September 2014. - * - * It's unclear from the documentation the architecture - * guarantees load-before-store ordering without barriers, but - * this code assumes it does. If that assumption is wrong, we - * can only use syncw for membar_producer -- membar_release has - * to use the full sync. - */ j ra syncw END(_membar_release) -#endif +ATOMIC_OP_ALIAS(membar_release,_membar_release) -ATOMIC_OP_ALIAS(membar_sync,_membar_sync) -ATOMIC_OP_ALIAS(membar_acquire,_membar_sync) -STRONG_ALIAS(_membar_acquire,_membar_sync) -ATOMIC_OP_ALIAS(membar_enter,_membar_sync) -STRONG_ALIAS(_membar_enter,_membar_sync) -#ifdef __OCTEON__ -ATOMIC_OP_ALIAS(membar_exit,_membar_release) STRONG_ALIAS(_membar_exit,_membar_release) -ATOMIC_OP_ALIAS(membar_release,_membar_release) -ATOMIC_OP_ALIAS(membar_producer,_membar_release) +ATOMIC_OP_ALIAS(membar_exit,_membar_release) + STRONG_ALIAS(_membar_producer,_membar_release) -#else -ATOMIC_OP_ALIAS(membar_exit,_membar_sync) -STRONG_ALIAS(_membar_exit,_membar_sync) -ATOMIC_OP_ALIAS(membar_release,_membar_sync) +ATOMIC_OP_ALIAS(membar_producer,_membar_release) + +#else /* !__OCTEON__ */ + +STRONG_ALIAS(_membar_acquire,_membar_sync) +ATOMIC_OP_ALIAS(membar_acquire,_membar_sync) STRONG_ALIAS(_membar_release,_membar_sync) -ATOMIC_OP_ALIAS(membar_producer,_membar_sync) +ATOMIC_OP_ALIAS(membar_release,_membar_sync) +STRONG_ALIAS(_membar_exit,_membar_sync) +ATOMIC_OP_ALIAS(membar_exit,_membar_sync) +STRONG_ALIAS(_membar_consumer,_membar_sync) +ATOMIC_OP_ALIAS(membar_consumer,_membar_sync) STRONG_ALIAS(_membar_producer,_membar_sync) +ATOMIC_OP_ALIAS(membar_producer,_membar_sync) + #endif -ATOMIC_OP_ALIAS(membar_consumer,_membar_sync) -STRONG_ALIAS(_membar_consumer,_membar_sync) diff --git a/sys/arch/mips/include/asm.h b/sys/arch/mips/include/asm.h index 3103118b1d71..3d0eda76b6a1 100644 --- a/sys/arch/mips/include/asm.h +++ b/sys/arch/mips/include/asm.h @@ -572,12 +572,42 @@ _C_LABEL(x): /* compiler define */ #if defined(__OCTEON__) - /* early cnMIPS have erratum which means 2 */ -#define LLSCSYNC sync 4; sync 4 +/* + * cnMIPS guarantees load-before-load/store ordering without any + * barriers. So the only barriers we need are store-before-load (sync) + * and store-before-store (syncw, i.e., sync 4). See Table 2-32 + * `Execution Ordering Rules' on p. 104 of Cavium OCTEON III CN78XX + * Hardware Reference Manual, CN78XX-HM-0.99E, September 2014: + * + * First Operation DLD [load instruction to a physical + * address that is L2/DRAM] + * Second Operation Any + * Execution Ordering Comments + * + * The second operation cannot appear to execute before + * the first (DLD) operation, regardless of the presence + * or absence of SYNC* instructions. + * + * Note: I'm not sure if this applies to earlier cnMIPS -- can't find + * it in the Cavium Networks OCTEON Plus CN50XX Hardware Reference + * Manual CN50XX-HM-0.99E, July 2008. + * + * Except cnMIPS also has a quirk where the store buffer can get + * clogged and we need to apply a plunger to it _after_ releasing a + * lock or else other CPUs may spin for hundreds of thousands of cycles + * before they see the lock is released. So we also have the quirky + * SYNC_PLUNGER barrier as syncw. + * + * Note: On early cnMIPS (CN3xxx), there is an erratum which sometimes + * requires issuing two syncw's in a row. I don't know the details -- + * don't have documentation -- and in Linux it is only used for I/O + * purposes. + */ +#define LLSCSYNC /* nothing */ #define BDSYNC sync -#define BDSYNC_ACQ sync -#define SYNC_ACQ sync -#define SYNC_REL sync +#define BDSYNC_ACQ nop +#define SYNC_ACQ /* nothing */ +#define SYNC_REL sync 4 #define BDSYNC_PLUNGER sync 4 #define SYNC_PLUNGER sync 4 #elif __mips >= 3 || !defined(__mips_o32) From 11ee9f36b21313f833babb0a72c92de3319da057 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Thu, 24 Feb 2022 15:53:11 +0000 Subject: [PATCH 49/49] WIP: kern_descrip.c: Membar audit. --- sys/kern/kern_descrip.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 1336ea9b8010..a1fa3bcdbdeb 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -392,6 +392,9 @@ fd_getfile(unsigned fd) * Multi threaded: issue a memory barrier to ensure that we * acquire the file pointer _after_ adding a reference. If * no memory barrier, we could fetch a stale pointer. + * + * XXX Unclear what stale pointer this refers to... I + * don't think this membar is actually necessary. */ atomic_inc_uint(&ff->ff_refcnt); #ifndef __HAVE_ATOMIC_AS_MEMBAR @@ -451,7 +454,7 @@ fd_putfile(unsigned fd) * CPU. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif /* @@ -602,6 +605,9 @@ fd_close(unsigned fd) * waiting for other users of the file to drain. Release * our reference, and wake up the closer. */ +#ifndef __HAVE_ATOMIC_AS_MEMBAR + membar_release(); +#endif atomic_dec_uint(&ff->ff_refcnt); cv_broadcast(&ff->ff_closing); mutex_exit(&fdp->fd_lock); @@ -637,7 +643,7 @@ fd_close(unsigned fd) } else { /* Multi threaded. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_producer(); + membar_release(); #endif refcnt = atomic_dec_uint_nv(&ff->ff_refcnt); } @@ -683,7 +689,13 @@ fd_close(unsigned fd) cv_wait(&ff->ff_closing, &fdp->fd_lock); } atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); +#ifndef __HAVE_ATOMIC_AS_MEMBAR + membar_acquire(); +#endif } else { +#ifndef __HAVE_ATOMIC_AS_MEMBAR + membar_acquire(); +#endif /* If no references, there must be no knotes. */ KASSERT(SLIST_EMPTY(&ff->ff_knlist)); } @@ -1532,10 +1544,13 @@ fd_free(void) KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_exit(); + membar_release(); #endif if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) return; +#ifndef __HAVE_ATOMIC_AS_MEMBAR + membar_acquire(); +#endif /* * Close any files that the process holds open.