diff -r b370ec80dbb8 common/lib/libc/arch/i386/atomic/atomic.S --- a/common/lib/libc/arch/i386/atomic/atomic.S Sun Aug 03 08:22:49 2025 +0000 +++ b/common/lib/libc/arch/i386/atomic/atomic.S Sun Aug 31 21:57:08 2025 +0000 @@ -212,7 +212,8 @@ ENTRY(_membar_sync) * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ * https://www.agner.org/optimize/instruction_tables.pdf * - * Sync with xen_mb in sys/arch/i386/i386/cpufunc.S. + * Sync with paravirt_membar_sync in + * sys/arch/i386/i386/cpufunc.S. */ LOCK addl $0, -4(%esp) diff -r b370ec80dbb8 common/lib/libc/arch/sparc64/atomic/membar_ops.S --- a/common/lib/libc/arch/sparc64/atomic/membar_ops.S Sun Aug 03 08:22:49 2025 +0000 +++ b/common/lib/libc/arch/sparc64/atomic/membar_ops.S Sun Aug 31 21:57:08 2025 +0000 @@ -72,6 +72,9 @@ ENTRY(_membar_sync) * https://www.oracle.com/technetwork/server-storage/sun-sparc-enterprise/documentation/sparc-2i-usersmanual-2516677.pdf#page=518 * * So let's avoid doing that. + * + * Sync with paravirt_membar_sync in + * sys/arch/sparc64/sparc64/locore.s. */ membar #StoreLoad retl diff -r b370ec80dbb8 common/lib/libc/arch/x86_64/atomic/atomic.S --- a/common/lib/libc/arch/x86_64/atomic/atomic.S Sun Aug 03 08:22:49 2025 +0000 +++ b/common/lib/libc/arch/x86_64/atomic/atomic.S Sun Aug 31 21:57:08 2025 +0000 @@ -287,7 +287,8 @@ ENTRY(_membar_sync) * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ * https://www.agner.org/optimize/instruction_tables.pdf * - * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S. + * Sync with paravirt_membar_sync in + * sys/arch/amd64/amd64/cpufunc.S. */ LOCK addq $0, -8(%rsp) diff -r b370ec80dbb8 distrib/sets/lists/comp/mi --- a/distrib/sets/lists/comp/mi Sun Aug 03 08:22:49 2025 +0000 +++ b/distrib/sets/lists/comp/mi Sun Aug 31 21:57:08 2025 +0000 @@ -12925,6 +12925,7 @@ ./usr/share/man/cat9/optstr_get.0 comp-sys-catman .cat ./usr/share/man/cat9/p_find.0 comp-obsolete obsolete ./usr/share/man/cat9/panic.0 comp-sys-catman .cat +./usr/share/man/cat9/paravirt_membar_sync.0 comp-sys-catman .cat ./usr/share/man/cat9/pathbuf.0 comp-sys-catman .cat ./usr/share/man/cat9/pci.0 comp-sys-catman .cat ./usr/share/man/cat9/pci_conf_hook.0 comp-sys-catman .cat @@ -21789,6 +21790,7 @@ ./usr/share/man/html9/optstr_get.html comp-sys-htmlman html ./usr/share/man/html9/p_find.html comp-obsolete obsolete ./usr/share/man/html9/panic.html comp-sys-htmlman html +./usr/share/man/html9/paravirt_membar_sync.html comp-sys-htmlman html ./usr/share/man/html9/pathbuf.html comp-sys-htmlman html ./usr/share/man/html9/pci.html comp-sys-htmlman html ./usr/share/man/html9/pci_conf_hook.html comp-sys-htmlman html @@ -30810,6 +30812,7 @@ ./usr/share/man/man9/optstr_get.9 comp-sys-man .man ./usr/share/man/man9/p_find.9 comp-obsolete obsolete ./usr/share/man/man9/panic.9 comp-sys-man .man +./usr/share/man/man9/paravirt_membar_sync.9 comp-sys-man .man ./usr/share/man/man9/pathbuf.9 comp-sys-man .man ./usr/share/man/man9/pci.9 comp-sys-man .man ./usr/share/man/man9/pci_conf_hook.9 comp-sys-man .man diff -r b370ec80dbb8 share/man/man9/Makefile --- a/share/man/man9/Makefile Sun Aug 03 08:22:49 2025 +0000 +++ b/share/man/man9/Makefile Sun Aug 31 21:57:08 2025 +0000 @@ -41,7 +41,12 @@ MAN= accept_filter.9 accf_data.9 accf_ht microseq.9 microtime.9 microuptime.9 mi_switch.9 module.9 \ mstohz.9 mutex.9 m_tag.9 namecache.9 \ namei.9 nullop.9 opencrypto.9 optstr.9 \ - panic.9 pathbuf.9 pci.9 pci_configure_bus.9 pci_intr.9 \ + panic.9 \ + paravirt_membar_sync.9 \ + pathbuf.9 \ + pci.9 \ + pci_configure_bus.9 \ + pci_intr.9 \ pci_msi.9 pckbport.9 pcmcia.9 pcq.9 pcu.9 \ percpu.9 pfil.9 physio.9 pktqueue.9 pmap.9 pmatch.9 pmf.9 pool.9 \ pool_cache.9 portfeatures.9 powerhook_establish.9 ppsratecheck.9 \ diff -r b370ec80dbb8 share/man/man9/paravirt_membar_sync.9 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/share/man/man9/paravirt_membar_sync.9 Sun Aug 31 21:57:08 2025 +0000 @@ -0,0 +1,140 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2025 The NetBSD Foundation +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd August 31, 2025 +.Dt PARAVIRT_MEMBAR_SYNC 9 +.Os +.Sh NAME +.Nm paravirt_membar_sync +.Nd memory barrier for paravirtualized device drivers +.Sh SYNOPSIS +.In sys/paravirt_membar.h +.Ft void +.Fn paravirt_membar_sync "void" +.Sh DESCRIPTION +The +.Nm +function issues a store-before-load barrier for coordination with a +paravirtualized device. +.Pp +This function has the same ordering semantics as +.Xr membar_sync 3 , +but +.Xr membar_sync 3 +can only coordinate with other CPUs that +.Nx +is running on. +In a virtual machine, +.Nx +may be running on a single +.Em virtual +CPU, and patch +.Xr membar_sync 3 +to be a no-op, while the host side of a paravirtualized device may be +running on a different +.Em physical +CPU requiring a barrier that +.Xr membar_sync 3 +does not issue. +.Sh EXAMPLES +Submit a request to the host device, and notify the host to process +it\(embut elide the notification, which is expensive, if the host is +already reading requests anyway: +.Bd -literal + /* + * Write the request into the ring buffer. + */ + memcpy(cputodev_ring->buffer[sc->sc_cputodev_idx], request, + sizeof(*request)); + + /* + * Ensure the request has been written before we publish it to + * the host device side. + */ + bus_dmamap_sync(dmat, dmap, ..., BUS_DMASYNC_PREWRITE); + + /* + * Publish the request to the host device side. + */ + cputodev_ring->header->producer_tail = ++sc->sc_cputodev_idx; + + /* + * Ensure we have published it _before_ we check whether the + * host needs notification. + */ + paravirt_membar_sync(); + + if (cputodev_ring->header->needs_notification) + notify_host(); +.Ed +.Pp +Enable interrupts from the host and check whether any were pending +while interrupts were disabled: +.Bd -literal + /* + * Tell the host device to deliver interrupts after this + * point. + */ + devtocpu_ring->header->needs_notification = true; + + /* + * Ensure we have requested interrupts _before_ we check + * whether we missed any notifications. + */ + paravirt_membar_sync(); + + /* + * Check whether there were any pending notifications while + * interrupts were blocked. If not, stop here. + */ + idx = devtocpu_ring->header->producer_idx; + if (sc->sc_devtocpu_idx == idx) + return; + + /* + * Ensure we read the index of the producer ring _before_ we + * read any content. + */ + bus_dmamap_sync(dmat, dmap, ..., BUS_DMASYNC_POSTREAD); + + /* + * Process the notifications. + */ + while (sc->sc_devtocpu_idx != idx) { + struct buffer *buf = + devtocpu_ring->buffer[sc->sc_devtocpu_idx]; + process_notification(buf); + sc->sc_devtocpu_idx++; + sc->sc_devtocpu_idx %= ringlen; + } +.Ed +.Sh SEE ALSO +.Xr membar_ops 3 , +.Xr bus_dma 9 , +.Xr bus_space 9 +.Sh HISTORY +These atomic operations first appeared in +.Nx 12.0 . diff -r b370ec80dbb8 sys/arch/alpha/alpha/locore.s --- a/sys/arch/alpha/alpha/locore.s Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/alpha/alpha/locore.s Sun Aug 31 21:57:08 2025 +0000 @@ -1524,3 +1524,18 @@ LEAF(alpha_write_fpcr, 1); f30save = 0; lda sp, framesz(sp) RET END(alpha_write_fpcr) + +LEAF(paravirt_membar_sync, 0) + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but without hotpatching + * away the MB instruction on uniprocessor boots -- because + * under virtualization, we still have to coordinate with a + * `device' backed by a hypervisor that is potentially on + * another physical CPU even if we observe only one virtual CPU + * as the guest. + */ + mb +END(paravirt_membar_sync) diff -r b370ec80dbb8 sys/arch/amd64/amd64/cpufunc.S --- a/sys/arch/amd64/amd64/cpufunc.S Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/amd64/amd64/cpufunc.S Sun Aug 31 21:57:08 2025 +0000 @@ -61,17 +61,17 @@ ENTRY(x86_mfence) ret END(x86_mfence) -#ifdef XEN -ENTRY(xen_mb) +ENTRY(paravirt_membar_sync) /* * Store-before-load ordering with respect to matching logic * on the hypervisor side. * * This is the same as membar_sync, but without hotpatching * away the LOCK prefix on uniprocessor boots -- because under - * Xen, we still have to coordinate with a `device' backed by a - * hypervisor that is potentially on another physical CPU even - * if we observe only one virtual CPU as the guest. + * virtualization, we still have to coordinate with a `device' + * backed by a hypervisor that is potentially on another + * physical CPU even if we observe only one virtual CPU as the + * guest. * * See common/lib/libc/arch/x86_64/atomic/atomic.S for * rationale and keep this in sync with the implementation @@ -80,7 +80,10 @@ ENTRY(xen_mb) lock addq $0,-8(%rsp) ret -END(xen_mb) +END(paravirt_membar_sync) + +#ifdef XEN +STRONG_ALIAS(xen_mb,paravirt_membar_sync) #endif /* XEN */ #ifdef KDTRACE_HOOKS diff -r b370ec80dbb8 sys/arch/arm/arm/cpu_subr.c --- a/sys/arch/arm/arm/cpu_subr.c Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/arm/arm/cpu_subr.c Sun Aug 31 21:57:08 2025 +0000 @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -145,3 +146,33 @@ cpu_clr_mbox(int cpuindex) } #endif + +#if defined _ARM_ARCH_6 || defined _ARM_ARCH_7 /* see below regarding armv<6 */ +void +paravirt_membar_sync(void) +{ + + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but guaranteed never to be + * conditionalized or hotpatched away even on uniprocessor + * builds and boots -- because under virtualization, we still + * have to coordinate with a `device' backed by a hypervisor + * that is potentially on another physical CPU even if we + * observe only one virtual CPU as the guest. + * + * Prior to armv6, there was no data memory barrier + * instruction. Such CPUs presumably don't exist in + * multiprocessor configurations. But what if we're running a + * _kernel_ built for a uniprocessor armv5 CPU, as a virtual + * machine guest of a _host_ with a newer multiprocessor CPU? + * How do we enforce store-before-load ordering for a + * paravirtualized device driver, coordinating with host + * software `device' potentially on another CPU? You'll have + * to answer that before you can use virtio drivers! + */ + dmb(ish); +} +#endif /* defined _ARM_ARCH_6 || defined _ARM_ARCH_7 */ diff -r b370ec80dbb8 sys/arch/hppa/hppa/support.S --- a/sys/arch/hppa/hppa/support.S Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/hppa/hppa/support.S Sun Aug 31 21:57:08 2025 +0000 @@ -304,3 +304,18 @@ LEAF_ENTRY(longjmp) ldi 1, %ret0 EXIT(longjmp) +LEAF_ENTRY(paravirt_membar_sync) + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but guaranteed never to be + * conditionalized or hotpatched away even on uniprocessor + * builds and boots -- because under virtualization, we still + * have to coordinate with a `device' backed by a hypervisor + * that is potentially on another physical CPU even if we + * observe only one virtual CPU as the guest. + */ + bv %r0(%rp) + sync +EXIT(paravirt_membar_sync) diff -r b370ec80dbb8 sys/arch/i386/i386/cpufunc.S --- a/sys/arch/i386/i386/cpufunc.S Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/i386/i386/cpufunc.S Sun Aug 31 21:57:08 2025 +0000 @@ -67,17 +67,17 @@ ENTRY(x86_mfence) ret END(x86_mfence) -#ifdef XEN -ENTRY(xen_mb) +ENTRY(paravirt_membar_sync) /* * Store-before-load ordering with respect to matching logic * on the hypervisor side. * * This is the same as membar_sync, but without hotpatching * away the LOCK prefix on uniprocessor boots -- because under - * Xen, we still have to coordinate with a `device' backed by a - * hypervisor that is potentially on another physical CPU even - * if we observe only one virtual CPU as the guest. + * virtualization, we still have to coordinate with a `device' + * backed by a hypervisor that is potentially on another + * physical CPU even if we observe only one virtual CPU as the + * guest. * * See common/lib/libc/arch/i386/atomic/atomic.S for * rationale and keep this in sync with the implementation @@ -86,7 +86,10 @@ ENTRY(xen_mb) lock addl $0,-4(%esp) ret -END(xen_mb) +END(paravirt_membar_sync) + +#ifdef XEN +STRONG_ALIAS(xen_mb,paravirt_membar_sync) #endif /* XEN */ #ifdef KDTRACE_HOOKS diff -r b370ec80dbb8 sys/arch/mips/mips/cpu_subr.c --- a/sys/arch/mips/mips/cpu_subr.c Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/mips/mips/cpu_subr.c Sun Aug 31 21:57:08 2025 +0000 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -1195,3 +1196,33 @@ cpuwatch_clr(cpu_watchpoint_t *cwp) } #endif /* (MIPS32 + MIPS32R2 + MIPS64 + MIPS64R2) > 0 */ + +#if MIPS32 + 0 > 0 /* see below regarding MIPS-I/II/III */ +void +paravirt_membar_sync(void) +{ + + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but guaranteed never to be + * conditionalized or hotpatched away even on uniprocessor + * builds and boots -- because under virtualization, we still + * have to coordinate with a `device' backed by a hypervisor + * that is potentially on another physical CPU even if we + * observe only one virtual CPU as the guest. + * + * Prior to MIPS32, there was no SYNC instruction. Such CPUs + * presumably don't exist in multiprocessor configurations. + * But what if we're running a _kernel_ built for a + * uniprocessor MIPS-I/II/III CPU (pre-MIPS32), as a virtual + * machine guest of a _host_ with a newer multiprocessor CPU? + * How do we enforce store-before-load ordering for a + * paravirtualized device driver, coordinating with host + * software `device' potentially on another CPU? You'll have + * to answer that before you can use virtio drivers! + */ + __asm volatile("sync"); +} +#endif diff -r b370ec80dbb8 sys/arch/riscv/riscv/cpu_subr.c --- a/sys/arch/riscv/riscv/cpu_subr.c Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/riscv/riscv/cpu_subr.c Sun Aug 31 21:57:08 2025 +0000 @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -428,3 +429,21 @@ cpu_ipi(struct cpu_info *ci) } #endif + +void +paravirt_membar_sync(void) +{ + + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but guaranteed never to be + * conditionalized or hotpatched away even on uniprocessor + * builds and boots -- because under virtualization, we still + * have to coordinate with a `device' backed by a hypervisor + * that is potentially on another physical CPU even if we + * observe only one virtual CPU as the guest. + */ + __asm volatile("fence rw,rw"); +} diff -r b370ec80dbb8 sys/arch/sparc/sparc/locore.s --- a/sys/arch/sparc/sparc/locore.s Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/sparc/sparc/locore.s Sun Aug 31 21:57:08 2025 +0000 @@ -6001,6 +6001,21 @@ Lpanic_spunout: .asciz "cpu%d: stuck on lock@%x" _ALIGN +ENTRY(paravirt_membar_sync) + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but without + * conditionalizing away the LDSTUB instruction on uniprocessor + * builds -- because under virtualization, we still have to + * coordinate with a `device' backed by a hypervisor that is + * potentially on another physical CPU even if we observe only + * one virtual CPU as the guest. + */ + ldstub [%sp - 4], %g0 /* makeshift store-before-load barrier */ +END(paravirt_membar_sync) + #if defined(KGDB) || defined(DDB) || defined(DIAGNOSTIC) /* * Write all windows (user or otherwise), except the current one. diff -r b370ec80dbb8 sys/arch/sparc64/sparc64/locore.s --- a/sys/arch/sparc64/sparc64/locore.s Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/sparc64/sparc64/locore.s Sun Aug 31 21:57:08 2025 +0000 @@ -7948,6 +7948,26 @@ ENTRY(sparc64_ipi_ccall) #endif +ENTRY(paravirt_membar_sync) + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but without patching or + * conditionalizing away the MEMBAR instruction on uniprocessor + * builds or boots -- because under virtualization, we still + * have to coordinate with a `device' backed by a hypervisor + * that is potentially on another physical CPU even if we + * observe only one virtual CPU as the guest. + * + * See common/lib/libc/arch/sparc64/atomic/membar_ops.S for why + * we avoid using the delay slot and keep this in sync with the + * implementation of membar_sync there. + */ + membar #StoreLoad + retl + nop +END(paravirt_membar_sync) .data _ALIGN diff -r b370ec80dbb8 sys/arch/virt68k/virt68k/locore.s --- a/sys/arch/virt68k/virt68k/locore.s Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/arch/virt68k/virt68k/locore.s Sun Aug 31 21:57:08 2025 +0000 @@ -598,6 +598,27 @@ ENTRY(ecacheon) ENTRY(ecacheoff) rts +ENTRY(paravirt_membar_sync) + /* + * Store-before-load ordering with respect to matching logic + * on the hypervisor side. + * + * This is the same as membar_sync, but guaranteed never to be + * conditionalized or hotpatched away even on uniprocessor + * builds and boots -- because under virtualization, we still + * have to coordinate with a `device' backed by a hypervisor + * that is potentially on another physical CPU even if we + * observe only one virtual CPU as the guest. + * + * I don't see an obvious ordering-only instruction in the m68k + * instruction set, but qemu implements CAS with + * store-before-load ordering, so this should work for virtio. + */ + clrl %d0 + casl %d0,%d0,%sp@ + rts +END(paravirt_membar_sync) + /* * Misc. global variables. */ diff -r b370ec80dbb8 sys/dev/hyperv/vmbus.c --- a/sys/dev/hyperv/vmbus.c Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/dev/hyperv/vmbus.c Sun Aug 31 21:57:08 2025 +0000 @@ -50,6 +50,7 @@ #include #include #include +#include #include @@ -791,6 +792,7 @@ vmbus_message_proc(void *arg, struct cpu msg = (struct vmbus_message *)sc->sc_percpu[cpu_index(ci)].simp + VMBUS_SINT_MESSAGE; + /* XXX bus_dmamap_sync(POSTREAD|POSTWRITE) on msg_type */ if (__predict_false(msg->msg_type != HYPERV_MSGTYPE_NONE)) { if (__predict_true(!cold)) softint_schedule_cpu(sc->sc_msg_sih, ci); @@ -813,9 +815,12 @@ vmbus_message_softintr(void *arg) for (;;) { msg = (struct vmbus_message *)sc->sc_percpu[cpu].simp + VMBUS_SINT_MESSAGE; + /* XXX bus_dmamap_sync(POSTREAD|POSTWRITE) on msg_type */ if (msg->msg_type == HYPERV_MSGTYPE_NONE) break; + /* XXX bus_dmamap_sync(POSTREAD) on msg_data */ + hdr = (struct vmbus_chanmsg_hdr *)msg->msg_data; type = hdr->chm_type; if (type >= VMBUS_CHANMSG_COUNT) { @@ -831,10 +836,22 @@ vmbus_message_softintr(void *arg) } } + /* XXX bus_dmamap_sync(PREREAD) on msg_data */ + msg->msg_type = HYPERV_MSGTYPE_NONE; - membar_sync(); + /* XXX bus_dmamap_sync(PREWRITE|PREREAD) on msg_type */ + + /* + * Ensure we tell the host that this message is done + * before we check whether the host told us there are + * more pending. + */ + paravirt_membar_sync(); + + /* XXX bus_dmamap_sync(POSTREAD) on msg_flags */ if (msg->msg_flags & VMBUS_MSGFLAG_PENDING) hyperv_send_eom(); + /* XXX bus_dmamap_sync(PREREAD) on msg_flags */ } } @@ -1655,8 +1672,10 @@ static __inline void vmbus_ring_avail(struct vmbus_ring_data *rd, uint32_t *towrite, uint32_t *toread) { + /* XXX bus_dmamap_sync(POSTREAD) on br_rindex/br_windex */ uint32_t ridx = rd->rd_ring->br_rindex; uint32_t widx = rd->rd_ring->br_windex; + /* XXX bus_dmamap_sync(PREREAD) on br_rindex/br_windex */ uint32_t r, w; if (widx >= ridx) @@ -1674,7 +1693,9 @@ static bool vmbus_ring_is_empty(struct vmbus_ring_data *rd) { + /* XXX bus_dmamap_sync(POSTREAD) on br_rindex/br_windex */ return rd->rd_ring->br_rindex == rd->rd_ring->br_windex; + /* XXX bus_dmamap_sync(PREREAD) on br_rindex/br_windex */ } static int @@ -1698,15 +1719,27 @@ vmbus_ring_write(struct vmbus_ring_data oprod = wrd->rd_prod; + /* XXX bus_dmamap_sync(POSTWRITE) on ring data */ + for (i = 0; i < iov_cnt; i++) vmbus_ring_put(wrd, iov[i].iov_base, iov[i].iov_len); indices = (uint64_t)oprod << 32; vmbus_ring_put(wrd, (uint8_t *)&indices, sizeof(indices)); - membar_sync(); + /* XXX bus_dmamap_sync(PREWRITE) on ring data */ + + membar_sync(); /* XXX bus_dmamap_sync(POSTWRITE) on br_windex */ wrd->rd_ring->br_windex = wrd->rd_prod; - membar_sync(); + /* XXX bus_dmamap_sync(PREWRITE) on br_windex */ + + /* + * Ensure we publish the producer index _before_ we check + * whether the host needs to be notified. + */ + paravirt_membar_sync(); + + /* XXX bus_dmamap_sync(POSTREAD) on br_rindex */ /* Signal when the ring transitions from being empty to non-empty */ if (wrd->rd_ring->br_imask == 0 && @@ -1715,6 +1748,8 @@ vmbus_ring_write(struct vmbus_ring_data else *needsig = 0; + /* XXX bus_dmamap_sync(PREREAD) on br_rindex */ + return 0; } @@ -1874,6 +1909,8 @@ vmbus_ring_read(struct vmbus_ring_data * return EAGAIN; } + /* XXX bus_dmamap_sync(POSTREAD) on ring data */ + if (offset) { rrd->rd_cons += offset; if (rrd->rd_cons >= rrd->rd_dsize) @@ -1883,8 +1920,11 @@ vmbus_ring_read(struct vmbus_ring_data * vmbus_ring_get(rrd, (uint8_t *)data, datalen, 0); vmbus_ring_get(rrd, (uint8_t *)&indices, sizeof(indices), 0); - membar_sync(); + /* XXX bus_dmamap_sync(PREREAD) on ring data */ + + membar_sync(); /* XXX bus_dmamap_sync(POSTWRITE) on br_rindex */ rrd->rd_ring->br_rindex = rrd->rd_cons; + /* XXX bus_dmamap_sync(PREWRITE) on br_rindex */ return 0; } @@ -1931,18 +1971,18 @@ static inline void vmbus_ring_mask(struct vmbus_ring_data *rd) { - membar_sync(); + membar_sync(); /* XXX bus_dmamap_sync(POSTWRITE) on br_imask */ rd->rd_ring->br_imask = 1; - membar_sync(); + membar_sync(); /* XXX bus_dmamap_sync(PREWRITE) on br_imask */ } static inline void vmbus_ring_unmask(struct vmbus_ring_data *rd) { - membar_sync(); + membar_sync(); /* XXX bus_dmamap_sync(POSTWRITE) on br_imask */ rd->rd_ring->br_imask = 0; - membar_sync(); + membar_sync(); /* XXX bus_dmamap_sync(PREWRITE) on br_imask */ } void @@ -1962,6 +2002,14 @@ vmbus_channel_unpause(struct vmbus_chann atomic_and_ulong(&ch->ch_sc->sc_evtmask[ch->ch_id / VMBUS_EVTFLAG_LEN], ~__BIT(ch->ch_id % VMBUS_EVTFLAG_LEN)); vmbus_ring_unmask(&ch->ch_rrd); + + /* + * Ensure we announce to the host side that we are accepting + * interrupts _before_ we check whether any pending events had + * come over the ring while we weren't accepting interrupts. + */ + paravirt_membar_sync(); + vmbus_ring_avail(&ch->ch_rrd, NULL, &avail); return avail; diff -r b370ec80dbb8 sys/dev/pci/virtio.c --- a/sys/dev/pci/virtio.c Sun Aug 03 08:22:49 2025 +0000 +++ b/sys/dev/pci/virtio.c Sun Aug 31 21:57:08 2025 +0000 @@ -38,6 +38,7 @@ #include #include #include +#include #define VIRTIO_PRIVATE @@ -708,6 +709,13 @@ virtio_start_vq_intr(struct virtio_softc } vq->vq_queued++; + /* + * Ensure we announce to the host side that we are accepting + * interrupts _before_ we check whether any pending events had + * come over the queue while we weren't accepting interrupts. + */ + paravirt_membar_sync(); + vq_sync_uring_header(sc, vq, BUS_DMASYNC_POSTREAD); if (vq->vq_used_idx == virtio_rw16(sc, vq->vq_used->idx)) return 0; @@ -1252,6 +1260,12 @@ notify: vq_sync_aring_header(sc, vq, BUS_DMASYNC_PREWRITE); vq->vq_queued++; + /* + * Ensure we publish the avail idx _before_ we check whether + * the host needs to notified. + */ + paravirt_membar_sync(); + if (sc->sc_active_features & VIRTIO_F_RING_EVENT_IDX) { vq_sync_uring_avail(sc, vq, BUS_DMASYNC_POSTREAD); t = virtio_rw16(sc, *vq->vq_avail_event) + 1; diff -r b370ec80dbb8 sys/sys/paravirt_membar.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sys/sys/paravirt_membar.h Sun Aug 31 21:57:08 2025 +0000 @@ -0,0 +1,34 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2025 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_PARAVIRT_MEMBAR_H_ +#define _SYS_PARAVIRT_MEMBAR_H_ + +void paravirt_membar_sync(void); + +#endif /* _SYS_PARAVIRT_MEMBAR_H_ */