From bad477606ac2639e49239cd1748fb802e52c9020 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 12 Jan 2022 00:46:40 +0000 Subject: [PATCH 1/5] driver(9): devsw_detach never fails. Make it return void. Prune a whole lotta dead branches as a result of this. (Some logic calling this is also wrong for other reasons; devsw_detach is final -- you should never have any reason to decide to roll it back. To be cleaned up in subsequent commits...) --- external/cddl/osnet/dev/dtrace/dtrace_modevent.c | 4 +--- external/cddl/osnet/dev/fbt/fbt.c | 3 ++- external/cddl/osnet/dev/sdt/sdt.c | 3 ++- .../cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c | 2 +- share/man/man9/devsw_attach.9 | 15 +++++++++------ sys/coda/coda_psdev.c | 2 +- sys/dev/ccd.c | 2 +- sys/dev/clockctl.c | 8 +++----- sys/dev/hdaudio/hdaudio.c | 6 +----- sys/dev/i2c/i2c.c | 7 ++----- sys/dev/pad/pad.c | 4 +--- sys/dev/raidframe/rf_netbsdkintf.c | 11 +---------- sys/dev/sysmon/sysmon.c | 2 +- sys/dev/tprof/tprof.c | 8 +------- sys/dist/pf/net/pf_ioctl.c | 3 ++- sys/external/bsd/ipf/netinet/ip_fil_netbsd.c | 2 +- sys/fs/autofs/autofs_vfsops.c | 4 +--- sys/kern/kern_drvctl.c | 9 ++------- sys/kern/subr_devsw.c | 3 +-- sys/modules/examples/pollpal/pollpal.c | 3 ++- sys/net/if_tap.c | 5 +---- sys/net/if_tun.c | 9 +-------- sys/rump/dev/lib/libbpf/bpf_component.c | 3 +-- sys/rump/dev/lib/libdrvctl/drvctl_component.c | 4 +--- sys/sys/conf.h | 2 +- 25 files changed, 41 insertions(+), 83 deletions(-) diff --git a/external/cddl/osnet/dev/dtrace/dtrace_modevent.c b/external/cddl/osnet/dev/dtrace/dtrace_modevent.c index cc0f8103fb98..f1338840125a 100644 --- a/external/cddl/osnet/dev/dtrace/dtrace_modevent.c +++ b/external/cddl/osnet/dev/dtrace/dtrace_modevent.c @@ -42,9 +42,7 @@ dtrace_modcmd(modcmd_t cmd, void *data) return error; case MODULE_CMD_FINI: - error = devsw_detach(NULL, &dtrace_cdevsw); - if (error != 0) - return error; + devsw_detach(NULL, &dtrace_cdevsw); error = dtrace_unload(); if (error != 0) { diff --git a/external/cddl/osnet/dev/fbt/fbt.c b/external/cddl/osnet/dev/fbt/fbt.c index b367c2155292..46dd7c1f7f06 100644 --- a/external/cddl/osnet/dev/fbt/fbt.c +++ b/external/cddl/osnet/dev/fbt/fbt.c @@ -1329,7 +1329,8 @@ dtrace_fbt_modcmd(modcmd_t cmd, void *data) error = fbt_unload(); if (error != 0) return error; - return devsw_detach(NULL, &fbt_cdevsw); + devsw_detach(NULL, &fbt_cdevsw); + return 0; case MODULE_CMD_AUTOUNLOAD: return EBUSY; default: diff --git a/external/cddl/osnet/dev/sdt/sdt.c b/external/cddl/osnet/dev/sdt/sdt.c index c3ad129f8284..5a41270a2917 100644 --- a/external/cddl/osnet/dev/sdt/sdt.c +++ b/external/cddl/osnet/dev/sdt/sdt.c @@ -562,7 +562,8 @@ dtrace_sdt_modcmd(modcmd_t cmd, void *data) error = sdt_unload(); if (error != 0) return error; - return devsw_detach(NULL, &sdt_cdevsw); + devsw_detach(NULL, &sdt_cdevsw); + return 0; case MODULE_CMD_AUTOUNLOAD: return EBUSY; default: diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c index 9e19cd1dc0c3..d74d8c71e54d 100644 --- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c +++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c @@ -7231,7 +7231,7 @@ zfs_modcmd(modcmd_t cmd, void *arg) if (error) return error; - (void) devsw_detach(&zfs_bdevsw, &zfs_cdevsw); + devsw_detach(&zfs_bdevsw, &zfs_cdevsw); attacherr: zfs_sysctl_fini(); diff --git a/share/man/man9/devsw_attach.9 b/share/man/man9/devsw_attach.9 index cf862be5846a..6ffc51957a3f 100644 --- a/share/man/man9/devsw_attach.9 +++ b/share/man/man9/devsw_attach.9 @@ -27,7 +27,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd April 30, 2017 +.Dd January 11, 2022 .Dt DEVSW 9 .Os .Sh NAME @@ -49,7 +49,7 @@ .Fa "const struct cdevsw *cdev" .Fa "devmajor_t *cmajor" .Fc -.Ft int +.Ft void .Fo devsw_detach .Fa "const struct bdevsw *bdev" .Fa "const struct cdevsw *cdev" @@ -130,6 +130,11 @@ and structures. .Fn devsw_detach should be called before a loaded device driver is unloaded. +The caller must ensure that there are no open instances of the device, +and that the device's +.Fn d_open +function will fail, before calling. +Fn devsw_detach . .Pp The .Fn bdevsw_lookup @@ -155,10 +160,8 @@ or .Sh RETURN VALUES Upon successful completion, .Fn devsw_attach -and -.Fn devsw_detach -return 0. -Otherwise they return an error value. +returns 0. +Otherwise it returns an error value. .Pp In case of failure, .Fn bdevsw_lookup diff --git a/sys/coda/coda_psdev.c b/sys/coda/coda_psdev.c index cede16da3f53..7f531f03fe56 100644 --- a/sys/coda/coda_psdev.c +++ b/sys/coda/coda_psdev.c @@ -758,7 +758,7 @@ vcoda_modcmd(modcmd_t cmd, void *arg) if (VC_OPEN(vcp)) return EBUSY; } - return devsw_detach(NULL, &vcoda_cdevsw); + devsw_detach(NULL, &vcoda_cdevsw); } #endif break; diff --git a/sys/dev/ccd.c b/sys/dev/ccd.c index 05945f9a67ba..2283bc0346da 100644 --- a/sys/dev/ccd.c +++ b/sys/dev/ccd.c @@ -1710,7 +1710,7 @@ ccd_modcmd(modcmd_t cmd, void *arg) error = EBUSY; } else { mutex_exit(&ccd_lock); - error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); + devsw_detach(&ccd_bdevsw, &ccd_cdevsw); ccddetach(); } #endif diff --git a/sys/dev/clockctl.c b/sys/dev/clockctl.c index 0da5e7765fe8..9685c0f129f6 100644 --- a/sys/dev/clockctl.c +++ b/sys/dev/clockctl.c @@ -182,14 +182,12 @@ clockctl_modcmd(modcmd_t cmd, void *data) return EBUSY; } #ifdef _MODULE - error = devsw_detach(NULL, &clockctl_cdevsw); + devsw_detach(NULL, &clockctl_cdevsw); #endif mutex_exit(&clockctl_mtx); - if (error == 0) { - kauth_unlisten_scope(clockctl_listener); - mutex_destroy(&clockctl_mtx); - } + kauth_unlisten_scope(clockctl_listener); + mutex_destroy(&clockctl_mtx); break; default: diff --git a/sys/dev/hdaudio/hdaudio.c b/sys/dev/hdaudio/hdaudio.c index d39ff2db6cde..5c7874778e22 100644 --- a/sys/dev/hdaudio/hdaudio.c +++ b/sys/dev/hdaudio/hdaudio.c @@ -1636,11 +1636,7 @@ hdaudio_modcmd(modcmd_t cmd, void *opaque) error = config_cfdriver_detach(&hdaudio_cd); if (error) break; - error = devsw_detach(NULL, &hdaudio_cdevsw); - if (error) { - config_cfdriver_attach(&hdaudio_cd); - break; - } + devsw_detach(NULL, &hdaudio_cdevsw); #endif break; default: diff --git a/sys/dev/i2c/i2c.c b/sys/dev/i2c/i2c.c index 322f6ad3f199..b56b3e235112 100644 --- a/sys/dev/i2c/i2c.c +++ b/sys/dev/i2c/i2c.c @@ -904,7 +904,7 @@ iic_modcmd(modcmd_t cmd, void *opaque) if (error) { aprint_error("%s: unable to init component\n", iic_cd.cd_name); - (void)devsw_detach(NULL, &iic_cdevsw); + devsw_detach(NULL, &iic_cdevsw); } mutex_exit(&iic_mtx); #endif @@ -922,10 +922,7 @@ iic_modcmd(modcmd_t cmd, void *opaque) mutex_exit(&iic_mtx); break; } - error = devsw_detach(NULL, &iic_cdevsw); - if (error != 0) - config_init_component(cfdriver_ioconf_iic, - cfattach_ioconf_iic, cfdata_ioconf_iic); + devsw_detach(NULL, &iic_cdevsw); #endif mutex_exit(&iic_mtx); break; diff --git a/sys/dev/pad/pad.c b/sys/dev/pad/pad.c index fe0b429cf386..a779f1f71b8d 100644 --- a/sys/dev/pad/pad.c +++ b/sys/dev/pad/pad.c @@ -777,9 +777,7 @@ pad_modcmd(modcmd_t cmd, void *arg) case MODULE_CMD_FINI: #ifdef _MODULE - error = devsw_detach(NULL, &pad_cdevsw); - if (error) - break; + devsw_detach(NULL, &pad_cdevsw); error = config_fini_component(cfdriver_ioconf_pad, pad_cfattach, cfdata_ioconf_pad); diff --git a/sys/dev/raidframe/rf_netbsdkintf.c b/sys/dev/raidframe/rf_netbsdkintf.c index d1bda3553e03..87439aa70bfb 100644 --- a/sys/dev/raidframe/rf_netbsdkintf.c +++ b/sys/dev/raidframe/rf_netbsdkintf.c @@ -4088,16 +4088,7 @@ raid_modcmd_fini(void) return error; } #endif - error = devsw_detach(&raid_bdevsw, &raid_cdevsw); - if (error != 0) { - aprint_error("%s: cannot detach devsw\n",__func__); -#ifdef _MODULE - config_cfdriver_attach(&raid_cd); -#endif - config_cfattach_attach(raid_cd.cd_name, &raid_ca); - mutex_exit(&raid_lock); - return error; - } + devsw_detach(&raid_bdevsw, &raid_cdevsw); rf_BootRaidframe(false); #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) rf_destroy_mutex2(rf_sparet_wait_mutex); diff --git a/sys/dev/sysmon/sysmon.c b/sys/dev/sysmon/sysmon.c index 46aedaad7337..fd2993f0c180 100644 --- a/sys/dev/sysmon/sysmon.c +++ b/sys/dev/sysmon/sysmon.c @@ -356,7 +356,7 @@ sysmon_fini(void) if (error == 0) { mutex_enter(&sysmon_minor_mtx); sm_is_attached = false; - error = devsw_detach(NULL, &sysmon_cdevsw); + devsw_detach(NULL, &sysmon_cdevsw); mutex_exit(&sysmon_minor_mtx); } #endif diff --git a/sys/dev/tprof/tprof.c b/sys/dev/tprof/tprof.c index b069a5b7df5d..136fd190ad14 100644 --- a/sys/dev/tprof/tprof.c +++ b/sys/dev/tprof/tprof.c @@ -768,13 +768,7 @@ tprof_modcmd(modcmd_t cmd, void *arg) case MODULE_CMD_FINI: #if defined(_MODULE) - { - int error; - error = devsw_detach(NULL, &tprof_cdevsw); - if (error) { - return error; - } - } + devsw_detach(NULL, &tprof_cdevsw); #endif /* defined(_MODULE) */ tprof_driver_fini(); return 0; diff --git a/sys/dist/pf/net/pf_ioctl.c b/sys/dist/pf/net/pf_ioctl.c index 94bfb70a411d..e4c13be698f8 100644 --- a/sys/dist/pf/net/pf_ioctl.c +++ b/sys/dist/pf/net/pf_ioctl.c @@ -3459,7 +3459,8 @@ pf_modcmd(modcmd_t cmd, void *opaque) } else { pfdetach(); pflogdetach(); - return devsw_detach(NULL, &pf_cdevsw); + devsw_detach(NULL, &pf_cdevsw); + return 0; } default: return ENOTTY; diff --git a/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c b/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c index d0c4ca95097c..bb4e0706cc9b 100644 --- a/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c +++ b/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c @@ -2256,7 +2256,7 @@ ipl_fini(void *opaque) { #ifdef _MODULE - (void)devsw_detach(NULL, &ipl_cdevsw); + devsw_detach(NULL, &ipl_cdevsw); #endif /* diff --git a/sys/fs/autofs/autofs_vfsops.c b/sys/fs/autofs/autofs_vfsops.c index fbd6eafe6532..1204d1f9b6d3 100644 --- a/sys/fs/autofs/autofs_vfsops.c +++ b/sys/fs/autofs/autofs_vfsops.c @@ -496,9 +496,7 @@ autofs_modcmd(modcmd_t cmd, void *arg) } mutex_exit(&autofs_softc->sc_lock); - error = devsw_detach(NULL, &autofs_cdevsw); - if (error) - break; + devsw_detach(NULL, &autofs_cdevsw); #endif error = vfs_detach(&autofs_vfsops); if (error) diff --git a/sys/kern/kern_drvctl.c b/sys/kern/kern_drvctl.c index 37f4730b2512..8a4156f8a0aa 100644 --- a/sys/kern/kern_drvctl.c +++ b/sys/kern/kern_drvctl.c @@ -665,15 +665,10 @@ drvctl_modcmd(modcmd_t cmd, void *arg) devmon_insert_vec = saved_insert_vec; saved_insert_vec = NULL; #ifdef _MODULE - error = devsw_detach(NULL, &drvctl_cdevsw); - if (error != 0) { - saved_insert_vec = devmon_insert_vec; - devmon_insert_vec = devmon_insert; - } + devsw_detach(NULL, &drvctl_cdevsw); #endif mutex_exit(&drvctl_lock); - if (error == 0) - drvctl_fini(); + drvctl_fini(); break; default: diff --git a/sys/kern/subr_devsw.c b/sys/kern/subr_devsw.c index 1a0f721fdd65..b95a09d3e6b6 100644 --- a/sys/kern/subr_devsw.c +++ b/sys/kern/subr_devsw.c @@ -343,14 +343,13 @@ devsw_detach_locked(const struct bdevsw *bdev, const struct cdevsw *cdev) } } -int +void devsw_detach(const struct bdevsw *bdev, const struct cdevsw *cdev) { mutex_enter(&device_lock); devsw_detach_locked(bdev, cdev); mutex_exit(&device_lock); - return 0; } /* diff --git a/sys/modules/examples/pollpal/pollpal.c b/sys/modules/examples/pollpal/pollpal.c index b76e0733699b..d8ddc73450e0 100644 --- a/sys/modules/examples/pollpal/pollpal.c +++ b/sys/modules/examples/pollpal/pollpal.c @@ -311,7 +311,8 @@ pollpal_modcmd(modcmd_t cmd, void *arg __unused) case MODULE_CMD_FINI: if (pollpal_nopen != 0) return EBUSY; - return devsw_detach(NULL, &pollpal_cdevsw); + devsw_detach(NULL, &pollpal_cdevsw); + return 0; default: return ENOTTY; } diff --git a/sys/net/if_tap.c b/sys/net/if_tap.c index 0b57ad4a711c..314f4647707c 100644 --- a/sys/net/if_tap.c +++ b/sys/net/if_tap.c @@ -256,9 +256,7 @@ tapdetach(void) if_clone_detach(&tap_cloners); #ifdef _MODULE - error = devsw_detach(NULL, &tap_cdevsw); - if (error != 0) - goto out2; + devsw_detach(NULL, &tap_cdevsw); #endif if (tap_count != 0) { @@ -277,7 +275,6 @@ tapdetach(void) out1: #ifdef _MODULE devsw_attach("tap", NULL, &tap_bmajor, &tap_cdevsw, &tap_cmajor); - out2: #endif if_clone_attach(&tap_cloners); diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c index 4f533a8f08d1..f4e5b6d86d43 100644 --- a/sys/net/if_tun.c +++ b/sys/net/if_tun.c @@ -142,17 +142,10 @@ tuninit(void) static int tundetach(void) { -#ifdef _MODULE - int error; -#endif if_clone_detach(&tun_cloner); #ifdef _MODULE - error = devsw_detach(NULL, &tun_cdevsw); - if (error != 0) { - if_clone_attach(&tun_cloner); - return error; - } + devsw_detach(NULL, &tun_cdevsw); #endif if (!LIST_EMPTY(&tun_softc_list) || !LIST_EMPTY(&tunz_softc_list)) { diff --git a/sys/rump/dev/lib/libbpf/bpf_component.c b/sys/rump/dev/lib/libbpf/bpf_component.c index 05807d371d40..d41d1987afe8 100644 --- a/sys/rump/dev/lib/libbpf/bpf_component.c +++ b/sys/rump/dev/lib/libbpf/bpf_component.c @@ -50,6 +50,5 @@ RUMP_COMPONENT(RUMP_COMPONENT_NET) panic("bpf devsw attach failed: %d", error); if ((error = rump_vfs_makeonedevnode(S_IFCHR, "/dev/bpf", cmaj, 0)) !=0) panic("cannot create bpf device nodes: %d", error); - if ((error = devsw_detach(NULL, &bpf_cdevsw)) != 0) - panic("cannot detach bpf devsw: %d", error); + devsw_detach(NULL, &bpf_cdevsw); } diff --git a/sys/rump/dev/lib/libdrvctl/drvctl_component.c b/sys/rump/dev/lib/libdrvctl/drvctl_component.c index e2e79f45f9de..ac4e103fdb9c 100644 --- a/sys/rump/dev/lib/libdrvctl/drvctl_component.c +++ b/sys/rump/dev/lib/libdrvctl/drvctl_component.c @@ -51,7 +51,5 @@ RUMP_COMPONENT(RUMP_COMPONENT_DEV) if ( error !=0) panic("cannot create drvctl device node: %d", error); - error = devsw_detach(NULL, &drvctl_cdevsw); - if (error != 0) - panic("cannot detach drvctl devsw: %d", error); + devsw_detach(NULL, &drvctl_cdevsw); } diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 081631d2111f..ba46976a924b 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -104,7 +104,7 @@ extern kmutex_t device_lock; int devsw_attach(const char *, const struct bdevsw *, devmajor_t *, const struct cdevsw *, devmajor_t *); -int devsw_detach(const struct bdevsw *, const struct cdevsw *); +void devsw_detach(const struct bdevsw *, const struct cdevsw *); const struct bdevsw *bdevsw_lookup(dev_t); const struct cdevsw *cdevsw_lookup(dev_t); devmajor_t bdevsw_lookup_major(const struct bdevsw *); From 8b29cf64eb49228095d0dfdb679eb91988a5a1d2 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 12 Jan 2022 01:05:24 +0000 Subject: [PATCH 2/5] driver(9): Fix synchronization of devsw_attach/lookup/detach. (`dev' means either `bdev' or `cdev' for brevity here, e.g. in `devsw_lookup' (bdevsw_lookup, cdevsw_lookup), `dev_open' (bdev_open, cdev_open), `maxdevsws', &c., except for `devsw_attach' and `devsw_detach' which are taken literally.) - Use atomic_store_release and atomic_load_consume for devsw and tables and their entries, which are read unlocked and thus require memory barriers to ensure ordering between initialization in devsw_attach and use in dev_lookup. - Use pserialize(9) and localcount(9) to synchronize dev_open and devsw_detach. => Driver must ensure d_open fails and all open instances have been closed by the time it calls devsw_detach. => Bonus: dev_open is no longer globally serialized through device_lock. - Use atomic_store_release and atomic_load_acquire for max_devsws, which is used in conditionals in the new devsw_lookup_acquire. => It is safe to use atomic_load_relaxed in devsw_lookup because the caller must guarantee the entry is stable, so any increase of max_devsws must have already happened. => devsw_lookup and devsw_lookup_acquire assume that max_devsws never goes down. If you change this you must find some way to adapt the users, preferably without adding much overhead so that devsw operations are cheap. This change introduces an auxiliary table devswref mapping device majors to localcounts of opens in progress. The auxiliary table only occupies one pointer's worth of memory in a monolithic kernel, and is allocated on the fly for dynamically loaded modules. We could ask the module itself to reserve storage for it, but I don't see much value in that, and it would require some changes to the ABI and to config(8). --- sys/kern/subr_devsw.c | 285 +++++++++++++++++++++++++++++++++++------- 1 file changed, 238 insertions(+), 47 deletions(-) diff --git a/sys/kern/subr_devsw.c b/sys/kern/subr_devsw.c index b95a09d3e6b6..1145bf83a08c 100644 --- a/sys/kern/subr_devsw.c +++ b/sys/kern/subr_devsw.c @@ -85,6 +85,9 @@ __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.38 2017/11/07 18:35:57 christos Exp #include #include #include +#include +#include +#include #ifdef DEVSW_DEBUG #define DPRINTF(x) printf x @@ -97,12 +100,22 @@ __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.38 2017/11/07 18:35:57 christos Exp #define CDEVSW_SIZE (sizeof(struct cdevsw *)) #define DEVSWCONV_SIZE (sizeof(struct devsw_conv)) +struct devswref { + struct localcount dr_lc; +}; + +/* XXX bdevsw, cdevsw, max_bdevsws, and max_cdevsws should be volatile */ extern const struct bdevsw **bdevsw, *bdevsw0[]; extern const struct cdevsw **cdevsw, *cdevsw0[]; extern struct devsw_conv *devsw_conv, devsw_conv0[]; extern const int sys_bdevsws, sys_cdevsws; extern int max_bdevsws, max_cdevsws, max_devsw_convs; +static struct devswref *cdevswref; +static struct devswref *bdevswref; +static struct pserialize *devsw_psz; +static kcondvar_t devsw_cv; + static int bdevsw_attach(const struct bdevsw *, devmajor_t *); static int cdevsw_attach(const struct cdevsw *, devmajor_t *); static void devsw_detach_locked(const struct bdevsw *, const struct cdevsw *); @@ -118,6 +131,9 @@ devsw_init(void) KASSERT(sys_bdevsws < MAXDEVSW - 1); KASSERT(sys_cdevsws < MAXDEVSW - 1); mutex_init(&device_lock, MUTEX_DEFAULT, IPL_NONE); + + devsw_psz = pserialize_create(); + cv_init(&devsw_cv, "devsw"); } int @@ -160,13 +176,17 @@ devsw_attach(const char *devname, } if (bdev != NULL) - bdevsw[*bmajor] = bdev; - cdevsw[*cmajor] = cdev; + atomic_store_release(&bdevsw[*bmajor], bdev); + atomic_store_release(&cdevsw[*cmajor], cdev); mutex_exit(&device_lock); return (0); } + /* + * XXX This should allocate what it needs up front so we never + * need to flail around trying to unwind. + */ error = bdevsw_attach(bdev, bmajor); if (error != 0) goto fail; @@ -224,7 +244,8 @@ devsw_attach(const char *devname, static int bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor) { - const struct bdevsw **newptr; + const struct bdevsw **newbdevsw = NULL; + struct devswref *newbdevswref = NULL; devmajor_t bmajor; int i; @@ -255,18 +276,34 @@ bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor) if (*devmajor >= max_bdevsws) { KASSERT(bdevsw == bdevsw0); - newptr = kmem_zalloc(MAXDEVSW * BDEVSW_SIZE, KM_NOSLEEP); - if (newptr == NULL) + newbdevsw = kmem_zalloc(MAXDEVSW * sizeof(newbdevsw[0]), + KM_NOSLEEP); + newbdevswref = kmem_zalloc(((MAXDEVSW - sys_bdevsws) * + sizeof(newbdevsw[0])), + KM_NOSLEEP); + if (newbdevsw == NULL || newbdevswref == NULL) { + if (newbdevsw) { + kmem_free(newbdevsw, + MAXDEVSW * sizeof(newbdevsw[0])); + } + if (newbdevswref) { + kmem_free(newbdevswref, + ((MAXDEVSW - sys_bdevsws) * + sizeof(newbdevswref[0]))); + } return (ENOMEM); - memcpy(newptr, bdevsw, max_bdevsws * BDEVSW_SIZE); - bdevsw = newptr; - max_bdevsws = MAXDEVSW; + } + memcpy(newbdevsw, bdevsw, max_bdevsws * BDEVSW_SIZE); + atomic_store_release(&bdevsw, newbdevsw); + atomic_store_release(&bdevswref, newbdevswref); + atomic_store_release(&max_bdevsws, MAXDEVSW); } if (bdevsw[*devmajor] != NULL) return (EEXIST); - bdevsw[*devmajor] = devsw; + atomic_store_release(&bdevsw[*devmajor], devsw); + localcount_init(&bdevswref[*devmajor - sys_bdevsws].dr_lc); return (0); } @@ -274,7 +311,8 @@ bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor) static int cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor) { - const struct cdevsw **newptr; + const struct cdevsw **newcdevsw = NULL; + struct devswref *newcdevswref = NULL; devmajor_t cmajor; int i; @@ -302,18 +340,34 @@ cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor) if (*devmajor >= max_cdevsws) { KASSERT(cdevsw == cdevsw0); - newptr = kmem_zalloc(MAXDEVSW * CDEVSW_SIZE, KM_NOSLEEP); - if (newptr == NULL) + newcdevsw = kmem_zalloc(MAXDEVSW * sizeof(newcdevsw[0]), + KM_NOSLEEP); + newcdevswref = kmem_zalloc(((MAXDEVSW - sys_cdevsws) * + sizeof(newcdevswref[0])), + KM_NOSLEEP); + if (newcdevsw == NULL || newcdevswref == NULL) { + if (newcdevsw) { + kmem_free(newcdevsw, + MAXDEVSW * sizeof(newcdevsw[0])); + } + if (newcdevswref) { + kmem_free(newcdevswref, + ((MAXDEVSW - sys_cdevsws) * + sizeof(newcdevswref[0]))); + } return (ENOMEM); - memcpy(newptr, cdevsw, max_cdevsws * CDEVSW_SIZE); - cdevsw = newptr; - max_cdevsws = MAXDEVSW; + } + memcpy(newcdevsw, cdevsw, max_cdevsws * sizeof(cdevsw[0])); + atomic_store_release(&cdevsw, newcdevsw); + atomic_store_release(&cdevswref, newcdevswref); + atomic_store_release(&max_cdevsws, MAXDEVSW); } if (cdevsw[*devmajor] != NULL) return (EEXIST); - cdevsw[*devmajor] = devsw; + atomic_store_release(&cdevsw[*devmajor], devsw); + localcount_init(&cdevswref[*devmajor - sys_cdevsws].dr_lc); return (0); } @@ -321,25 +375,56 @@ cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor) static void devsw_detach_locked(const struct bdevsw *bdev, const struct cdevsw *cdev) { - int i; + int bi, ci; KASSERT(mutex_owned(&device_lock)); + /* Prevent new references. */ if (bdev != NULL) { - for (i = 0 ; i < max_bdevsws ; i++) { - if (bdevsw[i] != bdev) + for (bi = 0; bi < max_bdevsws; bi++) { + if (bdevsw[bi] != bdev) continue; - bdevsw[i] = NULL; + atomic_store_relaxed(&bdevsw[bi], NULL); break; } + KASSERT(bi < max_bdevsws); } if (cdev != NULL) { - for (i = 0 ; i < max_cdevsws ; i++) { - if (cdevsw[i] != cdev) + for (ci = 0; ci < max_cdevsws; ci++) { + if (cdevsw[ci] != cdev) continue; - cdevsw[i] = NULL; + atomic_store_relaxed(&cdevsw[ci], NULL); break; } + KASSERT(ci < max_cdevsws); + } + + if (bdev == NULL && cdev == NULL) /* XXX possible? */ + return; + + /* + * Wait for all bdevsw_lookup_acquire, cdevsw_lookup_acquire + * calls to notice that the devsw is gone. + */ + pserialize_perform(devsw_psz); + + /* + * Wait for all references to drain. It is the caller's + * responsibility to ensure that at this point, there are no + * extant open instances and all new d_open calls will fail. + * + * Note that localcount_drain may release and reacquire + * device_lock. + */ + if (bdev != NULL) { + localcount_drain(&bdevswref[bi - sys_bdevsws].dr_lc, + &devsw_cv, &device_lock); + localcount_fini(&bdevswref[bi - sys_bdevsws].dr_lc); + } + if (cdev != NULL) { + localcount_drain(&cdevswref[ci - sys_cdevsws].dr_lc, + &devsw_cv, &device_lock); + localcount_fini(&cdevswref[ci - sys_cdevsws].dr_lc); } } @@ -365,10 +450,63 @@ bdevsw_lookup(dev_t dev) if (dev == NODEV) return (NULL); bmajor = major(dev); - if (bmajor < 0 || bmajor >= max_bdevsws) + if (bmajor < 0 || bmajor >= atomic_load_relaxed(&max_bdevsws)) return (NULL); - return (bdevsw[bmajor]); + return atomic_load_consume(&bdevsw)[bmajor]; +} + +static const struct bdevsw * +bdevsw_lookup_acquire(dev_t dev, struct localcount **lcp) +{ + devmajor_t bmajor; + const struct bdevsw *bdev = NULL, *const *curbdevsw; + struct devswref *curbdevswref; + int s; + + if (dev == NODEV) + return NULL; + bmajor = major(dev); + if (bmajor < 0) + return NULL; + + /* + * If it's statically linked into the kernel, no need for + * localcount reference -- it will never go away until the + * machine reboots. + */ + if (bmajor < sys_bdevsws) { + *lcp = NULL; + return bdevsw0[bmajor]; + } + + s = pserialize_read_enter(); + + /* + * max_bdevsws never goes down, so it is safe to rely on this + * condition without any locking for the array access below. + */ + if (bmajor >= atomic_load_acquire(&max_bdevsws)) + goto out; + curbdevsw = atomic_load_consume(&bdevsw); + if ((bdev = atomic_load_consume(&curbdevsw[bmajor])) == NULL) + goto out; + + curbdevswref = atomic_load_consume(&bdevswref); + *lcp = &curbdevswref[bmajor - sys_bdevsws].dr_lc; + localcount_acquire(*lcp); + +out: pserialize_read_exit(s); + return bdev; +} + +static void +bdevsw_release(const struct bdevsw *bdev, struct localcount *lc) +{ + + if (lc == NULL) + return; + localcount_release(lc, &devsw_cv, &device_lock); } /* @@ -384,10 +522,63 @@ cdevsw_lookup(dev_t dev) if (dev == NODEV) return (NULL); cmajor = major(dev); - if (cmajor < 0 || cmajor >= max_cdevsws) + if (cmajor < 0 || cmajor >= atomic_load_relaxed(&max_cdevsws)) return (NULL); - return (cdevsw[cmajor]); + return atomic_load_consume(&cdevsw)[cmajor]; +} + +static const struct cdevsw * +cdevsw_lookup_acquire(dev_t dev, struct localcount **lcp) +{ + devmajor_t cmajor; + const struct cdevsw *cdev = NULL, *const *curcdevsw; + struct devswref *curcdevswref; + int s; + + if (dev == NODEV) + return NULL; + cmajor = major(dev); + if (cmajor < 0) + return NULL; + + /* + * If it's statically linked into the kernel, no need for + * localcount reference -- it will never go away until the + * machine reboots. + */ + if (cmajor < sys_cdevsws) { + *lcp = NULL; + return cdevsw0[cmajor]; + } + + s = pserialize_read_enter(); + + /* + * max_cdevsws never goes down, so it is safe to rely on this + * condition without any locking for the array access below. + */ + if (cmajor >= atomic_load_acquire(&max_cdevsws)) + goto out; + curcdevsw = atomic_load_consume(&cdevsw); + if ((cdev = atomic_load_consume(&curcdevsw[cmajor])) == NULL) + goto out; + + curcdevswref = atomic_load_consume(&cdevswref); + *lcp = &curcdevswref[cmajor - sys_cdevsws].dr_lc; + localcount_acquire(*lcp); + +out: pserialize_read_exit(s); + return cdev; +} + +static void +cdevsw_release(const struct cdevsw *cdev, struct localcount *lc) +{ + + if (lc == NULL) + return; + localcount_release(lc, &devsw_cv, &device_lock); } /* @@ -399,10 +590,13 @@ cdevsw_lookup(dev_t dev) devmajor_t bdevsw_lookup_major(const struct bdevsw *bdev) { - devmajor_t bmajor; + const struct bdevsw *const *curbdevsw; + devmajor_t bmajor, bmax; - for (bmajor = 0 ; bmajor < max_bdevsws ; bmajor++) { - if (bdevsw[bmajor] == bdev) + bmax = atomic_load_acquire(&max_bdevsws); + curbdevsw = atomic_load_consume(&bdevsw); + for (bmajor = 0; bmajor < bmax; bmajor++) { + if (atomic_load_relaxed(&curbdevsw[bmajor]) == bdev) return (bmajor); } @@ -418,10 +612,13 @@ bdevsw_lookup_major(const struct bdevsw *bdev) devmajor_t cdevsw_lookup_major(const struct cdevsw *cdev) { - devmajor_t cmajor; + const struct cdevsw *const *curcdevsw; + devmajor_t cmajor, cmax; - for (cmajor = 0 ; cmajor < max_cdevsws ; cmajor++) { - if (cdevsw[cmajor] == cdev) + cmax = atomic_load_acquire(&max_cdevsws); + curcdevsw = atomic_load_consume(&cdevsw); + for (cmajor = 0; cmajor < cmax; cmajor++) { + if (atomic_load_relaxed(&curcdevsw[cmajor]) == cdev) return (cmajor); } @@ -696,15 +893,10 @@ int bdev_open(dev_t dev, int flag, int devtype, lwp_t *l) { const struct bdevsw *d; + struct localcount *lc; int rv, mpflag; - /* - * For open we need to lock, in order to synchronize - * with attach/detach. - */ - mutex_enter(&device_lock); - d = bdevsw_lookup(dev); - mutex_exit(&device_lock); + d = bdevsw_lookup_acquire(dev, &lc); if (d == NULL) return ENXIO; @@ -712,6 +904,8 @@ bdev_open(dev_t dev, int flag, int devtype, lwp_t *l) rv = (*d->d_open)(dev, flag, devtype, l); DEV_UNLOCK(d); + bdevsw_release(d, lc); + return rv; } @@ -854,15 +1048,10 @@ int cdev_open(dev_t dev, int flag, int devtype, lwp_t *l) { const struct cdevsw *d; + struct localcount *lc; int rv, mpflag; - /* - * For open we need to lock, in order to synchronize - * with attach/detach. - */ - mutex_enter(&device_lock); - d = cdevsw_lookup(dev); - mutex_exit(&device_lock); + d = cdevsw_lookup_acquire(dev, &lc); if (d == NULL) return ENXIO; @@ -870,6 +1059,8 @@ cdev_open(dev_t dev, int flag, int devtype, lwp_t *l) rv = (*d->d_open)(dev, flag, devtype, l); DEV_UNLOCK(d); + cdevsw_release(d, lc); + return rv; } From 08f6cb89eee9884176f2d71e8a3e7feec9b35e33 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 12 Jan 2022 14:04:39 +0000 Subject: [PATCH 3/5] dk(4): Omit redundant microoptimization around cv_broadcast. cv_broadcast already has a fast path for the no-waiter case. --- sys/dev/dkwedge/dk.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/sys/dev/dkwedge/dk.c b/sys/dev/dkwedge/dk.c index 2f788cf507cf..171e31e2a304 100644 --- a/sys/dev/dkwedge/dk.c +++ b/sys/dev/dkwedge/dk.c @@ -89,12 +89,9 @@ struct dkwedge_softc { kmutex_t sc_iolock; kcondvar_t sc_dkdrn; u_int sc_iopend; /* I/Os pending */ - int sc_flags; /* flags (sc_iolock) */ int sc_mode; /* parent open mode */ }; -#define DK_F_WAIT_DRAIN 0x0001 /* waiting for I/O to drain */ - static void dkstart(struct dkwedge_softc *); static void dkiodone(struct buf *); static void dkrestart(void *); @@ -194,10 +191,8 @@ dkwedge_wait_drain(struct dkwedge_softc *sc) { mutex_enter(&sc->sc_iolock); - while (sc->sc_iopend != 0) { - sc->sc_flags |= DK_F_WAIT_DRAIN; + while (sc->sc_iopend != 0) cv_wait(&sc->sc_dkdrn, &sc->sc_iolock); - } mutex_exit(&sc->sc_iolock); } @@ -1341,11 +1336,8 @@ dkstart(struct dkwedge_softc *sc) while ((bp = bufq_peek(sc->sc_bufq)) != NULL) { if (sc->sc_state != DKW_STATE_RUNNING) { (void) bufq_get(sc->sc_bufq); - if (sc->sc_iopend-- == 1 && - (sc->sc_flags & DK_F_WAIT_DRAIN) != 0) { - sc->sc_flags &= ~DK_F_WAIT_DRAIN; + if (--sc->sc_iopend == 0) cv_broadcast(&sc->sc_dkdrn); - } mutex_exit(&sc->sc_iolock); bp->b_error = ENXIO; bp->b_resid = bp->b_bcount; @@ -1430,10 +1422,8 @@ dkiodone(struct buf *bp) putiobuf(bp); mutex_enter(&sc->sc_iolock); - if (sc->sc_iopend-- == 1 && (sc->sc_flags & DK_F_WAIT_DRAIN) != 0) { - sc->sc_flags &= ~DK_F_WAIT_DRAIN; + if (--sc->sc_iopend == 0) cv_broadcast(&sc->sc_dkdrn); - } disk_unbusy(&sc->sc_dk, obp->b_bcount - obp->b_resid, obp->b_flags & B_READ); From 7361e5d87180670db9fd7fcd7461acc7e75ed81d Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 12 Jan 2022 14:07:35 +0000 Subject: [PATCH 4/5] autoconf(9): New localcount-based device instance references. device_lookup_acquire looks up an autoconf device instance, if found, and acquires a reference the caller must release with device_release. If detach is in progress, device_lookup_acquire waits until it completes; while references are held, detach is blocked. The reference is meant to be held across opening a device in the short term, and then to be passed off to a longer-term reference that can be broken explicitly by detach -- usually a device special vnode, which is broken by vdevgone in the driver's detach function *_detach. Sleeping while holding a reference is allowed, but the caller shouldn't sleep on anything that is only cancelled by *_detach, because *_detach doesn't run until the reference is released. The purpose of device_lookup_acquire is to let *_detach reliably determine whether there are any open instances of the device before it commits to detaching, without needing the kernel lock. Subsequent changes to subr_devsw.c will make bdev_open and cdev_open take a reference to an autoconf instance automatically so there are no logic changes needed in the *_open devsw functions. --- sys/kern/subr_autoconf.c | 197 ++++++++++++++++++++++++++++++++++++--- sys/sys/device.h | 6 ++ 2 files changed, 191 insertions(+), 12 deletions(-) diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c index 54391c6baea6..05f3def7eb6e 100644 --- a/sys/kern/subr_autoconf.c +++ b/sys/kern/subr_autoconf.c @@ -108,6 +108,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_autoconf.c,v 1.291 2021/12/31 14:19:57 riastrad #include #include #include +#include #include @@ -1453,6 +1454,9 @@ config_devdelete(device_t dev) if (dg->dg_devs != NULL) kmem_free(dg->dg_devs, sizeof(device_t) * dg->dg_ndevs); + localcount_fini(dev->dv_localcount); + kmem_free(dev->dv_localcount, sizeof(*dev->dv_localcount)); + cv_destroy(&dvl->dvl_cv); mutex_destroy(&dvl->dvl_mtx); @@ -1604,6 +1608,10 @@ config_devalloc(const device_t parent, const cfdata_t cf, "device-parent", device_xname(parent)); } + dev->dv_localcount = kmem_zalloc(sizeof(*dev->dv_localcount), + KM_SLEEP); + localcount_init(dev->dv_localcount); + if (dev->dv_cfdriver->cd_attrs != NULL) config_add_attrib_dict(dev); @@ -1755,8 +1763,38 @@ config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, /* Let userland know */ devmon_report_device(dev, true); + /* + * Prevent detach until the driver's attach function, and all + * deferred actions, have finished. + */ config_pending_incr(dev); + + /* + * Allow the current thread to acquire references to the + * device, and nobody else until we're done attaching. + */ + mutex_enter(&config_misc_lock); + KASSERT(dev->dv_attaching == NULL); + dev->dv_attaching = curlwp; + mutex_exit(&config_misc_lock); + + /* Call the driver's attach function. */ (*dev->dv_cfattach->ca_attach)(parent, dev, aux); + + /* + * Allow other threads to acquire references to the device now + * that the driver's attach function is done. + */ + mutex_enter(&config_misc_lock); + KASSERT(dev->dv_attaching == curlwp); + dev->dv_attaching = NULL; + cv_broadcast(&config_misc_cv); + mutex_exit(&config_misc_lock); + + /* + * Synchronous parts of attach are done. Allow detach, unless + * the driver's attach function scheduled deferred actions. + */ config_pending_decr(dev); mutex_enter(&config_misc_lock); @@ -1822,8 +1860,38 @@ config_attach_pseudo(cfdata_t cf) /* Let userland know */ devmon_report_device(dev, true); + /* + * Prevent detach until the driver's attach function, and all + * deferred actions, have finished. + */ config_pending_incr(dev); + + /* + * Allow the current thread to acquire references to the + * device, and nobody else until we're done attaching. + */ + mutex_enter(&config_misc_lock); + KASSERT(dev->dv_attaching == NULL); + dev->dv_attaching = curlwp; + mutex_exit(&config_misc_lock); + + /* Call the driver's attach function. */ (*dev->dv_cfattach->ca_attach)(ROOT, dev, NULL); + + /* + * Allow other threads to acquire references to the device now + * that the driver's attach function is done. + */ + mutex_enter(&config_misc_lock); + KASSERT(dev->dv_attaching == curlwp); + dev->dv_attaching = NULL; + cv_broadcast(&config_misc_cv); + mutex_exit(&config_misc_lock); + + /* + * Synchronous parts of attach are done. Allow detach, unless + * the driver's attach function scheduled deferred actions. + */ config_pending_decr(dev); config_process_deferred(&deferred_config_queue, dev); @@ -1872,24 +1940,49 @@ config_dump_garbage(struct devicelist *garbage) static int config_detach_enter(device_t dev) { - int error; + int error = 0; mutex_enter(&config_misc_lock); - for (;;) { - if (dev->dv_pending == 0 && dev->dv_detaching == NULL) { - dev->dv_detaching = curlwp; - error = 0; - break; - } + + /* + * Wait until attach has fully completed, and until any + * concurrent detach (e.g., drvctl racing with USB event + * thread) has completed. + * + * Caller must hold alldevs_nread or alldevs_nwrite (e.g., via + * deviter) to ensure the winner of the race doesn't free the + * device leading the loser of the race into use-after-free. + * + * XXX Not all callers do this! + */ + while (dev->dv_pending || dev->dv_detaching) { KASSERTMSG(dev->dv_detaching != curlwp, "recursively detaching %s", device_xname(dev)); error = cv_wait_sig(&config_misc_cv, &config_misc_lock); if (error) - break; + goto out; } - KASSERT(error || dev->dv_detaching == curlwp); - mutex_exit(&config_misc_lock); + /* + * Attach has completed, and no other concurrent detach is + * running. Claim the device for detaching. This will cause + * all new attempts to acquire references to block. + */ + KASSERT(dev->dv_attaching == NULL); + KASSERT(dev->dv_detaching == NULL); + dev->dv_detaching = curlwp; + + /* + * Wait for all device_lookup_acquire references -- mostly, for + * all attempts to open the device -- to drain. There may + * still be open instances of the device after this point, but + * all new attempts to acquire references will block until + * dv_detaching clears. + */ + localcount_drain(dev->dv_localcount, + &config_misc_cv, &config_misc_lock); + +out: mutex_exit(&config_misc_lock); return error; } @@ -1980,9 +2073,18 @@ config_detach(device_t dev, int flags) */ if (rv == 0) dev->dv_flags &= ~DVF_ACTIVE; - else if ((flags & DETACH_FORCE) == 0) + else if ((flags & DETACH_FORCE) == 0) { + /* + * Detach failed -- likely EBUSY. Reset the + * localcount. At this point there can be no + * references held, and no new references acquired -- + * calls to device_lookup_acquire are held up on + * dv_detaching until config_detach_exit. + */ + localcount_fini(dev->dv_localcount); + localcount_init(dev->dv_localcount); goto out; - else { + } else { panic("config_detach: forced detach of %s failed (%d)", device_xname(dev), rv); } @@ -2498,6 +2600,14 @@ config_alldevs_exit(struct alldevs_foray *af) * device_lookup: * * Look up a device instance for a given driver. + * + * Caller is responsible for ensuring the device's state is + * stable, either by holding a reference already obtained with + * device_lookup_acquire or by otherwise ensuring the device is + * attached and can't be detached (e.g., holding an open device + * node and ensuring *_detach calls vdevgone). + * + * XXX Find a way to assert this. */ device_t device_lookup(cfdriver_t cd, int unit) @@ -2526,6 +2636,69 @@ device_lookup_private(cfdriver_t cd, int unit) return device_private(device_lookup(cd, unit)); } +/* + * device_lookup_acquire: + * + * Look up a device instance for a given driver, and return a + * reference to it that must be released by device_release. + * + * => If the device is still attaching, blocks until *_attach has + * returned. + * + * => If the device is detaching, blocks until *_detach has + * returned. May succeed or fail in that case, depending on + * whether *_detach has backed out (EBUSY) or committed to + * detaching. + */ +device_t +device_lookup_acquire(cfdriver_t cd, int unit) +{ + device_t dv; + + /* XXX This should have a pserialized fast path -- TBD. */ + mutex_enter(&config_misc_lock); + mutex_enter(&alldevs_lock); +retry: if (unit < 0 || unit >= cd->cd_ndevs || + (dv = cd->cd_devs[unit]) == NULL || + dv->dv_del_gen != 0) { + dv = NULL; + } else { + /* + * Wait for the device to stabilize, if attaching or + * detaching. Either way we must wait for *_attach or + * *_detach to complete, and either way we must retry: + * even if detaching, *_detach might fail (EBUSY) so + * the device may still be there. + */ + if ((dv->dv_attaching != NULL && dv->dv_attaching != curlwp) || + dv->dv_detaching != NULL) { + mutex_exit(&alldevs_lock); + cv_wait(&config_misc_cv, &config_misc_lock); + mutex_enter(&alldevs_lock); + goto retry; + } + localcount_acquire(dv->dv_localcount); + } + mutex_exit(&alldevs_lock); + mutex_exit(&config_misc_lock); + + return dv; +} + +/* + * device_release: + * + * Release a reference to a device acquired with + * device_lookup_acquire. + */ +void +device_release(device_t dv) +{ + + localcount_release(dv->dv_localcount, + &config_misc_cv, &config_misc_lock); +} + /* * device_find_by_xname: * diff --git a/sys/sys/device.h b/sys/sys/device.h index 99940ab1563e..76d9f8be5ad6 100644 --- a/sys/sys/device.h +++ b/sys/sys/device.h @@ -274,10 +274,12 @@ struct device { void *dv_private; /* this device's private storage */ int *dv_locators; /* our actual locators (optional) */ prop_dictionary_t dv_properties;/* properties dictionary */ + struct localcount *dv_localcount;/* reference count */ int dv_pending; /* config_pending count */ TAILQ_ENTRY(device) dv_pending_list; + struct lwp *dv_attaching; /* thread not yet finished in attach */ struct lwp *dv_detaching; /* detach lock (config_misc_lock/cv) */ size_t dv_activity_count; @@ -651,6 +653,10 @@ void null_childdetached(device_t, device_t); device_t device_lookup(cfdriver_t, int); void *device_lookup_private(cfdriver_t, int); + +device_t device_lookup_acquire(cfdriver_t, int); +void device_release(device_t); + void device_register(device_t, void *); void device_register_post_config(device_t, void *); From 2f8d346eb09d2cacdb815cf23aaa5c7628e6a7b3 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Wed, 12 Jan 2022 14:18:57 +0000 Subject: [PATCH 5/5] driver(9): New devsw members d_cfdriver, d_devtounit. If set, then bdev_open/cdev_open will use d_devtounit to map the dev_t to an autoconf instance (e.g., /dev/wd0a -> wd0) and hold a reference with device_lookup_acquire across the call to d_open. This guarantees that the autoconf instance cannot be detached while the devsw's d_open function is trying to open it (and also that the autoconf instance has finished *_attach before anyone can open it). Of course, if the underlying hardware has gone away, there will be I/O errors, but this avoids software synchronization bugs between open and detach for drivers that opt into it. It's up to the driver and bus to figure out how to deal with I/O errors from operations on hardware that has gone away while the software hasn't finished notifying everything that it's gone yet. --- sys/kern/subr_devsw.c | 49 +++++++++++++++++++++++++++++++++++++++++-- sys/sys/conf.h | 4 ++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/sys/kern/subr_devsw.c b/sys/kern/subr_devsw.c index 1145bf83a08c..c610f8f430c9 100644 --- a/sys/kern/subr_devsw.c +++ b/sys/kern/subr_devsw.c @@ -88,6 +88,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.38 2017/11/07 18:35:57 christos Exp #include #include #include +#include #ifdef DEVSW_DEBUG #define DPRINTF(x) printf x @@ -894,16 +895,38 @@ bdev_open(dev_t dev, int flag, int devtype, lwp_t *l) { const struct bdevsw *d; struct localcount *lc; - int rv, mpflag; + device_t dv = NULL/*XXXGCC*/; + int unit, rv, mpflag; d = bdevsw_lookup_acquire(dev, &lc); if (d == NULL) return ENXIO; + if (d->d_devtounit) { + /* + * If the device node corresponds to an autoconf device + * instance, acquire a reference to it so that during + * d_open, device_lookup is stable. + * + * XXX This should also arrange to instantiate cloning + * pseudo-devices if appropriate, but that requires + * reviewing them all to find and verify a common + * pattern. + */ + if ((unit = (*d->d_devtounit)(dev)) == -1) + return ENXIO; + if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL) + return ENXIO; + } + DEV_LOCK(d); rv = (*d->d_open)(dev, flag, devtype, l); DEV_UNLOCK(d); + if (d->d_devtounit) { + device_release(dv); + } + bdevsw_release(d, lc); return rv; @@ -1049,16 +1072,38 @@ cdev_open(dev_t dev, int flag, int devtype, lwp_t *l) { const struct cdevsw *d; struct localcount *lc; - int rv, mpflag; + device_t dv = NULL/*XXXGCC*/; + int unit, rv, mpflag; d = cdevsw_lookup_acquire(dev, &lc); if (d == NULL) return ENXIO; + if (d->d_devtounit) { + /* + * If the device node corresponds to an autoconf device + * instance, acquire a reference to it so that during + * d_open, device_lookup is stable. + * + * XXX This should also arrange to instantiate cloning + * pseudo-devices if appropriate, but that requires + * reviewing them all to find and verify a common + * pattern. + */ + if ((unit = (*d->d_devtounit)(dev)) == -1) + return ENXIO; + if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL) + return ENXIO; + } + DEV_LOCK(d); rv = (*d->d_open)(dev, flag, devtype, l); DEV_UNLOCK(d); + if (d->d_devtounit) { + device_release(dv); + } + cdevsw_release(d, lc); return rv; diff --git a/sys/sys/conf.h b/sys/sys/conf.h index ba46976a924b..12cfd3bf89a6 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -76,6 +76,8 @@ struct bdevsw { int (*d_dump)(dev_t, daddr_t, void *, size_t); int (*d_psize)(dev_t); int (*d_discard)(dev_t, off_t, off_t); + int (*d_devtounit)(dev_t); + struct cfdriver *d_cfdriver; int d_flag; }; @@ -94,6 +96,8 @@ struct cdevsw { paddr_t (*d_mmap)(dev_t, off_t, int); int (*d_kqfilter)(dev_t, struct knote *); int (*d_discard)(dev_t, off_t, off_t); + int (*d_devtounit)(dev_t); + struct cfdriver *d_cfdriver; int d_flag; };