diff --git a/external/cddl/osnet/dev/dtrace/dtrace_modevent.c b/external/cddl/osnet/dev/dtrace/dtrace_modevent.c index cc0f8103fb98..f1338840125a 100644 --- a/external/cddl/osnet/dev/dtrace/dtrace_modevent.c +++ b/external/cddl/osnet/dev/dtrace/dtrace_modevent.c @@ -42,9 +42,7 @@ dtrace_modcmd(modcmd_t cmd, void *data) return error; case MODULE_CMD_FINI: - error = devsw_detach(NULL, &dtrace_cdevsw); - if (error != 0) - return error; + devsw_detach(NULL, &dtrace_cdevsw); error = dtrace_unload(); if (error != 0) { diff --git a/external/cddl/osnet/dev/fbt/fbt.c b/external/cddl/osnet/dev/fbt/fbt.c index b367c2155292..46dd7c1f7f06 100644 --- a/external/cddl/osnet/dev/fbt/fbt.c +++ b/external/cddl/osnet/dev/fbt/fbt.c @@ -1329,7 +1329,8 @@ dtrace_fbt_modcmd(modcmd_t cmd, void *data) error = fbt_unload(); if (error != 0) return error; - return devsw_detach(NULL, &fbt_cdevsw); + devsw_detach(NULL, &fbt_cdevsw); + return 0; case MODULE_CMD_AUTOUNLOAD: return EBUSY; default: diff --git a/external/cddl/osnet/dev/sdt/sdt.c b/external/cddl/osnet/dev/sdt/sdt.c index c3ad129f8284..5a41270a2917 100644 --- a/external/cddl/osnet/dev/sdt/sdt.c +++ b/external/cddl/osnet/dev/sdt/sdt.c @@ -562,7 +562,8 @@ dtrace_sdt_modcmd(modcmd_t cmd, void *data) error = sdt_unload(); if (error != 0) return error; - return devsw_detach(NULL, &sdt_cdevsw); + devsw_detach(NULL, &sdt_cdevsw); + return 0; case MODULE_CMD_AUTOUNLOAD: return EBUSY; default: diff --git a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c index 9e19cd1dc0c3..d74d8c71e54d 100644 --- a/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c +++ b/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_ioctl.c @@ -7231,7 +7231,7 @@ zfs_modcmd(modcmd_t cmd, void *arg) if (error) return error; - (void) devsw_detach(&zfs_bdevsw, &zfs_cdevsw); + devsw_detach(&zfs_bdevsw, &zfs_cdevsw); attacherr: zfs_sysctl_fini(); diff --git a/share/man/man9/devsw_attach.9 b/share/man/man9/devsw_attach.9 index cf862be5846a..6ffc51957a3f 100644 --- a/share/man/man9/devsw_attach.9 +++ b/share/man/man9/devsw_attach.9 @@ -27,7 +27,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.Dd April 30, 2017 +.Dd January 11, 2022 .Dt DEVSW 9 .Os .Sh NAME @@ -49,7 +49,7 @@ .Fa "const struct cdevsw *cdev" .Fa "devmajor_t *cmajor" .Fc -.Ft int +.Ft void .Fo devsw_detach .Fa "const struct bdevsw *bdev" .Fa "const struct cdevsw *cdev" @@ -130,6 +130,11 @@ and structures. .Fn devsw_detach should be called before a loaded device driver is unloaded. +The caller must ensure that there are no open instances of the device, +and that the device's +.Fn d_open +function will fail, before calling. +Fn devsw_detach . .Pp The .Fn bdevsw_lookup @@ -155,10 +160,8 @@ or .Sh RETURN VALUES Upon successful completion, .Fn devsw_attach -and -.Fn devsw_detach -return 0. -Otherwise they return an error value. +returns 0. +Otherwise it returns an error value. .Pp In case of failure, .Fn bdevsw_lookup diff --git a/sys/coda/coda_psdev.c b/sys/coda/coda_psdev.c index cede16da3f53..7f531f03fe56 100644 --- a/sys/coda/coda_psdev.c +++ b/sys/coda/coda_psdev.c @@ -758,7 +758,7 @@ vcoda_modcmd(modcmd_t cmd, void *arg) if (VC_OPEN(vcp)) return EBUSY; } - return devsw_detach(NULL, &vcoda_cdevsw); + devsw_detach(NULL, &vcoda_cdevsw); } #endif break; diff --git a/sys/dev/ccd.c b/sys/dev/ccd.c index 05945f9a67ba..2283bc0346da 100644 --- a/sys/dev/ccd.c +++ b/sys/dev/ccd.c @@ -1710,7 +1710,7 @@ ccd_modcmd(modcmd_t cmd, void *arg) error = EBUSY; } else { mutex_exit(&ccd_lock); - error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); + devsw_detach(&ccd_bdevsw, &ccd_cdevsw); ccddetach(); } #endif diff --git a/sys/dev/clockctl.c b/sys/dev/clockctl.c index 0da5e7765fe8..9685c0f129f6 100644 --- a/sys/dev/clockctl.c +++ b/sys/dev/clockctl.c @@ -182,14 +182,12 @@ clockctl_modcmd(modcmd_t cmd, void *data) return EBUSY; } #ifdef _MODULE - error = devsw_detach(NULL, &clockctl_cdevsw); + devsw_detach(NULL, &clockctl_cdevsw); #endif mutex_exit(&clockctl_mtx); - if (error == 0) { - kauth_unlisten_scope(clockctl_listener); - mutex_destroy(&clockctl_mtx); - } + kauth_unlisten_scope(clockctl_listener); + mutex_destroy(&clockctl_mtx); break; default: diff --git a/sys/dev/hdaudio/hdaudio.c b/sys/dev/hdaudio/hdaudio.c index d39ff2db6cde..5c7874778e22 100644 --- a/sys/dev/hdaudio/hdaudio.c +++ b/sys/dev/hdaudio/hdaudio.c @@ -1636,11 +1636,7 @@ hdaudio_modcmd(modcmd_t cmd, void *opaque) error = config_cfdriver_detach(&hdaudio_cd); if (error) break; - error = devsw_detach(NULL, &hdaudio_cdevsw); - if (error) { - config_cfdriver_attach(&hdaudio_cd); - break; - } + devsw_detach(NULL, &hdaudio_cdevsw); #endif break; default: diff --git a/sys/dev/i2c/i2c.c b/sys/dev/i2c/i2c.c index 6f2c0c6a9698..36a3e87d5316 100644 --- a/sys/dev/i2c/i2c.c +++ b/sys/dev/i2c/i2c.c @@ -942,7 +942,7 @@ iic_modcmd(modcmd_t cmd, void *opaque) if (error) { aprint_error("%s: unable to init component\n", iic_cd.cd_name); - (void)devsw_detach(NULL, &iic_cdevsw); + devsw_detach(NULL, &iic_cdevsw); } mutex_exit(&iic_mtx); #endif @@ -960,10 +960,7 @@ iic_modcmd(modcmd_t cmd, void *opaque) mutex_exit(&iic_mtx); break; } - error = devsw_detach(NULL, &iic_cdevsw); - if (error != 0) - config_init_component(cfdriver_ioconf_iic, - cfattach_ioconf_iic, cfdata_ioconf_iic); + devsw_detach(NULL, &iic_cdevsw); #endif mutex_exit(&iic_mtx); break; diff --git a/sys/dev/pad/pad.c b/sys/dev/pad/pad.c index fe0b429cf386..a779f1f71b8d 100644 --- a/sys/dev/pad/pad.c +++ b/sys/dev/pad/pad.c @@ -777,9 +777,7 @@ pad_modcmd(modcmd_t cmd, void *arg) case MODULE_CMD_FINI: #ifdef _MODULE - error = devsw_detach(NULL, &pad_cdevsw); - if (error) - break; + devsw_detach(NULL, &pad_cdevsw); error = config_fini_component(cfdriver_ioconf_pad, pad_cfattach, cfdata_ioconf_pad); diff --git a/sys/dev/raidframe/rf_netbsdkintf.c b/sys/dev/raidframe/rf_netbsdkintf.c index d1bda3553e03..87439aa70bfb 100644 --- a/sys/dev/raidframe/rf_netbsdkintf.c +++ b/sys/dev/raidframe/rf_netbsdkintf.c @@ -4088,16 +4088,7 @@ raid_modcmd_fini(void) return error; } #endif - error = devsw_detach(&raid_bdevsw, &raid_cdevsw); - if (error != 0) { - aprint_error("%s: cannot detach devsw\n",__func__); -#ifdef _MODULE - config_cfdriver_attach(&raid_cd); -#endif - config_cfattach_attach(raid_cd.cd_name, &raid_ca); - mutex_exit(&raid_lock); - return error; - } + devsw_detach(&raid_bdevsw, &raid_cdevsw); rf_BootRaidframe(false); #if (RF_INCLUDE_PARITY_DECLUSTERING_DS > 0) rf_destroy_mutex2(rf_sparet_wait_mutex); diff --git a/sys/dev/sysmon/sysmon.c b/sys/dev/sysmon/sysmon.c index 46aedaad7337..fd2993f0c180 100644 --- a/sys/dev/sysmon/sysmon.c +++ b/sys/dev/sysmon/sysmon.c @@ -356,7 +356,7 @@ sysmon_fini(void) if (error == 0) { mutex_enter(&sysmon_minor_mtx); sm_is_attached = false; - error = devsw_detach(NULL, &sysmon_cdevsw); + devsw_detach(NULL, &sysmon_cdevsw); mutex_exit(&sysmon_minor_mtx); } #endif diff --git a/sys/dev/tprof/tprof.c b/sys/dev/tprof/tprof.c index b069a5b7df5d..136fd190ad14 100644 --- a/sys/dev/tprof/tprof.c +++ b/sys/dev/tprof/tprof.c @@ -768,13 +768,7 @@ tprof_modcmd(modcmd_t cmd, void *arg) case MODULE_CMD_FINI: #if defined(_MODULE) - { - int error; - error = devsw_detach(NULL, &tprof_cdevsw); - if (error) { - return error; - } - } + devsw_detach(NULL, &tprof_cdevsw); #endif /* defined(_MODULE) */ tprof_driver_fini(); return 0; diff --git a/sys/dist/pf/net/pf_ioctl.c b/sys/dist/pf/net/pf_ioctl.c index 94bfb70a411d..e4c13be698f8 100644 --- a/sys/dist/pf/net/pf_ioctl.c +++ b/sys/dist/pf/net/pf_ioctl.c @@ -3459,7 +3459,8 @@ pf_modcmd(modcmd_t cmd, void *opaque) } else { pfdetach(); pflogdetach(); - return devsw_detach(NULL, &pf_cdevsw); + devsw_detach(NULL, &pf_cdevsw); + return 0; } default: return ENOTTY; diff --git a/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c b/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c index d0c4ca95097c..bb4e0706cc9b 100644 --- a/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c +++ b/sys/external/bsd/ipf/netinet/ip_fil_netbsd.c @@ -2256,7 +2256,7 @@ ipl_fini(void *opaque) { #ifdef _MODULE - (void)devsw_detach(NULL, &ipl_cdevsw); + devsw_detach(NULL, &ipl_cdevsw); #endif /* diff --git a/sys/fs/autofs/autofs_vfsops.c b/sys/fs/autofs/autofs_vfsops.c index fbd6eafe6532..1204d1f9b6d3 100644 --- a/sys/fs/autofs/autofs_vfsops.c +++ b/sys/fs/autofs/autofs_vfsops.c @@ -496,9 +496,7 @@ autofs_modcmd(modcmd_t cmd, void *arg) } mutex_exit(&autofs_softc->sc_lock); - error = devsw_detach(NULL, &autofs_cdevsw); - if (error) - break; + devsw_detach(NULL, &autofs_cdevsw); #endif error = vfs_detach(&autofs_vfsops); if (error) diff --git a/sys/kern/kern_drvctl.c b/sys/kern/kern_drvctl.c index 37f4730b2512..8a4156f8a0aa 100644 --- a/sys/kern/kern_drvctl.c +++ b/sys/kern/kern_drvctl.c @@ -665,15 +665,10 @@ drvctl_modcmd(modcmd_t cmd, void *arg) devmon_insert_vec = saved_insert_vec; saved_insert_vec = NULL; #ifdef _MODULE - error = devsw_detach(NULL, &drvctl_cdevsw); - if (error != 0) { - saved_insert_vec = devmon_insert_vec; - devmon_insert_vec = devmon_insert; - } + devsw_detach(NULL, &drvctl_cdevsw); #endif mutex_exit(&drvctl_lock); - if (error == 0) - drvctl_fini(); + drvctl_fini(); break; default: diff --git a/sys/kern/subr_autoconf.c b/sys/kern/subr_autoconf.c index 8439ae8c6a00..8c0b66556e0f 100644 --- a/sys/kern/subr_autoconf.c +++ b/sys/kern/subr_autoconf.c @@ -108,6 +108,7 @@ __KERNEL_RCSID(0, "$NetBSD: subr_autoconf.c,v 1.291 2021/12/31 14:19:57 riastrad #include #include #include +#include #include @@ -1453,6 +1454,9 @@ config_devdelete(device_t dev) if (dg->dg_devs != NULL) kmem_free(dg->dg_devs, sizeof(device_t) * dg->dg_ndevs); + localcount_fini(dev->dv_localcount); + kmem_free(dev->dv_localcount, sizeof(*dev->dv_localcount)); + cv_destroy(&dvl->dvl_cv); mutex_destroy(&dvl->dvl_mtx); @@ -1556,6 +1560,7 @@ config_devalloc(const device_t parent, const cfdata_t cf, dev->dv_activity_handlers = NULL; dev->dv_private = dev_private; dev->dv_flags = ca->ca_flags; /* inherit flags from class */ + dev->dv_attaching = curlwp; myunit = config_unit_alloc(dev, cd, cf); if (myunit == -1) { @@ -1604,6 +1609,10 @@ config_devalloc(const device_t parent, const cfdata_t cf, "device-parent", device_xname(parent)); } + dev->dv_localcount = kmem_zalloc(sizeof(*dev->dv_localcount), + KM_SLEEP); + localcount_init(dev->dv_localcount); + if (dev->dv_cfdriver->cd_attrs != NULL) config_add_attrib_dict(dev); @@ -1755,8 +1764,29 @@ config_attach_internal(device_t parent, cfdata_t cf, void *aux, cfprint_t print, /* Let userland know */ devmon_report_device(dev, true); + /* + * Prevent detach until the driver's attach function, and all + * deferred actions, have finished. + */ config_pending_incr(dev); + + /* Call the driver's attach function. */ (*dev->dv_cfattach->ca_attach)(parent, dev, aux); + + /* + * Allow other threads to acquire references to the device now + * that the driver's attach function is done. + */ + mutex_enter(&config_misc_lock); + KASSERT(dev->dv_attaching == curlwp); + dev->dv_attaching = NULL; + cv_broadcast(&config_misc_cv); + mutex_exit(&config_misc_lock); + + /* + * Synchronous parts of attach are done. Allow detach, unless + * the driver's attach function scheduled deferred actions. + */ config_pending_decr(dev); mutex_enter(&config_misc_lock); @@ -1822,8 +1852,29 @@ config_attach_pseudo(cfdata_t cf) /* Let userland know */ devmon_report_device(dev, true); + /* + * Prevent detach until the driver's attach function, and all + * deferred actions, have finished. + */ config_pending_incr(dev); + + /* Call the driver's attach function. */ (*dev->dv_cfattach->ca_attach)(ROOT, dev, NULL); + + /* + * Allow other threads to acquire references to the device now + * that the driver's attach function is done. + */ + mutex_enter(&config_misc_lock); + KASSERT(dev->dv_attaching == curlwp); + dev->dv_attaching = NULL; + cv_broadcast(&config_misc_cv); + mutex_exit(&config_misc_lock); + + /* + * Synchronous parts of attach are done. Allow detach, unless + * the driver's attach function scheduled deferred actions. + */ config_pending_decr(dev); config_process_deferred(&deferred_config_queue, dev); @@ -1872,24 +1923,39 @@ config_dump_garbage(struct devicelist *garbage) static int config_detach_enter(device_t dev) { - int error; + int error = 0; mutex_enter(&config_misc_lock); - for (;;) { - if (dev->dv_pending == 0 && dev->dv_detaching == NULL) { - dev->dv_detaching = curlwp; - error = 0; - break; - } + + /* + * Wait until attach has fully completed, and until any + * concurrent detach (e.g., drvctl racing with USB event + * thread) has completed. + * + * Caller must hold alldevs_nread or alldevs_nwrite (e.g., via + * deviter) to ensure the winner of the race doesn't free the + * device leading the loser of the race into use-after-free. + * + * XXX Not all callers do this! + */ + while (dev->dv_pending || dev->dv_detaching) { KASSERTMSG(dev->dv_detaching != curlwp, "recursively detaching %s", device_xname(dev)); error = cv_wait_sig(&config_misc_cv, &config_misc_lock); if (error) - break; + goto out; } - KASSERT(error || dev->dv_detaching == curlwp); - mutex_exit(&config_misc_lock); + /* + * Attach has completed, and no other concurrent detach is + * running. Claim the device for detaching. This will cause + * all new attempts to acquire references to block. + */ + KASSERT(dev->dv_attaching == NULL); + KASSERT(dev->dv_detaching == NULL); + dev->dv_detaching = curlwp; + +out: mutex_exit(&config_misc_lock); return error; } @@ -1980,9 +2046,10 @@ config_detach(device_t dev, int flags) */ if (rv == 0) dev->dv_flags &= ~DVF_ACTIVE; - else if ((flags & DETACH_FORCE) == 0) + else if ((flags & DETACH_FORCE) == 0) { + /* Detach failed -- likely EBUSY. */ goto out; - else { + } else { panic("config_detach: forced detach of %s failed (%d)", device_xname(dev), rv); } @@ -1991,6 +2058,19 @@ config_detach(device_t dev, int flags) * The device has now been successfully detached. */ + /* + * Wait for all device_lookup_acquire references -- mostly, for + * all attempts to open the device -- to drain. It is the + * responsibility of .ca_detach to ensure anything with open + * references will be interrupted and release them promptly, + * not block indefinitely. All new attempts to acquire + * references will block until dv_detaching clears. + */ + mutex_enter(&config_misc_lock); + localcount_drain(dev->dv_localcount, + &config_misc_cv, &config_misc_lock); + mutex_exit(&config_misc_lock); + /* Let userland know */ devmon_report_device(dev, false); @@ -2498,6 +2578,14 @@ config_alldevs_exit(struct alldevs_foray *af) * device_lookup: * * Look up a device instance for a given driver. + * + * Caller is responsible for ensuring the device's state is + * stable, either by holding a reference already obtained with + * device_lookup_acquire or by otherwise ensuring the device is + * attached and can't be detached (e.g., holding an open device + * node and ensuring *_detach calls vdevgone). + * + * XXX Find a way to assert this. */ device_t device_lookup(cfdriver_t cd, int unit) @@ -2526,6 +2614,69 @@ device_lookup_private(cfdriver_t cd, int unit) return device_private(device_lookup(cd, unit)); } +/* + * device_lookup_acquire: + * + * Look up a device instance for a given driver, and return a + * reference to it that must be released by device_release. + * + * => If the device is still attaching, blocks until *_attach has + * returned. + * + * => If the device is detaching, blocks until *_detach has + * returned. May succeed or fail in that case, depending on + * whether *_detach has backed out (EBUSY) or committed to + * detaching. + */ +device_t +device_lookup_acquire(cfdriver_t cd, int unit) +{ + device_t dv; + + /* XXX This should have a pserialized fast path -- TBD. */ + mutex_enter(&config_misc_lock); + mutex_enter(&alldevs_lock); +retry: if (unit < 0 || unit >= cd->cd_ndevs || + (dv = cd->cd_devs[unit]) == NULL || + dv->dv_del_gen != 0) { + dv = NULL; + } else { + /* + * Wait for the device to stabilize, if attaching or + * detaching. Either way we must wait for *_attach or + * *_detach to complete, and either way we must retry: + * even if detaching, *_detach might fail (EBUSY) so + * the device may still be there. + */ + if ((dv->dv_attaching != NULL && dv->dv_attaching != curlwp) || + dv->dv_detaching != NULL) { + mutex_exit(&alldevs_lock); + cv_wait(&config_misc_cv, &config_misc_lock); + mutex_enter(&alldevs_lock); + goto retry; + } + localcount_acquire(dv->dv_localcount); + } + mutex_exit(&alldevs_lock); + mutex_exit(&config_misc_lock); + + return dv; +} + +/* + * device_release: + * + * Release a reference to a device acquired with + * device_lookup_acquire. + */ +void +device_release(device_t dv) +{ + + localcount_release(dv->dv_localcount, + &config_misc_cv, &config_misc_lock); +} + /* * device_find_by_xname: * diff --git a/sys/kern/subr_devsw.c b/sys/kern/subr_devsw.c index 1a0f721fdd65..8b55187b32c1 100644 --- a/sys/kern/subr_devsw.c +++ b/sys/kern/subr_devsw.c @@ -85,6 +85,11 @@ __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.38 2017/11/07 18:35:57 christos Exp #include #include #include +#include +#include +#include +#include +#include #ifdef DEVSW_DEBUG #define DPRINTF(x) printf x @@ -97,12 +102,22 @@ __KERNEL_RCSID(0, "$NetBSD: subr_devsw.c,v 1.38 2017/11/07 18:35:57 christos Exp #define CDEVSW_SIZE (sizeof(struct cdevsw *)) #define DEVSWCONV_SIZE (sizeof(struct devsw_conv)) +struct devswref { + struct localcount *dr_lc; + bool dr_dynamic; +}; + +/* XXX bdevsw, cdevsw, max_bdevsws, and max_cdevsws should be volatile */ extern const struct bdevsw **bdevsw, *bdevsw0[]; extern const struct cdevsw **cdevsw, *cdevsw0[]; extern struct devsw_conv *devsw_conv, devsw_conv0[]; extern const int sys_bdevsws, sys_cdevsws; extern int max_bdevsws, max_cdevsws, max_devsw_convs; +static struct devswref *cdevswref; +static struct devswref *bdevswref; +static kcondvar_t devsw_cv; + static int bdevsw_attach(const struct bdevsw *, devmajor_t *); static int cdevsw_attach(const struct cdevsw *, devmajor_t *); static void devsw_detach_locked(const struct bdevsw *, const struct cdevsw *); @@ -118,6 +133,8 @@ devsw_init(void) KASSERT(sys_bdevsws < MAXDEVSW - 1); KASSERT(sys_cdevsws < MAXDEVSW - 1); mutex_init(&device_lock, MUTEX_DEFAULT, IPL_NONE); + + cv_init(&devsw_cv, "devsw"); } int @@ -158,15 +175,12 @@ devsw_attach(const char *devname, error = EEXIST; goto fail; } - - if (bdev != NULL) - bdevsw[*bmajor] = bdev; - cdevsw[*cmajor] = cdev; - - mutex_exit(&device_lock); - return (0); } + /* + * XXX This should allocate what it needs up front so we never + * need to flail around trying to unwind. + */ error = bdevsw_attach(bdev, bmajor); if (error != 0) goto fail; @@ -176,6 +190,13 @@ devsw_attach(const char *devname, goto fail; } + /* + * If we already found a conv, we're done. Otherwise, find an + * empty slot or extend the table. + */ + if (i == max_devsw_convs) + goto fail; + for (i = 0 ; i < max_devsw_convs ; i++) { if (devsw_conv[i].d_name == NULL) break; @@ -224,7 +245,9 @@ devsw_attach(const char *devname, static int bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor) { - const struct bdevsw **newptr; + const struct bdevsw **newbdevsw = NULL; + struct devswref *newbdevswref = NULL; + struct localcount *lc; devmajor_t bmajor; int i; @@ -253,20 +276,35 @@ bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor) return (ENOMEM); } + if (bdevswref == NULL) { + newbdevswref = kmem_zalloc(MAXDEVSW * sizeof(newbdevswref[0]), + KM_NOSLEEP); + if (newbdevswref == NULL) + return ENOMEM; + atomic_store_release(&bdevswref, newbdevswref); + } + if (*devmajor >= max_bdevsws) { KASSERT(bdevsw == bdevsw0); - newptr = kmem_zalloc(MAXDEVSW * BDEVSW_SIZE, KM_NOSLEEP); - if (newptr == NULL) - return (ENOMEM); - memcpy(newptr, bdevsw, max_bdevsws * BDEVSW_SIZE); - bdevsw = newptr; - max_bdevsws = MAXDEVSW; + newbdevsw = kmem_zalloc(MAXDEVSW * sizeof(newbdevsw[0]), + KM_NOSLEEP); + if (newbdevsw == NULL) + return ENOMEM; + memcpy(newbdevsw, bdevsw, max_bdevsws * sizeof(bdevsw[0])); + atomic_store_release(&bdevsw, newbdevsw); + atomic_store_release(&max_bdevsws, MAXDEVSW); } if (bdevsw[*devmajor] != NULL) return (EEXIST); - bdevsw[*devmajor] = devsw; + KASSERT(bdevswref[*devmajor].dr_lc == NULL); + lc = kmem_zalloc(sizeof(*lc), KM_SLEEP); + localcount_init(lc); + bdevswref[*devmajor].dr_lc = lc; + bdevswref[*devmajor].dr_dynamic = true; + + atomic_store_release(&bdevsw[*devmajor], devsw); return (0); } @@ -274,7 +312,9 @@ bdevsw_attach(const struct bdevsw *devsw, devmajor_t *devmajor) static int cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor) { - const struct cdevsw **newptr; + const struct cdevsw **newcdevsw = NULL; + struct devswref *newcdevswref = NULL; + struct localcount *lc; devmajor_t cmajor; int i; @@ -300,20 +340,35 @@ cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor) return (ENOMEM); } + if (cdevswref == NULL) { + newcdevswref = kmem_zalloc(MAXDEVSW * sizeof(newcdevswref[0]), + KM_NOSLEEP); + if (newcdevswref == NULL) + return ENOMEM; + atomic_store_release(&cdevswref, newcdevswref); + } + if (*devmajor >= max_cdevsws) { KASSERT(cdevsw == cdevsw0); - newptr = kmem_zalloc(MAXDEVSW * CDEVSW_SIZE, KM_NOSLEEP); - if (newptr == NULL) - return (ENOMEM); - memcpy(newptr, cdevsw, max_cdevsws * CDEVSW_SIZE); - cdevsw = newptr; - max_cdevsws = MAXDEVSW; + newcdevsw = kmem_zalloc(MAXDEVSW * sizeof(newcdevsw[0]), + KM_NOSLEEP); + if (newcdevsw == NULL) + return ENOMEM; + memcpy(newcdevsw, cdevsw, max_cdevsws * sizeof(cdevsw[0])); + atomic_store_release(&cdevsw, newcdevsw); + atomic_store_release(&max_cdevsws, MAXDEVSW); } if (cdevsw[*devmajor] != NULL) return (EEXIST); - cdevsw[*devmajor] = devsw; + KASSERT(cdevswref[*devmajor].dr_lc == NULL); + lc = kmem_zalloc(sizeof(*lc), KM_SLEEP); + localcount_init(lc); + cdevswref[*devmajor].dr_lc = lc; + cdevswref[*devmajor].dr_dynamic = true; + + atomic_store_release(&cdevsw[*devmajor], devsw); return (0); } @@ -321,36 +376,75 @@ cdevsw_attach(const struct cdevsw *devsw, devmajor_t *devmajor) static void devsw_detach_locked(const struct bdevsw *bdev, const struct cdevsw *cdev) { - int i; + int bi, ci = -1/*XXXGCC*/; KASSERT(mutex_owned(&device_lock)); + /* Prevent new references. */ if (bdev != NULL) { - for (i = 0 ; i < max_bdevsws ; i++) { - if (bdevsw[i] != bdev) + for (bi = 0; bi < max_bdevsws; bi++) { + if (bdevsw[bi] != bdev) continue; - bdevsw[i] = NULL; + atomic_store_relaxed(&bdevsw[bi], NULL); break; } + KASSERT(bi < max_bdevsws); } if (cdev != NULL) { - for (i = 0 ; i < max_cdevsws ; i++) { - if (cdevsw[i] != cdev) + for (ci = 0; ci < max_cdevsws; ci++) { + if (cdevsw[ci] != cdev) continue; - cdevsw[i] = NULL; + atomic_store_relaxed(&cdevsw[ci], NULL); break; } + KASSERT(ci < max_cdevsws); + } + + if (bdev == NULL && cdev == NULL) /* XXX possible? */ + return; + + /* + * Wait for all bdevsw_lookup_acquire, cdevsw_lookup_acquire + * calls to notice that the devsw is gone. + * + * XXX Can't use pserialize_perform here because devsw_init is + * too early for pserialize_create(). + */ + xc_barrier(0); + + /* + * Wait for all references to drain. It is the caller's + * responsibility to ensure that at this point, there are no + * extant open instances and all new d_open calls will fail. + * + * Note that localcount_drain may release and reacquire + * device_lock. + */ + if (bdev != NULL) { + localcount_drain(bdevswref[bi].dr_lc, + &devsw_cv, &device_lock); + localcount_fini(bdevswref[bi].dr_lc); + kmem_free(bdevswref[bi].dr_lc, sizeof(*bdevswref[bi].dr_lc)); + bdevswref[bi].dr_lc = NULL; + bdevswref[bi].dr_dynamic = false; + } + if (cdev != NULL) { + localcount_drain(cdevswref[ci].dr_lc, + &devsw_cv, &device_lock); + localcount_fini(cdevswref[ci].dr_lc); + kmem_free(cdevswref[ci].dr_lc, sizeof(*cdevswref[ci].dr_lc)); + cdevswref[ci].dr_lc = NULL; + cdevswref[ci].dr_dynamic = false; } } -int +void devsw_detach(const struct bdevsw *bdev, const struct cdevsw *cdev) { mutex_enter(&device_lock); devsw_detach_locked(bdev, cdev); mutex_exit(&device_lock); - return 0; } /* @@ -366,10 +460,60 @@ bdevsw_lookup(dev_t dev) if (dev == NODEV) return (NULL); bmajor = major(dev); - if (bmajor < 0 || bmajor >= max_bdevsws) + if (bmajor < 0 || bmajor >= atomic_load_relaxed(&max_bdevsws)) return (NULL); - return (bdevsw[bmajor]); + return atomic_load_consume(&bdevsw)[bmajor]; +} + +static const struct bdevsw * +bdevsw_lookup_acquire(dev_t dev, struct localcount **lcp) +{ + devmajor_t bmajor; + const struct bdevsw *bdev = NULL, *const *curbdevsw; + struct devswref *curbdevswref; + int s; + + if (dev == NODEV) + return NULL; + bmajor = major(dev); + if (bmajor < 0) + return NULL; + + s = pserialize_read_enter(); + + /* + * max_bdevsws never goes down, so it is safe to rely on this + * condition without any locking for the array access below. + * Test sys_bdevsws first so we can avoid the memory barrier in + * that case. + */ + if (bmajor >= sys_bdevsws && + bmajor >= atomic_load_acquire(&max_bdevsws)) + goto out; + curbdevsw = atomic_load_consume(&bdevsw); + if ((bdev = atomic_load_consume(&curbdevsw[bmajor])) == NULL) + goto out; + + curbdevswref = atomic_load_consume(&bdevswref); + if (curbdevswref == NULL || !curbdevswref[bmajor].dr_dynamic) { + *lcp = NULL; + } else { + *lcp = curbdevswref[bmajor].dr_lc; + localcount_acquire(*lcp); + } + +out: pserialize_read_exit(s); + return bdev; +} + +static void +bdevsw_release(const struct bdevsw *bdev, struct localcount *lc) +{ + + if (lc == NULL) + return; + localcount_release(lc, &devsw_cv, &device_lock); } /* @@ -385,10 +529,60 @@ cdevsw_lookup(dev_t dev) if (dev == NODEV) return (NULL); cmajor = major(dev); - if (cmajor < 0 || cmajor >= max_cdevsws) + if (cmajor < 0 || cmajor >= atomic_load_relaxed(&max_cdevsws)) return (NULL); - return (cdevsw[cmajor]); + return atomic_load_consume(&cdevsw)[cmajor]; +} + +static const struct cdevsw * +cdevsw_lookup_acquire(dev_t dev, struct localcount **lcp) +{ + devmajor_t cmajor; + const struct cdevsw *cdev = NULL, *const *curcdevsw; + struct devswref *curcdevswref; + int s; + + if (dev == NODEV) + return NULL; + cmajor = major(dev); + if (cmajor < 0) + return NULL; + + s = pserialize_read_enter(); + + /* + * max_cdevsws never goes down, so it is safe to rely on this + * condition without any locking for the array access below. + * Test sys_cdevsws first so we can avoid the memory barrier in + * that case. + */ + if (cmajor >= sys_cdevsws && + cmajor >= atomic_load_acquire(&max_cdevsws)) + goto out; + curcdevsw = atomic_load_consume(&cdevsw); + if ((cdev = atomic_load_consume(&curcdevsw[cmajor])) == NULL) + goto out; + + curcdevswref = atomic_load_consume(&cdevswref); + if (curcdevswref == NULL || !curcdevswref[cmajor].dr_dynamic) { + *lcp = NULL; + } else { + *lcp = curcdevswref[cmajor].dr_lc; + localcount_acquire(*lcp); + } + +out: pserialize_read_exit(s); + return cdev; +} + +static void +cdevsw_release(const struct cdevsw *cdev, struct localcount *lc) +{ + + if (lc == NULL) + return; + localcount_release(lc, &devsw_cv, &device_lock); } /* @@ -400,10 +594,13 @@ cdevsw_lookup(dev_t dev) devmajor_t bdevsw_lookup_major(const struct bdevsw *bdev) { - devmajor_t bmajor; + const struct bdevsw *const *curbdevsw; + devmajor_t bmajor, bmax; - for (bmajor = 0 ; bmajor < max_bdevsws ; bmajor++) { - if (bdevsw[bmajor] == bdev) + bmax = atomic_load_acquire(&max_bdevsws); + curbdevsw = atomic_load_consume(&bdevsw); + for (bmajor = 0; bmajor < bmax; bmajor++) { + if (atomic_load_relaxed(&curbdevsw[bmajor]) == bdev) return (bmajor); } @@ -419,10 +616,13 @@ bdevsw_lookup_major(const struct bdevsw *bdev) devmajor_t cdevsw_lookup_major(const struct cdevsw *cdev) { - devmajor_t cmajor; + const struct cdevsw *const *curcdevsw; + devmajor_t cmajor, cmax; - for (cmajor = 0 ; cmajor < max_cdevsws ; cmajor++) { - if (cdevsw[cmajor] == cdev) + cmax = atomic_load_acquire(&max_cdevsws); + curcdevsw = atomic_load_consume(&cdevsw); + for (cmajor = 0; cmajor < cmax; cmajor++) { + if (atomic_load_relaxed(&curcdevsw[cmajor]) == cdev) return (cmajor); } @@ -697,22 +897,41 @@ int bdev_open(dev_t dev, int flag, int devtype, lwp_t *l) { const struct bdevsw *d; - int rv, mpflag; + struct localcount *lc; + device_t dv = NULL/*XXXGCC*/; + int unit, rv, mpflag; - /* - * For open we need to lock, in order to synchronize - * with attach/detach. - */ - mutex_enter(&device_lock); - d = bdevsw_lookup(dev); - mutex_exit(&device_lock); + d = bdevsw_lookup_acquire(dev, &lc); if (d == NULL) return ENXIO; + if (d->d_devtounit) { + /* + * If the device node corresponds to an autoconf device + * instance, acquire a reference to it so that during + * d_open, device_lookup is stable. + * + * XXX This should also arrange to instantiate cloning + * pseudo-devices if appropriate, but that requires + * reviewing them all to find and verify a common + * pattern. + */ + if ((unit = (*d->d_devtounit)(dev)) == -1) + return ENXIO; + if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL) + return ENXIO; + } + DEV_LOCK(d); rv = (*d->d_open)(dev, flag, devtype, l); DEV_UNLOCK(d); + if (d->d_devtounit) { + device_release(dv); + } + + bdevsw_release(d, lc); + return rv; } @@ -855,22 +1074,41 @@ int cdev_open(dev_t dev, int flag, int devtype, lwp_t *l) { const struct cdevsw *d; - int rv, mpflag; + struct localcount *lc; + device_t dv = NULL/*XXXGCC*/; + int unit, rv, mpflag; - /* - * For open we need to lock, in order to synchronize - * with attach/detach. - */ - mutex_enter(&device_lock); - d = cdevsw_lookup(dev); - mutex_exit(&device_lock); + d = cdevsw_lookup_acquire(dev, &lc); if (d == NULL) return ENXIO; + if (d->d_devtounit) { + /* + * If the device node corresponds to an autoconf device + * instance, acquire a reference to it so that during + * d_open, device_lookup is stable. + * + * XXX This should also arrange to instantiate cloning + * pseudo-devices if appropriate, but that requires + * reviewing them all to find and verify a common + * pattern. + */ + if ((unit = (*d->d_devtounit)(dev)) == -1) + return ENXIO; + if ((dv = device_lookup_acquire(d->d_cfdriver, unit)) == NULL) + return ENXIO; + } + DEV_LOCK(d); rv = (*d->d_open)(dev, flag, devtype, l); DEV_UNLOCK(d); + if (d->d_devtounit) { + device_release(dv); + } + + cdevsw_release(d, lc); + return rv; } @@ -1063,3 +1301,18 @@ nommap(dev_t dev, off_t off, int prot) return (paddr_t)-1; } + +/* + * dev_minor_unit(dev) + * + * Returns minor(dev) as an int. Intended for use with struct + * bdevsw, cdevsw::d_devtounit for drivers whose /dev nodes are + * implemented by reference to an autoconf instance with the minor + * number. + */ +int +dev_minor_unit(dev_t dev) +{ + + return minor(dev); +} diff --git a/sys/kern/subr_disk.c b/sys/kern/subr_disk.c index da664f920382..41218421db57 100644 --- a/sys/kern/subr_disk.c +++ b/sys/kern/subr_disk.c @@ -728,3 +728,10 @@ disk_set_info(device_t dev, struct disk *dk, const char *type) if (odisk_info) prop_object_release(odisk_info); } + +int +disklabel_dev_unit(dev_t dev) +{ + + return DISKUNIT(dev); +} diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index b4bc4c34ab03..987cd5969d95 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -81,10 +81,19 @@ __KERNEL_RCSID(0, "$NetBSD: spec_vnops.c,v 1.183 2021/07/18 23:57:14 dholland Ex #include #include #include +#include #include #include +/* + * Lock order: + * + * vnode lock + * -> device_lock + * -> struct vnode::v_interlock + */ + /* symbolic sleep message strings for devices */ const char devopn[] = "devopn"; const char devio[] = "devio"; @@ -165,6 +174,7 @@ const struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; static kauth_listener_t rawio_listener; +static struct kcondvar specfs_iocv; /* Returns true if vnode is /dev/mem or /dev/kmem. */ bool @@ -210,6 +220,123 @@ spec_init(void) rawio_listener = kauth_listen_scope(KAUTH_SCOPE_DEVICE, rawio_listener_cb, NULL); + cv_init(&specfs_iocv, "specio"); +} + +/* + * spec_io_enter(vp, &sn, &dev) + * + * Enter an operation that may not hold vp's vnode lock or an + * fstrans on vp's mount. Until spec_io_exit, the vnode will not + * be revoked. + * + * On success, set sn to the specnode pointer and dev to the dev_t + * number and return zero. Caller must later call spec_io_exit + * when done. + * + * On failure, return ENXIO -- the device has been revoked and no + * longer exists. + */ +static int +spec_io_enter(struct vnode *vp, struct specnode **snp, dev_t *devp) +{ + dev_t dev; + struct specnode *sn; + unsigned iocnt; + int error = 0; + + mutex_enter(vp->v_interlock); + + /* + * Extract all the info we need from the vnode, unless the + * vnode has already been reclaimed. This can happen if the + * underlying device has been removed and all the device nodes + * for it have been revoked. The caller may not hold a vnode + * lock or fstrans to prevent this from happening before it has + * had an opportunity to notice the vnode is dead. + */ + if (vdead_check(vp, VDEAD_NOWAIT) != 0 || + (sn = vp->v_specnode) == NULL || + (dev = vp->v_rdev) == NODEV) { + error = ENXIO; + goto out; + } + + /* + * Notify spec_node_revoke that we are doing an I/O operation + * which may not be not bracketed by fstrans(9) and thus is not + * blocked by vfs suspension. + * + * We could hold this reference with psref(9) instead, but we + * already have to take the interlock for vdead_check, so + * there's not much more cost here to another atomic operation. + */ + iocnt = atomic_inc_uint_nv(&sn->sn_iocnt); + CTASSERT(MAXLWP < UINT_MAX); + KASSERT(iocnt < UINT_MAX); + + /* Success! */ + *snp = sn; + *devp = dev; + error = 0; + +out: mutex_exit(vp->v_interlock); + return error; +} + +/* + * spec_io_exit(vp, sn) + * + * Exit an operation entered with a successful spec_io_enter -- + * allow concurrent spec_node_revoke to proceed. The argument sn + * must match the struct specnode pointer returned by spec_io_exit + * for vp. + */ +static void +spec_io_exit(struct vnode *vp, struct specnode *sn) +{ + unsigned iocnt; + + KASSERT(vp->v_specnode == sn); + + /* + * We are done. Notify spec_node_revoke if appropriate. The + * transition of 1 -> 0 must happen under device_lock so + * spec_node_revoke doesn't miss a wakeup. + */ + do { + iocnt = atomic_load_relaxed(&sn->sn_iocnt); + if (iocnt == 1) { + mutex_enter(&device_lock); + if (atomic_dec_uint_nv(&sn->sn_iocnt) == 0) + cv_broadcast(&specfs_iocv); + mutex_exit(&device_lock); + break; + } + } while (atomic_cas_uint(&sn->sn_iocnt, iocnt, iocnt - 1) != iocnt); +} + +/* + * spec_io_drain(vp, sn) + * + * Wait for all existing spec_io_enter/exit sections to complete. + * Caller must ensure spec_io_enter will fail at this point. + */ +static void +spec_io_drain(struct vnode *vp, struct specnode *sn) +{ + + /* + * I/O at the same time as closing is unlikely -- it often + * indicates an application bug. + */ + if (__predict_true(atomic_load_relaxed(&sn->sn_iocnt) == 0)) + return; + + mutex_enter(&device_lock); + while (atomic_load_relaxed(&sn->sn_iocnt) != 0) + cv_wait(&specfs_iocv, &device_lock); + mutex_exit(&device_lock); } /* @@ -250,6 +377,7 @@ spec_node_init(vnode_t *vp, dev_t rdev) sd->sd_refcnt = 1; sd->sd_opencnt = 0; sd->sd_bdevvp = NULL; + sd->sd_opened = false; sn->sn_dev = sd; sd = NULL; } else { @@ -261,6 +389,7 @@ spec_node_init(vnode_t *vp, dev_t rdev) sn->sn_opencnt = 0; sn->sn_rdev = rdev; sn->sn_gone = false; + sn->sn_iocnt = 0; vp->v_specnode = sn; vp->v_specnext = *vpp; *vpp = vp; @@ -373,6 +502,8 @@ spec_node_setmountedfs(vnode_t *devvp, struct mount *mp) KASSERT(devvp->v_type == VBLK); KASSERT(devvp->v_specnode->sn_dev->sd_mountpoint == NULL || mp == NULL); + KASSERT(devvp->v_specnode->sn_opencnt); + devvp->v_specnode->sn_dev->sd_mountpoint = mp; if (mp == NULL) return; @@ -415,6 +546,14 @@ spec_node_revoke(vnode_t *vp) KASSERT(sn->sn_opencnt == 0); } mutex_exit(&device_lock); + + /* + * Wait for all other devsw operations to drain. After this + * point, no bdev/cdev_* can be active for this specnode. + * Note: We drain even if we witnessed sn->sn_opencnt == 0, + * because there may have been a concurrent VOP_CLOSE. + */ + spec_io_drain(vp, sn); } /* @@ -435,6 +574,7 @@ spec_node_destroy(vnode_t *vp) KASSERT(vp->v_type == VBLK || vp->v_type == VCHR); KASSERT(vp->v_specnode != NULL); KASSERT(sn->sn_opencnt == 0); + KASSERT(sn->sn_iocnt == 0); mutex_enter(&device_lock); /* Remove from the hash and destroy the node. */ @@ -498,26 +638,28 @@ spec_open(void *v) int a_mode; kauth_cred_t a_cred; } */ *ap = v; - struct lwp *l; - struct vnode *vp; - dev_t dev; + struct lwp *l = curlwp; + struct vnode *vp = ap->a_vp; + dev_t dev, dev1; int error; enum kauth_device_req req; - specnode_t *sn; + specnode_t *sn, *sn1; specdev_t *sd; spec_ioctl_t ioctl; - u_int gen; - const char *name; + u_int gen = 0; + const char *name = NULL; + bool needclose = false; struct partinfo pi; - - l = curlwp; - vp = ap->a_vp; + + KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE || + (vp->v_vflag & VV_LOCKSWORK) == 0); + KASSERTMSG(vp->v_type == VBLK || vp->v_type == VCHR, "type=%d", + vp->v_type); + dev = vp->v_rdev; sn = vp->v_specnode; sd = sn->sn_dev; - name = NULL; - gen = 0; - + /* * Don't allow open if fs is mounted -nodev. */ @@ -535,28 +677,101 @@ spec_open(void *v) req = KAUTH_REQ_DEVICE_RAWIO_SPEC_READ; break; } + error = kauth_authorize_device_spec(ap->a_cred, req, vp); + if (error != 0) + return (error); + /* + * Acquire an open reference -- as long as we hold onto it, and + * the vnode isn't revoked, it can't be closed. + */ + mutex_enter(&device_lock); + KASSERT(!sn->sn_gone); switch (vp->v_type) { case VCHR: - error = kauth_authorize_device_spec(ap->a_cred, req, vp); - if (error != 0) - return (error); - /* * Character devices can accept opens from multiple * vnodes. */ - mutex_enter(&device_lock); - if (sn->sn_gone) { - mutex_exit(&device_lock); - return (EBADF); - } sd->sd_opencnt++; sn->sn_opencnt++; - mutex_exit(&device_lock); + break; + case VBLK: + /* + * For block devices, permit only one open. The buffer + * cache cannot remain self-consistent with multiple + * vnodes holding a block device open. + * + * Treat zero opencnt with non-NULL mountpoint as open. + * This may happen after forced detach of a mounted device. + */ + if (sd->sd_opencnt != 0 || sd->sd_mountpoint != NULL) { + error = EBUSY; + break; + } + KASSERTMSG(sn->sn_opencnt == 0, "%u", sn->sn_opencnt); + sn->sn_opencnt = 1; + sd->sd_opencnt = 1; + sd->sd_bdevvp = vp; + break; + default: + panic("invalid specfs vnode type: %d", vp->v_type); + } + mutex_exit(&device_lock); + if (error) + return error; + + /* + * Set VV_ISTTY if this is a tty cdev. + * + * XXX This does the wrong thing if the module has to be + * autoloaded. We should maybe set this after autoloading + * modules and calling .d_open successfully, except (a) we need + * the vnode lock to touch it, and (b) once we acquire the + * vnode lock again, the vnode may have been revoked, and + * deadfs's dead_read needs VV_ISTTY to be already set in order + * to return the right answer. So this needs some additional + * synchronization to be made to work correctly with tty driver + * module autoload. For now, let's just hope it doesn't cause + * too much trouble for a tty from an autoloaded driver module + * to fail with EIO instead of returning EOF. + */ + if (vp->v_type == VCHR) { if (cdev_type(dev) == D_TTY) vp->v_vflag |= VV_ISTTY; - VOP_UNLOCK(vp); + } + + /* + * Because the opening the device may block indefinitely, + * e.g. when opening a tty, and loading a module may cross into + * many other subsystems, we must not hold the vnode lock while + * calling .d_open, so release it now and reacquire it when + * done. + * + * Take an I/O reference so that any concurrent + * spec_node_revoke will wait for us to finish calling .d_open. + * The vnode can't be dead at this point because we have it + * locked. Note that if revoked, the driver will interrupt + * .d_open before spec_node_revoke starts waiting for I/O to + * drain so this doesn't deadlock. + */ + VOP_UNLOCK(vp); + error = spec_io_enter(vp, &sn1, &dev1); + if (error) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + return error; + } + KASSERT(sn1 == sn); + KASSERT(dev1 == dev); + + /* + * Open the device. If .d_open returns ENXIO (device not + * configured), the driver may not be loaded, so try + * autoloading a module and then try .d_open again if anything + * got loaded. + */ + switch (vp->v_type) { + case VCHR: do { const struct cdevsw *cdev; @@ -579,36 +794,9 @@ spec_open(void *v) /* Try to autoload device module */ (void) module_autoload(name, MODULE_CLASS_DRIVER); } while (gen != module_gen); - - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); break; case VBLK: - error = kauth_authorize_device_spec(ap->a_cred, req, vp); - if (error != 0) - return (error); - - /* - * For block devices, permit only one open. The buffer - * cache cannot remain self-consistent with multiple - * vnodes holding a block device open. - * - * Treat zero opencnt with non-NULL mountpoint as open. - * This may happen after forced detach of a mounted device. - */ - mutex_enter(&device_lock); - if (sn->sn_gone) { - mutex_exit(&device_lock); - return (EBADF); - } - if (sd->sd_opencnt != 0 || sd->sd_mountpoint != NULL) { - mutex_exit(&device_lock); - return EBUSY; - } - sn->sn_opencnt = 1; - sd->sd_opencnt = 1; - sd->sd_bdevvp = vp; - mutex_exit(&device_lock); do { const struct bdevsw *bdev; @@ -628,49 +816,118 @@ spec_open(void *v) if ((name = bdevsw_getname(major(dev))) == NULL) break; - VOP_UNLOCK(vp); - /* Try to autoload device module */ (void) module_autoload(name, MODULE_CLASS_DRIVER); - - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } while (gen != module_gen); - break; - case VNON: - case VLNK: - case VDIR: - case VREG: - case VBAD: - case VFIFO: - case VSOCK: default: - return 0; + __unreachable(); } + /* + * Release the I/O reference now that we have called .d_open, + * and reacquire the vnode lock. At this point, the device may + * have been revoked, so we must tread carefully -- can't touch + * sn or sd until we verify the vnode is not dead. + */ + spec_io_exit(vp, sn); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + + /* + * If it has been revoked since we released the vnode lock and + * reacquired it, then spec_node_revoke has closed it, and we + * must fail with EBADF without touching sn or sd which may be + * freed at this point. + * + * Otherwise, if opening it failed, back out and release the + * open reference. If it was ever successfully opened and we + * got the last reference this way, it's now our job to close + * it. This might happen in the following scenario: + * + * Thread 1 Thread 2 + * VOP_OPEN + * ... + * .d_open -> 0 (success) + * acquire vnode lock + * do stuff VOP_OPEN + * release vnode lock ... + * .d_open -> EBUSY + * VOP_CLOSE + * acquire vnode lock + * --sd_opencnt != 0 + * => no .d_close + * release vnode lock + * acquire vnode lock + * --sd_opencnt == 0 + * + * We can't resolve this by making spec_close wait for .d_open + * to complete before examining sd_opencnt, because .d_open can + * hang indefinitely, e.g. for a tty. + */ mutex_enter(&device_lock); - if (sn->sn_gone) { + mutex_enter(vp->v_interlock); + if (vdead_check(vp, VDEAD_NOWAIT) != 0) { if (error == 0) error = EBADF; - } else if (error != 0) { - sd->sd_opencnt--; - sn->sn_opencnt--; - if (vp->v_type == VBLK) - sd->sd_bdevvp = NULL; - + } else { + KASSERT(!sn->sn_gone); + if (error != 0) { + if (--sd->sd_opencnt == 0 && sd->sd_opened) { + needclose = true; + sd->sd_opened = false; + } + sn->sn_opencnt--; + if (vp->v_type == VBLK) + sd->sd_bdevvp = NULL; + } else { + sd->sd_opened = true; + } } + mutex_exit(vp->v_interlock); mutex_exit(&device_lock); - if (cdev_type(dev) != D_DISK || error != 0) + /* + * If this open failed, but the device was previously opened, + * and another thread concurrently closed the vnode while we + * were in the middle of reopening it, the other thread will + * see sd_opencnt > 0 and thus decide not to call .d_close -- + * it is now our responsibility to do so. + * + * XXX The flags passed to VOP_CLOSE here are wrong, but + * drivers can't rely on FREAD|FWRITE anyway -- e.g., consider + * a device opened by thread 0 with O_READ, then opened by + * thread 1 with O_WRITE, then closed by thread 0, and finally + * closed by thread 1; the last .d_close call will have FWRITE + * but not FREAD. We should just eliminate the FREAD/FWRITE + * parameter to .d_close altogether. + */ + if (needclose) { + KASSERT(error); + VOP_CLOSE(vp, FNONBLOCK, NOCRED); + } + + /* If anything went wrong, we're done. */ + if (error) return error; - - ioctl = vp->v_type == VCHR ? cdev_ioctl : bdev_ioctl; - error = (*ioctl)(vp->v_rdev, DIOCGPARTINFO, &pi, FREAD, curlwp); - if (error == 0) - uvm_vnp_setsize(vp, (voff_t)pi.pi_secsize * pi.pi_size); + /* + * For disk devices, automagically set the vnode size to the + * partition size, if we can. This applies to block devices + * and character devices alike -- every block device must have + * a corresponding character device. And if the module is + * loaded it will remain loaded until we're done here (it is + * forbidden to devsw_detach until closed). So it is safe to + * query cdev_type unconditionally here. + */ + if (cdev_type(dev) == D_DISK) { + ioctl = vp->v_type == VCHR ? cdev_ioctl : bdev_ioctl; + if ((*ioctl)(dev, DIOCGPARTINFO, &pi, FREAD, curlwp) == 0) + uvm_vnp_setsize(vp, + (voff_t)pi.pi_secsize * pi.pi_size); + } + /* Success! */ return 0; } @@ -690,6 +947,8 @@ spec_read(void *v) struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct lwp *l = curlwp; + struct specnode *sn; + dev_t dev; struct buf *bp; daddr_t bn; int bsize, bscale; @@ -712,9 +971,27 @@ spec_read(void *v) switch (vp->v_type) { case VCHR: + /* + * Release the lock while we sleep -- possibly + * indefinitely, if this is, e.g., a tty -- in + * cdev_read, so we don't hold up everything else that + * might want access to the vnode. + * + * But before we issue the read, take an I/O reference + * to the specnode so close will know when we're done + * writing. Note that the moment we release the lock, + * the vnode's identity may change; hence spec_io_enter + * may fail, and the caller may have a dead vnode on + * their hands, if the file system on which vp lived + * has been unmounted. + */ VOP_UNLOCK(vp); - error = cdev_read(vp->v_rdev, uio, ap->a_ioflag); - vn_lock(vp, LK_SHARED | LK_RETRY); + error = spec_io_enter(vp, &sn, &dev); + if (error) + goto out; + error = cdev_read(dev, uio, ap->a_ioflag); + spec_io_exit(vp, sn); +out: vn_lock(vp, LK_SHARED | LK_RETRY); return (error); case VBLK: @@ -791,6 +1068,8 @@ spec_write(void *v) struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct lwp *l = curlwp; + struct specnode *sn; + dev_t dev; struct buf *bp; daddr_t bn; int bsize, bscale; @@ -806,9 +1085,27 @@ spec_write(void *v) switch (vp->v_type) { case VCHR: + /* + * Release the lock while we sleep -- possibly + * indefinitely, if this is, e.g., a tty -- in + * cdev_write, so we don't hold up everything else that + * might want access to the vnode. + * + * But before we issue the write, take an I/O reference + * to the specnode so close will know when we're done + * writing. Note that the moment we release the lock, + * the vnode's identity may change; hence spec_io_enter + * may fail, and the caller may have a dead vnode on + * their hands, if the file system on which vp lived + * has been unmounted. + */ VOP_UNLOCK(vp); - error = cdev_write(vp->v_rdev, uio, ap->a_ioflag); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + error = spec_io_enter(vp, &sn, &dev); + if (error) + goto out; + error = cdev_write(dev, uio, ap->a_ioflag); + spec_io_exit(vp, sn); +out: vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); return (error); case VBLK: @@ -866,21 +1163,13 @@ spec_fdiscard(void *v) off_t a_pos; off_t a_len; } */ *ap = v; - struct vnode *vp; + struct vnode *vp = ap->a_vp; dev_t dev; - vp = ap->a_vp; - dev = NODEV; - - mutex_enter(vp->v_interlock); - if (vdead_check(vp, VDEAD_NOWAIT) == 0 && vp->v_specnode != NULL) { - dev = vp->v_rdev; - } - mutex_exit(vp->v_interlock); + KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE || + (vp->v_vflag & VV_LOCKSWORK) == 0); - if (dev == NODEV) { - return ENXIO; - } + dev = vp->v_rdev; switch (vp->v_type) { case VCHR: @@ -909,40 +1198,32 @@ spec_ioctl(void *v) int a_fflag; kauth_cred_t a_cred; } */ *ap = v; - struct vnode *vp; + struct vnode *vp = ap->a_vp; + struct specnode *sn; dev_t dev; + int error; - /* - * Extract all the info we need from the vnode, taking care to - * avoid a race with VOP_REVOKE(). - */ - - vp = ap->a_vp; - dev = NODEV; - mutex_enter(vp->v_interlock); - if (vdead_check(vp, VDEAD_NOWAIT) == 0 && vp->v_specnode) { - dev = vp->v_rdev; - } - mutex_exit(vp->v_interlock); - if (dev == NODEV) { - return ENXIO; - } + error = spec_io_enter(vp, &sn, &dev); + if (error) + return error; switch (vp->v_type) { - case VCHR: - return cdev_ioctl(dev, ap->a_command, ap->a_data, + error = cdev_ioctl(dev, ap->a_command, ap->a_data, ap->a_fflag, curlwp); - + break; case VBLK: KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); - return bdev_ioctl(dev, ap->a_command, ap->a_data, + error = bdev_ioctl(dev, ap->a_command, ap->a_data, ap->a_fflag, curlwp); - + break; default: panic("spec_ioctl"); /* NOTREACHED */ } + + spec_io_exit(vp, sn); + return error; } /* ARGSUSED */ @@ -953,33 +1234,25 @@ spec_poll(void *v) struct vnode *a_vp; int a_events; } */ *ap = v; - struct vnode *vp; + struct vnode *vp = ap->a_vp; + struct specnode *sn; dev_t dev; + int revents; - /* - * Extract all the info we need from the vnode, taking care to - * avoid a race with VOP_REVOKE(). - */ - - vp = ap->a_vp; - dev = NODEV; - mutex_enter(vp->v_interlock); - if (vdead_check(vp, VDEAD_NOWAIT) == 0 && vp->v_specnode) { - dev = vp->v_rdev; - } - mutex_exit(vp->v_interlock); - if (dev == NODEV) { + if (spec_io_enter(vp, &sn, &dev) != 0) return POLLERR; - } switch (vp->v_type) { - case VCHR: - return cdev_poll(dev, ap->a_events, curlwp); - + revents = cdev_poll(dev, ap->a_events, curlwp); + break; default: - return (genfs_poll(v)); + revents = genfs_poll(v); + break; } + + spec_io_exit(vp, sn); + return revents; } /* ARGSUSED */ @@ -990,20 +1263,30 @@ spec_kqfilter(void *v) struct vnode *a_vp; struct proc *a_kn; } */ *ap = v; + struct vnode *vp = ap->a_vp; + struct specnode *sn; dev_t dev; + int error; - switch (ap->a_vp->v_type) { + error = spec_io_enter(vp, &sn, &dev); + if (error) + return error; + switch (vp->v_type) { case VCHR: - dev = ap->a_vp->v_rdev; - return cdev_kqfilter(dev, ap->a_kn); + error = cdev_kqfilter(dev, ap->a_kn); + break; default: /* * Block devices don't support kqfilter, and refuse it * for any other files (like those vflush()ed) too. */ - return (EOPNOTSUPP); + error = EOPNOTSUPP; + break; } + + spec_io_exit(vp, sn); + return error; } /* @@ -1018,11 +1301,19 @@ spec_mmap(void *v) kauth_cred_t a_cred; } */ *ap = v; struct vnode *vp = ap->a_vp; + struct specnode *sn; + dev_t dev; + int error; KASSERT(vp->v_type == VBLK); - if (bdev_type(vp->v_rdev) != D_DISK) - return EINVAL; + error = spec_io_enter(vp, &sn, &dev); + if (error) + return error; + + error = bdev_type(dev) == D_DISK ? 0 : EINVAL; + + spec_io_exit(vp, sn); return 0; } @@ -1067,27 +1358,14 @@ spec_strategy(void *v) } */ *ap = v; struct vnode *vp = ap->a_vp; struct buf *bp = ap->a_bp; + struct specnode *sn = NULL; dev_t dev; int error; - dev = NODEV; - - /* - * Extract all the info we need from the vnode, taking care to - * avoid a race with VOP_REVOKE(). - */ - - mutex_enter(vp->v_interlock); - if (vdead_check(vp, VDEAD_NOWAIT) == 0 && vp->v_specnode != NULL) { - KASSERT(vp == vp->v_specnode->sn_dev->sd_bdevvp); - dev = vp->v_rdev; - } - mutex_exit(vp->v_interlock); - - if (dev == NODEV) { - error = ENXIO; + error = spec_io_enter(vp, &sn, &dev); + if (error) goto out; - } + bp->b_dev = dev; if (!(bp->b_flags & B_READ)) { @@ -1107,13 +1385,15 @@ spec_strategy(void *v) } bdev_strategy(bp); - return 0; - -out: - bp->b_error = error; - bp->b_resid = bp->b_bcount; - biodone(bp); + error = 0; +out: if (sn) + spec_io_exit(vp, sn); + if (error) { + bp->b_error = error; + bp->b_resid = bp->b_bcount; + biodone(bp); + } return error; } @@ -1139,6 +1419,9 @@ spec_reclaim(void *v) } */ *ap = v; struct vnode *vp = ap->a_vp; + KASSERT(vp->v_specnode->sn_opencnt == 0); + KASSERT(vp->v_specnode->sn_iocnt == 0); + VOP_UNLOCK(vp); KASSERT(vp->v_mount == dead_rootmount); @@ -1182,14 +1465,18 @@ spec_close(void *v) } */ *ap = v; struct vnode *vp = ap->a_vp; struct session *sess; - dev_t dev = vp->v_rdev; + dev_t dev, dev1; int flags = ap->a_fflag; int mode, error, count; - specnode_t *sn; + specnode_t *sn, *sn1; specdev_t *sd; + KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE || + (vp->v_vflag & VV_LOCKSWORK) == 0); + mutex_enter(vp->v_interlock); sn = vp->v_specnode; + dev = vp->v_rdev; sd = sn->sn_dev; /* * If we're going away soon, make this non-blocking. @@ -1269,14 +1556,44 @@ spec_close(void *v) panic("spec_close: not special"); } + /* + * Decrement the open reference count of this node and the + * device. For block devices, the open reference count must be + * 1 at this point. If the device's open reference count goes + * to zero, we're the last one out so get the lights. + * + * We may find --sd->sd_opencnt gives zero, and yet + * sd->sd_opened is false. This happens if the vnode is + * revoked at the same time as it is being opened, which can + * happen when opening a tty blocks indefinitely. In that + * case, we still must call close -- it is the job of close to + * interrupt the open. Either way, the device will be no + * longer opened, so we have to clear sd->sd_opened; subsequent + * opens will have responsibility for issuing close. + * + * This has the side effect that the sequence of opens might + * happen out of order -- we might end up doing open, open, + * close, close, instead of open, close, open, close. This is + * unavoidable with the current devsw API, where open is + * allowed to block and close must be able to run concurrently + * to interrupt it. It is the driver's responsibility to + * ensure that close is idempotent so that this works. Drivers + * requiring per-open state and exact 1:1 correspondence + * between open and close can use fd_clone. + */ mutex_enter(&device_lock); sn->sn_opencnt--; count = --sd->sd_opencnt; - if (vp->v_type == VBLK) + if (vp->v_type == VBLK) { + KASSERTMSG(count == 0, "block device with %u opens", + count + 1); sd->sd_bdevvp = NULL; + } + if (count == 0) + sd->sd_opened = false; mutex_exit(&device_lock); - if (count != 0 && (vp->v_type != VCHR || !(cdev_flags(dev) & D_MCLOSE))) + if (count != 0) return 0; /* @@ -1284,16 +1601,28 @@ spec_close(void *v) * might end up sleeping for someone else who wants our queues. They * won't get them if we hold the vnode locked. */ - if (!(flags & FNONBLOCK)) + if (!(flags & FNONBLOCK)) { + /* + * Take an I/O reference while we hold the vnode lock. + * This ensures that spec_node_revoke waits for the + * last close. + */ + error = spec_io_enter(vp, &sn1, &dev1); + KASSERTMSG(error == 0, "error=%d", error); + KASSERT(sn1 == sn); + KASSERT(dev1 == dev); VOP_UNLOCK(vp); + } if (vp->v_type == VBLK) error = bdev_close(dev, flags, mode, curlwp); else error = cdev_close(dev, flags, mode, curlwp); - if (!(flags & FNONBLOCK)) + if (!(flags & FNONBLOCK)) { + spec_io_exit(vp, sn1); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + } return (error); } diff --git a/sys/miscfs/specfs/specdev.h b/sys/miscfs/specfs/specdev.h index 621c103e80b1..1abb52964b50 100644 --- a/sys/miscfs/specfs/specdev.h +++ b/sys/miscfs/specfs/specdev.h @@ -69,6 +69,7 @@ typedef struct specnode { u_int sn_opencnt; dev_t sn_rdev; bool sn_gone; + volatile u_int sn_iocnt; } specnode_t; typedef struct specdev { @@ -78,6 +79,7 @@ typedef struct specdev { u_int sd_opencnt; u_int sd_refcnt; dev_t sd_rdev; + bool sd_opened; } specdev_t; /* diff --git a/sys/modules/examples/pollpal/pollpal.c b/sys/modules/examples/pollpal/pollpal.c index b76e0733699b..d8ddc73450e0 100644 --- a/sys/modules/examples/pollpal/pollpal.c +++ b/sys/modules/examples/pollpal/pollpal.c @@ -311,7 +311,8 @@ pollpal_modcmd(modcmd_t cmd, void *arg __unused) case MODULE_CMD_FINI: if (pollpal_nopen != 0) return EBUSY; - return devsw_detach(NULL, &pollpal_cdevsw); + devsw_detach(NULL, &pollpal_cdevsw); + return 0; default: return ENOTTY; } diff --git a/sys/net/if_tap.c b/sys/net/if_tap.c index 0b57ad4a711c..314f4647707c 100644 --- a/sys/net/if_tap.c +++ b/sys/net/if_tap.c @@ -256,9 +256,7 @@ tapdetach(void) if_clone_detach(&tap_cloners); #ifdef _MODULE - error = devsw_detach(NULL, &tap_cdevsw); - if (error != 0) - goto out2; + devsw_detach(NULL, &tap_cdevsw); #endif if (tap_count != 0) { @@ -277,7 +275,6 @@ tapdetach(void) out1: #ifdef _MODULE devsw_attach("tap", NULL, &tap_bmajor, &tap_cdevsw, &tap_cmajor); - out2: #endif if_clone_attach(&tap_cloners); diff --git a/sys/net/if_tun.c b/sys/net/if_tun.c index 4f533a8f08d1..f4e5b6d86d43 100644 --- a/sys/net/if_tun.c +++ b/sys/net/if_tun.c @@ -142,17 +142,10 @@ tuninit(void) static int tundetach(void) { -#ifdef _MODULE - int error; -#endif if_clone_detach(&tun_cloner); #ifdef _MODULE - error = devsw_detach(NULL, &tun_cdevsw); - if (error != 0) { - if_clone_attach(&tun_cloner); - return error; - } + devsw_detach(NULL, &tun_cdevsw); #endif if (!LIST_EMPTY(&tun_softc_list) || !LIST_EMPTY(&tunz_softc_list)) { diff --git a/sys/rump/dev/lib/libbpf/bpf_component.c b/sys/rump/dev/lib/libbpf/bpf_component.c index 05807d371d40..d41d1987afe8 100644 --- a/sys/rump/dev/lib/libbpf/bpf_component.c +++ b/sys/rump/dev/lib/libbpf/bpf_component.c @@ -50,6 +50,5 @@ RUMP_COMPONENT(RUMP_COMPONENT_NET) panic("bpf devsw attach failed: %d", error); if ((error = rump_vfs_makeonedevnode(S_IFCHR, "/dev/bpf", cmaj, 0)) !=0) panic("cannot create bpf device nodes: %d", error); - if ((error = devsw_detach(NULL, &bpf_cdevsw)) != 0) - panic("cannot detach bpf devsw: %d", error); + devsw_detach(NULL, &bpf_cdevsw); } diff --git a/sys/rump/dev/lib/libdrvctl/drvctl_component.c b/sys/rump/dev/lib/libdrvctl/drvctl_component.c index e2e79f45f9de..ac4e103fdb9c 100644 --- a/sys/rump/dev/lib/libdrvctl/drvctl_component.c +++ b/sys/rump/dev/lib/libdrvctl/drvctl_component.c @@ -51,7 +51,5 @@ RUMP_COMPONENT(RUMP_COMPONENT_DEV) if ( error !=0) panic("cannot create drvctl device node: %d", error); - error = devsw_detach(NULL, &drvctl_cdevsw); - if (error != 0) - panic("cannot detach drvctl devsw: %d", error); + devsw_detach(NULL, &drvctl_cdevsw); } diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 081631d2111f..16dd87e5480c 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -63,7 +63,7 @@ struct vnode; #define D_TYPEMASK 0x00ff #define D_MPSAFE 0x0100 #define D_NEGOFFSAFE 0x0200 -#define D_MCLOSE 0x0400 +#define D_UNUSED0 0x0400 /* was D_MCLOSE */ /* * Block device switch table @@ -76,6 +76,8 @@ struct bdevsw { int (*d_dump)(dev_t, daddr_t, void *, size_t); int (*d_psize)(dev_t); int (*d_discard)(dev_t, off_t, off_t); + int (*d_devtounit)(dev_t); + struct cfdriver *d_cfdriver; int d_flag; }; @@ -94,6 +96,8 @@ struct cdevsw { paddr_t (*d_mmap)(dev_t, off_t, int); int (*d_kqfilter)(dev_t, struct knote *); int (*d_discard)(dev_t, off_t, off_t); + int (*d_devtounit)(dev_t); + struct cfdriver *d_cfdriver; int d_flag; }; @@ -104,7 +108,7 @@ extern kmutex_t device_lock; int devsw_attach(const char *, const struct bdevsw *, devmajor_t *, const struct cdevsw *, devmajor_t *); -int devsw_detach(const struct bdevsw *, const struct cdevsw *); +void devsw_detach(const struct bdevsw *, const struct cdevsw *); const struct bdevsw *bdevsw_lookup(dev_t); const struct cdevsw *cdevsw_lookup(dev_t); devmajor_t bdevsw_lookup_major(const struct bdevsw *); @@ -276,6 +280,7 @@ devmajor_t devsw_name2blk(const char *, char *, size_t); devmajor_t devsw_name2chr(const char *, char *, size_t); dev_t devsw_chr2blk(dev_t); dev_t devsw_blk2chr(dev_t); +int dev_minor_unit(dev_t); void mm_init(void); #endif /* _KERNEL */ diff --git a/sys/sys/device.h b/sys/sys/device.h index 3bd4a6c3abf7..e685419d4925 100644 --- a/sys/sys/device.h +++ b/sys/sys/device.h @@ -274,10 +274,12 @@ struct device { void *dv_private; /* this device's private storage */ int *dv_locators; /* our actual locators (optional) */ prop_dictionary_t dv_properties;/* properties dictionary */ + struct localcount *dv_localcount;/* reference count */ int dv_pending; /* config_pending count */ TAILQ_ENTRY(device) dv_pending_list; + struct lwp *dv_attaching; /* thread not yet finished in attach */ struct lwp *dv_detaching; /* detach lock (config_misc_lock/cv) */ size_t dv_activity_count; @@ -651,6 +653,10 @@ void null_childdetached(device_t, device_t); device_t device_lookup(cfdriver_t, int); void *device_lookup_private(cfdriver_t, int); + +device_t device_lookup_acquire(cfdriver_t, int); +void device_release(device_t); + void device_register(device_t, void *); void device_register_post_config(device_t, void *); diff --git a/sys/sys/disklabel.h b/sys/sys/disklabel.h index 4e94b8671332..853cdbe668a3 100644 --- a/sys/sys/disklabel.h +++ b/sys/sys/disklabel.h @@ -509,6 +509,7 @@ const char *convertdisklabel(struct disklabel *, void (*)(struct buf *), int bounds_check_with_label(struct disk *, struct buf *, int); int bounds_check_with_mediasize(struct buf *, int, uint64_t); const char *getfstypename(int); +int disklabel_dev_unit(dev_t); #endif #endif /* _LOCORE */