diff --git a/lib/libp2k/p2k.c b/lib/libp2k/p2k.c index 45ae7f78ab59..9942ea8b8368 100644 --- a/lib/libp2k/p2k.c +++ b/lib/libp2k/p2k.c @@ -789,7 +789,7 @@ do_makenode(struct puffs_usermount *pu, struct p2k_node *p2n_dir, struct p2k_node *p2n; struct componentname *cn; struct vattr *va_x; - struct vnode *vp; + struct vnode *vp = NULL; int rv; p2n = malloc(sizeof(*p2n)); diff --git a/sbin/fsck_lfs/pass1.c b/sbin/fsck_lfs/pass1.c index d816f8e5094c..48919f55989e 100644 --- a/sbin/fsck_lfs/pass1.c +++ b/sbin/fsck_lfs/pass1.c @@ -307,7 +307,7 @@ checkinode(ino_t inumber, struct inodesc * idesc) */ if (lfs_dino_getnlink(fs, dp) <= 0) { LFS_IENTRY(ifp, fs, inumber, bp); - if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE) { + if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) { statemap[inumber] = (mode == LFS_IFDIR ? DCLEAR : FCLEAR); /* Add this to our list of orphans */ zlnp = emalloc(sizeof *zlnp); diff --git a/sys/arch/i386/stand/efiboot/bootx64/Makefile b/sys/arch/i386/stand/efiboot/bootx64/Makefile index 828dd693aa3a..03006b4c3be5 100644 --- a/sys/arch/i386/stand/efiboot/bootx64/Makefile +++ b/sys/arch/i386/stand/efiboot/bootx64/Makefile @@ -9,4 +9,9 @@ EXTRA_SOURCES= efibootx64.c startprog64.S multiboot64.S COPTS+= -mno-red-zone CPPFLAGS+= -DEFI_FUNCTION_WRAPPER +# Follow the suit of Makefile.kern.inc; needed for the lfs64 union +# accessors -- they don't actually dereference the resulting pointer, +# just use it for type-checking. +CWARNFLAGS.clang+= -Wno-error=address-of-packed-member + .include "${.CURDIR}/../Makefile.efiboot" diff --git a/sys/dev/ccd.c b/sys/dev/ccd.c index c0c73bae696f..a72334bb8d7e 100644 --- a/sys/dev/ccd.c +++ b/sys/dev/ccd.c @@ -1,4 +1,4 @@ -/* $NetBSD: ccd.c,v 1.179 2019/03/27 19:13:34 martin Exp $ */ +/* $NetBSD: ccd.c,v 1.180 2019/08/07 00:38:01 pgoyette Exp $ */ /*- * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. @@ -88,7 +88,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.179 2019/03/27 19:13:34 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.180 2019/08/07 00:38:01 pgoyette Exp $"); #include #include @@ -216,10 +216,6 @@ static void printiinfo(struct ccdiinfo *); static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); static kmutex_t ccd_lock; -#ifdef _MODULE -static struct sysctllog *ccd_clog; -#endif - SYSCTL_SETUP_PROTO(sysctl_kern_ccd_setup); static struct ccd_softc * @@ -1681,7 +1677,6 @@ ccd_modcmd(modcmd_t cmd, void *arg) error = devsw_attach("ccd", &ccd_bdevsw, &bmajor, &ccd_cdevsw, &cmajor); - sysctl_kern_ccd_setup(&ccd_clog); #endif break; @@ -1696,7 +1691,6 @@ ccd_modcmd(modcmd_t cmd, void *arg) error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); ccddetach(); } - sysctl_teardown(&ccd_clog); #endif break; diff --git a/sys/dev/iscsi/iscsi_main.c b/sys/dev/iscsi/iscsi_main.c index 45189b96fa7d..c10a7e49d03c 100644 --- a/sys/dev/iscsi/iscsi_main.c +++ b/sys/dev/iscsi/iscsi_main.c @@ -1,4 +1,4 @@ -/* $NetBSD: iscsi_main.c,v 1.30 2019/07/13 17:06:00 mlelstv Exp $ */ +/* $NetBSD: iscsi_main.c,v 1.31 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 2004,2005,2006,2011 The NetBSD Foundation, Inc. @@ -677,7 +677,6 @@ iscsi_modcmd(modcmd_t cmd, void *arg) #ifdef _MODULE devmajor_t cmajor = NODEVMAJOR, bmajor = NODEVMAJOR; int error; - static struct sysctllog *clog; #endif switch (cmd) { @@ -723,8 +722,6 @@ iscsi_modcmd(modcmd_t cmd, void *arg) config_cfdriver_detach(&iscsi_cd); return ENXIO; } - - sysctl_iscsi_setup(&clog); #endif return 0; break; @@ -735,8 +732,6 @@ iscsi_modcmd(modcmd_t cmd, void *arg) if (error) return error; - sysctl_teardown(&clog); - config_cfattach_detach(iscsi_cd.cd_name, &iscsi_ca); config_cfdriver_detach(&iscsi_cd); devsw_detach(NULL, &iscsi_cdevsw); diff --git a/sys/dev/usb/usbnet.c b/sys/dev/usb/usbnet.c index 28afa57c717f..e47184c99e6d 100644 --- a/sys/dev/usb/usbnet.c +++ b/sys/dev/usb/usbnet.c @@ -1,4 +1,4 @@ -/* $NetBSD: usbnet.c,v 1.25.2.4 2019/12/17 12:55:10 martin Exp $ */ +/* $NetBSD: usbnet.c,v 1.7 2019/08/07 00:38:02 pgoyette Exp $ */ /* * Copyright (c) 2019 Matthew R. Green @@ -33,7 +33,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: usbnet.c,v 1.25.2.4 2019/12/17 12:55:10 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: usbnet.c,v 1.7 2019/08/07 00:38:02 pgoyette Exp $"); #include #include diff --git a/sys/kern/kern_module.c b/sys/kern/kern_module.c index ad23ede9f987..0d7a644c5cc9 100644 --- a/sys/kern/kern_module.c +++ b/sys/kern/kern_module.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_module.c,v 1.136 2019/06/19 15:01:01 pgoyette Exp $ */ +/* $NetBSD: kern_module.c,v 1.138 2019/08/08 18:08:41 pgoyette Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -34,7 +34,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_module.c,v 1.136 2019/06/19 15:01:01 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_module.c,v 1.138 2019/08/08 18:08:41 pgoyette Exp $"); #define _MODULE_INTERNAL @@ -54,6 +54,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_module.c,v 1.136 2019/06/19 15:01:01 pgoyette E #include #include #include +#include #include @@ -946,6 +947,108 @@ module_do_builtin(const module_t *pmod, const char *name, module_t **modp, return error; } +/* + * module_load_sysctl + * + * Check to see if a non-builtin module has any SYSCTL_SETUP() routine(s) + * registered. If so, call it (them). + */ + +static void +module_load_sysctl(module_t *mod) +{ + void (**ls_funcp)(struct sysctllog **); + void *ls_start; + size_t ls_size, count; + int error; + + /* + * Built-in modules don't have a mod_kobj so we cannot search + * for their link_set_sysctl_funcs + */ + if (mod->mod_source == MODULE_SOURCE_KERNEL) + return; + + error = kobj_find_section(mod->mod_kobj, "link_set_sysctl_funcs", + &ls_start, &ls_size); + if (error == 0) { + count = ls_size / sizeof(ls_start); + ls_funcp = ls_start; + while (count--) { + (**ls_funcp)(&mod->mod_sysctllog); + ls_funcp++; + } + } +} + +/* + * module_load_evcnt + * + * Check to see if a non-builtin module has any static evcnt's defined; + * if so, attach them. + */ + +static void +module_load_evcnt(module_t *mod) +{ + struct evcnt * const *ls_evp; + void *ls_start; + size_t ls_size, count; + int error; + + /* + * Built-in modules' static evcnt stuff will be handled + * automatically as part of general kernel initialization + */ + if (mod->mod_source == MODULE_SOURCE_KERNEL) + return; + + error = kobj_find_section(mod->mod_kobj, "link_set_evcnts", + &ls_start, &ls_size); + if (error == 0) { + count = ls_size / sizeof(*ls_evp); + ls_evp = ls_start; + while (count--) { + evcnt_attach_static(*ls_evp); + ls_evp++; + } + } +} + +/* + * module_unload_evcnt + * + * Check to see if a non-builtin module has any static evcnt's defined; + * if so, detach them. + */ + +static void +module_unload_evcnt(module_t *mod) +{ + struct evcnt * const *ls_evp; + void *ls_start; + size_t ls_size, count; + int error; + + /* + * Built-in modules' static evcnt stuff will be handled + * automatically as part of general kernel initialization + */ + if (mod->mod_source == MODULE_SOURCE_KERNEL) + return; + + error = kobj_find_section(mod->mod_kobj, "link_set_evcnts", + &ls_start, &ls_size); + if (error == 0) { + count = ls_size / sizeof(*ls_evp); + ls_evp = ls_start; + while (count--) { + evcnt_detach(*ls_evp); + ls_evp++; + } + } +} + /* * module_do_load: * @@ -1265,6 +1368,9 @@ module_do_load(const char *name, bool isdep, int flags, goto fail1; } + module_load_sysctl(mod); /* Set-up module's sysctl if any */ + module_load_evcnt(mod); /* Attach any static evcnt needed */ + /* * Good, the module loaded successfully. Put it onto the * list and add references to its requisite modules. @@ -1344,9 +1450,20 @@ module_do_unload(const char *name, bool load_requires_force) prev_active = module_active; module_active = mod; module_callback_unload(mod); + + /* + * If there were any registered SYSCTL_SETUP funcs, make sure + * we release the sysctl entries + */ + if (mod->mod_sysctllog) { + sysctl_teardown(&mod->mod_sysctllog); + } + module_unload_evcnt(mod); error = (*mod->mod_info->mi_modcmd)(MODULE_CMD_FINI, NULL); module_active = prev_active; if (error != 0) { + module_load_sysctl(mod); /* re-enable sysctl stuff */ + module_load_evcnt(mod); /* and reenable evcnts */ module_print("cannot unload module `%s' error=%d", name, error); return error; diff --git a/sys/kern/sysv_ipc.c b/sys/kern/sysv_ipc.c index 9198464cc3d0..37b4b6358642 100644 --- a/sys/kern/sysv_ipc.c +++ b/sys/kern/sysv_ipc.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysv_ipc.c,v 1.39 2019/04/10 10:03:50 pgoyette Exp $ */ +/* $NetBSD: sysv_ipc.c,v 1.40 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 1998, 2007 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sysv_ipc.c,v 1.39 2019/04/10 10:03:50 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sysv_ipc.c,v 1.40 2019/08/07 00:38:02 pgoyette Exp $"); #ifdef _KERNEL_OPT #include "opt_sysv.h" @@ -132,8 +132,6 @@ MODULE(MODULE_CLASS_EXEC, sysv_ipc, NULL); SYSCTL_SETUP_PROTO(sysctl_ipc_setup); -static struct sysctllog *sysctl_sysvipc_clog = NULL; - static const struct syscall_package sysvipc_syscalls[] = { #if defined(SYSVSHM) { SYS___shmctl50, 0, (sy_call_t *)sys___shmctl50 }, @@ -180,12 +178,12 @@ sysv_ipc_modcmd(modcmd_t cmd, void *arg) * sysctl data */ #ifdef SYSVSHM - error = shminit(&sysctl_sysvipc_clog); + error = shminit(); if (error != 0) return error; #endif #ifdef SYSVSEM - error = seminit(&sysctl_sysvipc_clog); + error = seminit(); if (error != 0) { #ifdef SYSVSHM shmfini(); @@ -194,7 +192,7 @@ sysv_ipc_modcmd(modcmd_t cmd, void *arg) } #endif #ifdef SYSVMSG - error = msginit(&sysctl_sysvipc_clog); + error = msginit(); if (error != 0) { #ifdef SYSVSEM semfini(); @@ -205,11 +203,6 @@ sysv_ipc_modcmd(modcmd_t cmd, void *arg) return error; } #endif - -#ifdef _MODULE - /* Set up the common sysctl tree */ - sysctl_ipc_setup(&sysctl_sysvipc_clog); -#endif break; case MODULE_CMD_FINI: /* @@ -228,7 +221,7 @@ sysv_ipc_modcmd(modcmd_t cmd, void *arg) #ifdef SYSVSEM if (semfini()) { #ifdef SYSVSHM - shminit(NULL); + shminit(); #endif return EBUSY; } @@ -236,20 +229,14 @@ sysv_ipc_modcmd(modcmd_t cmd, void *arg) #ifdef SYSVMSG if (msgfini()) { #ifdef SYSVSEM - seminit(NULL); + seminit(); #endif #ifdef SYSVSHM - shminit(NULL); + shminit(); #endif return EBUSY; } #endif - -#ifdef _MODULE - /* Remove the sysctl sub-trees */ - sysctl_teardown(&sysctl_sysvipc_clog); -#endif - /* Unlink the system calls. */ error = syscall_disestablish(NULL, sysvipc_syscalls); if (error) diff --git a/sys/kern/sysv_msg.c b/sys/kern/sysv_msg.c index 691b1dfc95fd..89e231540131 100644 --- a/sys/kern/sysv_msg.c +++ b/sys/kern/sysv_msg.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysv_msg.c,v 1.74.4.1 2019/10/15 19:05:38 martin Exp $ */ +/* $NetBSD: sysv_msg.c,v 1.75 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 1999, 2006, 2007 The NetBSD Foundation, Inc. @@ -50,7 +50,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sysv_msg.c,v 1.74.4.1 2019/10/15 19:05:38 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sysv_msg.c,v 1.75 2019/08/07 00:38:02 pgoyette Exp $"); #ifdef _KERNEL_OPT #include "opt_sysv.h" @@ -94,7 +94,7 @@ extern int kern_has_sysvmsg; SYSCTL_SETUP_PROTO(sysctl_ipc_msg_setup); int -msginit(struct sysctllog **clog) +msginit(void) { int i, sz; vaddr_t v; @@ -167,10 +167,6 @@ msginit(struct sysctllog **clog) kern_has_sysvmsg = 1; -#ifdef _MODULE - if (clog) - sysctl_ipc_msg_setup(clog); -#endif return 0; } diff --git a/sys/kern/sysv_sem.c b/sys/kern/sysv_sem.c index 269da37012c0..24b44301158d 100644 --- a/sys/kern/sysv_sem.c +++ b/sys/kern/sysv_sem.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysv_sem.c,v 1.97 2019/04/10 10:03:50 pgoyette Exp $ */ +/* $NetBSD: sysv_sem.c,v 1.98 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. @@ -39,7 +39,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.97 2019/04/10 10:03:50 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sysv_sem.c,v 1.98 2019/08/07 00:38:02 pgoyette Exp $"); #ifdef _KERNEL_OPT #include "opt_sysv.h" @@ -102,7 +102,7 @@ static ONCE_DECL(exithook_control); static int seminit_exithook(void); int -seminit(struct sysctllog **clog) +seminit(void) { int i, sz; vaddr_t v; @@ -145,10 +145,6 @@ seminit(struct sysctllog **clog) kern_has_sysvsem = 1; -#ifdef _MODULE - if (clog) - sysctl_ipc_sem_setup(clog); -#endif return 0; } diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index 73bf0df57254..b62db267a3c1 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -1,4 +1,4 @@ -/* $NetBSD: sysv_shm.c,v 1.135.2.4 2019/10/10 17:23:45 martin Exp $ */ +/* $NetBSD: sysv_shm.c,v 1.137 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. @@ -61,7 +61,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.135.2.4 2019/10/10 17:23:45 martin Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sysv_shm.c,v 1.137 2019/08/07 00:38:02 pgoyette Exp $"); #ifdef _KERNEL_OPT #include "opt_sysv.h" @@ -938,7 +938,7 @@ shmrealloc(int newshmni) } int -shminit(struct sysctllog **clog) +shminit(void) { vaddr_t v; size_t sz; @@ -983,10 +983,6 @@ shminit(struct sysctllog **clog) uvm_shmexit = shmexit; uvm_shmfork = shmfork; -#ifdef _MODULE - if (clog) - sysctl_ipc_shm_setup(clog); -#endif return 0; } diff --git a/sys/kern/vfs_vnode.c b/sys/kern/vfs_vnode.c index 35be3e0f5818..6ebf57081d83 100644 --- a/sys/kern/vfs_vnode.c +++ b/sys/kern/vfs_vnode.c @@ -764,7 +764,6 @@ vrelel(vnode_t *vp, int flags) if (VSTATE_GET(vp) == VS_RECLAIMED) { VOP_UNLOCK(vp); } else { - VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); mutex_exit(vp->v_interlock); /* @@ -780,7 +779,6 @@ vrelel(vnode_t *vp, int flags) if (!recycle) VOP_UNLOCK(vp); mutex_enter(vp->v_interlock); - VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); if (!recycle) { if (vtryrele(vp)) { mutex_exit(vp->v_interlock); diff --git a/sys/miscfs/genfs/layer_vfsops.c b/sys/miscfs/genfs/layer_vfsops.c index bcf10c9e05a7..113d5390a65b 100644 --- a/sys/miscfs/genfs/layer_vfsops.c +++ b/sys/miscfs/genfs/layer_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: layer_vfsops.c,v 1.51 2017/06/04 08:02:26 hannken Exp $ */ +/* $NetBSD: layer_vfsops.c,v 1.52 2019/08/07 00:38:02 pgoyette Exp $ */ /* * Copyright (c) 1999 National Aeronautics & Space Administration @@ -74,7 +74,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: layer_vfsops.c,v 1.51 2017/06/04 08:02:26 hannken Exp $"); +__KERNEL_RCSID(0, "$NetBSD: layer_vfsops.c,v 1.52 2019/08/07 00:38:02 pgoyette Exp $"); #include #include @@ -97,20 +97,11 @@ MODULE(MODULE_CLASS_MISC, layerfs, NULL); static int layerfs_modcmd(modcmd_t cmd, void *arg) { -#ifdef _MODULE - static struct sysctllog *layerfs_clog = NULL; -#endif switch (cmd) { case MODULE_CMD_INIT: -#ifdef _MODULE - sysctl_vfs_layerfs_setup(&layerfs_clog); -#endif return 0; case MODULE_CMD_FINI: -#ifdef _MODULE - sysctl_teardown(&layerfs_clog); -#endif return 0; default: return ENOTTY; diff --git a/sys/rump/fs/lib/liblfs/Makefile b/sys/rump/fs/lib/liblfs/Makefile index 2750eec8e0ee..172d396de5ed 100644 --- a/sys/rump/fs/lib/liblfs/Makefile +++ b/sys/rump/fs/lib/liblfs/Makefile @@ -21,5 +21,10 @@ CFLAGS+= -DLFS_KERNEL_RFW COPTS.lfs_inode.c+=-O0 .endif +# Follow the suit of Makefile.kern.inc; needed for the lfs64 union +# accessors -- they don't actually dereference the resulting pointer, +# just use it for type-checking. +CWARNFLAGS.clang+= -Wno-error=address-of-packed-member + .include .include diff --git a/sys/sys/module.h b/sys/sys/module.h index b60d046137b9..19417ec30c17 100644 --- a/sys/sys/module.h +++ b/sys/sys/module.h @@ -1,4 +1,4 @@ -/* $NetBSD: module.h,v 1.46 2019/04/08 11:32:49 pgoyette Exp $ */ +/* $NetBSD: module.h,v 1.47 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -83,6 +83,9 @@ typedef struct modinfo { } const modinfo_t; /* Per module information, maintained by kern_module.c */ + +struct sysctllog; + typedef struct module { u_int mod_refcnt; int mod_flags; @@ -97,6 +100,7 @@ typedef struct module { modsrc_t mod_source; time_t mod_autotime; specificdata_reference mod_sdref; + struct sysctllog *mod_sysctllog; } module_t; /* diff --git a/sys/sys/msg.h b/sys/sys/msg.h index f661a5c065e9..436f4a1c69d4 100644 --- a/sys/sys/msg.h +++ b/sys/sys/msg.h @@ -1,4 +1,4 @@ -/* $NetBSD: msg.h,v 1.27 2019/04/10 10:03:50 pgoyette Exp $ */ +/* $NetBSD: msg.h,v 1.28 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 1999, 2007 The NetBSD Foundation, Inc. @@ -209,7 +209,7 @@ __END_DECLS struct proc; -int msginit(struct sysctllog **); +int msginit(void); int msgfini(void); int msgctl1(struct lwp *, int, int, struct msqid_ds *); int msgsnd1(struct lwp *, int, const char *, size_t, int, size_t, diff --git a/sys/sys/sem.h b/sys/sys/sem.h index a4ce2c2093ff..ece89e3b22e6 100644 --- a/sys/sys/sem.h +++ b/sys/sys/sem.h @@ -1,4 +1,4 @@ -/* $NetBSD: sem.h,v 1.33 2019/04/10 10:03:50 pgoyette Exp $ */ +/* $NetBSD: sem.h,v 1.34 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. @@ -222,7 +222,7 @@ int semconfig(int); #endif __END_DECLS #else -int seminit(struct sysctllog **); +int seminit(void); int semfini(void); void semexit(struct proc *, void *); diff --git a/sys/sys/shm.h b/sys/sys/shm.h index c6c429390f88..ff2c60a1abf3 100644 --- a/sys/sys/shm.h +++ b/sys/sys/shm.h @@ -1,4 +1,4 @@ -/* $NetBSD: shm.h,v 1.52.4.1 2019/09/13 06:25:26 martin Exp $ */ +/* $NetBSD: shm.h,v 1.53 2019/08/07 00:38:02 pgoyette Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. @@ -172,7 +172,7 @@ extern int shm_nused; struct vmspace; -int shminit(struct sysctllog **); +int shminit(void); int shmfini(void); void shmfork(struct vmspace *, struct vmspace *); void shmexit(struct vmspace *); diff --git a/sys/ufs/lfs/lfs.h b/sys/ufs/lfs/lfs.h index 56e04d738206..b5819761708a 100644 --- a/sys/ufs/lfs/lfs.h +++ b/sys/ufs/lfs/lfs.h @@ -355,19 +355,22 @@ struct lfs_dirheader32 { uint8_t dh_type; /* file type, see below */ uint8_t dh_namlen; /* length of string in d_name */ }; +__CTASSERT(sizeof(struct lfs_dirheader32) == 8); struct lfs_dirheader64 { - uint32_t dh_inoA; /* inode number of entry */ - uint32_t dh_inoB; /* inode number of entry */ + uint64_t dh_ino; /* inode number of entry */ uint16_t dh_reclen; /* length of this record */ uint8_t dh_type; /* file type, see below */ uint8_t dh_namlen; /* length of string in d_name */ -}; +} __aligned(4) __packed; +__CTASSERT(sizeof(struct lfs_dirheader64) == 12); union lfs_dirheader { struct lfs_dirheader64 u_64; struct lfs_dirheader32 u_32; }; +__CTASSERT(__alignof(union lfs_dirheader) == __alignof(struct lfs_dirheader64)); +__CTASSERT(__alignof(union lfs_dirheader) == __alignof(struct lfs_dirheader32)); typedef union lfs_dirheader LFS_DIRHEADER; @@ -381,6 +384,7 @@ struct lfs_dirtemplate32 { struct lfs_dirheader32 dotdot_header; char dotdot_name[4]; /* ditto */ }; +__CTASSERT(sizeof(struct lfs_dirtemplate32) == 2*(8 + 4)); struct lfs_dirtemplate64 { struct lfs_dirheader64 dot_header; @@ -388,6 +392,7 @@ struct lfs_dirtemplate64 { struct lfs_dirheader64 dotdot_header; char dotdot_name[4]; /* ditto */ }; +__CTASSERT(sizeof(struct lfs_dirtemplate64) == 2*(12 + 4)); union lfs_dirtemplate { struct lfs_dirtemplate64 u_64; @@ -408,6 +413,7 @@ struct lfs_odirtemplate { uint16_t dotdot_namlen; char dotdot_name[4]; /* ditto */ }; +__CTASSERT(sizeof(struct lfs_odirtemplate) == 2*(8 + 4)); #endif /* @@ -441,6 +447,7 @@ struct lfs32_dinode { uint32_t di_gid; /* 116: File group. */ uint64_t di_modrev; /* 120: i_modrev for NFSv4 */ }; +__CTASSERT(sizeof(struct lfs32_dinode) == 128); struct lfs64_dinode { uint16_t di_mode; /* 0: IFMT, permissions; see below. */ @@ -469,11 +476,14 @@ struct lfs64_dinode { uint64_t di_inumber; /* 240: Inode number */ uint64_t di_spare[1]; /* 248: Reserved; currently unused */ }; +__CTASSERT(sizeof(struct lfs64_dinode) == 256); union lfs_dinode { struct lfs64_dinode u_64; struct lfs32_dinode u_32; }; +__CTASSERT(__alignof(union lfs_dinode) == __alignof(struct lfs64_dinode)); +__CTASSERT(__alignof(union lfs_dinode) == __alignof(struct lfs32_dinode)); /* * The di_db fields may be overlaid with other information for @@ -529,6 +539,7 @@ struct segusage { uint32_t su_flags; /* 12: segment flags */ uint64_t su_lastmod; /* 16: last modified timestamp */ }; +__CTASSERT(sizeof(struct segusage) == 24); typedef struct segusage_v1 SEGUSE_V1; struct segusage_v1 { @@ -538,6 +549,7 @@ struct segusage_v1 { uint16_t su_ninos; /* 10: number of inode blocks in seg */ uint32_t su_flags; /* 12: segment flags */ }; +__CTASSERT(sizeof(struct segusage_v1) == 16); /* * On-disk file information. One per file with data blocks in the segment. @@ -554,7 +566,8 @@ struct finfo64 { uint64_t fi_ino; /* inode number */ uint32_t fi_lastlength; /* length of last block in array */ uint32_t fi_pad; /* unused */ -}; +} __aligned(4) __packed; +__CTASSERT(sizeof(struct finfo64) == 24); typedef struct finfo32 FINFO32; struct finfo32 { @@ -563,11 +576,14 @@ struct finfo32 { uint32_t fi_ino; /* inode number */ uint32_t fi_lastlength; /* length of last block in array */ }; +__CTASSERT(sizeof(struct finfo32) == 16); typedef union finfo { struct finfo64 u_64; struct finfo32 u_32; } FINFO; +__CTASSERT(__alignof(union finfo) == __alignof(struct finfo64)); +__CTASSERT(__alignof(union finfo) == __alignof(struct finfo32)); /* * inode info (part of the segment summary) @@ -579,16 +595,20 @@ typedef union finfo { typedef struct iinfo64 { uint64_t ii_block; /* block number */ -} IINFO64; +} __aligned(4) __packed IINFO64; +__CTASSERT(sizeof(struct iinfo64) == 8); typedef struct iinfo32 { uint32_t ii_block; /* block number */ } IINFO32; +__CTASSERT(sizeof(struct iinfo32) == 4); typedef union iinfo { struct iinfo64 u_64; struct iinfo32 u_32; } IINFO; +__CTASSERT(__alignof(union iinfo) == __alignof(struct iinfo64)); +__CTASSERT(__alignof(union iinfo) == __alignof(struct iinfo32)); /* * Index file inode entries. @@ -596,8 +616,9 @@ typedef union iinfo { /* magic value for daddrs */ #define LFS_UNUSED_DADDR 0 /* out-of-band daddr */ -/* magic value for if_nextfree */ -#define LFS_ORPHAN_NEXTFREE (~(uint32_t)0) /* indicate orphaned file */ +/* magic value for if_nextfree -- indicate orphaned file */ +#define LFS_ORPHAN_NEXTFREE(fs) \ + ((fs)->lfs_is64 ? ~(uint64_t)0 : ~(uint32_t)0) typedef struct ifile64 IFILE64; struct ifile64 { @@ -606,7 +627,8 @@ struct ifile64 { uint64_t if_atime_sec; /* Last access time, seconds */ int64_t if_daddr; /* inode disk address */ uint64_t if_nextfree; /* next-unallocated inode */ -}; +} __aligned(4) __packed; +__CTASSERT(sizeof(struct ifile64) == 32); typedef struct ifile32 IFILE32; struct ifile32 { @@ -616,6 +638,7 @@ struct ifile32 { uint32_t if_atime_sec; /* Last access time, seconds */ uint32_t if_atime_nsec; /* and nanoseconds */ }; +__CTASSERT(sizeof(struct ifile32) == 20); typedef struct ifile_v1 IFILE_V1; struct ifile_v1 { @@ -627,6 +650,7 @@ struct ifile_v1 { struct timespec if_atime; /* Last access time */ #endif }; +__CTASSERT(sizeof(struct ifile_v1) == 12); /* * Note: struct ifile_v1 is often handled by accessing the first three @@ -638,6 +662,9 @@ typedef union ifile { struct ifile32 u_32; struct ifile_v1 u_v1; } IFILE; +__CTASSERT(__alignof(union ifile) == __alignof(struct ifile64)); +__CTASSERT(__alignof(union ifile) == __alignof(struct ifile32)); +__CTASSERT(__alignof(union ifile) == __alignof(struct ifile_v1)); /* * Cleaner information structure. This resides in the ifile and is used @@ -656,6 +683,7 @@ typedef struct _cleanerinfo32 { uint32_t free_tail; /* 20: tail of the inode free list */ uint32_t flags; /* 24: status word from the kernel */ } CLEANERINFO32; +__CTASSERT(sizeof(struct _cleanerinfo32) == 28); typedef struct _cleanerinfo64 { uint32_t clean; /* 0: number of clean segments */ @@ -666,13 +694,16 @@ typedef struct _cleanerinfo64 { uint64_t free_tail; /* 32: tail of the inode free list */ uint32_t flags; /* 40: status word from the kernel */ uint32_t pad; /* 44: must be 64-bit aligned */ -} CLEANERINFO64; +} __aligned(4) __packed CLEANERINFO64; +__CTASSERT(sizeof(struct _cleanerinfo64) == 48); /* this must not go to disk directly of course */ typedef union _cleanerinfo { CLEANERINFO32 u_32; CLEANERINFO64 u_64; } CLEANERINFO; +__CTASSERT(__alignof(union _cleanerinfo) == __alignof(struct _cleanerinfo32)); +__CTASSERT(__alignof(union _cleanerinfo) == __alignof(struct _cleanerinfo64)); /* * On-disk segment summary information @@ -704,6 +735,7 @@ struct segsum_v1 { uint16_t ss_pad; /* 26: extra space */ /* FINFO's and inode daddr's... */ }; +__CTASSERT(sizeof(struct segsum_v1) == 28); typedef struct segsum32 SEGSUM32; struct segsum32 { @@ -720,7 +752,8 @@ struct segsum32 { uint64_t ss_serial; /* 32: serial number */ uint64_t ss_create; /* 40: time stamp */ /* FINFO's and inode daddr's... */ -}; +} __aligned(4) __packed; +__CTASSERT(sizeof(struct segsum32) == 48); typedef struct segsum64 SEGSUM64; struct segsum64 { @@ -737,7 +770,8 @@ struct segsum64 { uint64_t ss_serial; /* 40: serial number */ uint64_t ss_create; /* 48: time stamp */ /* FINFO's and inode daddr's... */ -}; +} __aligned(4) __packed; +__CTASSERT(sizeof(struct segsum64) == 56); typedef union segsum SEGSUM; union segsum { @@ -745,7 +779,9 @@ union segsum { struct segsum32 u_32; struct segsum_v1 u_v1; }; - +__CTASSERT(__alignof(union segsum) == __alignof(struct segsum64)); +__CTASSERT(__alignof(union segsum) == __alignof(struct segsum32)); +__CTASSERT(__alignof(union segsum) == __alignof(struct segsum_v1)); /* * On-disk super block. @@ -934,6 +970,8 @@ struct dlfs64 { uint32_t dlfs_cksum; /* 508: checksum for superblock checking */ }; +__CTASSERT(__alignof(struct dlfs) == __alignof(struct dlfs64)); + /* Type used for the inode bitmap */ typedef uint32_t lfs_bm_t; diff --git a/sys/ufs/lfs/lfs_accessors.h b/sys/ufs/lfs/lfs_accessors.h index b4accd6c358a..f058ace2359d 100644 --- a/sys/ufs/lfs/lfs_accessors.h +++ b/sys/ufs/lfs/lfs_accessors.h @@ -274,17 +274,7 @@ static __inline uint64_t lfs_dir_getino(const STRUCT_LFS *fs, const LFS_DIRHEADER *dh) { if (fs->lfs_is64) { - uint64_t ino; - - /* - * XXX we can probably write this in a way that's both - * still legal and generates better code. - */ - memcpy(&ino, &dh->u_64.dh_inoA, sizeof(dh->u_64.dh_inoA)); - memcpy((char *)&ino + sizeof(dh->u_64.dh_inoA), - &dh->u_64.dh_inoB, - sizeof(dh->u_64.dh_inoB)); - return LFS_SWAP_uint64_t(fs, ino); + return LFS_SWAP_uint64_t(fs, dh->u_64.dh_ino); } else { return LFS_SWAP_uint32_t(fs, dh->u_32.dh_ino); } @@ -331,16 +321,7 @@ static __inline void lfs_dir_setino(STRUCT_LFS *fs, LFS_DIRHEADER *dh, uint64_t ino) { if (fs->lfs_is64) { - - ino = LFS_SWAP_uint64_t(fs, ino); - /* - * XXX we can probably write this in a way that's both - * still legal and generates better code. - */ - memcpy(&dh->u_64.dh_inoA, &ino, sizeof(dh->u_64.dh_inoA)); - memcpy(&dh->u_64.dh_inoB, - (char *)&ino + sizeof(dh->u_64.dh_inoA), - sizeof(dh->u_64.dh_inoB)); + dh->u_64.dh_ino = LFS_SWAP_uint64_t(fs, ino); } else { dh->u_32.dh_ino = LFS_SWAP_uint32_t(fs, ino); } diff --git a/sys/ufs/lfs/lfs_alloc.c b/sys/ufs/lfs/lfs_alloc.c index f8ca60104574..bd4e4e8b4468 100644 --- a/sys/ufs/lfs/lfs_alloc.c +++ b/sys/ufs/lfs/lfs_alloc.c @@ -705,16 +705,16 @@ lfs_vfree(struct vnode *vp, ino_t ino, int mode) * Takes the segmenet lock. */ void -lfs_order_freelist(struct lfs *fs) +lfs_order_freelist(struct lfs *fs, ino_t **orphanp, size_t *norphanp) { CLEANERINFO *cip; IFILE *ifp = NULL; struct buf *bp; ino_t ino, firstino, lastino, maxino; -#ifdef notyet - struct vnode *vp; -#endif - + ino_t *orphan = NULL; + size_t norphan = 0; + size_t norphan_alloc = 0; + ASSERT_NO_SEGLOCK(fs); lfs_seglock(fs, SEGM_PROT); @@ -745,7 +745,6 @@ lfs_order_freelist(struct lfs *fs) if (ino == LFS_UNUSED_INUM || ino == LFS_IFILE_INUM) continue; -#ifdef notyet /* * Address orphaned files. * @@ -757,39 +756,26 @@ lfs_order_freelist(struct lfs *fs) * but presumably it doesn't work... not sure what * happens to such files currently. -- dholland 20160806 */ - if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE && - VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp) == 0) { - unsigned segno; - - /* get the segment the inode in on disk */ - segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp)); - - /* truncate the inode */ - lfs_truncate(vp, 0, 0, NOCRED); - vput(vp); - - /* load the segment summary */ - LFS_SEGENTRY(sup, fs, segno, bp); - /* update the number of bytes in the segment */ - KASSERT(sup->su_nbytes >= DINOSIZE(fs)); - sup->su_nbytes -= DINOSIZE(fs); - /* write the segment summary */ - LFS_WRITESEGENTRY(sup, fs, segno, bp); - - /* Drop the on-disk address */ - lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR); - /* write the ifile entry */ - LFS_BWRITE_LOG(bp); - - /* - * and reload it (XXX: why? I guess - * LFS_BWRITE_LOG drops it...) - */ - LFS_IENTRY(ifp, fs, ino, bp); - - /* Fall through to next if block */ + if (lfs_if_getnextfree(fs, ifp) == LFS_ORPHAN_NEXTFREE(fs)) { + if (orphan == NULL) { + norphan_alloc = 32; /* XXX pulled from arse */ + orphan = kmem_zalloc(sizeof(orphan[0]) * + norphan_alloc, KM_SLEEP); + } else if (norphan == norphan_alloc) { + ino_t *orphan_new; + if (norphan_alloc >= 4096) + norphan_alloc += 4096; + else + norphan_alloc *= 2; + orphan_new = kmem_zalloc(sizeof(orphan[0]) * + norphan_alloc, KM_SLEEP); + memcpy(orphan_new, orphan, sizeof(orphan[0]) * + norphan); + kmem_free(orphan, sizeof(orphan[0]) * norphan); + orphan = orphan_new; + } + orphan[norphan++] = ino; } -#endif if (lfs_if_getdaddr(fs, ifp) == LFS_UNUSED_DADDR) { @@ -836,6 +822,22 @@ lfs_order_freelist(struct lfs *fs) /* done */ lfs_segunlock(fs); + + /* + * Shrink the array of orphans so we don't have to carry around + * the allocation size. + */ + if (norphan < norphan_alloc) { + ino_t *orphan_new = kmem_alloc(sizeof(orphan[0]) * norphan, + KM_SLEEP); + memcpy(orphan_new, orphan, sizeof(orphan[0]) * norphan); + kmem_free(orphan, sizeof(orphan[0]) * norphan_alloc); + orphan = orphan_new; + norphan_alloc = norphan; + } + + *orphanp = orphan; + *norphanp = norphan; } /* @@ -851,6 +853,84 @@ lfs_orphan(struct lfs *fs, ino_t ino) struct buf *bp; LFS_IENTRY(ifp, fs, ino, bp); - lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE); + lfs_if_setnextfree(fs, ifp, LFS_ORPHAN_NEXTFREE(fs)); LFS_BWRITE_LOG(bp); } + +/* + * Free orphans discovered during mount. This is a separate stage + * because it requires fs->lfs_suflags to be set up, which is not done + * by the time we run lfs_order_freelist. It's possible that we could + * run lfs_order_freelist later (i.e., set up fs->lfs_suflags sooner) + * but that requires more thought than I can put into this at the + * moment. + */ +void +lfs_free_orphans(struct lfs *fs, ino_t *orphan, size_t norphan) +{ + size_t i; + + for (i = 0; i < norphan; i++) { + ino_t ino = orphan[i]; + unsigned segno; + struct vnode *vp; + struct inode *ip; + struct buf *bp; + IFILE *ifp; + SEGUSE *sup; + int error; + + /* Get the segment the inode is in on disk. */ + LFS_IENTRY(ifp, fs, ino, bp); + segno = lfs_dtosn(fs, lfs_if_getdaddr(fs, ifp)); + brelse(bp, 0); + + /* + * Try to get the vnode. If we can't, tough -- hope + * you have backups! + */ + error = VFS_VGET(fs->lfs_ivnode->v_mount, ino, &vp); + if (error) { + printf("orphan %jd vget error %d\n", (intmax_t)ino, + error); + continue; + } + + /* + * Sanity-check the inode. + * + * XXX What to do if it is still referenced? + */ + ip = VTOI(vp); + if (ip->i_nlink != 0) + printf("orphan %jd nlink %d\n", (intmax_t)ino, + ip->i_nlink); + + /* + * Truncate the inode, to free any blocks allocated for + * it, and release it, to free the inode number. + * + * XXX Isn't it redundant to truncate? Won't vput do + * that for us? + */ + error = lfs_truncate(vp, 0, 0, NOCRED); + if (error) + printf("orphan %jd truncate error %d", (intmax_t)ino, + error); + vput(vp); + + /* Update the number of bytes in the segment summary. */ + LFS_SEGENTRY(sup, fs, segno, bp); + KASSERT(sup->su_nbytes >= DINOSIZE(fs)); + sup->su_nbytes -= DINOSIZE(fs); + LFS_WRITESEGENTRY(sup, fs, segno, bp); + + /* Drop the on-disk address. */ + LFS_IENTRY(ifp, fs, ino, bp); + lfs_if_setdaddr(fs, ifp, LFS_UNUSED_DADDR); + LFS_BWRITE_LOG(bp); + } + + if (orphan) + kmem_free(orphan, sizeof(orphan[0]) * norphan); +} diff --git a/sys/ufs/lfs/lfs_balloc.c b/sys/ufs/lfs/lfs_balloc.c index 3513e17388d5..fb8c4d5c0336 100644 --- a/sys/ufs/lfs/lfs_balloc.c +++ b/sys/ufs/lfs/lfs_balloc.c @@ -660,9 +660,10 @@ lfs_register_block(struct vnode *vp, daddr_t lbn) static void lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp) { + + KASSERT(mutex_owned(&lfs_lock)); ASSERT_MAYBE_SEGLOCK(fs); - mutex_enter(&lfs_lock); --ip->i_lfs_nbtree; SPLAY_REMOVE(lfs_splay, &ip->i_lfs_lbtree, lbp); if (fs->lfs_favail > lfs_btofsb(fs, (1 << lfs_sb_getbshift(fs)))) @@ -671,9 +672,12 @@ lfs_do_deregister(struct lfs *fs, struct inode *ip, struct lbnentry *lbp) if (locked_fakequeue_count > 0) --locked_fakequeue_count; lfs_subsys_pages -= lfs_sb_getbsize(fs) >> PAGE_SHIFT; - mutex_exit(&lfs_lock); + mutex_exit(&lfs_lock); pool_put(&lfs_lbnentry_pool, lbp); + mutex_enter(&lfs_lock); + + KASSERT(mutex_owned(&lfs_lock)); } void @@ -690,19 +694,18 @@ lfs_deregister_block(struct vnode *vp, daddr_t lbn) if (lbn < 0 || vp->v_type != VREG || ip->i_number == LFS_IFILE_INUM) return; + mutex_enter(&lfs_lock); fs = ip->i_lfs; tmp.lbn = lbn; - lbp = SPLAY_FIND(lfs_splay, &ip->i_lfs_lbtree, &tmp); - if (lbp == NULL) - return; - - lfs_do_deregister(fs, ip, lbp); + if ((lbp = SPLAY_FIND(lfs_splay, &ip->i_lfs_lbtree, &tmp)) != NULL) + lfs_do_deregister(fs, ip, lbp); + mutex_exit(&lfs_lock); } void lfs_deregister_all(struct vnode *vp) { - struct lbnentry *lbp, *nlbp; + struct lbnentry *lbp; struct lfs_splay *hd; struct lfs *fs; struct inode *ip; @@ -711,8 +714,8 @@ lfs_deregister_all(struct vnode *vp) fs = ip->i_lfs; hd = &ip->i_lfs_lbtree; - for (lbp = SPLAY_MIN(lfs_splay, hd); lbp != NULL; lbp = nlbp) { - nlbp = SPLAY_NEXT(lfs_splay, hd, lbp); + mutex_enter(&lfs_lock); + while ((lbp = SPLAY_MIN(lfs_splay, hd)) != NULL) lfs_do_deregister(fs, ip, lbp); - } + mutex_exit(&lfs_lock); } diff --git a/sys/ufs/lfs/lfs_bio.c b/sys/ufs/lfs/lfs_bio.c index f2972c009827..f23c6736cd54 100644 --- a/sys/ufs/lfs/lfs_bio.c +++ b/sys/ufs/lfs/lfs_bio.c @@ -653,9 +653,14 @@ lfs_check(struct vnode *vp, daddr_t blkno, int flags) /* If there are too many pending dirops, we have to flush them. */ if (fs->lfs_dirvcount > LFS_MAX_FSDIROP(fs) || lfs_dirvcount > LFS_MAX_DIROP || fs->lfs_diropwait > 0) { + KASSERT(fs->lfs_dirops == 0); + fs->lfs_writer++; mutex_exit(&lfs_lock); lfs_flush_dirops(fs); mutex_enter(&lfs_lock); + if (--fs->lfs_writer == 0) + cv_broadcast(&fs->lfs_diropscv); + KASSERT(fs->lfs_dirops == 0); } else if (locked_queue_count + INOCOUNT(fs) > LFS_MAX_BUFS || locked_queue_bytes + INOBYTES(fs) > LFS_MAX_BYTES || lfs_subsys_pages > LFS_MAX_PAGES || diff --git a/sys/ufs/lfs/lfs_debug.c b/sys/ufs/lfs/lfs_debug.c index a9484c611cd4..516a693e42f1 100644 --- a/sys/ufs/lfs/lfs_debug.c +++ b/sys/ufs/lfs/lfs_debug.c @@ -84,16 +84,12 @@ struct lfs_log_entry lfs_log[LFS_LOGLENGTH]; int lfs_bwrite_log(struct buf *bp, const char *file, int line) { - struct vop_bwrite_args a; - - a.a_desc = VDESC(vop_bwrite); - a.a_bp = bp; if (!(bp->b_flags & B_GATHERED) && !(bp->b_oflags & BO_DELWRI)) { LFS_ENTER_LOG("write", file, line, bp->b_lblkno, bp->b_flags, curproc->p_pid); } - return (VCALL(bp->b_vp, VOFFSET(vop_bwrite), &a)); + return VOP_BWRITE(bp->b_vp, bp); } void diff --git a/sys/ufs/lfs/lfs_extern.h b/sys/ufs/lfs/lfs_extern.h index 0e2724d78968..e757076b4b70 100644 --- a/sys/ufs/lfs/lfs_extern.h +++ b/sys/ufs/lfs/lfs_extern.h @@ -127,9 +127,10 @@ extern kcondvar_t locked_queue_cv; int lfs_valloc(struct vnode *, int, kauth_cred_t, ino_t *, int *); int lfs_valloc_fixed(struct lfs *, ino_t, int); int lfs_vfree(struct vnode *, ino_t, int); -void lfs_order_freelist(struct lfs *); +void lfs_order_freelist(struct lfs *, ino_t **, size_t *); int lfs_extend_ifile(struct lfs *, kauth_cred_t); void lfs_orphan(struct lfs *, ino_t); +void lfs_free_orphans(struct lfs *, ino_t *, size_t); /* lfs_balloc.c */ int lfs_balloc(struct vnode *, off_t, int, kauth_cred_t, int, struct buf **); @@ -210,7 +211,8 @@ void lfs_free(struct lfs *, void *, int); int lfs_seglock(struct lfs *, unsigned long); void lfs_segunlock(struct lfs *); void lfs_segunlock_relock(struct lfs *); -int lfs_writer_enter(struct lfs *, const char *); +void lfs_writer_enter(struct lfs *, const char *); +int lfs_writer_tryenter(struct lfs *); void lfs_writer_leave(struct lfs *); void lfs_wakeup_cleaner(struct lfs *); diff --git a/sys/ufs/lfs/lfs_inode.c b/sys/ufs/lfs/lfs_inode.c index 0266de39f7d9..9dd98afc7d42 100644 --- a/sys/ufs/lfs/lfs_inode.c +++ b/sys/ufs/lfs/lfs_inode.c @@ -133,6 +133,7 @@ lfs_update(struct vnode *vp, const struct timespec *acc, struct inode *ip; struct lfs *fs = VFSTOULFS(vp->v_mount)->um_lfs; int flags; + int error; ASSERT_NO_SEGLOCK(fs); if (vp->v_mount->mnt_flag & MNT_RDONLY) @@ -175,7 +176,7 @@ lfs_update(struct vnode *vp, const struct timespec *acc, vp->v_iflag | vp->v_vflag | vp->v_uflag, ip->i_state)); if (fs->lfs_dirops == 0) - lfs_flush_fs(fs, SEGM_SYNC); + break; else mtsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 0, &lfs_lock); @@ -183,8 +184,18 @@ lfs_update(struct vnode *vp, const struct timespec *acc, twice? */ } --fs->lfs_diropwait; + fs->lfs_writer++; + if (vp->v_uflag & VU_DIROP) { + KASSERT(fs->lfs_dirops == 0); + lfs_flush_fs(fs, SEGM_SYNC); + } + mutex_exit(&lfs_lock); + error = lfs_vflush(vp); + mutex_enter(&lfs_lock); + if (--fs->lfs_writer == 0) + cv_broadcast(&fs->lfs_diropscv); mutex_exit(&lfs_lock); - return lfs_vflush(vp); + return error; } return 0; } diff --git a/sys/ufs/lfs/lfs_inode.h b/sys/ufs/lfs/lfs_inode.h index e0d0404d226e..384f6f09990d 100644 --- a/sys/ufs/lfs/lfs_inode.h +++ b/sys/ufs/lfs/lfs_inode.h @@ -123,6 +123,7 @@ struct inode { /* unused 0x0400 */ /* was FFS-only IN_SPACECOUNTED */ #define IN_PAGING 0x1000 /* LFS: file is on paging queue */ #define IN_CDIROP 0x4000 /* LFS: dirop completed pending i/o */ +#define IN_MARKER 0x00010000 /* LFS: marker inode for iteration */ /* XXX this is missing some of the flags */ #define IN_ALLMOD (IN_MODIFIED|IN_ACCESS|IN_CHANGE|IN_UPDATE|IN_MODIFY|IN_ACCESSED|IN_CLEANING) diff --git a/sys/ufs/lfs/lfs_pages.c b/sys/ufs/lfs/lfs_pages.c index 479d950d567b..25ae245915fa 100644 --- a/sys/ufs/lfs/lfs_pages.c +++ b/sys/ufs/lfs/lfs_pages.c @@ -710,29 +710,30 @@ retry: (vp->v_uflag & VU_DIROP)) { DLOG((DLOG_PAGE, "lfs_putpages: flushing VU_DIROP\n")); - lfs_writer_enter(fs, "ppdirop"); + /* + * NB: lfs_flush_fs can recursively call lfs_putpages, + * but it won't reach this branch because it passes + * PGO_LOCKED. + */ - /* Note if we hold the vnode locked */ - if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) - { - DLOG((DLOG_PAGE, "lfs_putpages: dirop inode already locked\n")); - } else { - DLOG((DLOG_PAGE, "lfs_putpages: dirop inode not locked\n")); - } mutex_exit(vp->v_interlock); - mutex_enter(&lfs_lock); lfs_flush_fs(fs, sync ? SEGM_SYNC : 0); mutex_exit(&lfs_lock); - mutex_enter(vp->v_interlock); - lfs_writer_leave(fs); /* * The flush will have cleaned out this vnode as well, * no need to do more to it. * XXX then why are we falling through and continuing? */ + + /* + * XXX State may have changed while we dropped the + * lock; start over just in case. The above comment + * suggests this should maybe instead be goto out. + */ + goto retry; } /* diff --git a/sys/ufs/lfs/lfs_rename.c b/sys/ufs/lfs/lfs_rename.c index 866a97a1b634..a1e730c89a8e 100644 --- a/sys/ufs/lfs/lfs_rename.c +++ b/sys/ufs/lfs/lfs_rename.c @@ -1061,6 +1061,9 @@ lfs_gro_rename(struct mount *mp, kauth_cred_t cred, fdvp, fcnp, fde, fvp, tdvp, tcnp, tde, tvp); + if (tvp && VTOI(tvp)->i_nlink == 0) + lfs_orphan(VTOI(tvp)->i_lfs, VTOI(tvp)->i_number); + UNMARK_VNODE(fdvp); UNMARK_VNODE(fvp); UNMARK_VNODE(tdvp); diff --git a/sys/ufs/lfs/lfs_segment.c b/sys/ufs/lfs/lfs_segment.c index 54207304bf04..341b862c1f0f 100644 --- a/sys/ufs/lfs/lfs_segment.c +++ b/sys/ufs/lfs/lfs_segment.c @@ -399,7 +399,7 @@ lfs_vflush(struct vnode *vp) * still not done with this vnode. * XXX we can do better than this. */ - KDASSERT(ip->i_number != LFS_IFILE_INUM); + KASSERT(ip->i_number != LFS_IFILE_INUM); lfs_writeinode(fs, sp, ip); mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_MODIFIED); @@ -490,7 +490,7 @@ lfs_writevnodes_selector(void *cl, struct vnode *vp) KASSERT(mutex_owned(vp->v_interlock)); ip = VTOI(vp); - if (ip == NULL || vp->v_type == VNON) + if (ip == NULL || vp->v_type == VNON || ip->i_nlink <= 0) return false; if ((op == VN_DIROP && !(vp->v_uflag & VU_DIROP)) || (op != VN_DIROP && op != VN_CLEAN && (vp->v_uflag & VU_DIROP))) { @@ -624,6 +624,15 @@ lfs_segwrite(struct mount *mp, int flags) */ do_ckp = LFS_SHOULD_CHECKPOINT(fs, flags); + /* + * If we know we're gonna need the writer lock, take it now to + * preserve the lock order lfs_writer -> lfs_seglock. + */ + if (do_ckp) { + lfs_writer_enter(fs, "ckpwriter"); + writer_set = 1; + } + /* We can't do a partial write and checkpoint at the same time. */ if (do_ckp) flags &= ~SEGM_SINGLE; @@ -653,11 +662,10 @@ lfs_segwrite(struct mount *mp, int flags) break; } - if (do_ckp || fs->lfs_dirops == 0) { - if (!writer_set) { - lfs_writer_enter(fs, "lfs writer"); - writer_set = 1; - } + if (do_ckp || + (writer_set = lfs_writer_tryenter(fs)) != 0) { + KASSERT(writer_set); + KASSERT(fs->lfs_writer); error = lfs_writevnodes(fs, mp, sp, VN_DIROP); if (um_error == 0) um_error = error; diff --git a/sys/ufs/lfs/lfs_subr.c b/sys/ufs/lfs/lfs_subr.c index 9a91e784736a..596fae5acb4e 100644 --- a/sys/ufs/lfs/lfs_subr.c +++ b/sys/ufs/lfs/lfs_subr.c @@ -340,7 +340,7 @@ static void lfs_unmark_dirop(struct lfs *); static void lfs_unmark_dirop(struct lfs *fs) { - struct inode *ip, *nip; + struct inode *ip, *marker; struct vnode *vp; int doit; @@ -349,13 +349,26 @@ lfs_unmark_dirop(struct lfs *fs) doit = !(fs->lfs_flags & LFS_UNDIROP); if (doit) fs->lfs_flags |= LFS_UNDIROP; - if (!doit) { - mutex_exit(&lfs_lock); + mutex_exit(&lfs_lock); + + if (!doit) return; - } - for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { - nip = TAILQ_NEXT(ip, i_lfs_dchain); + marker = pool_get(&lfs_inode_pool, PR_WAITOK); + KASSERT(fs != NULL); + memset(marker, 0, sizeof(*marker)); + marker->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK); + memset(marker->inode_ext.lfs, 0, sizeof(*marker->inode_ext.lfs)); + marker->i_state |= IN_MARKER; + + mutex_enter(&lfs_lock); + TAILQ_INSERT_HEAD(&fs->lfs_dchainhd, marker, i_lfs_dchain); + while ((ip = TAILQ_NEXT(marker, i_lfs_dchain)) != NULL) { + TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); + TAILQ_INSERT_AFTER(&fs->lfs_dchainhd, ip, marker, + i_lfs_dchain); + if (ip->i_state & IN_MARKER) + continue; vp = ITOV(ip); if ((ip->i_state & (IN_ADIROP | IN_CDIROP)) == IN_CDIROP) { --lfs_dirvcount; @@ -371,10 +384,13 @@ lfs_unmark_dirop(struct lfs *fs) ip->i_state &= ~IN_CDIROP; } } - + TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); fs->lfs_flags &= ~LFS_UNDIROP; wakeup(&fs->lfs_flags); mutex_exit(&lfs_lock); + + pool_put(&lfs_inoext_pool, marker->inode_ext.lfs); + pool_put(&lfs_inode_pool, marker); } static void @@ -539,6 +555,7 @@ lfs_segunlock(struct lfs *fs) lfs_unmark_dirop(fs); } else { --fs->lfs_seglock; + KASSERT(fs->lfs_seglock != 0); mutex_exit(&lfs_lock); } } @@ -548,12 +565,12 @@ lfs_segunlock(struct lfs *fs) * * No simple_locks are held when we enter and none are held when we return. */ -int +void lfs_writer_enter(struct lfs *fs, const char *wmesg) { - int error = 0; + int error __diagused; - ASSERT_MAYBE_SEGLOCK(fs); + ASSERT_NO_SEGLOCK(fs); mutex_enter(&lfs_lock); /* disallow dirops during flush */ @@ -563,15 +580,26 @@ lfs_writer_enter(struct lfs *fs, const char *wmesg) ++fs->lfs_diropwait; error = mtsleep(&fs->lfs_writer, PRIBIO+1, wmesg, 0, &lfs_lock); + KASSERT(error == 0); --fs->lfs_diropwait; } - if (error) - fs->lfs_writer--; + mutex_exit(&lfs_lock); +} +int +lfs_writer_tryenter(struct lfs *fs) +{ + int writer_set; + + ASSERT_MAYBE_SEGLOCK(fs); + mutex_enter(&lfs_lock); + writer_set = (fs->lfs_dirops == 0); + if (writer_set) + fs->lfs_writer++; mutex_exit(&lfs_lock); - return error; + return writer_set; } void diff --git a/sys/ufs/lfs/lfs_vfsops.c b/sys/ufs/lfs/lfs_vfsops.c index a2ac2d16d89e..5e2e4aa039d0 100644 --- a/sys/ufs/lfs/lfs_vfsops.c +++ b/sys/ufs/lfs/lfs_vfsops.c @@ -120,6 +120,7 @@ MODULE(MODULE_CLASS_VFS, lfs, NULL); static int lfs_gop_write(struct vnode *, struct vm_page **, int, int); static int lfs_mountfs(struct vnode *, struct mount *, struct lwp *); +static int lfs_flushfiles(struct mount *, int); static struct sysctllog *lfs_sysctl_log; @@ -355,6 +356,7 @@ lfs_modcmd(modcmd_t cmd, void *arg) break; } lfs_sysctl_setup(&lfs_sysctl_log); + cv_init(&lfs_allclean_wakeup, "segment"); break; case MODULE_CMD_FINI: error = vfs_detach(&lfs_vfsops); @@ -362,6 +364,7 @@ lfs_modcmd(modcmd_t cmd, void *arg) break; syscall_disestablish(NULL, lfs_syscalls); sysctl_teardown(&lfs_sysctl_log); + cv_destroy(&lfs_allclean_wakeup); break; default: error = ENOTTY; @@ -755,23 +758,18 @@ lfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) ump = VFSTOULFS(mp); fs = ump->um_lfs; - if (fs->lfs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { + if (!fs->lfs_ronly && (mp->mnt_iflag & IMNT_WANTRDONLY)) { /* * Changing from read/write to read-only. - * XXX: shouldn't we sync here? or does vfs do that? */ -#ifdef LFS_QUOTA2 - /* XXX: quotas should remain on when readonly */ - if (fs->lfs_use_quota2) { - error = lfsquota2_umount(mp, 0); - if (error) { - return error; - } - } -#endif - } - - if (fs->lfs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { + int flags = WRITECLOSE; + if (mp->mnt_flag & MNT_FORCE) + flags |= FORCECLOSE; + error = lfs_flushfiles(mp, flags); + if (error) + return error; + fs->lfs_ronly = 1; + } else if (fs->lfs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { /* * Changing from read-only to read/write. * Note in the superblocks that we're writing. @@ -805,8 +803,9 @@ lfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) lfs_writesuper(fs, lfs_sb_getsboff(fs, 1)); } } + if (args->fspec == NULL) - return EINVAL; + return 0; } error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, @@ -860,7 +859,6 @@ lfs_checkmagic(struct lfs *fs) int lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) { - static bool lfs_mounted_once = false; struct lfs *primarysb, *altsb, *thesb; struct buf *primarybuf, *altbuf; struct lfs *fs; @@ -872,6 +870,8 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) CLEANERINFO *cip; SEGUSE *sup; daddr_t sb_addr; + ino_t *orphan; + size_t norphan; cred = l ? l->l_cred : NOCRED; @@ -1094,12 +1094,6 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) cv_init(&fs->lfs_stopcv, "lfsstop"); cv_init(&fs->lfs_nextsegsleep, "segment"); - /* Initialize values for all LFS mounts */ - if (!lfs_mounted_once) { - cv_init(&lfs_allclean_wakeup, "segment"); - lfs_mounted_once = true; - } - /* Set the file system readonly/modify bits. */ fs->lfs_ronly = ronly; if (ronly == 0) @@ -1137,6 +1131,7 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) mp->mnt_stat.f_iosize = lfs_sb_getbsize(fs); mp->mnt_flag |= MNT_LOCAL; mp->mnt_fs_bshift = lfs_sb_getbshift(fs); + mp->mnt_iflag |= IMNT_CAN_RWTORO; if (fs->um_maxsymlinklen > 0) mp->mnt_iflag |= IMNT_DTYPE; else @@ -1169,8 +1164,8 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) fs->lfs_ivnode = vp; vref(vp); - /* Set up inode bitmap and order free list */ - lfs_order_freelist(fs); + /* Set up inode bitmap, order free list, and gather orphans. */ + lfs_order_freelist(fs, &orphan, &norphan); /* Set up segment usage flags for the autocleaner. */ fs->lfs_nactive = 0; @@ -1209,6 +1204,9 @@ lfs_mountfs(struct vnode *devvp, struct mount *mp, struct lwp *l) brelse(bp, 0); } + /* Free the orphans we discovered while ordering the freelist. */ + lfs_free_orphans(fs, orphan, norphan); + /* * XXX: if the fs has quotas, quotas should be on even if * readonly. Otherwise you can't query the quota info! @@ -1328,22 +1326,72 @@ out: int lfs_unmount(struct mount *mp, int mntflags) { - struct lwp *l = curlwp; struct ulfsmount *ump; struct lfs *fs; - int error, flags, ronly; - vnode_t *vp; + int error, ronly; + + ump = VFSTOULFS(mp); + fs = ump->um_lfs; + + error = lfs_flushfiles(mp, mntflags & MNT_FORCE ? FORCECLOSE : 0); + if (error) + return error; + + /* Finish with the Ifile, now that we're done with it */ + vgone(fs->lfs_ivnode); + + ronly = !fs->lfs_ronly; + if (fs->lfs_devvp->v_type != VBAD) + spec_node_setmountedfs(fs->lfs_devvp, NULL); + vn_lock(fs->lfs_devvp, LK_EXCLUSIVE | LK_RETRY); + error = VOP_CLOSE(fs->lfs_devvp, + ronly ? FREAD : FREAD|FWRITE, NOCRED); + vput(fs->lfs_devvp); + + /* Complain about page leakage */ + if (fs->lfs_pages > 0) + printf("lfs_unmount: still claim %d pages (%d in subsystem)\n", + fs->lfs_pages, lfs_subsys_pages); + + /* Free per-mount data structures */ + free(fs->lfs_ino_bitmap, M_SEGMENT); + free(fs->lfs_suflags[0], M_SEGMENT); + free(fs->lfs_suflags[1], M_SEGMENT); + free(fs->lfs_suflags, M_SEGMENT); + lfs_free_resblks(fs); + cv_destroy(&fs->lfs_sleeperscv); + cv_destroy(&fs->lfs_diropscv); + cv_destroy(&fs->lfs_stopcv); + cv_destroy(&fs->lfs_nextsegsleep); + + rw_destroy(&fs->lfs_fraglock); + rw_destroy(&fs->lfs_iflock); + + kmem_free(fs, sizeof(struct lfs)); + kmem_free(ump, sizeof(*ump)); - flags = 0; - if (mntflags & MNT_FORCE) - flags |= FORCECLOSE; + mp->mnt_data = NULL; + mp->mnt_flag &= ~MNT_LOCAL; + return (error); +} + +static int +lfs_flushfiles(struct mount *mp, int flags) +{ + struct lwp *l = curlwp; + struct ulfsmount *ump; + struct lfs *fs; + struct vnode *vp; + int error; ump = VFSTOULFS(mp); fs = ump->um_lfs; /* Two checkpoints */ - lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); - lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); + if (!fs->lfs_ronly) { + lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); + lfs_segwrite(mp, SEGM_CKP | SEGM_SYNC); + } /* wake up the cleaner so it can die */ /* XXX: shouldn't this be *after* the error cases below? */ @@ -1383,51 +1431,18 @@ lfs_unmount(struct mount *mp, int mntflags) mutex_exit(vp->v_interlock); /* Explicitly write the superblock, to update serial and pflags */ - lfs_sb_setpflags(fs, lfs_sb_getpflags(fs) | LFS_PF_CLEAN); - lfs_writesuper(fs, lfs_sb_getsboff(fs, 0)); - lfs_writesuper(fs, lfs_sb_getsboff(fs, 1)); + if (!fs->lfs_ronly) { + lfs_sb_setpflags(fs, lfs_sb_getpflags(fs) | LFS_PF_CLEAN); + lfs_writesuper(fs, lfs_sb_getsboff(fs, 0)); + lfs_writesuper(fs, lfs_sb_getsboff(fs, 1)); + } mutex_enter(&lfs_lock); while (fs->lfs_iocount) mtsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs_umount", 0, &lfs_lock); mutex_exit(&lfs_lock); - /* Finish with the Ifile, now that we're done with it */ - vgone(fs->lfs_ivnode); - - ronly = !fs->lfs_ronly; - if (fs->lfs_devvp->v_type != VBAD) - spec_node_setmountedfs(fs->lfs_devvp, NULL); - vn_lock(fs->lfs_devvp, LK_EXCLUSIVE | LK_RETRY); - error = VOP_CLOSE(fs->lfs_devvp, - ronly ? FREAD : FREAD|FWRITE, NOCRED); - vput(fs->lfs_devvp); - - /* Complain about page leakage */ - if (fs->lfs_pages > 0) - printf("lfs_unmount: still claim %d pages (%d in subsystem)\n", - fs->lfs_pages, lfs_subsys_pages); - - /* Free per-mount data structures */ - free(fs->lfs_ino_bitmap, M_SEGMENT); - free(fs->lfs_suflags[0], M_SEGMENT); - free(fs->lfs_suflags[1], M_SEGMENT); - free(fs->lfs_suflags, M_SEGMENT); - lfs_free_resblks(fs); - cv_destroy(&fs->lfs_sleeperscv); - cv_destroy(&fs->lfs_diropscv); - cv_destroy(&fs->lfs_stopcv); - cv_destroy(&fs->lfs_nextsegsleep); - - rw_destroy(&fs->lfs_fraglock); - rw_destroy(&fs->lfs_iflock); - - kmem_free(fs, sizeof(struct lfs)); - kmem_free(ump, sizeof(*ump)); - - mp->mnt_data = NULL; - mp->mnt_flag &= ~MNT_LOCAL; - return (error); + return 0; } /* diff --git a/sys/ufs/lfs/lfs_vnops.c b/sys/ufs/lfs/lfs_vnops.c index 137c65397c21..719da6296093 100644 --- a/sys/ufs/lfs/lfs_vnops.c +++ b/sys/ufs/lfs/lfs_vnops.c @@ -1602,7 +1602,7 @@ lfs_strategy(void *v) int lfs_flush_dirops(struct lfs *fs) { - struct inode *ip, *nip; + struct inode *ip, *marker; struct vnode *vp; extern int lfs_dostats; /* XXX this does not belong here */ struct segment *sp; @@ -1611,7 +1611,8 @@ lfs_flush_dirops(struct lfs *fs) int error = 0; ASSERT_MAYBE_SEGLOCK(fs); - KASSERT(fs->lfs_nadirop == 0); + KASSERT(fs->lfs_nadirop == 0); /* stable during lfs_writer */ + KASSERT(fs->lfs_dirops == 0); /* stable during lfs_writer */ if (fs->lfs_ronly) return EROFS; @@ -1626,6 +1627,12 @@ lfs_flush_dirops(struct lfs *fs) if (lfs_dostats) ++lfs_stats.flush_invoked; + marker = pool_get(&lfs_inode_pool, PR_WAITOK); + memset(marker, 0, sizeof(*marker)); + marker->inode_ext.lfs = pool_get(&lfs_inoext_pool, PR_WAITOK); + memset(marker->inode_ext.lfs, 0, sizeof(*marker->inode_ext.lfs)); + marker->i_state = IN_MARKER; + lfs_imtime(fs); lfs_seglock(fs, flags); sp = fs->lfs_sp; @@ -1644,15 +1651,41 @@ lfs_flush_dirops(struct lfs *fs) * */ mutex_enter(&lfs_lock); - for (ip = TAILQ_FIRST(&fs->lfs_dchainhd); ip != NULL; ip = nip) { - nip = TAILQ_NEXT(ip, i_lfs_dchain); - mutex_exit(&lfs_lock); + KASSERT(fs->lfs_writer); + TAILQ_INSERT_HEAD(&fs->lfs_dchainhd, marker, i_lfs_dchain); + while ((ip = TAILQ_NEXT(marker, i_lfs_dchain)) != NULL) { + TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); + TAILQ_INSERT_AFTER(&fs->lfs_dchainhd, ip, marker, + i_lfs_dchain); + if (ip->i_state & IN_MARKER) + continue; vp = ITOV(ip); - mutex_enter(vp->v_interlock); + /* + * Prevent the vnode from going away if it's just been + * put out in the segment and lfs_unmark_dirop is about + * to release it. While it is on the list it is always + * referenced, so it cannot be reclaimed until we + * release it. + */ + vref(vp); + + /* + * Since we hold lfs_writer, the node can't be in an + * active dirop. Since it's on the list and we hold a + * reference to it, it can't be reclaimed now. + */ KASSERT((ip->i_state & IN_ADIROP) == 0); KASSERT(vp->v_uflag & VU_DIROP); - KASSERT(vdead_check(vp, VDEAD_NOWAIT) == 0); + + /* + * After we release lfs_lock, if we were in the middle + * of writing a segment, lfs_unmark_dirop may end up + * clearing VU_DIROP, and we have no way to stop it. + * That should be OK -- we'll just have less to do + * here. + */ + mutex_exit(&lfs_lock); /* * All writes to directories come from dirops; all @@ -1662,15 +1695,6 @@ lfs_flush_dirops(struct lfs *fs) * directory blocks inodes and file inodes. So we don't * really need to lock. */ - if (vdead_check(vp, VDEAD_NOWAIT) != 0) { - mutex_exit(vp->v_interlock); - mutex_enter(&lfs_lock); - continue; - } - mutex_exit(vp->v_interlock); - /* XXX see below - * waslocked = VOP_ISLOCKED(vp); - */ if (vp->v_type != VREG && ((ip->i_state & IN_ALLMOD) || !VPISEMPTY(vp))) { error = lfs_writefile(fs, sp, vp); @@ -1681,15 +1705,17 @@ lfs_flush_dirops(struct lfs *fs) mutex_exit(&lfs_lock); } if (error && (sp->seg_flags & SEGM_SINGLE)) { + vrele(vp); mutex_enter(&lfs_lock); error = EAGAIN; break; } } - KDASSERT(ip->i_number != LFS_IFILE_INUM); + KASSERT(ip->i_number != LFS_IFILE_INUM); error = lfs_writeinode(fs, sp, ip); - mutex_enter(&lfs_lock); if (error && (sp->seg_flags & SEGM_SINGLE)) { + vrele(vp); + mutex_enter(&lfs_lock); error = EAGAIN; break; } @@ -1702,9 +1728,16 @@ lfs_flush_dirops(struct lfs *fs) * write them. */ /* XXX only for non-directories? --KS */ + mutex_enter(&lfs_lock); LFS_SET_UINO(ip, IN_MODIFIED); + mutex_exit(&lfs_lock); + + vrele(vp); + mutex_enter(&lfs_lock); } + TAILQ_REMOVE(&fs->lfs_dchainhd, marker, i_lfs_dchain); mutex_exit(&lfs_lock); + /* We've written all the dirops there are */ ssp = (SEGSUM *)sp->segsum; lfs_ss_setflags(fs, ssp, lfs_ss_getflags(fs, ssp) & ~(SS_CONT)); @@ -1712,6 +1745,9 @@ lfs_flush_dirops(struct lfs *fs) (void) lfs_writeseg(fs, sp); lfs_segunlock(fs); + pool_put(&lfs_inoext_pool, marker->inode_ext.lfs); + pool_put(&lfs_inode_pool, marker); + return error; } @@ -1732,6 +1768,7 @@ lfs_flush_pchain(struct lfs *fs) int error, error2; ASSERT_NO_SEGLOCK(fs); + KASSERT(fs->lfs_writer); if (fs->lfs_ronly) return EROFS; @@ -1802,7 +1839,7 @@ lfs_flush_pchain(struct lfs *fs) LFS_SET_UINO(ip, IN_MODIFIED); mutex_exit(&lfs_lock); } - KDASSERT(ip->i_number != LFS_IFILE_INUM); + KASSERT(ip->i_number != LFS_IFILE_INUM); error2 = lfs_writeinode(fs, sp, ip); VOP_UNLOCK(vp); diff --git a/sys/ufs/lfs/ulfs_readwrite.c b/sys/ufs/lfs/ulfs_readwrite.c index 12984c77a04f..01805dd18e4f 100644 --- a/sys/ufs/lfs/ulfs_readwrite.c +++ b/sys/ufs/lfs/ulfs_readwrite.c @@ -35,6 +35,8 @@ #include __KERNEL_RCSID(1, "$NetBSD: ulfs_readwrite.c,v 1.25 2019/06/20 00:49:11 christos Exp $"); +#include + #define FS struct lfs #define I_FS i_lfs #define READ lfs_read @@ -278,7 +280,6 @@ WRITE(void *v) KASSERT(vp->v_type == VREG); - async = true; lfs_availwait(fs, lfs_btofsb(fs, uio->uio_resid)); lfs_check(vp, LFS_UNUSED_LBN, 0); @@ -395,7 +396,16 @@ WRITE(void *v) * XXXUBC simplistic async flushing. */ - __USE(async); + unsigned shift = ilog2(lfs_segsize(fs)) - 1; + if (!async && + (oldoff >> shift) != (uio->uio_offset >> shift)) { + mutex_enter(vp->v_interlock); + error = VOP_PUTPAGES(vp, (oldoff >> shift) << shift, + (uio->uio_offset >> shift) << shift, + PGO_CLEANIT | PGO_LAZY); + if (error) + break; + } } if (error == 0 && ioflag & IO_SYNC) { mutex_enter(vp->v_interlock); diff --git a/usr.sbin/dumplfs/dumplfs.c b/usr.sbin/dumplfs/dumplfs.c index 912c4579359b..bc4590dd0773 100644 --- a/usr.sbin/dumplfs/dumplfs.c +++ b/usr.sbin/dumplfs/dumplfs.c @@ -133,7 +133,7 @@ print_ientry(int i, struct lfs *lfsp, IFILE *ip) else printf("%d\tINUSE\t%u\t%8jX\t%s\n", i, version, (intmax_t)daddr, - nextfree == LFS_ORPHAN_NEXTFREE ? "FFFFFFFF" : "-"); + nextfree == LFS_ORPHAN_NEXTFREE(lfsp) ? "orphan" : "-"); } /*