Index: kern/vfs_cache.c =================================================================== RCS file: /cvsroot/src/sys/kern/vfs_cache.c,v retrieving revision 1.143 diff -u -p -r1.143 vfs_cache.c --- kern/vfs_cache.c 16 May 2020 18:31:50 -0000 1.143 +++ kern/vfs_cache.c 24 May 2020 22:39:53 -0000 @@ -582,13 +582,8 @@ cache_lookup(struct vnode *dvp, const ch return hit; } vp = ncp->nc_vp; - mutex_enter(vp->v_interlock); - rw_exit(&dvi->vi_nc_lock); - - /* - * Unlocked except for the vnode interlock. Call vcache_tryvget(). - */ error = vcache_tryvget(vp); + rw_exit(&dvi->vi_nc_lock); if (error) { KASSERT(error == EBUSY); /* @@ -821,9 +816,8 @@ cache_revlookup(struct vnode *vp, struct } dvp = ncp->nc_dvp; - mutex_enter(dvp->v_interlock); - rw_exit(&vi->vi_nc_listlock); error = vcache_tryvget(dvp); + rw_exit(&vi->vi_nc_listlock); if (error) { KASSERT(error == EBUSY); if (bufp) Index: kern/vfs_lookup.c =================================================================== RCS file: /cvsroot/src/sys/kern/vfs_lookup.c,v retrieving revision 1.219 diff -u -p -r1.219 vfs_lookup.c --- kern/vfs_lookup.c 22 Apr 2020 21:35:52 -0000 1.219 +++ kern/vfs_lookup.c 24 May 2020 22:39:53 -0000 @@ -1354,9 +1354,7 @@ lookup_fastforward(struct namei_state *s foundobj->v_type != VDIR || (foundobj->v_type == VDIR && foundobj->v_mountedhere != NULL)) { - mutex_enter(foundobj->v_interlock); error = vcache_tryvget(foundobj); - /* v_interlock now unheld */ if (error != 0) { foundobj = NULL; error = EOPNOTSUPP; @@ -1381,9 +1379,7 @@ lookup_fastforward(struct namei_state *s * let lookup_once() take care of it. */ if (searchdir != *searchdir_ret) { - mutex_enter(searchdir->v_interlock); error2 = vcache_tryvget(searchdir); - /* v_interlock now unheld */ KASSERT(plock != NULL); rw_exit(plock); if (__predict_true(error2 == 0)) { Index: kern/vfs_subr.c =================================================================== RCS file: /cvsroot/src/sys/kern/vfs_subr.c,v retrieving revision 1.487 diff -u -p -r1.487 vfs_subr.c --- kern/vfs_subr.c 16 May 2020 18:31:50 -0000 1.487 +++ kern/vfs_subr.c 24 May 2020 22:39:53 -0000 @@ -730,18 +730,15 @@ lazy_sync_vnode(struct vnode *vp) KASSERT(mutex_owned(&syncer_data_lock)); synced = false; - /* We are locking in the wrong direction. */ - if (mutex_tryenter(vp->v_interlock)) { + if (vcache_tryvget(vp) == 0) { mutex_exit(&syncer_data_lock); - if (vcache_tryvget(vp) == 0) { - if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { - synced = true; - (void) VOP_FSYNC(vp, curlwp->l_cred, - FSYNC_LAZY, 0, 0); - vput(vp); - } else - vrele(vp); - } + if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT) == 0) { + synced = true; + (void) VOP_FSYNC(vp, curlwp->l_cred, + FSYNC_LAZY, 0, 0); + vput(vp); + } else + vrele(vp); mutex_enter(&syncer_data_lock); } return synced; Index: kern/vfs_vnode.c =================================================================== RCS file: /cvsroot/src/sys/kern/vfs_vnode.c,v retrieving revision 1.122 diff -u -p -r1.122 vfs_vnode.c --- kern/vfs_vnode.c 18 May 2020 08:27:54 -0000 1.122 +++ kern/vfs_vnode.c 24 May 2020 22:39:53 -0000 @@ -112,7 +112,7 @@ * LOADING -> LOADED * Vnode has been initialised in vcache_get() or * vcache_new() and is ready to use. - * LOADED -> RECLAIMING + * BLOCKED -> RECLAIMING * Vnode starts disassociation from underlying file * system in vcache_reclaim(). * RECLAIMING -> RECLAIMED @@ -143,15 +143,8 @@ * as vput(9), routines. Common points holding references are e.g. * file openings, current working directory, mount points, etc. * - * Note on v_usecount and its locking - * - * At nearly all points it is known that v_usecount could be zero, - * the vnode_t::v_interlock will be held. To change the count away - * from zero, the interlock must be held. To change from a non-zero - * value to zero, again the interlock must be held. - * - * Changing the usecount from a non-zero value to a non-zero value can - * safely be done using atomic operations, without the interlock held. + * v_usecount is adjusted with atomic operations, however to change + * from a non-zero value to zero the interlock must also be held. */ #include @@ -237,13 +230,20 @@ extern int (**spec_vnodeop_p)(void *); extern struct vfsops dead_vfsops; /* + * The high bit of v_usecount is a gate for vcache_tryvget(). It's set + * only when the vnode state is LOADED. + */ +#define VUSECOUNT_MASK 0x7fffffff +#define VUSECOUNT_GATE 0x80000000 + +/* * Return the current usecount of a vnode. */ inline int vrefcnt(struct vnode *vp) { - return atomic_load_relaxed(&vp->v_usecount); + return atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_MASK; } /* Vnode state operations and diagnostics. */ @@ -329,8 +329,8 @@ static void vstate_assert_change(vnode_t *vp, enum vnode_state from, enum vnode_state to, const char *func, int line) { + bool gated = (atomic_load_relaxed(&vp->v_usecount) & VUSECOUNT_GATE); vnode_impl_t *vip = VNODE_TO_VIMPL(vp); - int refcnt = vrefcnt(vp); KASSERTMSG(mutex_owned(vp->v_interlock), "at %s:%d", func, line); if (from == VS_LOADING) @@ -345,10 +345,15 @@ vstate_assert_change(vnode_t *vp, enum v if (vip->vi_state != from) vnpanic(vp, "from is %s, expected %s at %s:%d\n", vstate_name(vip->vi_state), vstate_name(from), func, line); - if ((from == VS_BLOCKED || to == VS_BLOCKED) && refcnt != 1) - vnpanic(vp, "%s to %s with usecount %d at %s:%d", - vstate_name(from), vstate_name(to), refcnt, - func, line); + if ((from == VS_LOADED) != gated) + vnpanic(vp, "state is %s, gate %d does not match at %s:%d\n", + vstate_name(vip->vi_state), gated, func, line); + + /* Open/close the gate for vcache_tryvget(). */ + if (to == VS_LOADED) + atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE); + else + atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE); vip->vi_state = to; if (from == VS_LOADING) @@ -386,6 +391,12 @@ vstate_change(vnode_t *vp, enum vnode_st { vnode_impl_t *vip = VNODE_TO_VIMPL(vp); + /* Open/close the gate for vcache_tryvget(). */ + if (to == VS_LOADED) + atomic_or_uint(&vp->v_usecount, VUSECOUNT_GATE); + else + atomic_and_uint(&vp->v_usecount, ~VUSECOUNT_GATE); + vip->vi_state = to; if (from == VS_LOADING) cv_broadcast(&vcache_cv); @@ -712,10 +723,10 @@ vtryrele(vnode_t *vp) u_int use, next; for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { - if (__predict_false(use == 1)) { + if (__predict_false((use & VUSECOUNT_MASK) == 1)) { return false; } - KASSERT(use > 1); + KASSERT((use & VUSECOUNT_MASK) > 1); next = atomic_cas_uint(&vp->v_usecount, use, use - 1); if (__predict_true(next == use)) { return true; @@ -846,10 +857,8 @@ vrelel(vnode_t *vp, int flags, int lktyp VOP_UNLOCK(vp); } else { /* - * The vnode must not gain another reference while being - * deactivated. If VOP_INACTIVE() indicates that - * the described file has been deleted, then recycle - * the vnode. + * If VOP_INACTIVE() indicates that the file has been + * deleted, then recycle the vnode. * * Note that VOP_INACTIVE() will not drop the vnode lock. */ @@ -858,12 +867,34 @@ vrelel(vnode_t *vp, int flags, int lktyp VOP_INACTIVE(vp, &recycle); rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); mutex_enter(vp->v_interlock); - if (vtryrele(vp)) { - VOP_UNLOCK(vp); - mutex_exit(vp->v_interlock); - rw_exit(vp->v_uobj.vmobjlock); - return; - } + + for (;;) { + /* + * If no longer the last reference, try to shed it. + * On success, drop the interlock last thereby + * preventing the vnode being freed behind us. + */ + if (vtryrele(vp)) { + VOP_UNLOCK(vp); + rw_exit(vp->v_uobj.vmobjlock); + mutex_exit(vp->v_interlock); + return; + } + /* + * Block new references then check again to see if a + * new reference was acquired in the meantime. If + * it was, restore the vnode state and try again. + */ + if (recycle) { + VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); + if (vrefcnt(vp) != 1) { + VSTATE_CHANGE(vp, VS_BLOCKED, + VS_LOADED); + continue; + } + } + break; + } /* Take care of space accounting. */ if ((vp->v_iflag & VI_EXECMAP) != 0 && @@ -880,7 +911,7 @@ vrelel(vnode_t *vp, int flags, int lktyp * otherwise just free it. */ if (recycle) { - VSTATE_ASSERT(vp, VS_LOADED); + VSTATE_ASSERT(vp, VS_BLOCKED); /* vcache_reclaim drops the lock. */ vcache_reclaim(vp); } else { @@ -889,7 +920,7 @@ vrelel(vnode_t *vp, int flags, int lktyp KASSERT(vrefcnt(vp) > 0); } - if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { + if ((atomic_dec_uint_nv(&vp->v_usecount) & VUSECOUNT_MASK) != 0) { /* Gained another reference while being reclaimed. */ mutex_exit(vp->v_interlock); return; @@ -941,7 +972,7 @@ vrele_async(vnode_t *vp) * Vnode reference, where a reference is already held by some other * object (for example, a file structure). * - * NB: we have lockless code sequences that rely on this not blocking. + * NB: lockless code sequences may rely on this not blocking. */ void vref(vnode_t *vp) @@ -1019,14 +1050,8 @@ vrecycle(vnode_t *vp) mutex_enter(vp->v_interlock); - /* Make sure we hold the last reference. */ - VSTATE_WAIT_STABLE(vp); - if (vrefcnt(vp) != 1) { - mutex_exit(vp->v_interlock); - return false; - } - /* If the vnode is already clean we're done. */ + VSTATE_WAIT_STABLE(vp); if (VSTATE_GET(vp) != VS_LOADED) { VSTATE_ASSERT(vp, VS_RECLAIMED); vrelel(vp, 0, LK_NONE); @@ -1035,6 +1060,14 @@ vrecycle(vnode_t *vp) /* Prevent further references until the vnode is locked. */ VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); + + /* Make sure we hold the last reference. */ + if (vrefcnt(vp) != 1) { + VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); + mutex_exit(vp->v_interlock); + return false; + } + mutex_exit(vp->v_interlock); /* @@ -1046,9 +1079,8 @@ vrecycle(vnode_t *vp) error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); mutex_enter(vp->v_interlock); - VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); - if (error) { + VSTATE_CHANGE(vp, VS_BLOCKED, VS_LOADED); mutex_exit(vp->v_interlock); return false; } @@ -1143,6 +1175,7 @@ vgone(vnode_t *vp) mutex_enter(vp->v_interlock); VSTATE_WAIT_STABLE(vp); if (VSTATE_GET(vp) == VS_LOADED) { + VSTATE_CHANGE(vp, VS_LOADED, VS_BLOCKED); vcache_reclaim(vp); lktype = LK_NONE; } @@ -1310,30 +1343,24 @@ vcache_free(vnode_impl_t *vip) /* * Try to get an initial reference on this cached vnode. - * Returns zero on success, ENOENT if the vnode has been reclaimed and - * EBUSY if the vnode state is unstable. + * Returns zero on success or EBUSY if the vnode state is not LOADED. * - * v_interlock locked on entry and unlocked on exit. + * NB: lockless code sequences may rely on this not blocking. */ int vcache_tryvget(vnode_t *vp) { - int error = 0; - - KASSERT(mutex_owned(vp->v_interlock)); - - if (__predict_false(VSTATE_GET(vp) == VS_RECLAIMED)) - error = ENOENT; - else if (__predict_false(VSTATE_GET(vp) != VS_LOADED)) - error = EBUSY; - else if (vp->v_usecount == 0) - vp->v_usecount = 1; - else - atomic_inc_uint(&vp->v_usecount); - - mutex_exit(vp->v_interlock); + u_int use, next; - return error; + for (use = atomic_load_relaxed(&vp->v_usecount);; use = next) { + if (__predict_false((use & VUSECOUNT_GATE) == 0)) { + return EBUSY; + } + next = atomic_cas_uint(&vp->v_usecount, use, use + 1); + if (__predict_true(next == use)) { + return 0; + } + } } /* @@ -1363,10 +1390,7 @@ vcache_vget(vnode_t *vp) return ENOENT; } VSTATE_ASSERT(vp, VS_LOADED); - if (vp->v_usecount == 0) - vp->v_usecount = 1; - else - atomic_inc_uint(&vp->v_usecount); + atomic_inc_uint(&vp->v_usecount); mutex_exit(vp->v_interlock); return 0; @@ -1679,7 +1703,7 @@ vcache_reclaim(vnode_t *vp) * Prevent the vnode from being recycled or brought into use * while we clean it out. */ - VSTATE_CHANGE(vp, VS_LOADED, VS_RECLAIMING); + VSTATE_CHANGE(vp, VS_BLOCKED, VS_RECLAIMING); mutex_exit(vp->v_interlock); rw_enter(vp->v_uobj.vmobjlock, RW_WRITER);