/* $NetBSD$ */ /*- * Copyright (c) 2014 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Taylor R Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * XXX NOTE NOTE NOTE XXX * * This code does not actually work. It is a draft of an idea. It * probably won't even compile, even if you make it include the right * header files. */ /* * Vnode life cycle * * Vnodes exist in one of seven states: * * - UNINITIALIZED * - INITIALIZING * - READY (inactive if usecount = 0, active if usecount > 0) * - REVOKED * - DEACTIVATING * - RECLAIMING * - RECLAIMED * * This is a lot of states, but users of the vnode abstraction don't * usually see most of them -- vget returns a READY or REVOKED vnode, * and until you vrele, you don't ever see states other than those. * * - The UNINITIALIZED state is not visible outside the vnode * abstraction except that marker vnodes are always in it. * * - The INITIALIZING state is visible only from getnewvnode until * vready. * * - The DEACTIVATING state is visible only in VOP_INACTIVE. * * - The REVOKED state is indistinguishable from the READY state of a * deadfs vnode. * * - The RECLAIMING state is visible only in VOP_RECLAIM, and the * decision to reclaim a vnode is final. * * - The RECLAIMED state is not visible: vget returns ENOENT in this * case. * * (getnewvnode) * | * V * +--------------+ ungetnewvnode * | INITIALIZING |-------------------------> (vnfree) * +--------------+ * | * | vready * V * +------------------------------+ * vget +----> | active (READY, usecount > 0) | <----+ * / +------------------------------+ \ * / / \ | * / last vrele / \ vrevoke | * | or vrecycle / \ | * | | | | * | V V | * | +----------------+ +------------------------+ * | | DEACTIVATING | | REVOKED [VOP_INACTIVE, | * | | [VOP_INACTIVE] | | VOP_RECLAIM] | * | +----------------+ +------------------------+ * | | \___________ * | VOP_INACTIVE says | \ VOP_INACTIVE says reclaim * | don't reclaim | | * | V V * | +---------------+ +---------------+ * | | inactive | vdrain | RECLAIMING | * ^-------| (READY, | -------->| [VOP_RECLAIM] | * | usecount = 0) | +---------------+ * +---------------+ | * | * V * +-----------+ * | RECLAIMED | * +-----------+ * | * V * (vnfree) * * Usecount is managed with atomics. Dropping usecount to zero may * happen only under vp->v_interlock; all other transitions are allowed * in any context. */ /* * Changes: * * - Take the vnode lock around vrevoke. (XXX Non-genfs vnode locks?) * - Before vget, do vpreget instead of mutex_enter(vp->v_interlock). * - Call vready when a newly published vnode is ready. * - If you want the vnode lock after vget, take it yourself. */ /* * Idea: * * - VOP_INTERRUPT delivers a signal to whoever holds a vnode's lock. * (May be hairy for LK_SHARED...) Then `umount -f' can actually * work! What we really want is to cause pending I/O to fail (with a * negative acknowledgement), but a signal is probably the closest we * can get. */ /* * Vnode allocation */ /* * vnalloc: Allocate a vnode. If mp is nonnull, this is a marker vnode * for it; otherwise, it is a normal vnode. Must be freed with vnfree. */ struct vnode * vnalloc(struct mount *mp) { static const struct vnode zero_vnode; struct vnode *vp; if (mp == NULL) /* not a marker */ vdrain_vnode_created(); vp = pool_cache_get(vnode_cache, PR_WAITOK); KASSERT(vp != NULL); *vp = zero_vnode; vp->v_state = VS_UNINITIALIZED; uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); cv_init(&vp->v_cv, "vnode"); LIST_INIT(&vp->v_nclist); LIST_INIT(&vp->v_dnclist); if (mp == NULL) { rw_init(&vp->v_lock); } else { vp->v_mount = mp; vp->v_type = VBAD; vp->v_iflag = VI_MARKER; } return vp; } /* * vnfree: Free a vnode allocated with vnalloc. * * - vp must be UNINITIALIZED. */ void vnfree(struct vnode *vp) { bool marker; KASSERT(vp->v_state == VS_UNINITIALIZED); KASSERT(vp->v_usecount == 0); marker = vismarker(vp); if (marker) { KASSERT(vp->v_type == VBAD); } else { KASSERT(vp->v_mount == NULL); rw_destroy(&vp->v_lock); } KASSERT(LIST_EMPTY(&vp->v_dnclist)); KASSERT(LIST_EMPTY(&vp->v_nclist)); cv_destroy(&vp->v_cv); uvm_obj_destroy(&vp->v_uobj, true); pool_cache_put(vnode_cache, vp); if (!marker) vdrain_vnode_destroyed(); } /* * Vnode creation */ /* * getnewvnode: Create a new vnode. */ int getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), kmutex_t *interlock, struct vnode *vpp) { struct vnode *vp = NULL; int error; if (mp != NULL) { error = vfs_busy(mp, NULL); if (error) return error; } vp = vnalloc(NULL); vp->v_state = VS_INITIALIZING; vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; vp->v_data = NULL; vp->v_writecount = 0; vp->v_holdcnt = 0; /* These should be set up by uvm_obj_init in vnalloc. */ KASSERT(vp->v_usecount == 0); KASSERT(vp->v_uobj.pgops == &uvm_vnodeops); KASSERT(vp->v_uobj.uo_npages == 0); KASSERT(TAILQ_FIRST(&vp->v_uobj.memq) == NULL); vp->v_size = vp->v_writesize = VSIZENOTSET; if (interlock) { mutex_obj_hold(interlock); uvm_obj_setlock(&vp->v_uobj, interlock); KASSERT(vp->v_interlock == interlock); } vfs_insmntque(vp, mp); if (mp != NULL) { if (ISSET(mp->mnt_iflag, IMNT_MPSAFE)) vp->v_vflag |= VV_MPSAFE; } *vpp = vp; return 0; } /* * ungetnewvnode: Undo a getnewvnode before it is initialized. * * - vp must be INITIALIZING. */ void ungetnewvnode(struct vnode *vp) { KASSERT(vp->v_state == VS_INITIALIZING); KASSERT(vp->v_type == VNON); KASSERT(vp->v_data == NULL); KASSERT(vp->v_writecount == 0); KASSERT(vp->v_holdcnt == 0); KASSERT(!vismarker(vp)); vfs_insmntque(vp, NULL); mutex_enter(vp->v_interlock); vp->v_state = VS_RECLAIMED; cv_broadcast(&vp->v_cv, vp->v_interlock); while (0 < vp->v_usecount) cv_wait(&vp->v_cv, vp->v_interlock); KASSERT(vp->v_state == VS_RECLAIMED); KASSERT(vp->v_usecount == 0); vp->v_state = VS_UNINITIALIZED; mutex_exit(vp->v_interlock); vnfree(vp); } /* * vready: Mark a vnode initialized and ready to be used. * * - vp must be INITIALIZING. */ void vready(struct vnode *vp) { int error; mutex_enter(vp->v_interlock); KASSERT(vp->v_state == VS_INITIALIZING); KASSERT(!vismarker(vp)); if (__predict_false(atomic_inc_uint_nv(&vp->v_usecount) == 0)) vnpanic(vp, "%s: usecount overflow", __func__); vp->v_state = VS_READY; cv_broadcast(&vp->v_cv, vp->v_interlock); mutex_exit(vp->v_interlock); } /* * vpreget: Prepare for vget. Safe under a lock or in a pserialized * reader. Caller should only drop locks or exit pserialized reader * after this before vget. */ void vpreget(struct vnode *vp) { if (__predict_false(atomic_inc_uint_nv(&vp->v_usecount) == 0)) vnpanic("%s: usecount overflow", __func__); } /* * vget: Try to get a reference to vp. If it's currently changing * state, wait until it's done. Caller must have previously called * vpreget or vpreget_locked. */ int vget(struct vnode *vp, int flags) { unsigned int usecount; int error; mutex_enter(vp->v_interlock); KASSERT(!vismarker(vp)); KASSERT(0 < vp->v_usecount); KASSERT(vp->v_state != VS_UNINITIALIZED); while ((vp->v_state == VS_INITIALIZING) || (vp->v_state == VS_DEACTIVATING) || (vp->v_state == VS_RECLAIMING)) { if (flags == VGET_NONBLOCK) { error = EWOUBLDLOCK; goto fail; } else if (flags == VGET_INTR) { error = cv_wait_sig(&vp->v_cv, vp->v_interlock); if (error) { if ((vp->v_state == VS_READY) || (vp->v_state == VS_REVOKED) || (vp->v_state == VS_RECLAIMED)) /* * Interrupted, but we * transitioned to a happy * state at the same time. * * XXX Is it kosher to ignore * the error? If not, we'll * have to vrele_async. */ break; goto fail; } } else { cv_wait(&vp->v_cv, vp->v_interlock); } } if (vp->v_state == VS_RECLAIMED) { error = ENOENT; goto fail; } /* Success! */ KASSERT((vp->v_state == VS_READY) || (vp->v_state == VS_REVOKED)); vremfree(vp); mutex_exit(vp->v_interlock); return 0; fail: KASSERT(vp->v_state != VS_READY); KASSERT(vp->v_state != VS_REVOKED); usecount = atomic_dec_uint_nv(&vp->v_usecount); KASSERT(usecount != UINT_MAX); if (vp->v_state == VS_RECLAIMED) { /* * If the vnode has been reclaimed, and we held the * last reference to it, signal whoever is responsible * for freeing it that the last attempted reference has * been dropped. */ if (usecount == 0) cv_broadcast(&vp->v_cv, vp->v_interlock); } mutex_exit(vp->v_interlock); return error; } /* * vref: Bump vp's usecount. * * - vp must be active. (Otherwise use vget.) */ void vref(struct vnode *vp) { unsigned int usecount; #if DIAGNOSTIC { mutex_enter(vp->v_interlock); KASSERT((vp->v_state == VS_READY) || (vp->v_state == VS_REVOKED)); mutex_exit(vp->v_interlock); } #endif usecount = atomic_inc_uint_nv(&vp->v_usecount); if (__predict_true(2 <= usecount)) return; if (usecount == 0) vnpanic(vp, "%s: usecount overflow", __func__); if (usecount == 1) vnpanic(vp, "%s: inactive vnode", __func__); } /* * vrele: Drop vp's usecount. If it drops to zero, call VOP_INACTIVE * and maybe reclaim it. May take and drop vp->v_interlock. * * Because this may call VOP_INACTIVE and VOP_RECLAIM synchronously, it * may take vp's vnode lock, so caller must not hold any locks that are * out of order with vp's vnode lock. If this is an issue, or if * calling VOP_INACTIVE or VOP_RECLAIM is otherwise an issue, use * vrele_async instead. * * - vp must be active. */ void vrele(struct vnode *vp) { vrele_with(vp, &vdeactivate_and_reclaim); } /* * vrele_async: Drop vp's usecount. If it drops to zero, schedule the * actions of vrele asynchronously. May take and drop vp->v_interlock. * * - vp must be active. */ void vrele_async(struct vnode *vp) { vrele_with(vp, &vdeactivate_async); } static inline void vrele_with(struct vnode *vp, void (*deactivate)(struct vnode *)) { if (__predict_true(atomic_dec_uint_lock_if_zero(&vp->v_usecount, vp->v_interlock))) return; KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_READY); KASSERT(!vismarker(vp)); (*deactivate)(vp); } static bool vdeactivate(struct vnode *vp) { bool reclaim; KASSERT(mutex_owned(vp->v_interlock)); vp->v_state = VS_DEACTIVATING; mutex_exit(vp->v_interlock); VOP_LOCK(vp, LK_EXCLUSIVE); /* XXX This is silly. */ VOP_INACTIVE(vp, &reclaim); mutex_enter(vp->v_interlock); KASSERT(vp->v_state == VS_DEACTIVATING); return reclaim; } static void vdeactivate_and_reclaim(struct vnode *vp) { bool reclaim; KASSERT(mutex_owned(vp->v_interlock)); reclaim = vdeactivate(vp); KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_DEACTIVATING); /* * If the file system wants it reclaimed, reclaim it now. * Otherwise, put it on a queue to be reclaimed when we want to * chuck some vnodes. */ if (reclaim) { vdestroy(vp); } else { vaddfree(vp); vp->v_state = VS_READY; cv_broadcast(&vp->v_cv, vp->v_interlock); mutex_exit(vp->v_interlock); } } static void vdeactivate_async(struct vnode *vp) { unsigned int usecount; KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_READY); /* * Bump the usecount to pretend it's active until the thread * can get to it. */ do { usecount = vp->v_usecount; if (__predict_false(usecount != 0)) { if (__predict_false(usecount == UINT_MAX)) vnpanic(vp, "%s: usecount overflow", __func__); /* * Someone else got a reference to it and will * release it. */ return; } } while (atomic_cas_uint(&vp->v_usecount, 0, 1) != 0); #if notyet workqueue_enqueue(&vrele_wq, &vp->v_rele_work, NULL); #else mutex_enter(&vnode_free_list_lock); if (vp->v_freelisthd != NULL) TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); vp->v_freelisthd = &vrele_list; TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); /* XXX Why delay? */ /* XXX Use a per-CPU workqueue instead? */ if ((vrele_pending == UINT_MAX) || (++vrele_pending > (desiredvnodes >> 8))) cv_signal(&vrele_cv); mutex_exit(&vnode_free_list_lock); #endif } static void vrele_thread(void *arg __unused) { struct vnode *vp; for (;;) { mutex_enter(&vnode_free_list_lock); while (TAILQ_EMPTY(&vrele_list)) cv_wait(&vrele_cv, &vnode_free_list_lock); vp = TAILQ_FIRST(&vrele_list); TAILQ_REMOVE(&vrele_list, vp, v_freelist); KASSERT(vp->v_freelisthd == &vrele_list); vp->v_freelisthd = NULL; /* May be zero if we overflowed, but that's OK. */ if (vrele_pending) vrele_pending--; mutex_exit(&vnode_free_list_lock); vrele(vp); } } /* * Revocation, reclamation, and destruction */ /* * vrevoke: Turn vp into a dead vnode, to implement VOP_REVOKE. Will * take and drop vp->v_interlock. Will drop the vnode lock when it * calls VOP_INACTIVE before it calls VOP_RECLAIM. * * - vp must be active. * - vp's vnode lock must be held. * * NOTE: You must use the same VOP_UNLOCK as deadfs! */ void vrevoke(struct vnode *vp) { KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); mutex_enter(vp->v_interlock); KASSERT(vp->v_state == VS_READY); KASSERT(!vismarker(vp)); /* * We must hold a reference, so even though the usecount may * change without the lock, it can't become inactive. */ KASSERT(1 <= vp->v_usecount); if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { type = vp->v_type; dev = vp->v_dev; /* * vrevoke1 drops vp's vnode lock and interlock, so * there is no lock order to worry about between vp and * all other device vnodes vq. */ vrevoke1(vp); while (spec_node_lookup_by_dev(type, dev, &vq) == 0) { VOP_LOCK(vq, LK_EXCLUSIVE); mutex_enter(vq->v_interlock); vrevoke1(vq); vrele(vq); } } else { vrevoke1(vp); } } void vrevoke1(struct vnode *vp) { bool reclaim __unused; KASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE); KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_READY); KASSERT(!vismarker(vp)); /* * Mark the vnode as being revoked, tell the file system it is * inactive, and clean the vnode. VOP_INACTIVE drops the vnode * lock. */ vp->v_state = VS_REVOKED; mutex_exit(vp->v_interlock); VOP_INACTIVE(vp, &reclaim); mutex_enter(vp->v_interlock); KASSERT(vp->v_state == VS_REVOKED); vreclaim(vp); KASSERT(vp->v_state == VS_REVOKED); vp->v_vflag &= ~VV_ROOT; if ((vp->v_type == VBLK) && spec_node_getmountedfs(vp) != NULL) { vp->v_op = spec_vnodeop_p; vp->v_vflag &= ~VV_LOCKSWORK; } else { vp->v_op = dead_vnodeop_p; vp->v_vflag |= VV_LOCKSWORK; vp->v_tag = VT_NON; KNOTE(&vp->v_klist, NOTE_REVOKE); } /* Publish v_op before v_state. */ membar_producer(); vp->v_state = VS_READY; cv_broadcast(&vp->v_cv); mutex_exit(vp->v_interlock); } /* * vrecycle: Try to reclaim vp. If we can, either because someone else * is revoking it and we can wait for them to finish, or because it is * inactive and we can destroy it, then return true. If we can't * reclaim vp, return false. */ bool vrecycle(struct vnode *vp) { unsigned int usecount; mutex_enter(vp->v_interlock); KASSERT(!vismarker(vp)); KASSERT(0 < vp->v_usecount); /* * If someone revoked it, wait for the revocation to complete * and inform caller vp has been reclaimed. */ if (vp->v_state == VS_REVOKED) { do cv_wait(&vp->v_cv, vp->v_interlock); while (vp->v_state == VS_REVOKED); mutex_exit(vp->v_interlock); return true; } KASSERT(vp->v_state == VS_READY); KASSERT(0 < vp->v_usecount); usecount = atomic_dec_uint_nv(&vp->v_usecount); if (__predict_false(usecount == UINT_MAX)) vnpanic(vp, "%s: usecount underflow", __func__); if (0 < usecount) { mutex_exit(vp->v_interlock); return false; } (void)vdeactivate(vp); vdestroy(vp); return true; } /* * vdestroy: Reclaim vp, notify vget that this vnode is gone, wait for * stragglers, and vnfree vp. * * - vp must be DEACTIVATING. * - vp->v_interlock must be held. */ static void vdestroy(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT((vp->v_state == VS_DEACTIVATING) || (vp->v_state == VS_READY)); /* * Mark the vnode as preparing for destruction, and clean it. * vreclaim drops the interlock. */ vp->v_state = VS_RECLAIMING; vreclaim(vp); KASSERT(vp->v_state == VS_RECLAIMING); /* Wait until anyone who tried vget is done. */ vp->v_state = VS_RECLAIMED; while (0 < vp->v_usecount) cv_wait(&vp->v_cv, vp->v_interlock); /* We now hold the last reference, so it is OK to free vp. */ KASSERT(vp->v_state == VS_RECLAIMED); KASSERT(vp->v_usecount == 0); vp->v_state = VS_UNINITIALIZED; mutex_exit(vp->v_interlock); vnfree(vp); } /* * vreclaim: Someone wants to destroy or revoke vp. Flush buffers * associated with it and call VOP_RECLAIM. Drops and retakes * vp->v_interlock. * * - vp must be RECLAIMING or REVOKED. * - vp->v_interlock must be held. */ static void vreclaim(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT((vp->v_state == VS_RECLAIMING) || (vp->v_state == REVOKED)); /* XXX Begin crud cargo-culted from old vclean. */ if (ISSET(vp->v_iflag, VI_EXECMAP)) { atomic_add_int(&uvmexp.execpages, -vp.v_uobj.uo_npages); atomic_add_int(&uvmexp.filepages, vp.v_uobj.uo_npages); } /* XXX Old vrelel cleared VI_WRMAP; old vclean didn't. Hmm. */ vp->v_iflag &= ~(VI_TEXT | VI_EXECMAP | VI_WRMAP); mutex_exit(vp->v_interlock); VOP_LOCK(vp, LK_EXCLUSIVE); if ((vp->v_type == VBLK) && (spec_node_getmountedfs(vp) != NULL)) { /* XXX What now? */ } else { error = vinvalbuf(vp, V_SAVE, NOCRED, 1, 0, 0); if (error) { if (wapbl_vphaswapbl(vp)) WAPBL_DISCARD(wapbl_vptomp(vp)); error = vinvalbuf(vp, 0, NOCRED, 1, 0, 0); KASSERT(error == 0); } KASSERT(!ISSET(vp->v_iflag, VI_ONWORKLST)); if ((vp->v_type == VBLK) || (vp->v_type == VCHR)) spec_node_revoke(vp); } VOP_UNLOCK(vp, LK_EXCLUSIVE); /* XXX End crud cargo-culted from old vclean. */ VOP_RECLAIM(vp); KASSERT(vp->v_data == NULL); KASSERT(vp->v_uobj.uo_npages == 0); /* XXX Begin crud cargo-culted from old vclean. */ if ((vp->v_type == VREG) && (vp->v_ractx != NULL)) { uvm_ra_freectx(vp->v_ractx); vp->v_ractx = NULL; } cache_purge(vp); /* XXX End crud cargo-culted from old vclean. */ /* * Must happen after VOP_RECLAIM. We lose access to the mount * point after this. */ vfs_insmntque(vp, NULL); mutex_enter(vp->v_interlock); KASSERT(!ISSET(vp->v_iflag, VI_ONWORKLST)); vremfree(vp); } /* * Vnode lock */ /* * vn_lock: Lock vp, or fail with ENOENT if it has been revoked. */ int vn_lock(struct vnode *vp, int flags) { error = VOP_LOCK(vp, flags); if (error) return error; #if 0 mutex_enter(vp->v_interlock); if ((vp->v_state == VS_REVOKED) || (vp->v_op == dead_vnodeop_p)) { mutex_exit(vp->v_interlock); VOP_UNLOCK(vp); return ENOENT; } mutex_exit(vp->v_interlock); #else /* State transition to REVOKED is prevented by vnode lock. */ if (vp->v_state == VS_REVOKED) { VOP_UNLOCK(vp); return ENOENT; } /* Read v_state before v_op. */ membar_consumer(); /* * State transition from REVOKED happens after setting v_op, so * if we did not observe the REVOKED state, then either * * (a) nobody has revoked it, and the vnode lock prevents * anyone from doing so now; or * * (b) someone revoked it before we took the vnode lock, in * which case they can transition it from REVOKED to READY with * only the interlock. * * In case (b), they would have set v_op to dead_vnodeop_p * before (with a membar_producer) transitioning from REVOKED * to READY. So if we read that from v_op now (after a * membar_consumer), we are guaranteed to see the effects of * option (b). */ if (vp->v_op == dead_vnodeop_p) { VOP_UNLOCK(vp); return ENOENT; } #endif KASSERT(VOP_ISLOCKED(vp) == (flags & (LK_EXCLUSIVE | LK_SHARED))); return 0; } /* * */ int vn_lock_deadok(struct vnode *vp, int flags) { error = VOP_LOCK(vp, flags); if (error) return error; if (vp->v_state == VS_REVOKED) { mutex_enter(vp->v_interlock); VOP_UNLOCK(vp); KASSERT(vp->v_state == VS_REVOKED); do { if (ISSET(flags, LK_NOWAIT)) { error = EWOULDBLOCK; break; } else { cv_wait(&vp->v_cv, vp->v_interlock); } } while (vp->v_state == VS_REVOKED); mutex_exit(vp->v_interlock); if (error) return error; error = VOP_LOCK(vp, flags); if (error) return error; } KASSERT(VOP_ISLOCKED(vp) == (flags & (LK_EXCLUSIVE | LK_SHARED))); return 0; } /* * Hold counts. When there are buffers in the kernel (buffer cache or * uvm) for a vnode, we would prefer to destroy that vnode later. The * hold count records how many such buffers there are. */ /* * vholdl: Bump vp's hold count. * * - vp must be READY or REVOKED. * - vp->v_interlock must be held. */ void vholdl(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT((vp->v_state == VS_READY) || (vp->v_state == VS_REVOKED)); KASSERT(!vismarker(vp)); if (vp->v_holdcnt++ == UINT_MAX) vnpanic(vp, "vnode hold count overflow"); if (vp->v_holdcnt == 1) vswitchfree(vp, &vnode_hold_list, &vnode_free_list); } /* * holdrelel: Drop vp's hold count. * * - vp must be READY or REVOKED. * - vp->v_interlock must be held. * - vp must not be a marker vnode. */ void holdrelel(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT((vp->v_state == VS_READY) || (vp->v_state == VS_REVOKED)); KASSERT(!vismarker(vp)); KASSERT(0 < vp->v_holdcnt); vp->v_holdcnt--; if (vp->v_holdcnt == 0) vswitchfree(vp, &vnode_hold_list, &vnode_free_list); } /* * Freelists. Vnodes that are not actively being used stay cached in * case someone wants them soon, but get queued up to be destroyed when * the number of vnodes in the system gets too high. Vnodes not used * by buffers in the kernel are on the /free list/, and get destroyed * first; vnodes used by buffers in the kernel are on the /hold list/, * and get destroyed after everything in the free list. * * Destruction happens asynchronously, in the vdrain thread. Each file * system's VOP_RECLAIM cannot allocate or otherwise wait for resources * that allocating vnodes in any file system may require. (Yikes!) */ /* * vaddfree: Add vp to the free list or hold list as appropriate. * * - vp must be READY. * - vp->v_interlock must be held. * - vp must not be a marker vnode. * - vp must not have already been put on a free list, i.e. it is being * deactivated. */ static void vaddfree(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT(vp->v_state == VS_READY); KASSERT(!vismarker(vp)); if (0 < vp->v_usecount) return; mutex_enter(&vnode_free_list_lock); KASSERT(vp->v_freelisthd == NULL); vp->v_freelisthd = (0 == vp->v_holdcnt? &vnode_free_list : &vnode_hold_list); TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); mutex_exit(&vnode_free_list_lock); } /* * vswitchfree: Switch vp from the freelist old to the freelist new. * * - vp->v_interlock must be held. */ static void vswitchfree(struct vnode *vp, struct vnodelst *old, struct vnodelist *new) { KASSERT(mutex_owned(vp->v_interlock)); /* Don't bother if someone has already snagged it. */ if (0 < vp->v_usecount) return; mutex_enter(&vnode_free_list_lock); KASSERT(vp->v_freelisthd == old); TAILQ_REMOVE(old, vp, v_freelist); vp->v_freelisthd = new; TAILQ_INSERT(new, vp, v_freelist); mutex_exit(&vnode_free_list_lock); } /* * vremfree: Remove vp from whichever freelist it is on. * * - vp must be READY, REVOKED, or RECLAIMED. * - vp->v_interlock must be held. * - vp must not be a marker vnode. */ static void vremfree(struct vnode *vp) { KASSERT(mutex_owned(vp->v_interlock)); KASSERT((vp->v_state == VS_READY) || (vp->v_state == VS_REVOKED) || (vp->v_state == VS_RECLAIMED)); KASSERT(!vismarker(vp)); /* Don't bother if we never got put on at all. */ if (vp->v_freelisthd == NULL) return; mutex_enter(&vnode_free_list_lock); KASSERT(vp->v_freelisthd == (0 == vp->v_holdcnt? &vnode_free_list : &vnode_hold_list)); TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); vp->v_freelisthd = NULL; mutex_exit(&vnode_free_list_lock); } static void vdrain_vnode_created(struct vnode *vp) { mutex_enter(&vnode_free_list_lock); if (numvnodes == UINT_MAX) vnpanic(vp, "too many vnodes"); numvnodes++; if ((desiredvnodes + (desiredvnodes/10)) < numvnodes) cv_signal(&vdrain_cv); mutex_exit(&vnode_free_list_lock); } static void vdrain_vnode_destroyed(struct vnode *vp) { mutex_enter(&vnode_free_list_lock); numvnodes--; mutex_exit(&vnode_free_list_lock); } static void vdrain_thread(void *arg __unused) { for (;;) { mutex_enter(&vnode_free_list_lock); while (numvnodes < desiredvnodes) cv_wait(&vdrain_cv, &vnode_free_list_lock); if (vdrain_1() == EBUSY) kpause("vdrain", false, hz, NULL); /* vdrain_1 drops vnode_free_list_lock for us. */ } } static int vdrain_1(void) { static struct vnodelst *freelists[] = { &vnode_free_list, &vnode_hold_list, }; size_t i; struct vnode *vp; struct mount *mp; int error = ENOENT; KASSERT(mutex_owned(&vnode_free_list_lock)); for (i = 0; i < __arraycount(freelists); i++) { if (TAILQ_EMPTY(freelists[i])) continue; TAILQ_FOREACH(vp, freelists[i], v_freelist) { /* * XXX Lock order reversal! We can get rid of * this by removing vp from the queue before * taking its interlock and putting it back on * the queue if the fstrans can't start. * However, that also requires changing * everything else that manages vnodes on the * freelists to handle the case that vdrain may * have taken the vnode off the freelist and * may be about to put it back on. That is * more trouble than it is worth to avoid a * single speculative grab of this vnode. */ if (!mutex_tryenter(vp->v_interlock)) { error = EBUSY; continue; } KASSERT(vp->v_state == VS_READY); KASSERT(vp->v_usecount == 0); KASSERT(vp->v_freelisthd == freelists[i]); mp = vp->v_mount; if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { mutex_exit(vp->v_interlock); error = EBUSY; continue; } goto found; } } mutex_exit(&vnode_free_list_lock); return error; found: TAILQ_REMOVE(freelists[i], vp, v_freelist); vp->v_freelisthd = NULL; mutex_exit(&vnode_free_list_lock); /* * Act as though we had just finished vrele and VOP_INACTIVE by * entering the DEACTIVATING state. */ vp->v_state = VS_DEACTIVATING; vdestroy(vp); fstrans_done(mp); return 0; }