From 601039039d8cb9d34ff81d5137a78972d6ac6135 Mon Sep 17 00:00:00 2001
From: Taylor R Campbell <riastradh@NetBSD.org>
Date: Tue, 15 Apr 2014 05:15:27 +0000
Subject: [PATCH 13/14] Fix tmpfs_vnode_get locking and omit needless locking
 dances.

tmpfs_vnode_get no longer takes any vnode lock, so it can be used
with any vnode locks held.  Callers no longer take tn->tn_vlock prior
to tmpfs_vnode_get, and tmpfs_vnode_get no longer allocates under
tn->tn_vlock.

This leaves a race in tmpfs_fhtovp which was there before, namely
that during its call to tmpfs_vnode_get when the latter drops
tn->tn_vlock before calling vget, nothing prevents the node from
being freed.  To be fixed properly in a separate commit.
---
 sys/fs/tmpfs/tmpfs_rename.c | 11 ++++-------
 sys/fs/tmpfs/tmpfs_subr.c   | 29 +++++++++++++----------------
 sys/fs/tmpfs/tmpfs_vfsops.c | 10 +++++++---
 sys/fs/tmpfs/tmpfs_vnops.c  | 42 ++----------------------------------------
 4 files changed, 26 insertions(+), 66 deletions(-)

diff --git a/sys/fs/tmpfs/tmpfs_rename.c b/sys/fs/tmpfs/tmpfs_rename.c
index 9ca9e95..a827d7b 100644
--- a/sys/fs/tmpfs/tmpfs_rename.c
+++ b/sys/fs/tmpfs/tmpfs_rename.c
@@ -439,9 +439,7 @@ tmpfs_gro_lookup(struct mount *mp, struct vnode *dvp,
 	if (dirent == NULL)
 		return ENOENT;
 
-	mutex_enter(&dirent->td_node->tn_vlock);
 	error = tmpfs_vnode_get(mp, dirent->td_node, 0, &vp);
-	/* Note: tmpfs_vnode_get always releases dirent->td_node->tn_vlock.  */
 	if (error)
 		return error;
 	error = vn_lock(vp, LK_EXCLUSIVE);
@@ -496,7 +494,7 @@ tmpfs_gro_genealogy(struct mount *mp, kauth_cred_t cred,
     struct vnode *fdvp, struct vnode *tdvp,
     struct vnode **intermediate_node_ret)
 {
-	struct vnode *vp;
+	struct vnode *vp, *dvp;
 	struct tmpfs_node *dnode;
 	int error;
 
@@ -554,12 +552,11 @@ tmpfs_gro_genealogy(struct mount *mp, kauth_cred_t cred,
 		}
 
 		/* Neither -- keep ascending the family tree.  */
-		mutex_enter(&dnode->tn_vlock);
-		vput(vp);
-		vp = NULL;	/* Just in case, for the kassert above...  */
-		error = tmpfs_vnode_get(mp, dnode, 0, &vp);
+		error = tmpfs_vnode_get(mp, dnode, 0, &dvp);
 		if (error)
 			return error;
+		vput(vp);
+		vp = dvp;
 		error = vn_lock(vp, LK_EXCLUSIVE);
 		if (error) {
 			vrele(vp);
diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c
index dfcc762..792fd82 100644
--- a/sys/fs/tmpfs/tmpfs_subr.c
+++ b/sys/fs/tmpfs/tmpfs_subr.c
@@ -266,10 +266,7 @@ tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
 }
 
 /*
- * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode.
- *
- * => Must be called with tmpfs_node_t::tn_vlock held.
- * => Returns vnode (*vpp) locked.
+ * tmpfs_vnode_get: get or create a vnode for a specified inode.
  */
 int
 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, int flags, vnode_t **vpp)
@@ -279,20 +276,19 @@ tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, int flags, vnode_t **vpp)
 	int error;
 again:
 	/* If there is already a vnode, try to reclaim it. */
+	mutex_enter(&node->tn_vlock);
 	if ((vp = node->tn_vnode) != NULL) {
 		atomic_or_32(&node->tn_gen, TMPFS_RECLAIMING_BIT);
 		mutex_enter(vp->v_interlock);
 		mutex_exit(&node->tn_vlock);
-		error = vget(vp, LK_EXCLUSIVE);
-		if (error == ENOENT) {
-			mutex_enter(&node->tn_vlock);
+		error = vget(vp, 0);
+		if (error == ENOENT)
 			goto again;
-		}
 		atomic_and_32(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
-		VOP_UNLOCK(vp);
 		*vpp = vp;
 		return error;
 	}
+	mutex_exit(&node->tn_vlock);
 	if (TMPFS_NODE_RECLAIMING(node)) {
 		atomic_and_32(&node->tn_gen, ~TMPFS_RECLAIMING_BIT);
 	}
@@ -308,12 +304,17 @@ again:
 		slock = NULL;
 	}
 	error = getnewvnode(VT_TMPFS, mp, tmpfs_vnodeop_p, slock, 0, &vp);
-	if (error) {
-		mutex_exit(&node->tn_vlock);
+	if (error)
 		return error;
+
+	mutex_enter(&node->tn_vlock);
+	if (node->tn_vnode != NULL) {
+		mutex_exit(&node->tn_vlock);
+		ungetnewvnode(vp);
+		goto again;
 	}
+	mutex_exit(&node->tn_vlock);
 
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	vp->v_type = node->tn_type;
 
 	/* Type-specific initialization. */
@@ -341,11 +342,8 @@ again:
 	uvm_vnp_setsize(vp, node->tn_size);
 	vp->v_data = node;
 	node->tn_vnode = vp;
-	mutex_exit(&node->tn_vlock);
 
-	KASSERT(VOP_ISLOCKED(vp));
 	vready(vp);
-	VOP_UNLOCK(vp);
 	*vpp = vp;
 	return 0;
 }
@@ -401,7 +399,6 @@ tmpfs_construct_node(vnode_t *dvp, vnode_t **vpp, struct vattr *vap,
 	}
 
 	/* Get a vnode for the new file. */
-	mutex_enter(&node->tn_vlock);
 	error = tmpfs_vnode_get(dvp->v_mount, node, 0, vpp);
 	if (error) {
 		tmpfs_free_dirent(tmp, de);
diff --git a/sys/fs/tmpfs/tmpfs_vfsops.c b/sys/fs/tmpfs/tmpfs_vfsops.c
index d9fa75d..b6eaf27 100644
--- a/sys/fs/tmpfs/tmpfs_vfsops.c
+++ b/sys/fs/tmpfs/tmpfs_vfsops.c
@@ -264,7 +264,6 @@ tmpfs_root(struct mount *mp, int flags, vnode_t **vpp)
 {
 	tmpfs_node_t *node = VFS_TO_TMPFS(mp)->tm_root;
 
-	mutex_enter(&node->tn_vlock);
 	return tmpfs_vnode_get(mp, node, flags, vpp);
 }
 
@@ -291,7 +290,13 @@ tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, vnode_t **vpp)
 	mutex_enter(&tmp->tm_lock);
 	LIST_FOREACH(node, &tmp->tm_nodes, tn_entries) {
 		if (node->tn_id == tfh.tf_id) {
-			mutex_enter(&node->tn_vlock);
+			/*
+			 * XXX This is not correct: nothing keeps node
+			 * from going away during tmpfs_vnode_get.
+			 * Fixing this requires adding a reference
+			 * count to the node so that tmpfs_free_node
+			 * can wait for tmpfs_fhtovp calls to drain.
+			 */
 			break;
 		}
 	}
@@ -299,7 +304,6 @@ tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, vnode_t **vpp)
 
 	if (node == NULL)
 		return ESTALE;
-	/* Will release the tn_vlock. */
 	if ((error = tmpfs_vnode_get(mp, node, flags, vpp)) != 0)
 		return error;
 	if ((error = vn_lock(*vpp, LK_EXCLUSIVE)) != 0) {
diff --git a/sys/fs/tmpfs/tmpfs_vnops.c b/sys/fs/tmpfs/tmpfs_vnops.c
index 2cc504a..bedc0eb 100644
--- a/sys/fs/tmpfs/tmpfs_vnops.c
+++ b/sys/fs/tmpfs/tmpfs_vnops.c
@@ -171,10 +171,10 @@ tmpfs_lookup(void *v)
 	if (cachefound && *vpp == NULLVP) {
 		/* Negative cache hit. */
 		error = ENOENT;
-		goto out_unlocked;
+		goto out;
 	} else if (cachefound) {
 		error = 0;
-		goto out_unlocked;
+		goto out;
 	}
 
 	/*
@@ -202,32 +202,7 @@ tmpfs_lookup(void *v)
 			error = ENOENT;
 			goto out;
 		}
-
-		/*
-		 * Lock the parent tn_vlock before releasing the vnode lock,
-		 * and thus prevent parent from disappearing.
-		 */
-		mutex_enter(&pnode->tn_vlock);
-		VOP_UNLOCK(dvp);
-
-		/*
-		 * Get a vnode of the '..' entry and re-acquire the lock.
-		 * Release the tn_vlock.
-		 */
 		error = tmpfs_vnode_get(dvp->v_mount, pnode, 0, vpp);
-		if (error) {
-			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-			goto out;
-		}
-		/* XXX Omit needless lock/unlock.  */
-		error = vn_lock(*vpp, LK_EXCLUSIVE);
-		if (error) {
-			vrele(*vpp);
-			*vpp = NULL;
-			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
-			goto out;
-		}
-		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY);
 		goto out;
 
 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
@@ -302,17 +277,7 @@ tmpfs_lookup(void *v)
 	}
 
 	/* Get a vnode for the matching entry. */
-	mutex_enter(&tnode->tn_vlock);
 	error = tmpfs_vnode_get(dvp->v_mount, tnode, 0, vpp);
-	if (error)
-		goto done;
-	/* XXX Omit needless lock/unlock.  */
-	error = vn_lock(*vpp, LK_EXCLUSIVE);
-	if (error) {
-		vrele(*vpp);
-		*vpp = NULL;
-		goto done;
-	}
 done:
 	/*
 	 * Cache the result, unless request was for creation (as it does
@@ -323,9 +288,6 @@ done:
 			    cnp->cn_flags);
 	}
 out:
-	if (error == 0 && *vpp != dvp)
-		VOP_UNLOCK(*vpp);
-out_unlocked:
 	KASSERT(VOP_ISLOCKED(dvp));
 
 	return error;
-- 
1.8.3.1