# HG changeset patch # Parent 3da0173326e1d9dadacc2fec1c90fb37620ceb52 # Parent 59a23b5a52c17b8ecc0f258f86df79556b3aaaca Fix (finally) the rest of PR 47040. Revert the supporting logic in -r1.190 of vfs_lookup.c, and fix the important change to set searchdir = NULL instead of searchdir = foundobj. Then supply the necessary new supporting logic to cope with some new cases where searchdir can be null. This is at the point when lookup_once crosses a mountpoint going down; the idea was to avoid coupling locks across filesystems as that has a number of potentially negative consequences. At this stage of namei, though, it's important to set searchdir to null as this is what is used later on to handle other cases arising from crossing mount points. If you set it to be the same as foundobj, that instead creates the impression that you looked up "/." on the new volume, and that causes odd things to happen in corner cases such as the one appearing in PR 47040. This fix ought to be pulled up to -6 and -7, and it probably could be safely, but given the delicacy of this code and the fact that it's taken me more than three years to find the combination of time and intestinal fortitude to do it, as well as the minor nature of the resulting wrong behavior observed so far, I think we'll let that part go. This change also exposes an annoying corner case: if you cross a mount point and the root directory vnode of the new volume is not a directory but a symlink, we now have no searchdir to follow the symlink relative to. In principle one could hang onto the searchdir from before calling lookup_once and use that, or complexify the interface of lookup_once to hang onto it as desired for this case. Alternatively one could add the necessary null checks to namei_follow and allow only absolute symlinks in this case, as for an absolute symlink one doesn't need the old searchdir. However, given that only broken filesystems have symlinks as their root vnodes, I'm not going to bother. Instead if this happens we'll just fail with ENOTDIR. diff -r 59a23b5a52c1 sys/kern/vfs_lookup.c --- a/sys/kern/vfs_lookup.c Sun Apr 10 21:48:30 2016 -0400 +++ b/sys/kern/vfs_lookup.c Sun Apr 10 23:59:40 2016 -0400 @@ -894,6 +894,13 @@ * Call VOP_LOOKUP for a single lookup; return a new search directory * (used when crossing mountpoints up or searching union mounts down) and * the found object, which for create operations may be NULL on success. + * + * Note that the new search directory may be null, which means the + * searchdir was unlocked and released. This happens in the common case + * when crossing a mount point downwards, in order to avoid coupling + * locks between different file system volumes. Importantly, this can + * happen even if the call fails. (XXX: this is gross and should be + * tidied somehow.) */ static int lookup_once(struct namei_state *state, @@ -1075,39 +1082,48 @@ * Check to see if the vnode has been mounted on; * if so find the root of the mounted file system. */ + KASSERT(searchdir != NULL); while (foundobj->v_type == VDIR && (mp = foundobj->v_mountedhere) != NULL && (cnp->cn_flags & NOCROSSMOUNT) == 0) { + + KASSERT(searchdir != foundobj); + error = vfs_busy(mp, NULL); if (error != 0) { - if (searchdir != foundobj) { - vput(foundobj); - } else { - vrele(foundobj); - } + vput(foundobj); goto done; } - if (searchdir != foundobj) { + if (searchdir != NULL) { VOP_UNLOCK(searchdir); } vput(foundobj); error = VFS_ROOT(mp, &foundobj); vfs_unbusy(mp, false, NULL); if (error) { - vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); + if (searchdir != NULL) { + vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); + } goto done; } /* - * avoid locking vnodes from two filesystems because it's - * prune to deadlock. eg. when using puffs. - * also, it isn't a good idea to propagate slowness of a - * filesystem up to the root directory. - * for now, only handle the common case. (ie. foundobj is VDIR) + * Avoid locking vnodes from two filesystems because + * it's prone to deadlock, e.g. when using puffs. + * Also, it isn't a good idea to propagate slowness of + * a filesystem up to the root directory. For now, + * only handle the common case, where foundobj is + * VDIR. + * + * In this case set searchdir to null to avoid using + * it again. It is not correct to set searchdir == + * foundobj here as that will confuse the caller. + * (See PR 40740.) */ - if (foundobj->v_type == VDIR) { + if (searchdir == NULL) { + /* already been here once; do nothing further */ + } else if (foundobj->v_type == VDIR) { vrele(searchdir); - *newsearchdir_ret = searchdir = foundobj; - vref(searchdir); + *newsearchdir_ret = searchdir = NULL; } else { VOP_UNLOCK(foundobj); vn_lock(searchdir, LK_EXCLUSIVE | LK_RETRY); @@ -1118,7 +1134,8 @@ *foundobj_ret = foundobj; error = 0; done: - KASSERT(VOP_ISLOCKED(*newsearchdir_ret) == LK_EXCLUSIVE); + KASSERT(*newsearchdir_ret == NULL || + VOP_ISLOCKED(*newsearchdir_ret) == LK_EXCLUSIVE); /* * *foundobj_ret is valid only if error == 0. */ @@ -1179,6 +1196,8 @@ } for (;;) { + KASSERT(searchdir != NULL); + KASSERT(VOP_ISLOCKED(searchdir) == LK_EXCLUSIVE); /* * If the directory we're on is unmounted, bail out. @@ -1214,7 +1233,9 @@ error = lookup_once(state, searchdir, &searchdir, &foundobj); if (error) { - vput(searchdir); + if (searchdir != NULL) { + vput(searchdir); + } ndp->ni_dvp = NULL; ndp->ni_vp = NULL; /* @@ -1238,6 +1259,8 @@ * the code below doesn't have to test for * foundobj == NULL. */ + /* lookup_once can't have dropped the searchdir */ + KASSERT(searchdir != NULL); break; } @@ -1252,6 +1275,28 @@ ndp->ni_next -= state->slashes; if (neverfollow) { error = EINVAL; + } else if (searchdir == NULL) { + /* + * dholland 20160410: lookup_once only + * drops searchdir if it crossed a + * mount point. Therefore, if we get + * here it means we crossed a mount + * point to a mounted filesystem whose + * root vnode is a symlink. In theory + * we could continue at this point by + * using the pre-crossing searchdir + * (e.g. just take out an extra + * reference on it before calling + * lookup_once so we still have it), + * but this will make an ugly mess and + * it should never happen in practice + * as only badly broken filesystems + * have non-directory root vnodes. (I + * have seen this sort of thing with + * NFS occasionally but even then it + * means something's badly wrong.) + */ + error = ENOTDIR; } else { /* * dholland 20110410: if we're at a @@ -1266,7 +1311,9 @@ } if (error) { KASSERT(searchdir != foundobj); - vput(searchdir); + if (searchdir != NULL) { + vput(searchdir); + } vput(foundobj); ndp->ni_dvp = NULL; ndp->ni_vp = NULL; @@ -1283,6 +1330,7 @@ * is the searchdir. */ if (cnp->cn_nameptr[0] == '\0') { + KASSERT(searchdir != NULL); foundobj = searchdir; searchdir = NULL; cnp->cn_flags |= ISLASTCN; @@ -1300,9 +1348,8 @@ */ if ((foundobj->v_type != VDIR) && (cnp->cn_flags & REQUIREDIR)) { - if (searchdir == foundobj) { - vrele(searchdir); - } else { + KASSERT(foundobj != searchdir); + if (searchdir) { vput(searchdir); } vput(foundobj); @@ -1325,7 +1372,7 @@ cnp->cn_nameptr = ndp->ni_next; if (searchdir == foundobj) { vrele(searchdir); - } else { + } else if (searchdir != NULL) { vput(searchdir); } searchdir = foundobj; @@ -1613,7 +1660,7 @@ error = lookup_once(state, startdir, &startdir, &foundobj); if (error == 0 && startdir == foundobj) { vrele(startdir); - } else { + } else if (startdir != NULL) { vput(startdir); } if (error) {