Index: sys/sys/vnode.h =================================================================== RCS file: /cvsroot/src/sys/sys/vnode.h,v retrieving revision 1.246 diff -p -u -2 -r1.246 vnode.h --- sys/sys/vnode.h 24 Mar 2014 13:42:40 -0000 1.246 +++ sys/sys/vnode.h 6 Apr 2014 09:24:13 -0000 @@ -557,4 +557,6 @@ struct vnode * void vnfree(struct vnode *); void vremfree(struct vnode *); +int vcache_lookup(struct mount *, void *, size_t, struct vnode **); +void vcache_remove(struct mount *, void *, size_t); /* see vnsubr(9) */ Index: sys/sys/mount.h =================================================================== RCS file: /cvsroot/src/sys/sys/mount.h,v retrieving revision 1.212 diff -p -u -2 -r1.212 mount.h --- sys/sys/mount.h 5 Mar 2014 09:37:29 -0000 1.212 +++ sys/sys/mount.h 6 Apr 2014 09:24:17 -0000 @@ -221,4 +221,6 @@ struct vfsops { int (*vfs_sync) (struct mount *, int, struct kauth_cred *); int (*vfs_vget) (struct mount *, ino_t, struct vnode **); + int (*vfs_load_node) (struct mount *, struct vnode *, + const void *, size_t, void **); int (*vfs_fhtovp) (struct mount *, struct fid *, struct vnode **); @@ -243,4 +245,6 @@ struct vfsops { /* XXX vget is actually file system internal. */ #define VFS_VGET(MP, INO, VPP) (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP) +#define VFS_LOAD_NODE(MP, VP, KEY, KEY_LEN, NEW_KEY) \ + (*(MP)->mnt_op->vfs_load_node)(MP, VP, KEY, KEY_LEN, NEW_KEY) #define VFS_RENAMELOCK_ENTER(MP) (*(MP)->mnt_op->vfs_renamelock_enter)(MP) @@ -282,4 +286,6 @@ int fsname##_statvfs(struct mount *, str int fsname##_sync(struct mount *, int, struct kauth_cred *); \ int fsname##_vget(struct mount *, ino_t, struct vnode **); \ +int fsname##_load_node(struct mount *, struct vnode *, \ + const void *, size_t, void **); \ int fsname##_fhtovp(struct mount *, struct fid *, struct vnode **); \ int fsname##_vptofh(struct vnode *, struct fid *, size_t *); \ Index: sys/kern/vfs_vnode.c =================================================================== RCS file: /cvsroot/src/sys/kern/vfs_vnode.c,v retrieving revision 1.35 diff -p -u -2 -r1.35 vfs_vnode.c --- sys/kern/vfs_vnode.c 24 Mar 2014 13:42:40 -0000 1.35 +++ sys/kern/vfs_vnode.c 6 Apr 2014 09:24:20 -0000 @@ -148,4 +148,15 @@ __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c, #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */ +struct vcache_key { + struct mount *vk_mount; + void *vk_key; + size_t vk_key_len; +}; +struct vcache_node { + rb_node_t vn_rb_node; + struct vnode *vn_vnode; + struct vcache_key vn_key; +}; + u_int numvnodes __cacheline_aligned; @@ -170,5 +181,10 @@ static int vrele_pending __cacheline_a static int vrele_gen __cacheline_aligned; +static kmutex_t vcache_lock __cacheline_aligned; +static kcondvar_t vcache_cv __cacheline_aligned; +static rb_tree_t vcache_rb_tree __cacheline_aligned; + static int cleanvnode(void); +static void vcache_init(void); static void vclean(vnode_t *); static void vrelel(vnode_t *, int); @@ -201,4 +217,6 @@ vfs_vnode_sysinit(void) TAILQ_INIT(&vrele_list); + vcache_init(); + mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&vdrain_cv, "vdrain"); @@ -238,4 +256,10 @@ vnalloc(struct mount *mp) vp->v_iflag = VI_MARKER; } else { + mutex_enter(&vnode_free_list_lock); + numvnodes++; + if (numvnodes > desiredvnodes + desiredvnodes / 10) + cv_signal(&vdrain_cv); + mutex_exit(&vnode_free_list_lock); + rw_init(&vp->v_lock); } @@ -368,9 +392,4 @@ getnewvnode(enum vtagtype tag, struct mo /* Allocate a new vnode. */ - mutex_enter(&vnode_free_list_lock); - numvnodes++; - if (numvnodes > desiredvnodes + desiredvnodes / 10) - cv_signal(&vdrain_cv); - mutex_exit(&vnode_free_list_lock); vp = vnalloc(NULL); @@ -1121,4 +1140,166 @@ vgone(vnode_t *vp) } +static int +vcache_compare_key(void *context, const void *node, const void *key) +{ + const struct vcache_node * const node1 = node; + const struct vcache_key *key1 = &node1->vn_key; + const struct vcache_key * const key2 = key; + + if (key1->vk_mount != key2->vk_mount) + return (intptr_t)key1->vk_mount - (intptr_t)key2->vk_mount; + if (key1->vk_key_len != key2->vk_key_len) + return key1->vk_key_len - key2->vk_key_len; + return memcmp(key1->vk_key, key2->vk_key, key1->vk_key_len); +} + +static int +vcache_compare_node(void *context, const void *node1, const void *node2) +{ + const struct vcache_node * const node = node2; + const struct vcache_key * const key2 = &node->vn_key; + + return vcache_compare_key(context, node1, key2); +} + +static void +vcache_init(void) +{ + static const rb_tree_ops_t vcache_rb_ops = { + .rbto_compare_nodes = &vcache_compare_node, + .rbto_compare_key = &vcache_compare_key, + .rbto_node_offset = offsetof(struct vcache_node, vn_rb_node), + }; + + mutex_init(&vcache_lock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&vcache_cv, "vcache"); + rb_tree_init(&vcache_rb_tree, &vcache_rb_ops); +} + +/* + * Lookup a vnode / fs node pair by key and return it referenced through vpp. + */ +int +vcache_lookup(struct mount *mp, void *key, size_t key_len, struct vnode **vpp) +{ + int error; + void *new_key; + struct vnode *vp; + struct vcache_key vcache_key; + struct vcache_node *node, *new_node; + +#ifdef DIAGNOSTICS + new_key = NULL; + *vpp = NULL; +#endif + vcache_key.vk_mount = mp; + vcache_key.vk_key = key; + vcache_key.vk_key_len = key_len; + +again: + mutex_enter(&vcache_lock); + node = rb_tree_find_node(&vcache_rb_tree, &vcache_key); + + /* If found, take a reference or retry. */ + if (__predict_true(node != NULL && node->vn_vnode != NULL)) { + vp = node->vn_vnode; + mutex_enter(vp->v_interlock); + mutex_exit(&vcache_lock); + error = vget(vp, 0); + if (error == ENOENT) + goto again; + if (error == 0) + *vpp = vp; + KASSERT((error != 0) == (*vpp == NULL)); + return error; + } + + /* If another thread loads this node, wait and retry. */ + if (node != NULL /* && node->vn_node == NULL */) { + cv_wait(&vcache_cv, &vcache_lock); + mutex_exit(&vcache_lock); + goto again; + } + mutex_exit(&vcache_lock); + + /* Allocate and initialize a new vcache / vnode pair. */ + error = vfs_busy(mp, NULL); + if (error) + return error; + new_node = kmem_alloc(sizeof(*new_node), KM_SLEEP); + new_node->vn_vnode = NULL; + new_node->vn_key = vcache_key; + vp = vnalloc(NULL); + vp->v_usecount = 1; + vp->v_type = VNON; + vp->v_size = vp->v_writesize = VSIZENOTSET; + mutex_enter(&vcache_lock); + node = rb_tree_insert_node(&vcache_rb_tree, new_node); + mutex_exit(&vcache_lock); + + /* If another thread beat us inserting this node, retry. */ + if (node != new_node) { + kmem_free(new_node, sizeof(*new_node)); + vp->v_usecount = 0; + vnfree(vp); + vfs_unbusy(mp, false, NULL); + goto again; + } + + /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */ + error = VFS_LOAD_NODE(mp, vp, key, key_len, &new_key); + if (error == 0) { + KASSERT(new_key != NULL); + KASSERT(memcmp(key, new_key, key_len) == 0); + KASSERT(vp->v_op != NULL); + vfs_insmntque(vp, mp); + if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) + vp->v_vflag |= VV_MPSAFE; + vfs_unbusy(mp, true, NULL); + + /* Finished loading, finalize node and notify waiters. */ + mutex_enter(&vcache_lock); + new_node->vn_key.vk_key = new_key; + new_node->vn_vnode = vp; + cv_broadcast(&vcache_cv); + mutex_exit(&vcache_lock); + *vpp = vp; + return 0; + } + + /* Failed to load the fs node, cleanup and return. */ + mutex_enter(&vcache_lock); + rb_tree_remove_node(&vcache_rb_tree, new_node); + cv_broadcast(&vcache_cv); + mutex_exit(&vcache_lock); + kmem_free(new_node, sizeof(*new_node)); + vp->v_usecount = 0; + vnfree(vp); + vfs_unbusy(mp, false, NULL); + KASSERT(*vpp == NULL); + return error; +} + +/* + * Remove a vnode / fs node pair from the cache. + */ +void +vcache_remove(struct mount *mp, void *key, size_t key_len) +{ + struct vcache_key vcache_key; + struct vcache_node *node; + + vcache_key.vk_mount = mp; + vcache_key.vk_key = key; + vcache_key.vk_key_len = key_len; + + mutex_enter(&vcache_lock); + node = rb_tree_find_node(&vcache_rb_tree, &vcache_key); + KASSERT(node != NULL); + rb_tree_remove_node(&vcache_rb_tree, node); + mutex_exit(&vcache_lock); + kmem_free(node, sizeof(*node)); +} + /* * Update outstanding I/O count and do wakeup if requested. Index: sys/ufs/ufs/inode.h =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/inode.h,v retrieving revision 1.65 diff -p -u -2 -r1.65 inode.h --- sys/ufs/ufs/inode.h 9 Jun 2013 17:55:46 -0000 1.65 +++ sys/ufs/ufs/inode.h 6 Apr 2014 09:24:23 -0000 @@ -94,5 +94,4 @@ struct lfs_inode_ext; struct inode { struct genfs_node i_gnode; - LIST_ENTRY(inode) i_hash;/* Hash chain. */ TAILQ_ENTRY(inode) i_nextsnap; /* snapshot file list. */ struct vnode *i_vnode; /* Vnode associated with this inode. */ Index: sys/ufs/ufs/ufs_extern.h =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_extern.h,v retrieving revision 1.73 diff -p -u -2 -r1.73 ufs_extern.h --- sys/ufs/ufs/ufs_extern.h 16 Jun 2013 13:33:30 -0000 1.73 +++ sys/ufs/ufs/ufs_extern.h 6 Apr 2014 09:24:26 -0000 @@ -109,13 +109,4 @@ int ufs_bmaparray(struct vnode *, daddr_ int ufs_getlbns(struct vnode *, daddr_t, struct indir *, int *); -/* ufs_ihash.c */ -void ufs_ihashinit(void); -void ufs_ihashreinit(void); -void ufs_ihashdone(void); -struct vnode *ufs_ihashlookup(dev_t, ino_t); -struct vnode *ufs_ihashget(dev_t, ino_t, int); -void ufs_ihashins(struct inode *); -void ufs_ihashrem(struct inode *); - /* ufs_inode.c */ int ufs_reclaim(struct vnode *); @@ -189,4 +180,5 @@ void ufs_done(void); int ufs_start(struct mount *, int); int ufs_root(struct mount *, struct vnode **); +int ufs_vget(struct mount *, ino_t, struct vnode **); int ufs_quotactl(struct mount *, struct quotactl_args *); int ufs_fhtovp(struct mount *, struct ufid *, struct vnode **); @@ -202,5 +194,4 @@ void ufs_gop_markupdate(struct vnode *, __END_DECLS -extern kmutex_t ufs_ihash_lock; extern kmutex_t ufs_hashlock; Index: sys/ufs/ext2fs/ext2fs_lookup.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ext2fs/ext2fs_lookup.c,v retrieving revision 1.74 diff -p -u -2 -r1.74 ext2fs_lookup.c --- sys/ufs/ext2fs/ext2fs_lookup.c 7 Feb 2014 15:29:23 -0000 1.74 +++ sys/ufs/ext2fs/ext2fs_lookup.c 6 Apr 2014 09:24:30 -0000 @@ -280,6 +280,5 @@ ext2fs_lookup(void *v) doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* prev entry dp->i_offset */ - struct vnode *pdp; /* saved dp during symlink work */ - struct vnode *tdp; /* returned by VFS_VGET */ + struct vnode *tdp; /* returned by vcache_lookup */ doff_t enduseful; /* pointer past last used dir slot */ u_long bmask; /* block offset mask */ @@ -595,9 +594,6 @@ found: tdp = vdp; } else { - if (flags & ISDOTDOT) - VOP_UNLOCK(vdp); /* race to get the inode */ - error = VFS_VGET(vdp->v_mount, foundino, &tdp); - if (flags & ISDOTDOT) - vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); + error = vcache_lookup(vdp->v_mount, + &foundino, sizeof(foundino), &tdp); if (error) return (error); @@ -607,8 +603,5 @@ found: */ if ((error = VOP_ACCESS(vdp, VWRITE, cred)) != 0) { - if (dp->i_number == foundino) - vrele(tdp); - else - vput(tdp); + vrele(tdp); return (error); } @@ -624,13 +617,8 @@ found: VTOI(tdp)->i_uid)); if (error) { - if (dp->i_number == foundino) - vrele(tdp); - else - vput(tdp); + vrele(tdp); return (EPERM); } } - if (tdp != vdp) - VOP_UNLOCK(tdp); *vpp = tdp; return (0); @@ -653,50 +641,18 @@ found: if (dp->i_number == foundino) return (EISDIR); - if (flags & ISDOTDOT) - VOP_UNLOCK(vdp); /* race to get the inode */ - error = VFS_VGET(vdp->v_mount, foundino, &tdp); - if (flags & ISDOTDOT) - vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); + error = vcache_lookup(vdp->v_mount, + &foundino, sizeof(foundino), &tdp); if (error) return (error); - if (tdp != vdp) - VOP_UNLOCK(tdp); *vpp = tdp; return (0); } - /* - * Step through the translation in the name. We do not `vput' the - * directory because we may need it again if a symbolic link - * is relative to the current directory. Instead we save it - * unlocked as "pdp". We must get the target inode before unlocking - * the directory to insure that the inode will not be removed - * before we get it. We prevent deadlock by always fetching - * inodes from the root, moving down the directory tree. Thus - * when following backward pointers ".." we must unlock the - * parent directory before getting the requested directory. - * There is a potential race condition here if both the current - * and parent directories are removed before the VFS_VGET for the - * inode associated with ".." returns. We hope that this occurs - * infrequently since we cannot avoid this race condition without - * implementing a sophisticated deadlock detection algorithm. - * Note also that this simple deadlock detection scheme will not - * work if the file system has any hard links other than ".." - * that point backwards in the directory structure. - */ - pdp = vdp; - if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); /* race to get the inode */ - error = VFS_VGET(vdp->v_mount, foundino, &tdp); - vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY); - if (error) { - return (error); - } - *vpp = tdp; - } else if (dp->i_number == foundino) { + if (dp->i_number == foundino) { vref(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { - error = VFS_VGET(vdp->v_mount, foundino, &tdp); + error = vcache_lookup(vdp->v_mount, + &foundino, sizeof(foundino), &tdp); if (error) return (error); @@ -708,6 +664,4 @@ found: */ cache_enter(vdp, *vpp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_flags); - if (*vpp != vdp) - VOP_UNLOCK(*vpp); return 0; } Index: sys/ufs/ext2fs/ext2fs_vfsops.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ext2fs/ext2fs_vfsops.c,v retrieving revision 1.179 diff -p -u -2 -r1.179 ext2fs_vfsops.c --- sys/ufs/ext2fs/ext2fs_vfsops.c 23 Mar 2014 15:21:16 -0000 1.179 +++ sys/ufs/ext2fs/ext2fs_vfsops.c 6 Apr 2014 09:24:33 -0000 @@ -131,5 +131,6 @@ struct vfsops ext2fs_vfsops = { .vfs_statvfs = ext2fs_statvfs, .vfs_sync = ext2fs_sync, - .vfs_vget = ext2fs_vget, + .vfs_vget = ufs_vget, + .vfs_load_node = ext2fs_load_node, .vfs_fhtovp = ext2fs_fhtovp, .vfs_vptofh = ext2fs_vptofh, @@ -942,82 +943,50 @@ ext2fs_sync(struct mount *mp, int waitfo /* - * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it - * in from disk. If it is in core, wait for the lock bit to clear, then - * return the inode locked. Detection and handling of mount points must be - * done by the calling routine. + * Read an inode from disk and initialize this vnode / inode pair. + * Caller assures no other thread will try to load this inode. */ int -ext2fs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) +ext2fs_load_node(struct mount *mp, struct vnode *vp, + const void *key, size_t key_len, void **new_key) { + ino_t ino; struct m_ext2fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; - struct vnode *vp; dev_t dev; int error; void *cp; + KASSERT(key_len == sizeof(ino)); + memcpy(&ino, key, key_len); ump = VFSTOUFS(mp); dev = ump->um_dev; -retry: - if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) - return (0); - - /* Allocate a new vnode/inode. */ - error = getnewvnode(VT_EXT2FS, mp, ext2fs_vnodeop_p, NULL, &vp); - if (error) { - *vpp = NULL; - return (error); - } - ip = pool_get(&ext2fs_inode_pool, PR_WAITOK); - - mutex_enter(&ufs_hashlock); - if ((*vpp = ufs_ihashget(dev, ino, 0)) != NULL) { - mutex_exit(&ufs_hashlock); - ungetnewvnode(vp); - pool_put(&ext2fs_inode_pool, ip); - goto retry; - } + fs = ump->um_e2fs; - vp->v_vflag |= VV_LOCKSWORK; + /* Read in the disk contents for the inode, copy into the inode. */ + error = bread(ump->um_devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ino)), + (int)fs->e2fs_bsize, NOCRED, 0, &bp); + if (error) + return error; + /* Allocate and initialize inode. */ + ip = pool_get(&ext2fs_inode_pool, PR_WAITOK); memset(ip, 0, sizeof(struct inode)); + vp->v_tag = VT_EXT2FS; + vp->v_op = ext2fs_vnodeop_p; + vp->v_vflag |= VV_LOCKSWORK; vp->v_data = ip; ip->i_vnode = vp; ip->i_ump = ump; - ip->i_e2fs = fs = ump->um_e2fs; + ip->i_e2fs = fs; ip->i_dev = dev; ip->i_number = ino; ip->i_e2fs_last_lblk = 0; ip->i_e2fs_last_blk = 0; - genfs_node_init(vp, &ext2fs_genfsops); - - /* - * Put it onto its hash chain and lock it so that other requests for - * this inode will block if they arrive while we are sleeping waiting - * for old data structures to be purged or for the contents of the - * disk portion of this inode to be read. - */ - - ufs_ihashins(ip); - mutex_exit(&ufs_hashlock); - /* Read in the disk contents for the inode, copy into the inode. */ - error = bread(ump->um_devvp, EXT2_FSBTODB(fs, ino_to_fsba(fs, ino)), - (int)fs->e2fs_bsize, NOCRED, 0, &bp); - if (error) { - - /* - * The inode does not contain anything useful, so it would - * be misleading to leave it on its hash chain. With mode - * still zero, it will be unlinked and returned to the free - * list by vput(). - */ + /* Initialize genfs node. */ + genfs_node_init(vp, &ext2fs_genfsops); - vput(vp); - *vpp = NULL; - return (error); - } cp = (char *)bp->b_data + (ino_to_fsbo(fs, ino) * EXT2_DINODE_SIZE(fs)); ip->i_din.e2fs_din = pool_get(&ext2fs_dinode_pool, PR_WAITOK); @@ -1034,18 +1003,8 @@ retry: } - /* - * Initialize the vnode from the inode, check for aliases. - */ - - error = ext2fs_vinit(mp, ext2fs_specop_p, ext2fs_fifoop_p, &vp); - if (error) { - vput(vp); - *vpp = NULL; - return (error); - } - /* - * Finish inode initialization now that aliasing has been resolved. - */ + /* Initialize the vnode from the inode. */ + ext2fs_vinit(mp, ext2fs_specop_p, ext2fs_fifoop_p, &vp); + /* Finish inode initialization. */ ip->i_devvp = ump->um_devvp; vref(ip->i_devvp); @@ -1064,6 +1023,6 @@ retry: } uvm_vnp_setsize(vp, ext2fs_size(ip)); - *vpp = vp; - return (0); + *new_key = &ip->i_number; + return 0; } Index: sys/ufs/ffs/ffs_vfsops.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vfsops.c,v retrieving revision 1.295 diff -p -u -2 -r1.295 ffs_vfsops.c --- sys/ufs/ffs/ffs_vfsops.c 23 Mar 2014 15:21:16 -0000 1.295 +++ sys/ufs/ffs/ffs_vfsops.c 6 Apr 2014 09:24:37 -0000 @@ -142,5 +142,6 @@ struct vfsops ffs_vfsops = { .vfs_statvfs = ffs_statvfs, .vfs_sync = ffs_sync, - .vfs_vget = ffs_vget, + .vfs_vget = ufs_vget, + .vfs_load_node = ffs_load_node, .vfs_fhtovp = ffs_fhtovp, .vfs_vptofh = ffs_vptofh, @@ -1724,59 +1725,41 @@ ffs_sync(struct mount *mp, int waitfor, /* - * Look up a FFS dinode number to find its incore vnode, otherwise read it - * in from disk. If it is in core, wait for the lock bit to clear, then - * return the inode locked. Detection and handling of mount points must be - * done by the calling routine. + * Read an inode from disk and initialize this vnode / inode pair. + * Caller assures no other thread will try to load this inode. */ int -ffs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) +ffs_load_node(struct mount *mp, struct vnode *vp, + const void *key, size_t key_len, void **new_key) { + ino_t ino; struct fs *fs; struct inode *ip; struct ufsmount *ump; struct buf *bp; - struct vnode *vp; dev_t dev; int error; + KASSERT(key_len == sizeof(ino)); + memcpy(&ino, key, key_len); ump = VFSTOUFS(mp); dev = ump->um_dev; + fs = ump->um_fs; - retry: - if ((*vpp = ufs_ihashget(dev, ino, LK_EXCLUSIVE)) != NULL) - return (0); + /* Read in the disk contents for the inode. */ + error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)), + (int)fs->fs_bsize, NOCRED, 0, &bp); + if (error) + return error; - /* Allocate a new vnode/inode. */ - error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, NULL, &vp); - if (error) { - *vpp = NULL; - return (error); - } + /* Allocate and initialize inode. */ ip = pool_cache_get(ffs_inode_cache, PR_WAITOK); - - /* - * If someone beat us to it, put back the freshly allocated - * vnode/inode pair and retry. - */ - mutex_enter(&ufs_hashlock); - if (ufs_ihashget(dev, ino, 0) != NULL) { - mutex_exit(&ufs_hashlock); - ungetnewvnode(vp); - pool_cache_put(ffs_inode_cache, ip); - goto retry; - } - - vp->v_vflag |= VV_LOCKSWORK; - - /* - * XXX MFS ends up here, too, to allocate an inode. Should we - * XXX create another pool for MFS inodes? - */ - memset(ip, 0, sizeof(struct inode)); + vp->v_tag = VT_UFS; + vp->v_op = ffs_vnodeop_p; + vp->v_vflag |= VV_LOCKSWORK; vp->v_data = ip; ip->i_vnode = vp; ip->i_ump = ump; - ip->i_fs = fs = ump->um_fs; + ip->i_fs = fs; ip->i_dev = dev; ip->i_number = ino; @@ -1785,36 +1768,7 @@ ffs_vget(struct mount *mp, ino_t ino, st #endif - /* - * Initialize genfs node, we might proceed to destroy it in - * error branches. - */ + /* Initialize genfs node. */ genfs_node_init(vp, &ffs_genfsops); - /* - * Put it onto its hash chain and lock it so that other requests for - * this inode will block if they arrive while we are sleeping waiting - * for old data structures to be purged or for the contents of the - * disk portion of this inode to be read. - */ - - ufs_ihashins(ip); - mutex_exit(&ufs_hashlock); - - /* Read in the disk contents for the inode, copy into the inode. */ - error = bread(ump->um_devvp, FFS_FSBTODB(fs, ino_to_fsba(fs, ino)), - (int)fs->fs_bsize, NOCRED, 0, &bp); - if (error) { - - /* - * The inode does not contain anything useful, so it would - * be misleading to leave it on its hash chain. With mode - * still zero, it will be unlinked and returned to the free - * list by vput(). - */ - - vput(vp); - *vpp = NULL; - return (error); - } if (ip->i_ump->um_fstype == UFS1) ip->i_din.ffs1_din = pool_cache_get(ffs_dinode1_cache, @@ -1826,15 +1780,8 @@ ffs_vget(struct mount *mp, ino_t ino, st brelse(bp, 0); - /* - * Initialize the vnode from the inode, check for aliases. - * Note that the underlying vnode may have changed. - */ - + /* Initialize the vnode from the inode. */ ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp); - /* - * Finish inode initialization now that aliasing has been resolved. - */ - + /* Finish inode initialization. */ ip->i_devvp = ump->um_devvp; vref(ip->i_devvp); @@ -1850,6 +1797,6 @@ ffs_vget(struct mount *mp, ino_t ino, st } /* XXX */ uvm_vnp_setsize(vp, ip->i_size); - *vpp = vp; - return (0); + *new_key = &ip->i_number; + return 0; } Index: sys/ufs/mfs/mfs_vfsops.c =================================================================== RCS file: /cvsroot/src/sys/ufs/mfs/mfs_vfsops.c,v retrieving revision 1.106 diff -p -u -2 -r1.106 mfs_vfsops.c --- sys/ufs/mfs/mfs_vfsops.c 23 Mar 2014 15:21:17 -0000 1.106 +++ sys/ufs/mfs/mfs_vfsops.c 6 Apr 2014 09:24:41 -0000 @@ -100,5 +100,6 @@ struct vfsops mfs_vfsops = { .vfs_statvfs = mfs_statvfs, .vfs_sync = ffs_sync, - .vfs_vget = ffs_vget, + .vfs_vget = ufs_vget, + .vfs_load_node = ffs_load_node, .vfs_fhtovp = ffs_fhtovp, .vfs_vptofh = ffs_vptofh, Index: sys/ufs/ufs/ufs_inode.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_inode.c,v retrieving revision 1.89 diff -p -u -2 -r1.89 ufs_inode.c --- sys/ufs/ufs/ufs_inode.c 22 Jan 2013 09:39:18 -0000 1.89 +++ sys/ufs/ufs/ufs_inode.c 6 Apr 2014 09:24:44 -0000 @@ -192,7 +192,7 @@ ufs_reclaim(struct vnode *vp) /* - * Remove the inode from its hash chain. + * Remove the inode from the vnode cache. */ - ufs_ihashrem(ip); + vcache_remove(vp->v_mount, &ip->i_number, sizeof(ip->i_number)); if (ip->i_devvp) { Index: sys/ufs/ufs/ufs_lookup.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_lookup.c,v retrieving revision 1.129 diff -p -u -2 -r1.129 ufs_lookup.c --- sys/ufs/ufs/ufs_lookup.c 7 Feb 2014 15:29:23 -0000 1.129 +++ sys/ufs/ufs/ufs_lookup.c 6 Apr 2014 09:24:47 -0000 @@ -138,6 +138,5 @@ ufs_lookup(void *v) doff_t endsearch; /* offset to end directory search */ doff_t prevoff; /* previous value of ulr_offset */ - struct vnode *pdp; /* saved dp during symlink work */ - struct vnode *tdp; /* returned by VFS_VGET */ + struct vnode *tdp; /* returned by vcache_lookup */ doff_t enduseful; /* pointer past last used dir slot. used for directory truncation. */ @@ -567,9 +566,6 @@ found: tdp = vdp; } else { - if (flags & ISDOTDOT) - VOP_UNLOCK(vdp); /* race to get the inode */ - error = VFS_VGET(vdp->v_mount, foundino, &tdp); - if (flags & ISDOTDOT) - vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); + error = vcache_lookup(vdp->v_mount, + &foundino, sizeof(foundino), &tdp); if (error) goto out; @@ -580,8 +576,5 @@ found: error = VOP_ACCESS(vdp, VWRITE, cred); if (error) { - if (dp->i_number == foundino) - vrele(tdp); - else - vput(tdp); + vrele(tdp); goto out; } @@ -597,8 +590,5 @@ found: VTOI(tdp)->i_uid)); if (error) { - if (dp->i_number == foundino) - vrele(tdp); - else - vput(tdp); + vrele(tdp); error = EPERM; goto out; @@ -628,9 +618,6 @@ found: goto out; } - if (flags & ISDOTDOT) - VOP_UNLOCK(vdp); /* race to get the inode */ - error = VFS_VGET(vdp->v_mount, foundino, &tdp); - if (flags & ISDOTDOT) - vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); + error = vcache_lookup(vdp->v_mount, + &foundino, sizeof(foundino), &tdp); if (error) goto out; @@ -640,37 +627,10 @@ found: } - /* - * Step through the translation in the name. We do not `vput' the - * directory because we may need it again if a symbolic link - * is relative to the current directory. Instead we save it - * unlocked as "pdp". We must get the target inode before unlocking - * the directory to insure that the inode will not be removed - * before we get it. We prevent deadlock by always fetching - * inodes from the root, moving down the directory tree. Thus - * when following backward pointers ".." we must unlock the - * parent directory before getting the requested directory. - * There is a potential race condition here if both the current - * and parent directories are removed before the VFS_VGET for the - * inode associated with ".." returns. We hope that this occurs - * infrequently since we cannot avoid this race condition without - * implementing a sophisticated deadlock detection algorithm. - * Note also that this simple deadlock detection scheme will not - * work if the file system has any hard links other than ".." - * that point backwards in the directory structure. - */ - pdp = vdp; - if (flags & ISDOTDOT) { - VOP_UNLOCK(pdp); /* race to get the inode */ - error = VFS_VGET(vdp->v_mount, foundino, &tdp); - vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY); - if (error) { - goto out; - } - *vpp = tdp; - } else if (dp->i_number == foundino) { + if (dp->i_number == foundino) { vref(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { - error = VFS_VGET(vdp->v_mount, foundino, &tdp); + error = vcache_lookup(vdp->v_mount, + &foundino, sizeof(foundino), &tdp); if (error) goto out; @@ -685,6 +645,4 @@ found: out: - if (error == 0 && *vpp != vdp) - VOP_UNLOCK(*vpp); fstrans_done(vdp->v_mount); return error; Index: sys/ufs/ufs/ufs_vfsops.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_vfsops.c,v retrieving revision 1.52 diff -p -u -2 -r1.52 ufs_vfsops.c --- sys/ufs/ufs/ufs_vfsops.c 22 Jan 2013 09:39:18 -0000 1.52 +++ sys/ufs/ufs/ufs_vfsops.c 6 Apr 2014 09:24:51 -0000 @@ -98,4 +98,24 @@ ufs_root(struct mount *mp, struct vnode /* + * Look up and return a vnode/inode pair by inode number. + */ +int +ufs_vget(struct mount *mp, ino_t ino, struct vnode **vpp) +{ + int error; + + error = vcache_lookup(mp, &ino, sizeof(ino), vpp); + if (error) + return error; + error = vn_lock(*vpp, LK_EXCLUSIVE); + if (error) { + vrele(*vpp); + *vpp = NULL; + return error; + } + return 0; +} + +/* * Do operations associated with quotas */ @@ -246,5 +266,4 @@ ufs_init(void) "ufsdir", NULL, IPL_NONE, NULL, NULL, NULL); - ufs_ihashinit(); #if defined(QUOTA) || defined(QUOTA2) dqinit(); @@ -261,5 +280,4 @@ void ufs_reinit(void) { - ufs_ihashreinit(); #if defined(QUOTA) || defined(QUOTA2) dqreinit(); @@ -276,5 +294,4 @@ ufs_done(void) return; - ufs_ihashdone(); #if defined(QUOTA) || defined(QUOTA2) dqdone(); Index: sys/ufs/files.ufs =================================================================== RCS file: /cvsroot/src/sys/ufs/files.ufs,v retrieving revision 1.34 diff -p -u -2 -r1.34 files.ufs --- sys/ufs/files.ufs 18 Mar 2014 18:20:44 -0000 1.34 +++ sys/ufs/files.ufs 6 Apr 2014 09:24:54 -0000 @@ -95,5 +95,4 @@ file ufs/ufs/ufs_bmap.c ffs | mfs | ext file ufs/ufs/ufs_dirhash.c (ffs | mfs | ext2fs | chfs) & ufs_dirhash file ufs/ufs/ufs_extattr.c (ffs | mfs) & ufs_extattr -file ufs/ufs/ufs_ihash.c ffs | mfs | ext2fs file ufs/ufs/ufs_inode.c ffs | mfs | ext2fs file ufs/ufs/ufs_lookup.c ffs | mfs | ext2fs | chfs Index: sys/modules/ffs/Makefile =================================================================== RCS file: /cvsroot/src/sys/modules/ffs/Makefile,v retrieving revision 1.8 diff -p -u -2 -r1.8 Makefile --- sys/modules/ffs/Makefile 10 May 2012 07:51:34 -0000 1.8 +++ sys/modules/ffs/Makefile 6 Apr 2014 09:24:57 -0000 @@ -11,5 +11,5 @@ CWARNFLAGS.clang= -Wno-conversion .PATH: ${S}/ufs/ufs -SRCS= ufs_bmap.c ufs_dirhash.c ufs_extattr.c ufs_ihash.c ufs_inode.c \ +SRCS= ufs_bmap.c ufs_dirhash.c ufs_extattr.c ufs_inode.c \ ufs_lookup.c ufs_quota.c ufs_quota1.c ufs_quota2.c ufs_rename.c \ ufs_vfsops.c ufs_vnops.c ufs_wapbl.c quota2_subr.c Index: sys/rump/fs/lib/libffs/Makefile =================================================================== RCS file: /cvsroot/src/sys/rump/fs/lib/libffs/Makefile,v retrieving revision 1.14 diff -p -u -2 -r1.14 Makefile --- sys/rump/fs/lib/libffs/Makefile 9 May 2012 00:21:17 -0000 1.14 +++ sys/rump/fs/lib/libffs/Makefile 6 Apr 2014 09:25:00 -0000 @@ -10,5 +10,5 @@ SRCS= ffs_alloc.c ffs_appleufs.c ffs_bal ffs_wapbl.c ffs_quota2.c -SRCS+= ufs_bmap.c ufs_dirhash.c ufs_extattr.c ufs_ihash.c ufs_inode.c \ +SRCS+= ufs_bmap.c ufs_dirhash.c ufs_extattr.c ufs_inode.c \ ufs_lookup.c ufs_rename.c ufs_vfsops.c ufs_vnops.c ufs_wapbl.c \ ufs_quota.c ufs_quota2.c quota2_subr.c Index: usr.bin/vmstat/vmstat.c =================================================================== RCS file: /cvsroot/src/usr.bin/vmstat/vmstat.c,v retrieving revision 1.191 diff -p -u -2 -r1.191 vmstat.c --- usr.bin/vmstat/vmstat.c 19 Feb 2014 20:42:14 -0000 1.191 +++ usr.bin/vmstat/vmstat.c 6 Apr 2014 09:25:03 -0000 @@ -1471,8 +1471,4 @@ struct kernel_hash { HASH_LIST, offsetof(struct buf, b_hash) }, { - "inode cache (ihash)", - X_IHASH, X_IHASHTBL, - HASH_LIST, offsetof(struct inode, i_hash) - }, { "ipv4 address -> interface hash", X_IFADDRHASH, X_IFADDRHASHTBL,