New in-memory field i_syncsize: maximum size guaranteed initialized. - Set on-disk inode size from i_syncsize for regular files. - *Don't* increase on-disk inode size before sync. - Update i_syncsize in ffs_fsync. - New invariant: DIP(ip, size) == ip->i_syncsize <= ip->i_size. - i_syncsize applies only to regular files. Ignored for others. This is a draft idea -- not yet compile-tested, needs thought first and review second, &c. Goal is to plug the garbage-data-appended-after-write bug by never increasing file size until data have hit disk, using the syncer's calls to VOP_FSYNC to be notified of that, without requiring any complex partial ordering memory barrier cruft hooked into uvm/ubc. Index: sys/ufs/ffs/ffs_inode.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ffs/ffs_inode.c,v retrieving revision 1.123 diff -p -u -r1.123 ffs_inode.c --- sys/ufs/ffs/ffs_inode.c 11 Nov 2016 10:50:16 -0000 1.123 +++ sys/ufs/ffs/ffs_inode.c 12 Mar 2017 13:58:58 -0000 @@ -343,7 +343,8 @@ ffs_truncate(struct vnode *ovp, off_t le genfs_node_wrlock(ovp); oip->i_size = length; - DIP_ASSIGN(oip, size, length); + oip->i_syncsize = MIN(oip->i_syncsize, oip->i_size); + DIP_ASSIGN(oip, size, oip->i_syncsize); uvm_vnp_setsize(ovp, length); /* * Calculate index into inode's block list of @@ -555,7 +556,7 @@ out: * Put back the real size. */ oip->i_size = length; - DIP_ASSIGN(oip, size, length); + DIP_ASSIGN(oip, size, oip->i_syncsize); DIP_ADD(oip, blocks, -blocksreleased); genfs_node_unlock(ovp); oip->i_flag |= IN_CHANGE; Index: sys/ufs/ffs/ffs_vnops.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ffs/ffs_vnops.c,v retrieving revision 1.125 diff -p -u -r1.125 ffs_vnops.c --- sys/ufs/ffs/ffs_vnops.c 25 Jul 2014 08:20:53 -0000 1.125 +++ sys/ufs/ffs/ffs_vnops.c 12 Mar 2017 13:58:59 -0000 @@ -370,6 +370,11 @@ ffs_fsync(void *v) if (error) { goto out; } + if (trunc_page(ap->a_offlo) == 0) { + struct inode *ip = VTOI(vp); + ip->i_syncsize = MIN(ip->i_size, round_page(ap->a_offhi)); + DIP_ASSIGN(ip, size, ip->i_syncsize); + } #ifdef WAPBL KASSERT(vp->v_type == VREG); @@ -541,6 +546,10 @@ ffs_full_fsync(struct vnode *vp, int fla (void)VOP_IOCTL(VTOI(vp)->i_devvp, DIOCCACHESYNC, &i, FWRITE, kauth_cred_get()); } + if (error == 0) { + VTOI(vp)->i_syncsize = VTOI(vp)->i_size; + DIP_ASSIGN(VTOI(vp), size, VTOI(vp)->i_syncsize); + } return error; } Index: sys/ufs/ufs/inode.h =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/inode.h,v retrieving revision 1.75 diff -p -u -r1.75 inode.h --- sys/ufs/ufs/inode.h 14 Aug 2016 11:31:41 -0000 1.75 +++ sys/ufs/ufs/inode.h 12 Mar 2017 13:58:59 -0000 @@ -147,6 +147,7 @@ struct inode { u_int16_t i_mode; /* IFMT, permissions; see below. */ int16_t i_nlink; /* File link count. */ u_int64_t i_size; /* File byte count. */ + u_int64_t i_syncsize; /* Largest byte count known to be synced. */ u_int32_t i_flags; /* Status flags (chflags). */ int32_t i_gen; /* Generation number. */ u_int32_t i_uid; /* File owner. */ Index: sys/ufs/ufs/ufs_readwrite.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_readwrite.c,v retrieving revision 1.120 diff -p -u -r1.120 ufs_readwrite.c --- sys/ufs/ufs/ufs_readwrite.c 12 Apr 2015 22:48:38 -0000 1.120 +++ sys/ufs/ufs/ufs_readwrite.c 12 Mar 2017 13:58:59 -0000 @@ -590,7 +590,6 @@ BUFWR(struct vnode *vp, struct uio *uio, break; if (uio->uio_offset + xfersize > ip->i_size) { ip->i_size = uio->uio_offset + xfersize; - DIP_ASSIGN(ip, size, ip->i_size); uvm_vnp_setsize(vp, ip->i_size); extended = 1; } Index: sys/ufs/ufs/ufs_vnops.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_vnops.c,v retrieving revision 1.234 diff -p -u -r1.234 ufs_vnops.c --- sys/ufs/ufs/ufs_vnops.c 9 Nov 2016 04:12:55 -0000 1.234 +++ sys/ufs/ufs/ufs_vnops.c 12 Mar 2017 13:59:00 -0000 @@ -1875,7 +1875,6 @@ ufs_gop_alloc(struct vnode *vp, off_t of UVMHIST_LOG(ubchist, "vp %p old 0x%x new 0x%x", vp, ip->i_size, off + bsize, 0); ip->i_size = off + bsize; - DIP_ASSIGN(ip, size, ip->i_size); } off += bsize;