Index: kern/vfs_vnode.c =================================================================== RCS file: /cvsroot/src/sys/kern/vfs_vnode.c,v retrieving revision 1.113 diff -u -p -r1.113 vfs_vnode.c --- kern/vfs_vnode.c 27 Feb 2020 22:12:54 -0000 1.113 +++ kern/vfs_vnode.c 17 Mar 2020 22:41:14 -0000 @@ -1678,6 +1678,7 @@ vcache_reclaim(vnode_t *vp) cpu_count(CPU_COUNT_FILEPAGES, vp->v_uobj.uo_npages); } vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); + vp->v_iflag |= VI_DEADCHECK; /* for genfs_getpages() */ mutex_exit(vp->v_interlock); rw_exit(vp->v_uobj.vmobjlock); Index: miscfs/genfs/genfs_io.c =================================================================== RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_io.c,v retrieving revision 1.94 diff -u -p -r1.94 genfs_io.c --- miscfs/genfs/genfs_io.c 17 Mar 2020 18:31:38 -0000 1.94 +++ miscfs/genfs/genfs_io.c 17 Mar 2020 22:41:14 -0000 @@ -103,7 +103,7 @@ genfs_getpages(void *v) } */ * const ap = v; off_t diskeof, memeof; - int i, error, npages; + int i, error, npages, iflag; const int flags = ap->a_flags; struct vnode * const vp = ap->a_vp; struct uvm_object * const uobj = &vp->v_uobj; @@ -125,18 +125,35 @@ genfs_getpages(void *v) KASSERT(vp->v_type == VREG || vp->v_type == VDIR || vp->v_type == VLNK || vp->v_type == VBLK); + /* + * the object must be locked. it can only be a read lock when + * processing a read fault with PGO_LOCKED | PGO_NOBUSY. + */ + + KASSERT(rw_lock_held(uobj->vmobjlock)); + KASSERT(rw_write_held(uobj->vmobjlock) || + ((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 && !memwrite)); + #ifdef DIAGNOSTIC if ((flags & PGO_JOURNALLOCKED) && vp->v_mount->mnt_wapbl) WAPBL_JLOCK_ASSERT(vp->v_mount); #endif - mutex_enter(vp->v_interlock); - error = vdead_check(vp, VDEAD_NOWAIT); - mutex_exit(vp->v_interlock); - if (error) { - if ((flags & PGO_LOCKED) == 0) - rw_exit(uobj->vmobjlock); - return error; + /* + * check for reclaimed vnode. v_interlock is not held here, but + * VI_DEADCHECK is set with vmobjlock held. + */ + + iflag = atomic_load_relaxed(&vp->v_iflag); + if (__predict_false((iflag & VI_DEADCHECK) != 0)) { + mutex_enter(vp->v_interlock); + error = vdead_check(vp, VDEAD_NOWAIT); + mutex_exit(vp->v_interlock); + if (error) { + if ((flags & PGO_LOCKED) == 0) + rw_exit(uobj->vmobjlock); + return error; + } } startover: @@ -217,9 +234,11 @@ startover: KASSERT(pg == NULL || pg == PGO_DONTCARE); } #endif /* defined(DEBUG) */ - nfound = uvn_findpages(uobj, origoffset, &npages, + nfound = uvn_findpages(uobj, origoffset, &npages, ap->a_m, NULL, - UFP_NOWAIT|UFP_NOALLOC|(memwrite ? UFP_NORDONLY : 0)); + UFP_NOWAIT | UFP_NOALLOC | + (memwrite ? UFP_NORDONLY : 0) | + ((flags & PGO_NOBUSY) != 0 ? UFP_NOBUSY : 0)); KASSERT(npages == *ap->a_count); if (nfound == 0) { error = EBUSY; @@ -230,7 +249,9 @@ startover: * the file behind us. */ if (!genfs_node_rdtrylock(vp)) { - genfs_rel_pages(ap->a_m, npages); + if ((flags & PGO_NOBUSY) == 0) { + genfs_rel_pages(ap->a_m, npages); + } /* * restore the array. Index: nfs/nfs_bio.c =================================================================== RCS file: /cvsroot/src/sys/nfs/nfs_bio.c,v retrieving revision 1.194 diff -u -p -r1.194 nfs_bio.c --- nfs/nfs_bio.c 23 Feb 2020 15:46:41 -0000 1.194 +++ nfs/nfs_bio.c 17 Mar 2020 22:41:14 -0000 @@ -1260,6 +1260,19 @@ nfs_getpages(void *v) bool v3 = NFS_ISV3(vp); bool write = (ap->a_access_type & VM_PROT_WRITE) != 0; bool locked = (ap->a_flags & PGO_LOCKED) != 0; + bool nobusy = (ap->a_flags & PGO_NOBUSY); + + /* + * XXX NFS wants to modify the pages below and that can't be done + * with a read lock. We can't upgrade the lock here because it + * would screw up UVM fault processing. Have NFS take the I/O + * path. + */ + if (locked && rw_lock_op(uobj->vmobjlock) == RW_READER) { + *ap->a_count = 0; + ap->a_m[ap->a_centeridx] = NULL; + return EBUSY; + } /* * If we are not locked we are not really using opgs, @@ -1341,7 +1354,8 @@ nfs_getpages(void *v) * available and put back original pgs array. */ - uvm_page_unbusy(pgs, npages); + if (nobusy == false) + uvm_page_unbusy(pgs, npages); *ap->a_count = 0; memcpy(pgs, opgs, npages * sizeof(struct vm_pages *)); Index: sys/cpu_data.h =================================================================== RCS file: /cvsroot/src/sys/sys/cpu_data.h,v retrieving revision 1.49 diff -u -p -r1.49 cpu_data.h --- sys/cpu_data.h 15 Jan 2020 17:55:44 -0000 1.49 +++ sys/cpu_data.h 17 Mar 2020 22:41:15 -0000 @@ -93,8 +93,8 @@ enum cpu_count { CPU_COUNT_FILEUNKNOWN, CPU_COUNT_FILECLEAN, CPU_COUNT_FILEDIRTY, - CPU_COUNT__UNUSED1, - CPU_COUNT__UNUSED2, + CPU_COUNT_FLTUP, + CPU_COUNT_FLTNOUP, CPU_COUNT_MAX /* 48 */ }; Index: sys/vnode.h =================================================================== RCS file: /cvsroot/src/sys/sys/vnode.h,v retrieving revision 1.293 diff -u -p -r1.293 vnode.h --- sys/vnode.h 14 Mar 2020 20:45:23 -0000 1.293 +++ sys/vnode.h 17 Mar 2020 22:41:15 -0000 @@ -212,6 +212,7 @@ typedef struct vnode vnode_t; #define VI_WRMAP 0x00000400 /* might have PROT_WRITE u. mappings */ #define VI_PAGES 0x00000800 /* UVM object has >0 pages */ #define VI_ONWORKLST 0x00004000 /* On syncer work-list */ +#define VI_DEADCHECK 0x00008000 /* UVM: need to call vdead_check() */ /* * The third set are locked by the underlying file system. @@ -220,7 +221,7 @@ typedef struct vnode vnode_t; #define VNODE_FLAGBITS \ "\20\1ROOT\2SYSTEM\3ISTTY\4MAPPED\5MPSAFE\6LOCKSWORK\11TEXT\12EXECMAP" \ - "\13WRMAP\14PAGES\17ONWORKLST\31DIROP" + "\13WRMAP\14PAGES\17ONWORKLST\18DEADCHECK\31DIROP" #define VSIZENOTSET ((voff_t)-1) Index: uvm/uvm_anon.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_anon.c,v retrieving revision 1.75 diff -u -p -r1.75 uvm_anon.c --- uvm/uvm_anon.c 14 Mar 2020 20:23:51 -0000 1.75 +++ uvm/uvm_anon.c 17 Mar 2020 22:41:15 -0000 @@ -336,6 +336,8 @@ uvm_anon_pagein(struct vm_amap *amap, st * anon was freed. */ return false; + case ENOLCK: + panic("uvm_anon_pagein"); default: return true; } Index: uvm/uvm_aobj.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_aobj.c,v retrieving revision 1.138 diff -u -p -r1.138 uvm_aobj.c --- uvm/uvm_aobj.c 17 Mar 2020 18:31:39 -0000 1.138 +++ uvm/uvm_aobj.c 17 Mar 2020 22:41:15 -0000 @@ -807,6 +807,16 @@ uao_get(struct uvm_object *uobj, voff_t (uintptr_t)uobj, offset, flags,0); /* + * the object must be locked. it can only be a read lock when + * processing a read fault with PGO_LOCKED | PGO_NOBUSY. + */ + + KASSERT(rw_lock_held(uobj->vmobjlock)); + KASSERT(rw_write_held(uobj->vmobjlock) || + ((~flags & (PGO_LOCKED | PGO_NOBUSY)) == 0 && + (access_type & VM_PROT_WRITE) == 0)); + + /* * get number of pages */ @@ -835,10 +845,12 @@ uao_get(struct uvm_object *uobj, voff_t /* * if page is new, attempt to allocate the page, - * zero-fill'd. + * zero-fill'd. we can only do this if busying + * pages, as otherwise the object is read locked. */ - if (ptmp == NULL && uao_find_swslot(uobj, + if ((flags & PGO_NOBUSY) == 0 && ptmp == NULL && + uao_find_swslot(uobj, current_offset >> PAGE_SHIFT) == 0) { ptmp = uao_pagealloc(uobj, current_offset, UVM_FLAG_COLORMATCH|UVM_PGA_ZERO); @@ -870,9 +882,11 @@ uao_get(struct uvm_object *uobj, voff_t KASSERT(uvm_pagegetdirty(ptmp) != UVM_PAGE_STATUS_CLEAN); - /* caller must un-busy this page */ - ptmp->flags |= PG_BUSY; - UVM_PAGE_OWN(ptmp, "uao_get1"); + if ((flags & PGO_NOBUSY) == 0) { + /* caller must un-busy this page */ + ptmp->flags |= PG_BUSY; + UVM_PAGE_OWN(ptmp, "uao_get1"); + } gotpage: pps[lcv] = ptmp; gotpages++; Index: uvm/uvm_extern.h =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_extern.h,v retrieving revision 1.221 diff -u -p -r1.221 uvm_extern.h --- uvm/uvm_extern.h 23 Feb 2020 15:46:43 -0000 1.221 +++ uvm/uvm_extern.h 17 Mar 2020 22:41:15 -0000 @@ -249,6 +249,7 @@ b\32UNMAP\0\ #define UFP_NORDONLY 0x08 #define UFP_DIRTYONLY 0x10 #define UFP_BACKWARD 0x20 +#define UFP_NOBUSY 0x40 /* * lockflags that control the locking behavior of various functions. @@ -506,6 +507,8 @@ struct uvmexp_sysctl { int64_t fileunknown; int64_t fileclean; int64_t filedirty; + int64_t fltup; + int64_t fltnoup; }; #ifdef _KERNEL Index: uvm/uvm_fault.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_fault.c,v retrieving revision 1.219 diff -u -p -r1.219 uvm_fault.c --- uvm/uvm_fault.c 17 Mar 2020 18:31:39 -0000 1.219 +++ uvm/uvm_fault.c 17 Mar 2020 22:41:15 -0000 @@ -194,7 +194,7 @@ uvmfault_anonflush(struct vm_anon **anon for (lcv = 0; lcv < n; lcv++) { if (anons[lcv] == NULL) continue; - KASSERT(rw_write_held(anons[lcv]->an_lock)); + KASSERT(rw_lock_held(anons[lcv]->an_lock)); pg = anons[lcv]->an_page; if (pg && (pg->flags & PG_BUSY) == 0) { uvm_pagelock(pg); @@ -276,10 +276,11 @@ uvmfault_anonget(struct uvm_faultinfo *u struct vm_anon *anon) { struct vm_page *pg; + krw_t lock_type; int error; UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist); - KASSERT(rw_write_held(anon->an_lock)); + KASSERT(rw_lock_held(anon->an_lock)); KASSERT(anon->an_lock == amap->am_lock); /* Increment the counters.*/ @@ -316,6 +317,7 @@ uvmfault_anonget(struct uvm_faultinfo *u * Is page resident? Make sure it is not busy/released. */ + lock_type = rw_lock_op(anon->an_lock); if (pg) { /* @@ -352,9 +354,14 @@ uvmfault_anonget(struct uvm_faultinfo *u } else { #if defined(VMSWAP) /* - * No page, therefore allocate one. + * No page, therefore allocate one. A write lock is + * required for this. If the caller didn't supply + * one, fail now and have them retry. */ + if (lock_type == RW_READER) { + return ENOLCK; + } pg = uvm_pagealloc(NULL, ufi != NULL ? ufi->orig_rvaddr : 0, anon, ufi != NULL ? UVM_FLAG_COLORMATCH : 0); @@ -400,7 +407,7 @@ uvmfault_anonget(struct uvm_faultinfo *u locked = uvmfault_relock(ufi); if (locked || we_own) { - rw_enter(anon->an_lock, RW_WRITER); + rw_enter(anon->an_lock, lock_type); } /* @@ -415,6 +422,7 @@ uvmfault_anonget(struct uvm_faultinfo *u */ if (we_own) { + KASSERT(lock_type == RW_WRITER); #if defined(VMSWAP) if (error) { @@ -561,6 +569,11 @@ uvmfault_promote(struct uvm_faultinfo *u } else if (uobjpage != PGO_DONTCARE) { /* object-backed COW */ opg = uobjpage; + if ((uobjpage->flags & PG_BUSY) != 0) { + KASSERT(rw_write_held(opg->uobject->vmobjlock)); + } else { + KASSERT(rw_read_held(opg->uobject->vmobjlock)); + } } else { /* ZFOD */ opg = NULL; @@ -573,10 +586,9 @@ uvmfault_promote(struct uvm_faultinfo *u KASSERT(amap != NULL); KASSERT(uobjpage != NULL); - KASSERT(uobjpage == PGO_DONTCARE || (uobjpage->flags & PG_BUSY) != 0); KASSERT(rw_write_held(amap->am_lock)); KASSERT(oanon == NULL || amap->am_lock == oanon->an_lock); - KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + KASSERT(uobj == NULL || rw_lock_held(uobj->vmobjlock)); if (*spare != NULL) { anon = *spare; @@ -615,7 +627,10 @@ uvmfault_promote(struct uvm_faultinfo *u } /* unlock and fail ... */ - uvm_page_unbusy(&uobjpage, 1); + if (uobjpage != PGO_DONTCARE && + (uobjpage->flags & PG_BUSY) != 0) { + uvm_page_unbusy(&uobjpage, 1); + } uvmfault_unlockall(ufi, amap, uobj); if (!uvm_reclaimable()) { UVMHIST_LOG(maphist, "out of VM", 0,0,0,0); @@ -738,6 +753,12 @@ struct uvm_faultctx { * (or due to the mechanical separation of the function?) */ bool promote; + + /* + * type of lock to acquire on objects in both layers. + */ + krw_t lower_lock_type; + krw_t upper_lock_type; }; static inline int uvm_fault_check( @@ -780,7 +801,7 @@ static inline void uvm_fault_lower_neigh struct uvm_faultinfo *, const struct uvm_faultctx *, vaddr_t, struct vm_page *); static inline int uvm_fault_lower_io( - struct uvm_faultinfo *, const struct uvm_faultctx *, + struct uvm_faultinfo *, struct uvm_faultctx *, struct uvm_object **, struct vm_page **); static inline int uvm_fault_lower_direct( struct uvm_faultinfo *, struct uvm_faultctx *, @@ -814,6 +835,15 @@ uvm_fault_internal(struct vm_map *orig_m /* "wire" fault causes wiring of both mapping and paging */ .wire_mapping = (fault_flag & UVM_FAULT_WIRE) != 0, .wire_paging = (fault_flag & UVM_FAULT_WIRE) != 0, + + /* + * default lock type to acquire on upper & lower layer + * objects: reader. this can be upgraded at any point + * during the fault from read -> write and uvm_faultctx + * changed to match, but is never downgraded write -> read. + */ + .upper_lock_type = RW_READER, + .lower_lock_type = RW_READER, }; const bool maxprot = (fault_flag & UVM_FAULT_MAXPROT) != 0; struct vm_anon *anons_store[UVM_MAXRANGE], **anons; @@ -998,6 +1028,12 @@ uvm_fault_check( flt->cow_now = (flt->access_type & VM_PROT_WRITE) != 0; } + if (flt->wire_paging) { + /* wiring pages requires a write lock. */ + flt->upper_lock_type = RW_WRITER; + flt->lower_lock_type = RW_WRITER; + } + flt->promote = false; /* @@ -1093,18 +1129,45 @@ uvm_fault_check( (uintptr_t)ufi->entry, (uintptr_t)amap, (uintptr_t)uobj, 0); /* - * if we've got an amap, lock it and extract current anons. + * if we've got an amap, guess at the best lock type, then lock it + * and extract current anons. */ if (amap) { - amap_lock(amap, RW_WRITER); +#ifndef DEBUG + /* + * if the amap isn't shared, so go for a writer lock to + * avoid the cost of upgrading the lock later if needed. + * XXX nice for PostgreSQL, but consider threads. + * + * if there's a backing object and handling a write fault, + * assume we're about to COW. + */ + + if ((amap_flags(amap) & AMAP_SHARED) == 0) { + flt->upper_lock_type = RW_WRITER; + } if (uobj != NULL && (flt->access_type & VM_PROT_WRITE) != 0) { + flt->upper_lock_type = RW_WRITER; + } +#endif /* !DEBUG */ + + amap_lock(amap, flt->upper_lock_type); amap_lookups(&ufi->entry->aref, eoff, *ranons, flt->npages); } else { + /* + * no covering amap: if a write fault, we are about to + * dirty the object and that requires a write lock. + */ + + if ((flt->access_type & VM_PROT_WRITE) != 0) { + flt->lower_lock_type = RW_WRITER; + } *ranons = NULL; /* to be safe */ } /* locked: maps(read), amap(if there) */ - KASSERT(amap == NULL || rw_write_held(amap->am_lock)); + KASSERT(amap == NULL || + rw_lock_op(amap->am_lock) == flt->upper_lock_type); /* * for MADV_SEQUENTIAL mappings we want to deactivate the back pages @@ -1147,6 +1210,44 @@ uvm_fault_check( } /* + * uvm_fault_upper_upgrade: upgrade upper lock, reader -> writer + */ + +static inline int +uvm_fault_upper_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, + struct vm_amap *amap, struct uvm_object *uobj) +{ + + KASSERT(amap != NULL); + KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock)); + + /* + * fast path. + */ + + if (__predict_true(flt->upper_lock_type == RW_WRITER)) { + return 0; + } + + /* + * otherwise try for the upgrade. if we don't get it, unlock + * everything, restart the fault and next time around get a writer + * lock. + */ + + flt->upper_lock_type = RW_WRITER; + if (__predict_false(!rw_tryupgrade(amap->am_lock))) { + uvmfault_unlockall(ufi, amap, uobj); + cpu_count(CPU_COUNT_FLTNOUP, 1); + UVMHIST_LOG(maphist, " !upgrade upper", 0, 0,0,0); + return ERESTART; + } + cpu_count(CPU_COUNT_FLTUP, 1); + KASSERT(flt->upper_lock_type == rw_lock_op(amap->am_lock)); + return 0; +} + +/* * uvm_fault_upper_lookup: look up existing h/w mapping and amap. * * iterate range of interest: @@ -1169,7 +1270,8 @@ uvm_fault_upper_lookup( UVMHIST_FUNC("uvm_fault_upper_lookup"); UVMHIST_CALLED(maphist); /* locked: maps(read), amap(if there) */ - KASSERT(amap == NULL || rw_write_held(amap->am_lock)); + KASSERT(amap == NULL || + rw_lock_op(amap->am_lock) == flt->upper_lock_type); /* * map in the backpages and frontpages we found in the amap in hopes @@ -1199,7 +1301,7 @@ uvm_fault_upper_lookup( } /* - * check for present page and map if possible. re-activate it. + * check for present page and map if possible. */ pages[lcv] = PGO_DONTCARE; @@ -1221,7 +1323,8 @@ uvm_fault_upper_lookup( } /* locked: maps(read), amap(if there) */ - KASSERT(amap == NULL || rw_write_held(amap->am_lock)); + KASSERT(amap == NULL || + rw_lock_op(amap->am_lock) == flt->upper_lock_type); /* (shadowed == true) if there is an anon at the faulting address */ UVMHIST_LOG(maphist, " shadowed=%jd, will_get=%jd", shadowed, (ufi->entry->object.uvm_obj && shadowed != false),0,0); @@ -1254,12 +1357,21 @@ uvm_fault_upper_neighbor( KASSERT(pg->uobject == NULL); KASSERT(pg->uanon != NULL); - KASSERT(rw_write_held(pg->uanon->an_lock)); + KASSERT(rw_lock_op(pg->uanon->an_lock) == flt->upper_lock_type); KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN); - uvm_pagelock(pg); - uvm_pageenqueue(pg); - uvm_pageunlock(pg); + /* + * in the read-locked case, it's not possible for this to be a new + * page, therefore it's enqueued already. there wasn't a direct + * fault on the page, so avoid the cost of re-enqueuing it unless + * write-locked. + */ + + if (flt->upper_lock_type == RW_WRITER) { + uvm_pagelock(pg); + uvm_pageenqueue(pg); + uvm_pageunlock(pg); + } UVMHIST_LOG(maphist, " MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx", (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0); @@ -1301,7 +1413,7 @@ uvm_fault_upper( UVMHIST_FUNC("uvm_fault_upper"); UVMHIST_CALLED(maphist); /* locked: maps(read), amap, anon */ - KASSERT(rw_write_held(amap->am_lock)); + KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); KASSERT(anon->an_lock == amap->am_lock); /* @@ -1324,7 +1436,7 @@ uvm_fault_upper( * if the page is on loan from a uvm_object, then anonget will * lock that object for us if it does not fail. */ - + retry: error = uvmfault_anonget(ufi, amap, anon); switch (error) { case 0: @@ -1337,6 +1449,15 @@ uvm_fault_upper( kpause("fltagain1", false, hz/2, NULL); return ERESTART; + case ENOLCK: + /* it needs a write lock: retry */ + error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); + if (error != 0) { + return error; + } + KASSERT(rw_write_held(amap->am_lock)); + goto retry; + default: return error; } @@ -1348,9 +1469,10 @@ uvm_fault_upper( uobj = anon->an_page->uobject; /* locked by anonget if !NULL */ /* locked: maps(read), amap, anon, uobj(if one) */ - KASSERT(rw_write_held(amap->am_lock)); + KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); KASSERT(anon->an_lock == amap->am_lock); - KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + KASSERT(uobj == NULL || + rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); /* * special handling for loaned pages @@ -1425,6 +1547,13 @@ uvm_fault_upper_loan( /* >1 case is already ok */ if (anon->an_ref == 1) { + /* breaking loan requires a write lock. */ + error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); + if (error != 0) { + return error; + } + KASSERT(rw_write_held(amap->am_lock)); + error = uvm_loanbreak_anon(anon, *ruobj); if (error != 0) { uvmfault_unlockall(ufi, amap, *ruobj); @@ -1453,6 +1582,7 @@ uvm_fault_upper_promote( struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, struct uvm_object *uobj, struct vm_anon *anon) { + struct vm_amap * const amap = ufi->entry->aref.ar_amap; struct vm_anon * const oanon = anon; struct vm_page *pg; int error; @@ -1461,6 +1591,13 @@ uvm_fault_upper_promote( UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0); cpu_count(CPU_COUNT_FLT_ACOW, 1); + /* promoting requires a write lock. */ + error = uvm_fault_upper_upgrade(ufi, flt, amap, NULL); + if (error != 0) { + return error; + } + KASSERT(rw_write_held(amap->am_lock)); + error = uvmfault_promote(ufi, oanon, PGO_DONTCARE, &anon, &flt->anon_spare); switch (error) { @@ -1472,13 +1609,10 @@ uvm_fault_upper_promote( return error; } - KASSERT(anon == NULL || anon->an_lock == oanon->an_lock); + KASSERT(anon->an_lock == oanon->an_lock); + /* uvm_fault_upper_done will activate or enqueue the page */ pg = anon->an_page; - /* uvm_fault_upper_done will activate the page */ - uvm_pagelock(pg); - uvm_pageenqueue(pg); - uvm_pageunlock(pg); pg->flags &= ~(PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); @@ -1532,10 +1666,11 @@ uvm_fault_upper_enter( UVMHIST_FUNC("uvm_fault_upper_enter"); UVMHIST_CALLED(maphist); /* locked: maps(read), amap, oanon, anon(if different from oanon) */ - KASSERT(rw_write_held(amap->am_lock)); + KASSERT(rw_lock_op(amap->am_lock) == flt->upper_lock_type); KASSERT(anon->an_lock == amap->am_lock); KASSERT(oanon->an_lock == amap->am_lock); - KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + KASSERT(uobj == NULL || + rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); KASSERT(uvm_pagegetdirty(pg) != UVM_PAGE_STATUS_CLEAN); /* @@ -1564,6 +1699,17 @@ uvm_fault_upper_enter( KASSERT(!pmap_extract(pmap, va, NULL)); /* + * ensure that the page is queued in the case that + * we just promoted. + */ + + if (flt->upper_lock_type == RW_WRITER) { + uvm_pagelock(pg); + uvm_pageenqueue(pg); + uvm_pageunlock(pg); + } + + /* * No need to undo what we did; we can simply think of * this as the pmap throwing away the mapping information. * @@ -1633,6 +1779,57 @@ uvm_fault_upper_done( } /* + * uvm_fault_lower_upgrade: upgrade lower lock, reader -> writer + */ + +static inline int +uvm_fault_lower_upgrade(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, + struct vm_amap *amap, struct uvm_object *uobj, struct vm_page *uobjpage) +{ + + KASSERT(uobj != NULL); + KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock)); + + /* + * fast path. + */ + + if (__predict_true(flt->lower_lock_type == RW_WRITER)) { + KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0); + return 0; + } + + /* + * otherwise try for the upgrade. if we don't get it, unlock + * everything, restart the fault and next time around get a writer + * lock. + */ + + flt->lower_lock_type = RW_WRITER; + if (__predict_false(!rw_tryupgrade(uobj->vmobjlock))) { + uvmfault_unlockall(ufi, amap, uobj); + cpu_count(CPU_COUNT_FLTNOUP, 1); + UVMHIST_LOG(maphist, " !upgrade lower", 0, 0,0,0); + return ERESTART; + } + cpu_count(CPU_COUNT_FLTUP, 1); + KASSERT(flt->lower_lock_type == rw_lock_op(uobj->vmobjlock)); + + /* + * finally, if a page was supplied, assert that it's not busy + * (can't be with a reader lock) and then mark it busy now that + * we have a writer lock. + */ + + if (uobjpage != NULL) { + KASSERT((uobjpage->flags & PG_BUSY) == 0); + uobjpage->flags |= PG_BUSY; + UVM_PAGE_OWN(uobjpage, "upgrdlwr"); + } + return 0; +} + +/* * uvm_fault_lower: handle lower fault. * * 1. check uobj @@ -1687,9 +1884,19 @@ uvm_fault_lower( * locked: * maps(read), amap(if there), uobj(if !null), uobjpage(if !null) */ - KASSERT(amap == NULL || rw_write_held(amap->am_lock)); - KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); - KASSERT(uobjpage == NULL || (uobjpage->flags & PG_BUSY) != 0); + KASSERT(amap == NULL || + rw_lock_op(amap->am_lock) == flt->upper_lock_type); + if (flt->lower_lock_type == RW_WRITER) { + KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + KASSERTMSG(uobjpage == NULL || + (uobjpage->flags & PG_BUSY) != 0, + "page %p should be busy", uobjpage); + } else { + KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock)); + KASSERTMSG(uobjpage == NULL || + (uobjpage->flags & PG_BUSY) == 0, + "page %p should not be busy", uobjpage); + } /* * note that uobjpage can not be PGO_DONTCARE at this point. we now @@ -1730,9 +1937,15 @@ uvm_fault_lower( * locked: * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj) */ - KASSERT(amap == NULL || rw_write_held(amap->am_lock)); - KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); - KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); + KASSERT(amap == NULL || + rw_lock_op(amap->am_lock) == flt->upper_lock_type); + if (flt->lower_lock_type == RW_WRITER) { + KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); + } else { + KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock)); + KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0); + } /* * notes: @@ -1773,17 +1986,25 @@ uvm_fault_lower_lookup( vaddr_t currva; UVMHIST_FUNC("uvm_fault_lower_lookup"); UVMHIST_CALLED(maphist); - rw_enter(uobj->vmobjlock, RW_WRITER); - /* Locked: maps(read), amap(if there), uobj */ + rw_enter(uobj->vmobjlock, flt->lower_lock_type); + + /* + * Locked: maps(read), amap(if there), uobj + * + * if we have a read lock on the object, do a PGO_NOBUSY get, which + * will return us pages with PG_BUSY clear. if a write lock is held + * pages will be returned with PG_BUSY set. + */ cpu_count(CPU_COUNT_FLTLGET, 1); gotpages = flt->npages; (void) uobj->pgops->pgo_get(uobj, ufi->entry->offset + flt->startva - ufi->entry->start, pages, &gotpages, flt->centeridx, - flt->access_type & MASK(ufi->entry), ufi->entry->advice, PGO_LOCKED); + flt->access_type & MASK(ufi->entry), ufi->entry->advice, + PGO_LOCKED | (flt->lower_lock_type == RW_WRITER ? 0 : PGO_NOBUSY)); - KASSERT(rw_write_held(uobj->vmobjlock)); + KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); /* * check for pages to map, if we got any @@ -1804,10 +2025,20 @@ uvm_fault_lower_lookup( } KASSERT(curpg->uobject == uobj); + if (flt->lower_lock_type == RW_WRITER) { + KASSERT(rw_write_held(uobj->vmobjlock)); + KASSERTMSG((curpg->flags & PG_BUSY) != 0, + "page %p should be busy", curpg); + } else { + KASSERT(rw_read_held(uobj->vmobjlock)); + KASSERTMSG((curpg->flags & PG_BUSY) == 0, + "page %p should not be busy", curpg); + } + /* * if center page is resident and not PG_BUSY|PG_RELEASED - * then pgo_get made it PG_BUSY for us and gave us a handle - * to it. + * and !PGO_NOBUSY, then pgo_get made it PG_BUSY for us and + * gave us a handle to it. */ if (lcv == flt->centeridx) { @@ -1840,9 +2071,18 @@ uvm_fault_lower_neighbor( * for this. we can just directly enter the pages. */ - uvm_pagelock(pg); - uvm_pageenqueue(pg); - uvm_pageunlock(pg); + /* + * in the read-locked case, it's not possible for this to be a new + * page. it must be cached with the object and enqueued already. + * there wasn't a direct fault on the page, so avoid the cost of + * re-enqueuing it. + */ + + if (flt->lower_lock_type == RW_WRITER) { + uvm_pagelock(pg); + uvm_pageenqueue(pg); + uvm_pageunlock(pg); + } UVMHIST_LOG(maphist, " MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx", (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0); @@ -1859,10 +2099,21 @@ uvm_fault_lower_neighbor( KASSERT((pg->flags & PG_RELEASED) == 0); KASSERT(!UVM_OBJ_IS_CLEAN(pg->uobject) || uvm_pagegetdirty(pg) == UVM_PAGE_STATUS_CLEAN); - pg->flags &= ~(PG_BUSY); - UVM_PAGE_OWN(pg, NULL); - KASSERT(rw_write_held(pg->uobject->vmobjlock)); + /* + * if a write lock was held on the object, the pages have been + * busied. unbusy them now, as we are about to enter and then + * forget about them. + */ + + if (flt->lower_lock_type == RW_WRITER) { + KASSERT((pg->flags & PG_BUSY) != 0); + pg->flags &= ~(PG_BUSY); + UVM_PAGE_OWN(pg, NULL); + } else { + KASSERT((pg->flags & PG_BUSY) == 0); + } + KASSERT(rw_lock_op(pg->uobject->vmobjlock) == flt->lower_lock_type); const vm_prot_t mapprot = readonly ? (flt->enter_prot & ~VM_PROT_WRITE) : @@ -1884,7 +2135,7 @@ uvm_fault_lower_neighbor( static int uvm_fault_lower_io( - struct uvm_faultinfo *ufi, const struct uvm_faultctx *flt, + struct uvm_faultinfo *ufi, struct uvm_faultctx *flt, struct uvm_object **ruobj, struct vm_page **ruobjpage) { struct vm_amap * const amap = ufi->entry->aref.ar_amap; @@ -1907,10 +2158,17 @@ uvm_fault_lower_io( advice = ufi->entry->advice; /* Locked: maps(read), amap(if there), uobj */ + KASSERT(rw_lock_op(uobj->vmobjlock) == flt->lower_lock_type); + + /* Upgrade to a write lock if needed. */ + error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, NULL); + if (error != 0) { + return error; + } uvmfault_unlockall(ufi, amap, NULL); - /* Locked: uobj */ - KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + /* Locked: uobj(write) */ + KASSERT(rw_write_held(uobj->vmobjlock)); cpu_count(CPU_COUNT_FLTGET, 1); gotpages = 1; @@ -1951,13 +2209,14 @@ uvm_fault_lower_io( locked = uvmfault_relock(ufi); if (locked && amap) - amap_lock(amap, RW_WRITER); + amap_lock(amap, flt->upper_lock_type); /* might be changed */ uobj = pg->uobject; - rw_enter(uobj->vmobjlock, RW_WRITER); + rw_enter(uobj->vmobjlock, flt->lower_lock_type); KASSERT((pg->flags & PG_BUSY) != 0); + KASSERT(flt->lower_lock_type == RW_WRITER); uvm_pagelock(pg); uvm_pageactivate(pg); @@ -2057,7 +2316,11 @@ uvm_fault_lower_direct( } KASSERT(pg == uobjpage); - KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); + if (flt->lower_lock_type == RW_READER) { + KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0); + } else { + KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); + } return uvm_fault_lower_enter(ufi, flt, uobj, NULL, pg); } @@ -2077,6 +2340,7 @@ uvm_fault_lower_direct_loan( struct vm_amap * const amap = ufi->entry->aref.ar_amap; struct vm_page *pg; struct vm_page *uobjpage = *ruobjpage; + int error; UVMHIST_FUNC("uvm_fault_lower_direct_loan"); UVMHIST_CALLED(maphist); if (!flt->cow_now) { @@ -2084,7 +2348,16 @@ uvm_fault_lower_direct_loan( /* cap! */ flt->enter_prot = flt->enter_prot & ~VM_PROT_WRITE; } else { - /* write fault: must break the loan here */ + /* + * write fault: must break the loan here. to do this + * we need a write lock on the object. + */ + + error = uvm_fault_lower_upgrade(ufi, flt, amap, uobj, uobjpage); + if (error != 0) { + return error; + } + KASSERT(rw_write_held(uobj->vmobjlock)); pg = uvm_loanbreak(uobjpage); if (pg == NULL) { @@ -2134,6 +2407,13 @@ uvm_fault_lower_promote( KASSERT(amap != NULL); + /* promoting requires a write lock. */ + error = uvm_fault_upper_upgrade(ufi, flt, amap, uobj); + if (error != 0) { + return error; + } + KASSERT(rw_write_held(amap->am_lock)); + /* * If we are going to promote the data to an anon we * allocate a blank anon here and plug it into our amap. @@ -2154,7 +2434,11 @@ uvm_fault_lower_promote( /* * Fill in the data. */ - KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); + if (flt->lower_lock_type == RW_READER) { + KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) == 0); + } else { + KASSERT(uobj == NULL || (uobjpage->flags & PG_BUSY) != 0); + } if (uobjpage != PGO_DONTCARE) { cpu_count(CPU_COUNT_FLT_PRCOPY, 1); @@ -2176,11 +2460,13 @@ uvm_fault_lower_promote( * since we still hold the object lock. */ - uobjpage->flags &= ~PG_BUSY; - uvm_pagelock(uobjpage); - uvm_pagewakeup(uobjpage); - uvm_pageunlock(uobjpage); - UVM_PAGE_OWN(uobjpage, NULL); + if ((uobjpage->flags & PG_BUSY) != 0) { + uobjpage->flags &= ~PG_BUSY; + uvm_pagelock(uobjpage); + uvm_pagewakeup(uobjpage); + uvm_pageunlock(uobjpage); + UVM_PAGE_OWN(uobjpage, NULL); + } UVMHIST_LOG(maphist, " promote uobjpage %#jx to anon/page %#jx/%#jx", @@ -2223,12 +2509,22 @@ uvm_fault_lower_enter( * maps(read), amap(if !null), uobj(if !null), * anon(if !null), pg(if anon), unlock_uobj(if !null) * + * anon must be write locked (promotion). uobj can be either. + * * Note: pg is either the uobjpage or the new page in the new anon. */ - KASSERT(amap == NULL || rw_write_held(amap->am_lock)); - KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + KASSERT(amap == NULL || + rw_lock_op(amap->am_lock) == flt->upper_lock_type); KASSERT(anon == NULL || anon->an_lock == amap->am_lock); - KASSERT((pg->flags & PG_BUSY) != 0); + if (flt->lower_lock_type == RW_WRITER) { + KASSERT(uobj == NULL || rw_write_held(uobj->vmobjlock)); + KASSERTMSG((pg->flags & PG_BUSY) != 0, + "page %p should be busy", pg); + } else { + KASSERT(uobj == NULL || rw_read_held(uobj->vmobjlock)); + KASSERTMSG(anon != NULL || (pg->flags & PG_BUSY) == 0, + "page %p should not be busy", pg); + } /* * all resources are present. we can now map it in and free our @@ -2265,18 +2561,24 @@ uvm_fault_lower_enter( * we just promoted the page. */ - uvm_pagelock(pg); - uvm_pageenqueue(pg); - uvm_pagewakeup(pg); - uvm_pageunlock(pg); + if (anon != NULL || flt->lower_lock_type == RW_WRITER) { + uvm_pagelock(pg); + uvm_pageenqueue(pg); + uvm_pagewakeup(pg); + uvm_pageunlock(pg); + } else { + KASSERT((pg->flags & PG_BUSY) == 0); + } /* * note that pg can't be PG_RELEASED since we did not drop * the object lock since the last time we checked. */ KASSERT((pg->flags & PG_RELEASED) == 0); - pg->flags &= ~(PG_BUSY|PG_FAKE); - UVM_PAGE_OWN(pg, NULL); + if ((pg->flags & PG_BUSY) != 0) { + pg->flags &= ~(PG_BUSY|PG_FAKE); + UVM_PAGE_OWN(pg, NULL); + } uvmfault_unlockall(ufi, amap, uobj); if (!uvm_reclaimable()) { @@ -2298,11 +2600,13 @@ uvm_fault_lower_enter( * lock since the last time we checked. */ KASSERT((pg->flags & PG_RELEASED) == 0); - uvm_pagelock(pg); - uvm_pagewakeup(pg); - uvm_pageunlock(pg); - pg->flags &= ~(PG_BUSY|PG_FAKE); - UVM_PAGE_OWN(pg, NULL); + if ((pg->flags & PG_BUSY) != 0) { + uvm_pagelock(pg); + uvm_pagewakeup(pg); + uvm_pageunlock(pg); + pg->flags &= ~(PG_BUSY|PG_FAKE); + UVM_PAGE_OWN(pg, NULL); + } pmap_update(ufi->orig_map->pmap); uvmfault_unlockall(ufi, amap, uobj); Index: uvm/uvm_loan.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_loan.c,v retrieving revision 1.98 diff -u -p -r1.98 uvm_loan.c --- uvm/uvm_loan.c 17 Mar 2020 18:31:39 -0000 1.98 +++ uvm/uvm_loan.c 17 Mar 2020 22:41:15 -0000 @@ -391,6 +391,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, if (error) { UVMHIST_LOG(loanhist, "error %jd", error,0,0,0); + KASSERT(error != ENOLCK); /* need to refault (i.e. refresh our lookup) ? */ if (error == ERESTART) { Index: uvm/uvm_map.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_map.c,v retrieving revision 1.374 diff -u -p -r1.374 uvm_map.c --- uvm/uvm_map.c 14 Mar 2020 17:29:53 -0000 1.374 +++ uvm/uvm_map.c 17 Mar 2020 22:41:15 -0000 @@ -2256,7 +2256,7 @@ uvm_unmap_remove(struct vm_map *map, vad * change while in pmap_remove(). */ - uvm_map_lock_entry(entry, RW_WRITER); + uvm_map_lock_entry(entry, RW_READER); pmap_remove(map->pmap, entry->start, entry->end); /* @@ -2831,7 +2831,7 @@ uvm_map_extract(struct vm_map *srcmap, v /* we advance "entry" in the following if statement */ if (flags & UVM_EXTRACT_REMOVE) { - uvm_map_lock_entry(entry, RW_WRITER); + uvm_map_lock_entry(entry, RW_READER); pmap_remove(srcmap->pmap, entry->start, entry->end); uvm_map_unlock_entry(entry); @@ -3063,7 +3063,7 @@ uvm_map_protect(struct vm_map *map, vadd if (current->protection != old_prot) { /* update pmap! */ - uvm_map_lock_entry(current, RW_WRITER); + uvm_map_lock_entry(current, RW_READER); pmap_protect(map->pmap, current->start, current->end, current->protection & MASK(current)); uvm_map_unlock_entry(current); @@ -4406,7 +4406,7 @@ uvm_mapent_forkcopy(struct vm_map *new_m if (old_entry->aref.ar_amap && !UVM_ET_ISNEEDSCOPY(old_entry)) { if (old_entry->max_protection & VM_PROT_WRITE) { - uvm_map_lock_entry(old_entry, RW_WRITER); + uvm_map_lock_entry(old_entry, RW_READER); pmap_protect(old_map->pmap, old_entry->start, old_entry->end, old_entry->protection & ~VM_PROT_WRITE); Index: uvm/uvm_meter.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_meter.c,v retrieving revision 1.74 diff -u -p -r1.74 uvm_meter.c --- uvm/uvm_meter.c 15 Jan 2020 17:55:45 -0000 1.74 +++ uvm/uvm_meter.c 17 Mar 2020 22:41:15 -0000 @@ -186,6 +186,8 @@ sysctl_vm_uvmexp2(SYSCTLFN_ARGS) u.fileunknown = (int)cpu_count_get(CPU_COUNT_FILEUNKNOWN); u.fileclean = (int)cpu_count_get(CPU_COUNT_FILECLEAN); u.filedirty = (int)cpu_count_get(CPU_COUNT_FILEDIRTY); + u.fltup = (int)cpu_count_get(CPU_COUNT_FLTUP); + u.fltnoup = (int)cpu_count_get(CPU_COUNT_FLTNOUP); node = *rnode; node.sysctl_data = &u; Index: uvm/uvm_pager.h =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pager.h,v retrieving revision 1.46 diff -u -p -r1.46 uvm_pager.h --- uvm/uvm_pager.h 14 Mar 2020 20:45:23 -0000 1.46 +++ uvm/uvm_pager.h 17 Mar 2020 22:41:15 -0000 @@ -152,6 +152,7 @@ struct uvm_pagerops { #define PGO_JOURNALLOCKED 0x020 /* journal is already locked [get/put] */ #define PGO_LOCKED 0x040 /* fault data structures are locked [get] */ #define PGO_BUSYFAIL 0x080 /* fail if a page is busy [put] */ +#define PGO_NOBUSY 0x100 /* don't busy returned pages (read locked) */ #define PGO_OVERWRITE 0x200 /* pages will be overwritten before unlocked */ #define PGO_PASTEOF 0x400 /* allow allocation of pages past EOF */ #define PGO_NOBLOCKALLOC 0x800 /* backing block allocation is not needed */ Index: uvm/uvm_vnode.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_vnode.c,v retrieving revision 1.110 diff -u -p -r1.110 uvm_vnode.c --- uvm/uvm_vnode.c 14 Mar 2020 20:45:23 -0000 1.110 +++ uvm/uvm_vnode.c 17 Mar 2020 22:41:15 -0000 @@ -287,7 +287,15 @@ uvn_findpage(struct uvm_object *uobj, vo UVMHIST_LOG(ubchist, "vp %#jx off 0x%jx", (uintptr_t)uobj, offset, 0, 0); - KASSERT(rw_write_held(uobj->vmobjlock)); + /* + * NOBUSY must come with NOWAIT and NOALLOC. if NOBUSY is + * specified, this may be called with a reader lock. + */ + + KASSERT(rw_lock_held(uobj->vmobjlock)); + KASSERT((flags & UFP_NOBUSY) == 0 || (flags & UFP_NOWAIT) != 0); + KASSERT((flags & UFP_NOBUSY) == 0 || (flags & UFP_NOALLOC) != 0); + KASSERT((flags & UFP_NOBUSY) != 0 || rw_write_held(uobj->vmobjlock)); if (*pgp != NULL) { UVMHIST_LOG(ubchist, "dontcare", 0,0,0,0); @@ -380,8 +388,10 @@ uvn_findpage(struct uvm_object *uobj, vo } /* mark the page BUSY and we're done. */ - pg->flags |= PG_BUSY; - UVM_PAGE_OWN(pg, "uvn_findpage"); + if ((flags & UFP_NOBUSY) == 0) { + pg->flags |= PG_BUSY; + UVM_PAGE_OWN(pg, "uvn_findpage"); + } UVMHIST_LOG(ubchist, "found %#jx (color %ju)", (uintptr_t)pg, VM_PGCOLOR(pg), 0, 0); uvm_page_array_advance(a);