Index: external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c =================================================================== RCS file: /cvsroot/src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c,v retrieving revision 1.53 diff -u -p -r1.53 zfs_vnops.c --- external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c 4 Oct 2019 23:06:19 -0000 1.53 +++ external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vnops.c 9 Dec 2019 22:24:06 -0000 @@ -6081,9 +6081,7 @@ zfs_putapage(vnode_t *vp, page_t **pp, i out_unbusy: mutex_enter(mtx); - mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pp, count); - mutex_exit(&uvm_pageqlock); mutex_exit(mtx); out: Index: sys/arch/amd64/amd64/machdep.c =================================================================== RCS file: /cvsroot/src/sys/arch/amd64/amd64/machdep.c,v retrieving revision 1.342 diff -u -p -r1.342 machdep.c --- sys/arch/amd64/amd64/machdep.c 6 Dec 2019 08:35:21 -0000 1.342 +++ sys/arch/amd64/amd64/machdep.c 9 Dec 2019 22:24:31 -0000 @@ -858,7 +858,7 @@ sparse_dump_mark(void) pfn++) { pg = PHYS_TO_VM_PAGE(ptoa(pfn)); - if (pg->uanon || (pg->pqflags & PQ_FREE) || + if (pg->uanon || (pg->flags & PG_FREE) || (pg->uobject && pg->uobject->pgops)) { p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE; clrbit(sparse_dump_physmap, p); Index: sys/miscfs/genfs/genfs_io.c =================================================================== RCS file: /cvsroot/src/sys/miscfs/genfs/genfs_io.c,v retrieving revision 1.76 diff -u -p -r1.76 genfs_io.c --- sys/miscfs/genfs/genfs_io.c 6 Oct 2019 05:48:00 -0000 1.76 +++ sys/miscfs/genfs/genfs_io.c 9 Dec 2019 22:24:37 -0000 @@ -79,9 +79,7 @@ genfs_rel_pages(struct vm_page **pgs, un pg->flags |= PG_RELEASED; } } - mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); - mutex_exit(&uvm_pageqlock); } static void @@ -466,7 +464,6 @@ startover: out: UVMHIST_LOG(ubchist, "succeeding, npages %jd", npages,0,0,0); error = 0; - mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { struct vm_page *pg = pgs[i]; if (pg == NULL) { @@ -498,7 +495,6 @@ out: UVM_PAGE_OWN(pg, NULL); } } - mutex_exit(&uvm_pageqlock); if (memwrite) { genfs_markdirty(vp); } @@ -1201,9 +1197,6 @@ retry: * apply FREE or DEACTIVATE options if requested. */ - if (flags & (PGO_DEACTIVATE|PGO_FREE)) { - mutex_enter(&uvm_pageqlock); - } for (i = 0; i < npages; i++) { tpg = pgs[i]; KASSERT(tpg->uobject == uobj); @@ -1236,9 +1229,6 @@ retry: } } } - if (flags & (PGO_DEACTIVATE|PGO_FREE)) { - mutex_exit(&uvm_pageqlock); - } if (needs_clean) { modified = true; @@ -1646,7 +1636,6 @@ genfs_compat_getpages(void *v) } uvm_pagermapout(kva, npages); mutex_enter(uobj->vmobjlock); - mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { pg = pgs[i]; if (error && (pg->flags & PG_FAKE) != 0) { @@ -1659,7 +1648,6 @@ genfs_compat_getpages(void *v) if (error) { uvm_page_unbusy(pgs, npages); } - mutex_exit(&uvm_pageqlock); if (error == 0 && memwrite) { genfs_markdirty(vp); } Index: sys/nfs/nfs_bio.c =================================================================== RCS file: /cvsroot/src/sys/nfs/nfs_bio.c,v retrieving revision 1.191 diff -u -p -r1.191 nfs_bio.c --- sys/nfs/nfs_bio.c 15 Jul 2015 03:28:55 -0000 1.191 +++ sys/nfs/nfs_bio.c 9 Dec 2019 22:24:38 -0000 @@ -1340,9 +1340,7 @@ nfs_getpages(void *v) * available and put back original pgs array. */ - mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); - mutex_exit(&uvm_pageqlock); *ap->a_count = 0; memcpy(pgs, opgs, npages * sizeof(struct vm_pages *)); Index: sys/rump/librump/rumpvfs/vm_vfs.c =================================================================== RCS file: /cvsroot/src/sys/rump/librump/rumpvfs/vm_vfs.c,v retrieving revision 1.34 diff -u -p -r1.34 vm_vfs.c --- sys/rump/librump/rumpvfs/vm_vfs.c 18 Oct 2013 19:56:11 -0000 1.34 +++ sys/rump/librump/rumpvfs/vm_vfs.c 9 Dec 2019 22:24:39 -0000 @@ -73,9 +73,7 @@ uvm_aio_aiodone(struct buf *bp) } KASSERT(mutex_owned(uobj->vmobjlock)); - mutex_enter(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); - mutex_exit(&uvm_pageqlock); mutex_exit(uobj->vmobjlock); uvm_pagermapout((vaddr_t)bp->b_data, npages); Index: sys/ufs/lfs/lfs_pages.c =================================================================== RCS file: /cvsroot/src/sys/ufs/lfs/lfs_pages.c,v retrieving revision 1.15 diff -u -p -r1.15 lfs_pages.c --- sys/ufs/lfs/lfs_pages.c 19 Aug 2017 14:22:49 -0000 1.15 +++ sys/ufs/lfs/lfs_pages.c 9 Dec 2019 22:24:39 -0000 @@ -1,7 +1,7 @@ /* $NetBSD: lfs_pages.c,v 1.15 2017/08/19 14:22:49 maya Exp $ */ /*- - * Copyright (c) 1999, 2000, 2001, 2002, 2003 The NetBSD Foundation, Inc. + * Copyright (c) 1999, 2000, 2001, 2002, 2003, 2019 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -375,9 +375,7 @@ check_dirty(struct lfs *fs, struct vnode * Wire the page so that * pdaemon doesn't see it again. */ - mutex_enter(&uvm_pageqlock); uvm_pagewire(pg); - mutex_exit(&uvm_pageqlock); /* Suspended write flag */ pg->flags |= PG_DELWRI; @@ -539,9 +537,7 @@ retry: "lfsput2", 0); mutex_enter(vp->v_interlock); } - mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); } ap->a_offlo = blkeof; if (ap->a_offhi > 0 && ap->a_offhi <= ap->a_offlo) { @@ -817,7 +813,6 @@ retry: } busypg = NULL; - KASSERT(!mutex_owned(&uvm_pageqlock)); oreclaim = (ap->a_flags & PGO_RECLAIM); ap->a_flags &= ~PGO_RECLAIM; error = genfs_do_putpages(vp, startoffset, endoffset, Index: sys/ufs/lfs/lfs_vfsops.c =================================================================== RCS file: /cvsroot/src/sys/ufs/lfs/lfs_vfsops.c,v retrieving revision 1.365 diff -u -p -r1.365 lfs_vfsops.c --- sys/ufs/lfs/lfs_vfsops.c 28 May 2019 08:59:35 -0000 1.365 +++ sys/ufs/lfs/lfs_vfsops.c 9 Dec 2019 22:24:39 -0000 @@ -2054,9 +2054,7 @@ lfs_gop_write(struct vnode *vp, struct v pgs[i]->flags |= PG_PAGEOUT; uvm_pageout_start(1); mutex_enter(vp->v_interlock); - mutex_enter(&uvm_pageqlock); uvm_pageunwire(pgs[i]); - mutex_exit(&uvm_pageqlock); mutex_exit(vp->v_interlock); } } @@ -2238,7 +2236,6 @@ lfs_gop_write(struct vnode *vp, struct v pgs[0]->offset, eof, npages)); } - mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { pg = pgs[i]; @@ -2262,7 +2259,6 @@ lfs_gop_write(struct vnode *vp, struct v } /* uvm_pageunbusy takes care of PG_BUSY, PG_WANTED */ uvm_page_unbusy(pgs, npages); - mutex_exit(&uvm_pageqlock); mutex_exit(vp->v_interlock); return EAGAIN; } Index: sys/ufs/lfs/ulfs_inode.c =================================================================== RCS file: /cvsroot/src/sys/ufs/lfs/ulfs_inode.c,v retrieving revision 1.21 diff -u -p -r1.21 ulfs_inode.c --- sys/ufs/lfs/ulfs_inode.c 28 Oct 2017 00:37:13 -0000 1.21 +++ sys/ufs/lfs/ulfs_inode.c 9 Dec 2019 22:24:39 -0000 @@ -234,7 +234,6 @@ ulfs_balloc_range(struct vnode *vp, off_ GOP_SIZE(vp, off + len, &eob, 0); mutex_enter(uobj->vmobjlock); - mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { KASSERT((pgs[i]->flags & PG_RELEASED) == 0); if (!error) { @@ -246,7 +245,6 @@ ulfs_balloc_range(struct vnode *vp, off_ } uvm_pageactivate(pgs[i]); } - mutex_exit(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); mutex_exit(uobj->vmobjlock); Index: sys/ufs/ufs/ufs_inode.c =================================================================== RCS file: /cvsroot/src/sys/ufs/ufs/ufs_inode.c,v retrieving revision 1.105 diff -u -p -r1.105 ufs_inode.c --- sys/ufs/ufs/ufs_inode.c 10 Dec 2018 20:48:34 -0000 1.105 +++ sys/ufs/ufs/ufs_inode.c 9 Dec 2019 22:24:39 -0000 @@ -270,7 +270,6 @@ ufs_balloc_range(struct vnode *vp, off_t GOP_SIZE(vp, off + len, &eob, 0); mutex_enter(uobj->vmobjlock); - mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { KASSERT((pgs[i]->flags & PG_RELEASED) == 0); if (!error) { @@ -282,7 +281,6 @@ ufs_balloc_range(struct vnode *vp, off_t } uvm_pageactivate(pgs[i]); } - mutex_exit(&uvm_pageqlock); uvm_page_unbusy(pgs, npages); mutex_exit(uobj->vmobjlock); Index: sys/uvm/uvm.h =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm.h,v retrieving revision 1.69 diff -u -p -r1.69 uvm.h --- sys/uvm/uvm.h 1 Dec 2019 14:40:31 -0000 1.69 +++ sys/uvm/uvm.h 9 Dec 2019 22:24:39 -0000 @@ -123,7 +123,6 @@ extern struct uvm_object *uvm_kernel_obj * locks (made globals for lockstat). */ -extern kmutex_t uvm_pageqlock; /* lock for active/inactive page q */ extern kmutex_t uvm_fpageqlock; /* lock for free page q */ extern kmutex_t uvm_kentry_lock; Index: sys/uvm/uvm_amap.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_amap.c,v retrieving revision 1.110 diff -u -p -r1.110 uvm_amap.c --- sys/uvm/uvm_amap.c 1 Dec 2019 14:24:43 -0000 1.110 +++ sys/uvm/uvm_amap.c 9 Dec 2019 22:24:39 -0000 @@ -966,7 +966,6 @@ amap_copy(struct vm_map *map, struct vm_ * map change until we are done copying all the map entrys. * => XXXCDC: out of memory should cause fork to fail, but there is * currently no easy way to do this (needs fix) - * => page queues must be unlocked (we may lock them) */ void @@ -1070,19 +1069,10 @@ ReStart: * Drop PG_BUSY on new page. Since its owner was locked all * this time - it cannot be PG_RELEASED or PG_WANTED. */ + uvm_pageactivate(npg); npg->flags &= ~(PG_BUSY|PG_FAKE); UVM_PAGE_OWN(npg, NULL); } - /* Activate all pages. Some may be missing because of retry above. */ - mutex_enter(&uvm_pageqlock); - for (lcv = 0 ; lcv < amap->am_nused ; lcv++) { - anon = amap->am_anon[amap->am_slots[lcv]]; - KASSERT(anon->an_lock == amap->am_lock); - if (anon->an_page != NULL) { - uvm_pageactivate(anon->an_page); - } - } - mutex_exit(&uvm_pageqlock); amap_unlock(amap); } Index: sys/uvm/uvm_anon.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_anon.c,v retrieving revision 1.68 diff -u -p -r1.68 uvm_anon.c --- sys/uvm/uvm_anon.c 2 Dec 2019 20:02:02 -0000 1.68 +++ sys/uvm/uvm_anon.c 9 Dec 2019 22:24:39 -0000 @@ -95,7 +95,7 @@ uvm_analloc(void) } /* - * uvm_anon_dispose: break loans and remove pmap mapping + * uvm_anon_dispose: free any resident page or swap resources of anon. * * => anon must be removed from the amap (if anon was in an amap). * => amap must be locked; we may drop and re-acquire the lock here. @@ -134,11 +134,11 @@ uvm_anon_dispose(struct vm_anon *anon) */ if (pg->uobject) { - mutex_enter(&uvm_pageqlock); + mutex_enter(&pg->interlock); KASSERT(pg->loan_count > 0); pg->loan_count--; pg->uanon = NULL; - mutex_exit(&uvm_pageqlock); + mutex_exit(&pg->interlock); mutex_exit(pg->uobject->vmobjlock); } else { @@ -160,6 +160,10 @@ uvm_anon_dispose(struct vm_anon *anon) mutex_obj_hold(anon->an_lock); return false; } + uvm_pagefree(pg); + UVMHIST_LOG(maphist, "anon 0x%#jx, page 0x%#jx: " + "freed now!", (uintptr_t)anon, (uintptr_t)pg, + 0, 0); } } @@ -171,6 +175,12 @@ uvm_anon_dispose(struct vm_anon *anon) } #endif + /* + * Free any swap resources, leave a page replacement hint. + */ + + uvm_anon_dropswap(anon); + uvmpdpol_anfree(anon); UVMHIST_LOG(maphist,"<- done!",0,0,0,0); return true; } @@ -184,10 +194,6 @@ void uvm_anon_free(struct vm_anon *anon) { -#if defined(VMSWAP) - /* Free any dangling swap slot. */ - uvm_anon_dropswap(anon); -#endif KASSERT(anon->an_ref == 0); KASSERT(anon->an_lock == NULL); KASSERT(anon->an_page == NULL); @@ -205,54 +211,22 @@ uvm_anon_free(struct vm_anon *anon) void uvm_anon_freelst(struct vm_amap *amap, struct vm_anon *anonlst) { - struct vm_anon *anon; - struct vm_anon **anonp = &anonlst; - struct vm_page *pg; + struct vm_anon *next; UVMHIST_FUNC(__func__); UVMHIST_CALLED(maphist); KASSERT(mutex_owned(amap->am_lock)); - if (anonlst == NULL) { - amap_unlock(amap); - return; - } - - /* Break loans and hardware mappings. Defer release of busy pages. */ - while ((anon = *anonp) != NULL) { - if (!uvm_anon_dispose(anon)) { - /* Do not free this anon. */ - *anonp = anon->an_link; - /* Note: clears an_ref as well. */ - anon->an_link = NULL; - } else { - anonp = &anon->an_link; - } - } - - /* Free pages and leave a page replacement hint. */ - mutex_enter(&uvm_pageqlock); - for (anon = anonlst; anon != NULL; anon = anon->an_link) { - UVMHIST_LOG(maphist, "anon 0x%#jx, page 0x%#jx: " - "releasing now!", (uintptr_t)anon, - (uintptr_t)anon->an_page, 0, 0); - if ((pg = anon->an_page) != NULL) { - uvm_pagefree(pg); - } - uvmpdpol_anfree(anon); - } - mutex_exit(&uvm_pageqlock); - amap_unlock(amap); - - /* Free swap space, pages and vm_anon. */ - while (anonlst) { - anon = anonlst->an_link; + for (; anonlst != NULL; anonlst = next) { + next = anonlst->an_link; /* Note: clears an_ref as well. */ anonlst->an_link = NULL; - anonlst->an_lock = NULL; - uvm_anon_free(anonlst); - anonlst = anon; + if (uvm_anon_dispose(anonlst)) { + anonlst->an_lock = NULL; + uvm_anon_free(anonlst); + } } + amap_unlock(amap); } /* @@ -276,7 +250,6 @@ struct vm_page * uvm_anon_lockloanpg(struct vm_anon *anon) { struct vm_page *pg; - bool locked = false; KASSERT(mutex_owned(anon->an_lock)); @@ -290,37 +263,21 @@ uvm_anon_lockloanpg(struct vm_anon *anon */ while (((pg = anon->an_page) != NULL) && pg->loan_count != 0) { - - /* - * quickly check to see if the page has an object before - * bothering to lock the page queues. this may also produce - * a false positive result, but that's ok because we do a real - * check after that. - */ - + mutex_enter(&pg->interlock); if (pg->uobject) { - mutex_enter(&uvm_pageqlock); - if (pg->uobject) { - locked = - mutex_tryenter(pg->uobject->vmobjlock); - } else { - /* object disowned before we got PQ lock */ - locked = true; - } - mutex_exit(&uvm_pageqlock); - /* * if we didn't get a lock (try lock failed), then we * toggle our anon lock and try again */ - if (!locked) { + if (!mutex_tryenter(pg->uobject->vmobjlock)) { /* * someone locking the object has a chance to * lock us right now * * XXX Better than yielding but inadequate. */ + mutex_exit(&pg->interlock); kpause("livelock", false, 1, anon->an_lock); continue; } @@ -331,12 +288,11 @@ uvm_anon_lockloanpg(struct vm_anon *anon * then we have to take the ownership. */ - if (pg->uobject == NULL && (pg->pqflags & PQ_ANON) == 0) { - mutex_enter(&uvm_pageqlock); - pg->pqflags |= PQ_ANON; + if (pg->uobject == NULL && (pg->flags & PG_ANON) == 0) { + pg->flags |= PG_ANON; pg->loan_count--; - mutex_exit(&uvm_pageqlock); } + mutex_exit(&pg->interlock); break; } return pg; @@ -396,12 +352,7 @@ uvm_anon_pagein(struct vm_amap *amap, st * Deactivate the page (to put it on a page queue). */ - mutex_enter(&uvm_pageqlock); - if (pg->wire_count == 0) { - uvm_pagedeactivate(pg); - } - mutex_exit(&uvm_pageqlock); - + uvm_pagedeactivate(pg); if (pg->flags & PG_WANTED) { pg->flags &= ~PG_WANTED; wakeup(pg); @@ -457,9 +408,7 @@ uvm_anon_release(struct vm_anon *anon) KASSERT(pg->loan_count == 0); KASSERT(anon->an_ref == 0); - mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - mutex_exit(&uvm_pageqlock); KASSERT(anon->an_page == NULL); /* dispose should succeed as no one can reach this anon anymore. */ success = uvm_anon_dispose(anon); Index: sys/uvm/uvm_aobj.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_aobj.c,v retrieving revision 1.130 diff -u -p -r1.130 uvm_aobj.c --- sys/uvm/uvm_aobj.c 1 Dec 2019 20:31:40 -0000 1.130 +++ sys/uvm/uvm_aobj.c 9 Dec 2019 22:24:39 -0000 @@ -612,26 +612,19 @@ uao_detach(struct uvm_object *uobj) * involved in is complete), release any swap resources and free * the page itself. */ - mutex_enter(uobj->vmobjlock); - TAILQ_FOREACH(pg, &uobj->memq, listq.queue) { - pmap_page_protect(pg, VM_PROT_NONE); - } - mutex_enter(&uvm_pageqlock); while ((pg = TAILQ_FIRST(&uobj->memq)) != NULL) { + pmap_page_protect(pg, VM_PROT_NONE); if (pg->flags & PG_BUSY) { pg->flags |= PG_WANTED; - mutex_exit(&uvm_pageqlock); UVM_UNLOCK_AND_WAIT(pg, uobj->vmobjlock, false, "uao_det", 0); mutex_enter(uobj->vmobjlock); - mutex_enter(&uvm_pageqlock); continue; } uao_dropswap(&aobj->u_obj, pg->offset >> PAGE_SHIFT); uvm_pagefree(pg); } - mutex_exit(&uvm_pageqlock); /* * Finally, free the anonymous UVM object itself. @@ -658,9 +651,6 @@ uao_detach(struct uvm_object *uobj) * we can make a complete pass through the object in one go by starting * at the head and working towards the tail (new pages are put in * front of us). - * => NOTE: we are allowed to lock the page queues, so the caller - * must not be holding the lock on them [e.g. pagedaemon had - * better not call us with the queues locked] * => we return 0 unless we encountered some sort of I/O error * XXXJRT currently never happens, as we never directly initiate * XXXJRT I/O @@ -803,12 +793,7 @@ uao_put(struct uvm_object *uobj, voff_t case PGO_CLEANIT|PGO_DEACTIVATE: case PGO_DEACTIVATE: deactivate_it: - mutex_enter(&uvm_pageqlock); - /* skip the page if it's wired */ - if (pg->wire_count == 0) { - uvm_pagedeactivate(pg); - } - mutex_exit(&uvm_pageqlock); + uvm_pagedeactivate(pg); break; case PGO_FREE: @@ -833,9 +818,7 @@ uao_put(struct uvm_object *uobj, voff_t */ uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); - mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - mutex_exit(&uvm_pageqlock); break; default: @@ -921,7 +904,7 @@ uao_get(struct uvm_object *uobj, voff_t if (ptmp) { /* new page */ ptmp->flags &= ~(PG_FAKE); - ptmp->pqflags |= PQ_AOBJ; + ptmp->flags |= PG_AOBJ; goto gotpage; } } @@ -1023,12 +1006,7 @@ gotpage: continue; } - /* - * safe with PQ's unlocked: because we just - * alloc'd the page - */ - - ptmp->pqflags |= PQ_AOBJ; + ptmp->flags |= PG_AOBJ; /* * got new page ready for I/O. break pps while @@ -1128,9 +1106,7 @@ gotpage: uvm_swap_markbad(swslot, 1); } - mutex_enter(&uvm_pageqlock); uvm_pagefree(ptmp); - mutex_exit(&uvm_pageqlock); mutex_exit(uobj->vmobjlock); return error; } @@ -1380,10 +1356,7 @@ uao_pagein_page(struct uvm_aobj *aobj, i /* * make sure it's on a page queue. */ - mutex_enter(&uvm_pageqlock); - if (pg->wire_count == 0) - uvm_pageenqueue(pg); - mutex_exit(&uvm_pageqlock); + uvm_pageenqueue(pg); if (pg->flags & PG_WANTED) { wakeup(pg); Index: sys/uvm/uvm_bio.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_bio.c,v retrieving revision 1.100 diff -u -p -r1.100 uvm_bio.c --- sys/uvm/uvm_bio.c 7 Nov 2019 07:45:14 -0000 1.100 +++ sys/uvm/uvm_bio.c 9 Dec 2019 22:24:39 -0000 @@ -243,9 +243,7 @@ ubc_fault_page(const struct uvm_faultinf } KASSERT((pg->flags & PG_FAKE) == 0); if (pg->flags & PG_RELEASED) { - mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - mutex_exit(&uvm_pageqlock); return 0; } if (pg->loan_count != 0) { @@ -287,9 +285,7 @@ ubc_fault_page(const struct uvm_faultinf error = pmap_enter(ufi->orig_map->pmap, va, VM_PAGE_TO_PHYS(pg), prot & mask, PMAP_CANFAIL | (access_type & mask)); - mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_WANTED); UVM_PAGE_OWN(pg, NULL); @@ -659,7 +655,6 @@ ubc_release(void *va, int flags) } umap->flags &= ~UMAP_PAGES_LOCKED; mutex_enter(uobj->vmobjlock); - mutex_enter(&uvm_pageqlock); for (u_int i = 0; i < npages; i++) { paddr_t pa; bool rv __diagused; @@ -672,7 +667,6 @@ ubc_release(void *va, int flags) KASSERT(pgs[i]->loan_count == 0); uvm_pageactivate(pgs[i]); } - mutex_exit(&uvm_pageqlock); pmap_kremove(umapva, ubc_winsize); pmap_update(pmap_kernel()); uvm_page_unbusy(pgs, npages); @@ -891,7 +885,6 @@ ubc_direct_release(struct uvm_object *uo int flags, struct vm_page **pgs, int npages) { mutex_enter(uobj->vmobjlock); - mutex_enter(&uvm_pageqlock); for (int i = 0; i < npages; i++) { struct vm_page *pg = pgs[i]; @@ -901,8 +894,6 @@ ubc_direct_release(struct uvm_object *uo if (flags & UBC_WRITE) pg->flags &= ~(PG_FAKE|PG_CLEAN); } - mutex_exit(&uvm_pageqlock); - uvm_page_unbusy(pgs, npages); mutex_exit(uobj->vmobjlock); } Index: sys/uvm/uvm_fault.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_fault.c,v retrieving revision 1.211 diff -u -p -r1.211 uvm_fault.c --- sys/uvm/uvm_fault.c 1 Dec 2019 14:30:01 -0000 1.211 +++ sys/uvm/uvm_fault.c 9 Dec 2019 22:24:39 -0000 @@ -191,19 +191,15 @@ uvmfault_anonflush(struct vm_anon **anon int lcv; struct vm_page *pg; - mutex_enter(&uvm_pageqlock); for (lcv = 0; lcv < n; lcv++) { if (anons[lcv] == NULL) continue; KASSERT(mutex_owned(anons[lcv]->an_lock)); pg = anons[lcv]->an_page; if (pg && (pg->flags & PG_BUSY) == 0) { - if (pg->wire_count == 0) { - uvm_pagedeactivate(pg); - } + uvm_pagedeactivate(pg); } } - mutex_exit(&uvm_pageqlock); } /* @@ -453,9 +449,7 @@ uvmfault_anonget(struct uvm_faultinfo *u * pmap_page_protect() it. */ - mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - mutex_exit(&uvm_pageqlock); if (locked) { uvmfault_unlockall(ufi, NULL, NULL); @@ -492,9 +486,7 @@ released: * We have successfully read the page, activate it. */ - mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); #else @@ -1272,9 +1264,7 @@ uvm_fault_upper_neighbor( /* locked: amap, anon */ - mutex_enter(&uvm_pageqlock); uvm_pageenqueue(pg); - mutex_exit(&uvm_pageqlock); UVMHIST_LOG(maphist, " MAPPING: n anon: pm=%#jx, va=%#jx, pg=%#jx", (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0); @@ -1492,9 +1482,8 @@ uvm_fault_upper_promote( KASSERT(anon == NULL || anon->an_lock == oanon->an_lock); pg = anon->an_page; - mutex_enter(&uvm_pageqlock); - uvm_pageenqueue(pg); /* uvm_fault_upper_done will activate the page */ - mutex_exit(&uvm_pageqlock); + /* uvm_fault_upper_done will activate the page */ + uvm_pageenqueue(pg); pg->flags &= ~(PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); @@ -1627,7 +1616,6 @@ uvm_fault_upper_done( * ... update the page queues. */ - mutex_enter(&uvm_pageqlock); if (wire_paging) { uvm_pagewire(pg); @@ -1639,11 +1627,9 @@ uvm_fault_upper_done( */ pg->flags &= ~(PG_CLEAN); - } else { uvm_pageactivate(pg); } - mutex_exit(&uvm_pageqlock); if (wire_paging) { uvm_anon_dropswap(anon); @@ -1863,9 +1849,7 @@ uvm_fault_lower_neighbor( * for this. we can just directly enter the pages. */ - mutex_enter(&uvm_pageqlock); uvm_pageenqueue(pg); - mutex_exit(&uvm_pageqlock); UVMHIST_LOG(maphist, " MAPPING: n obj: pm=%#jx, va=%#jx, pg=%#jx", (uintptr_t)ufi->orig_map->pmap, currva, (uintptr_t)pg, 0); @@ -1984,9 +1968,7 @@ uvm_fault_lower_io( mutex_enter(uobj->vmobjlock); KASSERT((pg->flags & PG_BUSY) != 0); - mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); /* locked(locked): maps(read), amap(if !null), uobj, pg */ /* locked(!locked): uobj, pg */ @@ -2288,9 +2270,7 @@ uvm_fault_lower_enter( * we just promoted the page. */ - mutex_enter(&uvm_pageqlock); uvm_pageenqueue(pg); - mutex_exit(&uvm_pageqlock); if (pg->flags & PG_WANTED) wakeup(pg); @@ -2349,10 +2329,9 @@ uvm_fault_lower_done( UVMHIST_FUNC("uvm_fault_lower_done"); UVMHIST_CALLED(maphist); - mutex_enter(&uvm_pageqlock); if (flt->wire_paging) { uvm_pagewire(pg); - if (pg->pqflags & PQ_AOBJ) { + if (pg->flags & PG_AOBJ) { /* * since the now-wired page cannot be paged out, @@ -2368,7 +2347,6 @@ uvm_fault_lower_done( } else { uvm_pageactivate(pg); } - mutex_exit(&uvm_pageqlock); if (dropswap) { uao_dropswap(uobj, pg->offset >> PAGE_SHIFT); @@ -2481,11 +2459,9 @@ uvm_fault_unwire_locked(struct vm_map *m if (entry != oentry) { if (oentry != NULL) { - mutex_exit(&uvm_pageqlock); uvm_map_unlock_entry(oentry); } uvm_map_lock_entry(entry); - mutex_enter(&uvm_pageqlock); oentry = entry; } @@ -2505,7 +2481,6 @@ uvm_fault_unwire_locked(struct vm_map *m } if (oentry != NULL) { - mutex_exit(&uvm_pageqlock); uvm_map_unlock_entry(entry); } } Index: sys/uvm/uvm_init.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_init.c,v retrieving revision 1.50 diff -u -p -r1.50 uvm_init.c --- sys/uvm/uvm_init.c 1 Dec 2019 14:28:01 -0000 1.50 +++ sys/uvm/uvm_init.c 9 Dec 2019 22:24:39 -0000 @@ -64,7 +64,6 @@ const int * const uvmexp_pagemask = &uvm const int * const uvmexp_pageshift = &uvmexp.pageshift; #endif -kmutex_t uvm_pageqlock __cacheline_aligned; kmutex_t uvm_fpageqlock __cacheline_aligned; kmutex_t uvm_kentry_lock __cacheline_aligned; Index: sys/uvm/uvm_km.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_km.c,v retrieving revision 1.150 diff -u -p -r1.150 uvm_km.c --- sys/uvm/uvm_km.c 1 Dec 2019 23:14:47 -0000 1.150 +++ sys/uvm/uvm_km.c 9 Dec 2019 22:24:40 -0000 @@ -474,9 +474,7 @@ uvm_km_pgremove(vaddr_t startva, vaddr_t } uao_dropswap(uobj, curoff >> PAGE_SHIFT); if (pg != NULL) { - mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - mutex_exit(&uvm_pageqlock); } } mutex_exit(uobj->vmobjlock); Index: sys/uvm/uvm_loan.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_loan.c,v retrieving revision 1.88 diff -u -p -r1.88 uvm_loan.c --- sys/uvm/uvm_loan.c 1 Dec 2019 14:40:31 -0000 1.88 +++ sys/uvm/uvm_loan.c 9 Dec 2019 22:24:40 -0000 @@ -59,7 +59,7 @@ UVMHIST_DEFINE(loanhist); * at the same time. * * loans are tracked by pg->loan_count. an O->A page will have both - * a uvm_object and a vm_anon, but PQ_ANON will not be set. this sort + * a uvm_object and a vm_anon, but PG_ANON will not be set. this sort * of page is considered "owned" by the uvm_object (not the anon). * * each loan of a page to the kernel bumps the pg->wire_count. the @@ -77,15 +77,15 @@ UVMHIST_DEFINE(loanhist); * pages should be freed when the last loan is dropped. in some cases * an anon may "adopt" an orphaned page. * - * locking: to read pg->loan_count either the owner or the page queues + * locking: to read pg->loan_count either the owner or pg->interlock * must be locked. to modify pg->loan_count, both the owner of the page - * and the PQs must be locked. pg->flags is (as always) locked by + * and pg->interlock must be locked. pg->flags is (as always) locked by * the owner of the page. * * note that locking from the "loaned" side is tricky since the object * getting the loaned page has no reference to the page's owner and thus * the owner could "die" at any time. in order to prevent the owner - * from dying the page queues should be locked. this forces us to sometimes + * from dying pg->interlock should be locked. this forces us to sometimes * use "try" locking. * * loans are typically broken by the following events: @@ -357,7 +357,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, if (flags & UVM_LOAN_TOANON) { KASSERT(mutex_owned(anon->an_lock)); pg = anon->an_page; - if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1) { + if (pg && (pg->flags & PG_ANON) != 0 && anon->an_ref == 1) { if (pg->wire_count > 0) { UVMHIST_LOG(loanhist, "->A wired %#jx", (uintptr_t)pg, 0, 0, 0); @@ -412,9 +412,7 @@ uvm_loananon(struct uvm_faultinfo *ufi, */ pg = anon->an_page; - mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { - mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "->K wired %#jx", (uintptr_t)pg, 0, 0, 0); KASSERT(pg->uobject == NULL); uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, NULL); @@ -423,10 +421,11 @@ uvm_loananon(struct uvm_faultinfo *ufi, if (pg->loan_count == 0) { pmap_page_protect(pg, VM_PROT_READ); } + mutex_enter(&pg->interlock); pg->loan_count++; KASSERT(pg->loan_count > 0); /* detect wrap-around */ + mutex_exit(&pg->interlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); **output = pg; (*output)++; @@ -463,9 +462,7 @@ uvm_loanpage(struct vm_page **pgpp, int KASSERT(mutex_owned(pg->uobject->vmobjlock)); KASSERT(pg->flags & PG_BUSY); - mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { - mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0); error = EBUSY; @@ -474,10 +471,11 @@ uvm_loanpage(struct vm_page **pgpp, int if (pg->loan_count == 0) { pmap_page_protect(pg, VM_PROT_READ); } + mutex_enter(&pg->interlock); pg->loan_count++; KASSERT(pg->loan_count > 0); /* detect wrap-around */ + mutex_exit(&pg->interlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); } uvm_page_unbusy(pgpp, npages); @@ -576,9 +574,7 @@ reget: slock = pg->uobject->vmobjlock; mutex_enter(slock); - mutex_enter(&uvm_pageqlock); uvm_page_unbusy(&pg, 1); - mutex_exit(&uvm_pageqlock); mutex_exit(slock); } goto reget; @@ -713,15 +709,11 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, wakeup(pg); } if (pg->flags & PG_RELEASED) { - mutex_enter(&uvm_pageqlock); uvm_pagefree(pg); - mutex_exit(&uvm_pageqlock); mutex_exit(uobj->vmobjlock); return (0); } - mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_WANTED); UVM_PAGE_OWN(pg, NULL); mutex_exit(uobj->vmobjlock); @@ -779,22 +771,21 @@ uvm_loanuobj(struct uvm_faultinfo *ufi, if (anon == NULL) { goto fail; } - mutex_enter(&uvm_pageqlock); if (pg->wire_count > 0) { - mutex_exit(&uvm_pageqlock); UVMHIST_LOG(loanhist, "wired %#jx", (uintptr_t)pg, 0, 0, 0); goto fail; } if (pg->loan_count == 0) { pmap_page_protect(pg, VM_PROT_READ); } + mutex_enter(&pg->interlock); pg->loan_count++; KASSERT(pg->loan_count > 0); /* detect wrap-around */ pg->uanon = anon; anon->an_page = pg; anon->an_lock = /* TODO: share amap lock */ + mutex_exit(&pg->interlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); if (pg->flags & PG_WANTED) { wakeup(pg); } @@ -872,17 +863,15 @@ again: /* got a zero'd page. */ pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE); pg->flags |= PG_RDONLY; - mutex_enter(&uvm_pageqlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); UVM_PAGE_OWN(pg, NULL); } if ((flags & UVM_LOAN_TOANON) == 0) { /* loaning to kernel-page */ - mutex_enter(&uvm_pageqlock); + mutex_enter(&pg->interlock); pg->loan_count++; KASSERT(pg->loan_count > 0); /* detect wrap-around */ - mutex_exit(&uvm_pageqlock); + mutex_exit(&pg->interlock); mutex_exit(uvm_loanzero_object.vmobjlock); **output = pg; (*output)++; @@ -920,11 +909,11 @@ again: } anon->an_page = pg; pg->uanon = anon; - mutex_enter(&uvm_pageqlock); + mutex_enter(&pg->interlock); pg->loan_count++; KASSERT(pg->loan_count > 0); /* detect wrap-around */ + mutex_exit(&pg->interlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); mutex_exit(&anon->an_lock); mutex_exit(uvm_loanzero_object.vmobjlock); **output = anon; @@ -973,7 +962,6 @@ uvm_unloanpage(struct vm_page **ploans, struct vm_page *pg; kmutex_t *slock; - mutex_enter(&uvm_pageqlock); while (npages-- > 0) { pg = *ploans++; @@ -983,6 +971,7 @@ uvm_unloanpage(struct vm_page **ploans, * so we have to do a try-lock here. */ + mutex_enter(&pg->interlock); slock = NULL; while (pg->uobject != NULL || pg->uanon != NULL) { if (pg->uobject != NULL) { @@ -994,16 +983,16 @@ uvm_unloanpage(struct vm_page **ploans, break; } /* XXX Better than yielding but inadequate. */ - kpause("livelock", false, 1, &uvm_pageqlock); + kpause("livelock", false, 1, &pg->interlock); slock = NULL; } /* * drop our loan. if page is owned by an anon but - * PQ_ANON is not set, the page was loaned to the anon + * PG_ANON is not set, the page was loaned to the anon * from an object which dropped ownership, so resolve * this by turning the anon's loan into real ownership - * (ie. decrement loan_count again and set PQ_ANON). + * (ie. decrement loan_count again and set PG_ANON). * after all this, if there are no loans left, put the * page back a paging queue (if the page is owned by * an anon) or free it (if the page is now unowned). @@ -1012,11 +1001,12 @@ uvm_unloanpage(struct vm_page **ploans, KASSERT(pg->loan_count > 0); pg->loan_count--; if (pg->uobject == NULL && pg->uanon != NULL && - (pg->pqflags & PQ_ANON) == 0) { + (pg->flags & PG_ANON) == 0) { KASSERT(pg->loan_count > 0); pg->loan_count--; - pg->pqflags |= PQ_ANON; + pg->flags |= PG_ANON; } + mutex_exit(&pg->interlock); if (pg->loan_count == 0 && pg->uobject == NULL && pg->uanon == NULL) { KASSERT((pg->flags & PG_BUSY) == 0); @@ -1026,7 +1016,6 @@ uvm_unloanpage(struct vm_page **ploans, mutex_exit(slock); } } - mutex_exit(&uvm_pageqlock); } /* @@ -1075,12 +1064,11 @@ ulz_put(struct uvm_object *uobj, voff_t KASSERT(pg != NULL); KASSERT(TAILQ_NEXT(pg, listq.queue) == NULL); - mutex_enter(&uvm_pageqlock); if (pg->uanon) uvm_pageactivate(pg); - else + else { uvm_pagedequeue(pg); - mutex_exit(&uvm_pageqlock); + } mutex_exit(uobj->vmobjlock); return 0; @@ -1134,7 +1122,7 @@ uvm_loanbreak(struct vm_page *uobjpage) * force a reload of the old page by clearing it from all * pmaps. * transfer dirtiness of the old page to the new page. - * then lock the page queues to rename the pages. + * then lock pg->interlock to rename the pages. */ uvm_pagecopy(uobjpage, pg); /* old -> new */ @@ -1155,12 +1143,11 @@ uvm_loanbreak(struct vm_page *uobjpage) uobjpage->flags &= ~(PG_WANTED|PG_BUSY); UVM_PAGE_OWN(uobjpage, NULL); - mutex_enter(&uvm_pageqlock); - /* * replace uobjpage with new page. */ + mutex_enter(&uobjpage->interlock); uvm_pagereplace(uobjpage, pg); /* @@ -1170,15 +1157,14 @@ uvm_loanbreak(struct vm_page *uobjpage) */ if (uobjpage->uanon == NULL) uvm_pagedequeue(uobjpage); + mutex_exit(&uobjpage->interlock); /* * at this point we have absolutely no * control over uobjpage */ - /* install new page */ uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); /* * done! loan is broken and "pg" is @@ -1207,11 +1193,16 @@ uvm_loanbreak_anon(struct vm_anon *anon, /* force reload */ pmap_page_protect(anon->an_page, VM_PROT_NONE); - mutex_enter(&uvm_pageqlock); /* KILL loan */ - + if (pg < anon->an_page) { + mutex_enter(&pg->interlock); + mutex_enter(&anon->an_page->interlock); + } else { + mutex_enter(&anon->an_page->interlock); + mutex_enter(&pg->interlock); + } anon->an_page->uanon = NULL; /* in case we owned */ - anon->an_page->pqflags &= ~PQ_ANON; + anon->an_page->flags &= ~PG_ANON; if (uobj) { /* if we were receiver of loan */ @@ -1224,21 +1215,22 @@ uvm_loanbreak_anon(struct vm_anon *anon, uvm_pagedequeue(anon->an_page); } - if (uobj) { - mutex_exit(uobj->vmobjlock); - } - /* install new page in anon */ anon->an_page = pg; pg->uanon = anon; - pg->pqflags |= PQ_ANON; + pg->flags |= PG_ANON; + mutex_exit(&pg->interlock); + mutex_exit(&anon->an_page->interlock); uvm_pageactivate(pg); - mutex_exit(&uvm_pageqlock); pg->flags &= ~(PG_BUSY|PG_FAKE); UVM_PAGE_OWN(pg, NULL); + if (uobj) { + mutex_exit(uobj->vmobjlock); + } + /* done! */ return 0; Index: sys/uvm/uvm_map.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_map.c,v retrieving revision 1.366 diff -u -p -r1.366 uvm_map.c --- sys/uvm/uvm_map.c 1 Nov 2019 13:04:22 -0000 1.366 +++ sys/uvm/uvm_map.c 9 Dec 2019 22:24:40 -0000 @@ -3944,15 +3944,12 @@ uvm_map_clean(struct vm_map *map, vaddr_ * at all in these cases. */ - mutex_enter(&uvm_pageqlock); if (pg->loan_count != 0 || pg->wire_count != 0) { - mutex_exit(&uvm_pageqlock); continue; } KASSERT(pg->uanon == anon); uvm_pagedeactivate(pg); - mutex_exit(&uvm_pageqlock); continue; case PGO_FREE: Index: sys/uvm/uvm_object.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_object.c,v retrieving revision 1.15 diff -u -p -r1.15 uvm_object.c --- sys/uvm/uvm_object.c 26 Oct 2015 09:02:49 -0000 1.15 +++ sys/uvm/uvm_object.c 9 Dec 2019 22:24:40 -0000 @@ -173,20 +173,18 @@ uvm_obj_wirepages(struct uvm_object *uob pgs[i] = pg; } - if (pgs[i]->pqflags & PQ_AOBJ) { + if (pgs[i]->flags & PG_AOBJ) { pgs[i]->flags &= ~(PG_CLEAN); uao_dropswap(uobj, i); } } /* Wire the pages */ - mutex_enter(&uvm_pageqlock); for (i = 0; i < npages; i++) { uvm_pagewire(pgs[i]); if (list != NULL) TAILQ_INSERT_TAIL(list, pgs[i], pageq.queue); } - mutex_exit(&uvm_pageqlock); /* Unbusy the pages */ uvm_page_unbusy(pgs, npages); @@ -219,7 +217,6 @@ uvm_obj_unwirepages(struct uvm_object *u off_t offset; mutex_enter(uobj->vmobjlock); - mutex_enter(&uvm_pageqlock); for (offset = start; offset < end; offset += PAGE_SIZE) { pg = uvm_pagelookup(uobj, offset); @@ -228,7 +225,6 @@ uvm_obj_unwirepages(struct uvm_object *u uvm_pageunwire(pg); } - mutex_exit(&uvm_pageqlock); mutex_exit(uobj->vmobjlock); } Index: sys/uvm/uvm_page.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_page.c,v retrieving revision 1.200 diff -u -p -r1.200 uvm_page.c --- sys/uvm/uvm_page.c 20 Sep 2019 11:09:43 -0000 1.200 +++ sys/uvm/uvm_page.c 9 Dec 2019 22:24:40 -0000 @@ -201,7 +201,6 @@ const rb_tree_ops_t uvm_page_tree_ops = * uvm_pageinsert: insert a page in the object. * * => caller must lock object - * => caller must lock page queues * => call should have already set pg's object and offset pointers * and bumped the version counter */ @@ -264,7 +263,6 @@ uvm_pageinsert(struct uvm_object *uobj, * uvm_page_remove: remove page from object. * * => caller must lock object - * => caller must lock page queues */ static inline void @@ -346,15 +344,14 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr CTASSERT(sizeof(pagearray->offset) >= sizeof(struct uvm_cpu *)); /* - * init the page queues and page queue locks, except the free - * list; we allocate that later (with the initial vm_page + * init the page queues and free page queue lock, except the + * free list; we allocate that later (with the initial vm_page * structures). */ uvm.cpus[0] = &boot_cpu; curcpu()->ci_data.cpu_uvm = &boot_cpu; uvmpdpol_init(); - mutex_init(&uvm_pageqlock, MUTEX_DRIVER, IPL_NONE); mutex_init(&uvm_fpageqlock, MUTEX_DRIVER, IPL_VM); /* @@ -840,7 +837,7 @@ uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, /* cpu, try1 */ if ((pg = LIST_FIRST((freeq = &pgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { - KASSERT(pg->pqflags & PQ_FREE); + KASSERT(pg->flags & PG_FREE); KASSERT(try1 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); KASSERT(try1 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg)); @@ -851,7 +848,7 @@ uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, /* global, try1 */ if ((pg = LIST_FIRST((freeq = &gpgfl->pgfl_buckets[color].pgfl_queues[try1]))) != NULL) { - KASSERT(pg->pqflags & PQ_FREE); + KASSERT(pg->flags & PG_FREE); KASSERT(try1 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); KASSERT(try1 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg)); @@ -862,7 +859,7 @@ uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, /* cpu, try2 */ if ((pg = LIST_FIRST((freeq = &pgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { - KASSERT(pg->pqflags & PQ_FREE); + KASSERT(pg->flags & PG_FREE); KASSERT(try2 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); KASSERT(try2 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); KASSERT(ucpu == VM_FREE_PAGE_TO_CPU(pg)); @@ -873,7 +870,7 @@ uvm_pagealloc_pgfl(struct uvm_cpu *ucpu, /* global, try2 */ if ((pg = LIST_FIRST((freeq = &gpgfl->pgfl_buckets[color].pgfl_queues[try2]))) != NULL) { - KASSERT(pg->pqflags & PQ_FREE); + KASSERT(pg->flags & PG_FREE); KASSERT(try2 == PGFL_ZEROS || !(pg->flags & PG_ZERO)); KASSERT(try2 == PGFL_UNKNOWN || (pg->flags & PG_ZERO)); KASSERT(ucpu != VM_FREE_PAGE_TO_CPU(pg)); @@ -1059,21 +1056,30 @@ uvm_pagealloc_strat(struct uvm_object *o ucpu->page_idle_zero = vm_page_zero_enable; } } - KASSERT(pg->pqflags == PQ_FREE); + KASSERT((pg->flags & ~(PG_ZERO|PG_FREE)) == 0); + /* + * for now check this - later on we may do lazy dequeue, but need + * to get pageq.queue used only by the pagedaemon policy first. + */ + KASSERT(!uvmpdpol_pageisqueued_p(pg)); + + /* + * assign the page to the object. we don't need to lock the page's + * identity to do this, as the caller holds the objects locked, and + * the page is not on any paging queues at this time. + */ pg->offset = off; pg->uobject = obj; pg->uanon = anon; + KASSERT(uvm_page_locked_p(pg)); pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE; if (anon) { anon->an_page = pg; - pg->pqflags = PQ_ANON; + pg->flags |= PG_ANON; atomic_inc_uint(&uvmexp.anonpages); - } else { - if (obj) { - uvm_pageinsert(obj, pg); - } - pg->pqflags = 0; + } else if (obj) { + uvm_pageinsert(obj, pg); } mutex_spin_exit(&uvm_fpageqlock); @@ -1103,6 +1109,7 @@ uvm_pagealloc_strat(struct uvm_object *o * uvm_pagereplace: replace a page with another * * => object must be locked + * => interlock must be held */ void @@ -1129,6 +1136,7 @@ uvm_pagereplace(struct vm_page *oldpg, s * uvm_pagerealloc: reallocate a page from one object to another * * => both objects must be locked + * => both interlocks must be held */ void @@ -1156,8 +1164,6 @@ uvm_pagerealloc(struct vm_page *pg, stru #ifdef DEBUG /* * check if page is zero-filled - * - * - called with free page queue lock held. */ void uvm_pagezerocheck(struct vm_page *pg) @@ -1196,7 +1202,6 @@ uvm_pagezerocheck(struct vm_page *pg) * => erase page's identity (i.e. remove from object) * => put page on free list * => caller must lock owning object (either anon or uvm_object) - * => caller must lock page queues * => assumes all valid mappings of pg are gone */ @@ -1206,7 +1211,7 @@ uvm_pagefree(struct vm_page *pg) struct pgflist *pgfl; struct uvm_cpu *ucpu; int index, color, queue; - bool iszero; + bool iszero, locked; #ifdef DEBUG if (pg->uobject == (void *)0xdeadbeef && @@ -1216,7 +1221,7 @@ uvm_pagefree(struct vm_page *pg) #endif /* DEBUG */ KASSERT((pg->flags & PG_PAGEOUT) == 0); - KASSERT(!(pg->pqflags & PQ_FREE)); + KASSERT(!(pg->flags & PG_FREE)); //KASSERT(mutex_owned(&uvm_pageqlock) || !uvmpdpol_pageisqueued_p(pg)); KASSERT(pg->uobject == NULL || mutex_owned(pg->uobject->vmobjlock)); KASSERT(pg->uobject != NULL || pg->uanon == NULL || @@ -1240,15 +1245,17 @@ uvm_pagefree(struct vm_page *pg) * unbusy the page, and we're done. */ + mutex_enter(&pg->interlock); + locked = true; if (pg->uobject != NULL) { uvm_pageremove(pg->uobject, pg); pg->flags &= ~PG_CLEAN; } else if (pg->uanon != NULL) { - if ((pg->pqflags & PQ_ANON) == 0) { + if ((pg->flags & PG_ANON) == 0) { pg->loan_count--; } else { - pg->pqflags &= ~PQ_ANON; - atomic_dec_uint(&uvmexp.anonpages); + pg->flags &= ~PG_ANON; + atomic_dec_uint(&uvmexp.anonpages); } pg->uanon->an_page = NULL; pg->uanon = NULL; @@ -1262,43 +1269,48 @@ uvm_pagefree(struct vm_page *pg) #endif if (pg->loan_count) { KASSERT(pg->uobject == NULL); + mutex_exit(&pg->interlock); if (pg->uanon == NULL) { - KASSERT(mutex_owned(&uvm_pageqlock)); uvm_pagedequeue(pg); } return; } + } else if (pg->uobject != NULL || pg->uanon != NULL) { + mutex_enter(&pg->interlock); + locked = true; + } else { + locked = false; } /* * remove page from its object or anon. */ - if (pg->uobject != NULL) { uvm_pageremove(pg->uobject, pg); } else if (pg->uanon != NULL) { pg->uanon->an_page = NULL; + pg->uanon = NULL; atomic_dec_uint(&uvmexp.anonpages); } /* - * now remove the page from the queues. - */ - if (uvmpdpol_pageisqueued_p(pg)) { - KASSERT(mutex_owned(&uvm_pageqlock)); - uvm_pagedequeue(pg); - } - - /* * if the page was wired, unwire it now. */ if (pg->wire_count) { pg->wire_count = 0; - uvmexp.wired--; + atomic_dec_uint(&uvmexp.wired); + } + if (locked) { + mutex_exit(&pg->interlock); } /* + * now remove the page from the queues. + */ + uvm_pagedequeue(pg); + + /* * and put on free queue */ @@ -1313,7 +1325,7 @@ uvm_pagefree(struct vm_page *pg) #endif mutex_spin_enter(&uvm_fpageqlock); - pg->pqflags = PQ_FREE; + pg->flags = PG_FREE; #ifdef DEBUG if (iszero) @@ -1348,7 +1360,6 @@ uvm_pagefree(struct vm_page *pg) * => pages must either all belong to the same object, or all belong to anons. * => if pages are object-owned, object must be locked. * => if pages are anon-owned, anons must be locked. - * => caller must lock page queues if pages may be released. * => caller must make sure that anon-owned pages are not PG_RELEASED. */ @@ -1369,6 +1380,7 @@ uvm_page_unbusy(struct vm_page **pgs, in KASSERT(pg->flags & PG_BUSY); KASSERT((pg->flags & PG_PAGEOUT) == 0); if (pg->flags & PG_WANTED) { + /* XXXAD thundering herd problem. */ wakeup(pg); } if (pg->flags & PG_RELEASED) { @@ -1483,8 +1495,8 @@ uvm_pageidlezero(void) LIST_REMOVE(pg, listq.list); /* per-cpu list */ ucpu->pages[PGFL_UNKNOWN]--; uvmexp.free--; - KASSERT(pg->pqflags == PQ_FREE); - pg->pqflags = 0; + KASSERT(pg->flags == PG_FREE); + pg->flags = 0; mutex_spin_exit(&uvm_fpageqlock); #ifdef PMAP_PAGEIDLEZERO if (!PMAP_PAGEIDLEZERO(VM_PAGE_TO_PHYS(pg))) { @@ -1497,7 +1509,7 @@ uvm_pageidlezero(void) */ mutex_spin_enter(&uvm_fpageqlock); - pg->pqflags = PQ_FREE; + pg->flags = PG_FREE; LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ nextbucket].pgfl_queues[ PGFL_UNKNOWN], pg, pageq.list); @@ -1512,15 +1524,13 @@ uvm_pageidlezero(void) #else pmap_zero_page(VM_PAGE_TO_PHYS(pg)); #endif /* PMAP_PAGEIDLEZERO */ - pg->flags |= PG_ZERO; - if (!mutex_tryenter(&uvm_fpageqlock)) { lcont = true; mutex_spin_enter(&uvm_fpageqlock); } else { lcont = false; } - pg->pqflags = PQ_FREE; + pg->flags = PG_FREE | PG_ZERO; LIST_INSERT_HEAD(&gpgfl->pgfl_buckets[ nextbucket].pgfl_queues[PGFL_ZEROS], pg, pageq.list); @@ -1567,24 +1577,27 @@ uvm_pagelookup(struct uvm_object *obj, v /* * uvm_pagewire: wire the page, thus removing it from the daemon's grasp * - * => caller must lock page queues + * => caller must lock objects */ void uvm_pagewire(struct vm_page *pg) { - KASSERT(mutex_owned(&uvm_pageqlock)); + + KASSERT(uvm_page_locked_p(pg)); #if defined(READAHEAD_STATS) - if ((pg->pqflags & PQ_READAHEAD) != 0) { + if ((pg->flags & PG_READAHEAD) != 0) { uvm_ra_hit.ev_count++; - pg->pqflags &= ~PQ_READAHEAD; + pg->flags &= ~PG_READAHEAD; } #endif /* defined(READAHEAD_STATS) */ if (pg->wire_count == 0) { uvm_pagedequeue(pg); - uvmexp.wired++; + atomic_inc_uint(&uvmexp.wired); } + mutex_enter(&pg->interlock); pg->wire_count++; + mutex_exit(&pg->interlock); KASSERT(pg->wire_count > 0); /* detect wraparound */ } @@ -1592,26 +1605,30 @@ uvm_pagewire(struct vm_page *pg) * uvm_pageunwire: unwire the page. * * => activate if wire count goes to zero. - * => caller must lock page queues + * => caller must lock objects */ void uvm_pageunwire(struct vm_page *pg) { - KASSERT(mutex_owned(&uvm_pageqlock)); + + KASSERT(uvm_page_locked_p(pg)); KASSERT(pg->wire_count != 0); + KASSERT(!uvmpdpol_pageisqueued_p(pg)); + mutex_enter(&pg->interlock); pg->wire_count--; + mutex_exit(&pg->interlock); if (pg->wire_count == 0) { uvm_pageactivate(pg); KASSERT(uvmexp.wired != 0); - uvmexp.wired--; + atomic_dec_uint(&uvmexp.wired); } } /* * uvm_pagedeactivate: deactivate page * - * => caller must lock page queues + * => caller must lock objects * => caller must check to make sure page is not wired * => object that page belongs to must be locked (so we can adjust pg->flags) * => caller must clear the reference on the page before calling @@ -1621,65 +1638,64 @@ void uvm_pagedeactivate(struct vm_page *pg) { - KASSERT(mutex_owned(&uvm_pageqlock)); KASSERT(uvm_page_locked_p(pg)); - KASSERT(pg->wire_count != 0 || uvmpdpol_pageisqueued_p(pg)); - uvmpdpol_pagedeactivate(pg); + if (pg->wire_count == 0) { + KASSERT(uvmpdpol_pageisqueued_p(pg)); + uvmpdpol_pagedeactivate(pg); + } } /* * uvm_pageactivate: activate page * - * => caller must lock page queues + * => caller must lock objects */ void uvm_pageactivate(struct vm_page *pg) { - KASSERT(mutex_owned(&uvm_pageqlock)); KASSERT(uvm_page_locked_p(pg)); #if defined(READAHEAD_STATS) - if ((pg->pqflags & PQ_READAHEAD) != 0) { + if ((pg->flags & PG_READAHEAD) != 0) { uvm_ra_hit.ev_count++; - pg->pqflags &= ~PQ_READAHEAD; + pg->flags &= ~PG_READAHEAD; } #endif /* defined(READAHEAD_STATS) */ - if (pg->wire_count != 0) { - return; + if (pg->wire_count == 0) { + uvmpdpol_pageactivate(pg); } - uvmpdpol_pageactivate(pg); } /* * uvm_pagedequeue: remove a page from any paging queue + * + * => caller must lock objects */ - void uvm_pagedequeue(struct vm_page *pg) { + KASSERT(uvm_page_locked_p(pg)); if (uvmpdpol_pageisqueued_p(pg)) { - KASSERT(mutex_owned(&uvm_pageqlock)); + uvmpdpol_pagedequeue(pg); } - - uvmpdpol_pagedequeue(pg); } /* * uvm_pageenqueue: add a page to a paging queue without activating. * used where a page is not really demanded (yet). eg. read-ahead + * + * => caller must lock objects */ - void uvm_pageenqueue(struct vm_page *pg) { - KASSERT(mutex_owned(&uvm_pageqlock)); - if (pg->wire_count != 0) { - return; + KASSERT(uvm_page_locked_p(pg)); + if (pg->wire_count == 0 && !uvmpdpol_pageisqueued_p(pg)) { + uvmpdpol_pageenqueue(pg); } - uvmpdpol_pageenqueue(pg); } /* @@ -1806,7 +1822,6 @@ uvm_direct_process(struct vm_page **pgs, */ static const char page_flagbits[] = UVM_PGFLAGBITS; -static const char page_pqflagbits[] = UVM_PQFLAGBITS; void uvm_page_printit(struct vm_page *pg, bool full, @@ -1816,13 +1831,11 @@ uvm_page_printit(struct vm_page *pg, boo struct uvm_object *uobj; struct pgflist *pgl; char pgbuf[128]; - char pqbuf[128]; (*pr)("PAGE %p:\n", pg); snprintb(pgbuf, sizeof(pgbuf), page_flagbits, pg->flags); - snprintb(pqbuf, sizeof(pqbuf), page_pqflagbits, pg->pqflags); - (*pr)(" flags=%s, pqflags=%s, wire_count=%d, pa=0x%lx\n", - pgbuf, pqbuf, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); + (*pr)(" flags=%s, pqflags=%x, wire_count=%d, pa=0x%lx\n", + pgbuf, pg->pqflags, pg->wire_count, (long)VM_PAGE_TO_PHYS(pg)); (*pr)(" uobject=%p, uanon=%p, offset=0x%llx loan_count=%d\n", pg->uobject, pg->uanon, (long long)pg->offset, pg->loan_count); #if defined(UVM_PAGE_TRKOWN) @@ -1839,8 +1852,8 @@ uvm_page_printit(struct vm_page *pg, boo return; /* cross-verify object/anon */ - if ((pg->pqflags & PQ_FREE) == 0) { - if (pg->pqflags & PQ_ANON) { + if ((pg->flags & PG_FREE) == 0) { + if (pg->flags & PG_ANON) { if (pg->uanon == NULL || pg->uanon->an_page != pg) (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n", (pg->uanon) ? pg->uanon->an_page : NULL); @@ -1864,7 +1877,7 @@ uvm_page_printit(struct vm_page *pg, boo } /* cross-verify page queue */ - if (pg->pqflags & PQ_FREE) { + if (pg->flags & PG_FREE) { int fl = uvm_page_lookup_freelist(pg); int color = VM_PGCOLOR_BUCKET(pg); pgl = &uvm.page_free[fl].pgfl_buckets[color].pgfl_queues[ @@ -1888,7 +1901,7 @@ uvm_page_printit(struct vm_page *pg, boo } /* - * uvm_pages_printthem - print a summary of all managed pages + * uvm_page_printall - print a summary of all managed pages */ void @@ -1911,7 +1924,7 @@ uvm_page_printall(void (*pr)(const char pfn++) { pg = PHYS_TO_VM_PAGE(ptoa(pfn)); - (*pr)("%18p %04x %04x %18p %18p", + (*pr)("%18p %04x %08x %18p %18p", pg, pg->flags, pg->pqflags, pg->uobject, pg->uanon); #ifdef UVM_PAGE_TRKOWN Index: sys/uvm/uvm_page.h =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_page.h,v retrieving revision 1.84 diff -u -p -r1.84 uvm_page.h --- sys/uvm/uvm_page.h 7 Jan 2019 22:48:01 -0000 1.84 +++ sys/uvm/uvm_page.h 9 Dec 2019 22:24:40 -0000 @@ -100,13 +100,19 @@ * * f: free page queue lock, uvm_fpageqlock * o: page owner (uvm_object::vmobjlock, vm_amap::am_lock, vm_anon::an_lock) - * p: page queue lock, uvm_pageqlock - * o,p: o|p for read, o&p for write + * i: vm_page::interlock + * => flags set and cleared only with o&i held can + * safely be tested for with only o held. + * o,i: o|i for read, o&i for write (depends on context - if could be loaned) + * => see uvm_loan.c * w: wired page queue or uvm_pglistalloc: - * => wired page queue: o&p to change, stable from wire to unwire + * => wired page queue: o&i to change, stable from wire to unwire * XXX What about concurrent or nested wire? * => uvm_pglistalloc: owned by caller * ?: locked by pmap or assumed page owner's lock + * p: locked by pagedaemon policy module (pdpolicy) + * c: cpu private + * s: stable, does not change * * UVM and pmap(9) may use uvm_page_locked_p() to assert whether the * page owner's lock is acquired. @@ -118,7 +124,7 @@ * => listq.list is entry on per-CPU free page queue * => uanon is unused (or (void *)0xdeadbeef for DEBUG) * => uobject is unused (or (void *)0xdeadbeef for DEBUG) - * => PQ_FREE is set in pqflags + * => PG_FREE is set in flags * o owned by a uvm_object * => pageq.queue is entry on wired page queue, if any * => listq.queue is entry on list of pages in object @@ -129,7 +135,7 @@ * => listq is unused (XXX correct?) * => uanon is owner * => uobject is NULL - * => PQ_ANON is set in pqflags + * => PG_ANON is set in flags * o allocated by uvm_pglistalloc * => pageq.queue is entry on resulting pglist, owned by caller * => listq is unused (XXX correct?) @@ -158,14 +164,16 @@ struct vm_page { LIST_ENTRY(vm_page) list; /* f: CPU free page queue */ } listq; - struct vm_anon *uanon; /* o,p: anon */ - struct uvm_object *uobject; /* o,p: object */ - voff_t offset; /* o,p: offset into object */ + struct vm_anon *uanon; /* o,i: anon */ + struct uvm_object *uobject; /* o,i: object */ + voff_t offset; /* o: offset into object */ uint16_t flags; /* o: object flags */ - uint16_t loan_count; /* o,p: num. active loans */ - uint16_t wire_count; /* p: wired down map refs */ - uint16_t pqflags; /* p: page queue flags */ - paddr_t phys_addr; /* physical address of page */ + uint16_t spare; /* : will be used Dec 2019 */ + uint32_t pqflags; /* p: pdpolicy queue flags */ + uint32_t loan_count; /* o,i: num. active loans */ + uint32_t wire_count; /* o,i: wired down map refs */ + paddr_t phys_addr; /* s: physical address of pg */ + kmutex_t interlock; /* s: lock on identity */ #ifdef __HAVE_VM_PAGE_MD struct vm_page_md mdpage; /* ?: pmap-specific data */ @@ -185,8 +193,10 @@ struct vm_page { * Locking notes: * * PG_, struct vm_page::flags => locked by owner - * PQ_, struct vm_page::pqflags => locked by uvm_pageqlock - * PQ_FREE => additionally locked by uvm_fpageqlock + * PG_AOBJ => additionally locked by vm_page::interlock + * PG_ANON => additionally locked by vm_page::interlock + * PG_FREE => additionally locked by uvm_fpageqlock + * for uvm_pglistalloc() * * Flag descriptions: * @@ -246,36 +256,20 @@ struct vm_page { #define PG_RELEASED 0x0020 #define PG_FAKE 0x0040 #define PG_RDONLY 0x0080 -#define PG_ZERO 0x0100 -#define PG_MARKER 0x0200 - -#define PG_PAGER1 0x1000 /* pager-specific flag */ +#define PG_AOBJ 0x0100 /* page is part of an anonymous + uvm_object */ +#define PG_ANON 0x0200 /* page is part of an anon, rather + than an uvm_object */ +#define PG_SWAPBACKED (PG_ANON|PG_AOBJ) +#define PG_READAHEAD 0x0400 /* read-ahead but not "hit" yet */ +#define PG_FREE 0x0800 /* page is on free list */ +#define PG_MARKER 0x1000 +#define PG_PAGER1 0x2000 /* pager-specific flag */ +#define PG_ZERO 0x4000 #define UVM_PGFLAGBITS \ "\20\1BUSY\2WANTED\3TABLED\4CLEAN\5PAGEOUT\6RELEASED\7FAKE\10RDONLY" \ - "\11ZERO\12MARKER\15PAGER1" - -#define PQ_FREE 0x0001 /* page is on free list */ -#define PQ_ANON 0x0002 /* page is part of an anon, rather - than an uvm_object */ -#define PQ_AOBJ 0x0004 /* page is part of an anonymous - uvm_object */ -#define PQ_SWAPBACKED (PQ_ANON|PQ_AOBJ) -#define PQ_READAHEAD 0x0008 /* read-ahead but has not been "hit" yet */ - -#define PQ_PRIVATE1 0x0100 -#define PQ_PRIVATE2 0x0200 -#define PQ_PRIVATE3 0x0400 -#define PQ_PRIVATE4 0x0800 -#define PQ_PRIVATE5 0x1000 -#define PQ_PRIVATE6 0x2000 -#define PQ_PRIVATE7 0x4000 -#define PQ_PRIVATE8 0x8000 - -#define UVM_PQFLAGBITS \ - "\20\1FREE\2ANON\3AOBJ\4READAHEAD" \ - "\11PRIVATE1\12PRIVATE2\13PRIVATE3\14PRIVATE4" \ - "\15PRIVATE5\16PRIVATE6\17PRIVATE7\20PRIVATE8" + "\11AOBJ\12AOBJ\13READAHEAD\14FREE\15MARKER\16PAGER1\17ZERO" /* * physical memory layout structure @@ -362,7 +356,7 @@ int uvm_direct_process(struct vm_page ** #define PHYS_TO_VM_PAGE(pa) uvm_phys_to_vm_page(pa) -#define VM_PAGE_IS_FREE(entry) ((entry)->pqflags & PQ_FREE) +#define VM_PAGE_IS_FREE(entry) ((entry)->flags & PG_FREE) #define VM_FREE_PAGE_TO_CPU(pg) ((struct uvm_cpu *)((uintptr_t)pg->offset)) #ifdef DEBUG Index: sys/uvm/uvm_pager.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pager.c,v retrieving revision 1.113 diff -u -p -r1.113 uvm_pager.c --- sys/uvm/uvm_pager.c 1 Dec 2019 23:14:47 -0000 1.113 +++ sys/uvm/uvm_pager.c 9 Dec 2019 22:24:40 -0000 @@ -318,12 +318,11 @@ uvm_aio_aiodone_pages(struct vm_page **p uobj = NULL; pg = pgs[0]; swap = (pg->uanon != NULL && pg->uobject == NULL) || - (pg->pqflags & PQ_AOBJ) != 0; + (pg->flags & PG_AOBJ) != 0; if (!swap) { uobj = pg->uobject; slock = uobj->vmobjlock; mutex_enter(slock); - mutex_enter(&uvm_pageqlock); } else { #if defined(VMSWAP) if (error) { @@ -362,7 +361,6 @@ uvm_aio_aiodone_pages(struct vm_page **p slock = pg->uanon->an_lock; } mutex_enter(slock); - mutex_enter(&uvm_pageqlock); anon_disposed = (pg->flags & PG_RELEASED) != 0; KASSERT(!anon_disposed || pg->uobject != NULL || pg->uanon->an_ref == 0); @@ -421,7 +419,7 @@ uvm_aio_aiodone_pages(struct vm_page **p KASSERT(!write); pg->flags &= ~PG_FAKE; #if defined(READAHEAD_STATS) - pg->pqflags |= PQ_READAHEAD; + pg->flags |= PG_READAHEAD; uvm_ra_total.ev_count++; #endif /* defined(READAHEAD_STATS) */ KASSERT((pg->flags & PG_CLEAN) != 0); @@ -437,7 +435,7 @@ uvm_aio_aiodone_pages(struct vm_page **p if (pg->flags & PG_PAGEOUT) { pg->flags &= ~PG_PAGEOUT; pageout_done++; - uvmexp.pdfreed++; + atomic_inc_uint(&uvmexp.pdfreed); pg->flags |= PG_RELEASED; } @@ -448,11 +446,9 @@ uvm_aio_aiodone_pages(struct vm_page **p if (swap) { if (pg->uobject == NULL && anon_disposed) { - mutex_exit(&uvm_pageqlock); uvm_anon_release(pg->uanon); } else { uvm_page_unbusy(&pg, 1); - mutex_exit(&uvm_pageqlock); mutex_exit(slock); } } @@ -461,7 +457,6 @@ uvm_aio_aiodone_pages(struct vm_page **p uvm_pageout_done(pageout_done); if (!swap) { uvm_page_unbusy(pgs, npages); - mutex_exit(&uvm_pageqlock); mutex_exit(slock); } else { #if defined(VMSWAP) @@ -478,7 +473,7 @@ uvm_aio_aiodone_pages(struct vm_page **p else uvm_swap_free(swslot, npages); } - uvmexp.pdpending--; + atomic_dec_uint(&uvmexp.pdpending); #endif /* defined(VMSWAP) */ } } Index: sys/uvm/uvm_pdaemon.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pdaemon.c,v retrieving revision 1.112 diff -u -p -r1.112 uvm_pdaemon.c --- sys/uvm/uvm_pdaemon.c 1 Dec 2019 14:40:31 -0000 1.112 +++ sys/uvm/uvm_pdaemon.c 9 Dec 2019 22:24:40 -0000 @@ -97,7 +97,7 @@ UVMHIST_DEFINE(pdhist); #define UVMPD_NUMDIRTYREACTS 16 -#define UVMPD_NUMTRYLOCKOWNER 16 +#define UVMPD_NUMTRYLOCKOWNER 128 /* * local prototypes @@ -112,7 +112,7 @@ static void uvmpd_pool_drain_wakeup(void static unsigned int uvm_pagedaemon_waiters; /* State for the pool drainer thread */ -static kmutex_t uvmpd_pool_drain_lock; +static kmutex_t uvmpd_pool_drain_lock __cacheline_aligned; static kcondvar_t uvmpd_pool_drain_cv; static bool uvmpd_pool_drain_run = false; @@ -198,7 +198,6 @@ uvm_kick_pdaemon(void) * uvmpd_tune: tune paging parameters * * => called when ever memory is added (or removed?) to the system - * => caller must call with page queues locked */ static void @@ -260,10 +259,8 @@ uvm_pageout(void *arg) */ uvm.pagedaemon_lwp = curlwp; - mutex_enter(&uvm_pageqlock); npages = uvmexp.npages; uvmpd_tune(); - mutex_exit(&uvm_pageqlock); /* * main loop @@ -287,10 +284,9 @@ uvm_pageout(void *arg) } /* - * now lock page queues and recompute inactive count + * now recompute inactive count */ - mutex_enter(&uvm_pageqlock); if (npages != uvmexp.npages || extrapages != uvm_extrapages) { npages = uvmexp.npages; extrapages = uvm_extrapages; @@ -334,12 +330,7 @@ uvm_pageout(void *arg) mutex_spin_exit(&uvm_fpageqlock); /* - * scan done. unlock page queues (the only lock we are holding) - */ - mutex_exit(&uvm_pageqlock); - - /* - * if we don't need free memory, we're done. + * scan done. if we don't need free memory, we're done. */ if (!needsfree && !kmem_va_starved) @@ -377,23 +368,21 @@ void uvm_pageout_start(int npages) { - mutex_spin_enter(&uvm_fpageqlock); - uvmexp.paging += npages; - mutex_spin_exit(&uvm_fpageqlock); + atomic_add_int(&uvmexp.paging, npages); } void uvm_pageout_done(int npages) { - mutex_spin_enter(&uvm_fpageqlock); KASSERT(uvmexp.paging >= npages); - uvmexp.paging -= npages; + atomic_add_int(&uvmexp.paging, -npages); /* * wake up either of pagedaemon or LWPs waiting for it. */ + mutex_spin_enter(&uvm_fpageqlock); if (uvmexp.free <= uvmexp.reserve_kernel) { wakeup(&uvm.pagedaemon); } else { @@ -406,7 +395,7 @@ uvm_pageout_done(int npages) /* * uvmpd_trylockowner: trylock the page's owner. * - * => called with pageq locked. + * => called with page interlock held. * => resolve orphaned O->A loaned page. * => return the locked mutex on success. otherwise, return NULL. */ @@ -415,38 +404,68 @@ kmutex_t * uvmpd_trylockowner(struct vm_page *pg) { struct uvm_object *uobj = pg->uobject; + struct vm_anon *anon = pg->uanon; + int tries, count; + bool running; kmutex_t *slock; - KASSERT(mutex_owned(&uvm_pageqlock)); + KASSERT(mutex_owned(&pg->interlock)); if (uobj != NULL) { slock = uobj->vmobjlock; - } else { - struct vm_anon *anon = pg->uanon; - - KASSERT(anon != NULL); + KASSERTMSG(slock != NULL, "pg %p uobj %p, NULL lock", pg, uobj); + } else if (anon != NULL) { slock = anon->an_lock; - } - - if (!mutex_tryenter(slock)) { + KASSERTMSG(slock != NULL, "pg %p anon %p, NULL lock", pg, anon); + } else { + /* Page may be in state of flux - ignore. */ + mutex_exit(&pg->interlock); return NULL; } - if (uobj == NULL) { - - /* - * set PQ_ANON if it isn't set already. - */ - - if ((pg->pqflags & PQ_ANON) == 0) { - KASSERT(pg->loan_count > 0); - pg->loan_count--; - pg->pqflags |= PQ_ANON; - /* anon now owns it */ + /* + * Now try to lock the objects. We'll try hard, but don't really + * plan on spending more than a millisecond or so here. + */ + tries = (curlwp == uvm.pagedaemon_lwp ? UVMPD_NUMTRYLOCKOWNER : 1); + for (;;) { + if (mutex_tryenter(slock)) { + if (uobj == NULL) { + /* + * set PG_ANON if it isn't set already. + */ + if ((pg->flags & PG_ANON) == 0) { + KASSERT(pg->loan_count > 0); + pg->loan_count--; + pg->flags |= PG_ANON; + /* anon now owns it */ + } + } + mutex_exit(&pg->interlock); + return slock; + } + running = mutex_owner_running(slock); + if (!running || --tries <= 0) { + break; } + count = SPINLOCK_BACKOFF_MAX; + SPINLOCK_BACKOFF(count); } - return slock; + /* + * We didn't get the lock; chances are the very next page on the + * queue also has the same lock, so if the lock owner is not running + * take a breather and allow them to make progress. There could be + * only 1 CPU in the system, or the pagedaemon could have preempted + * the owner in kernel, or any number of other things could be going + * on. + */ + mutex_exit(&pg->interlock); + if (!running && curlwp == uvm.pagedaemon_lwp) { + (void)kpause("pdpglock", false, 1, NULL); + } + uvmexp.pdbusy++; + return NULL; } #if defined(VMSWAP) @@ -497,7 +516,7 @@ swapcluster_add(struct swapcluster *swc, KASSERT(swc->swc_slot != 0); KASSERT(swc->swc_nused < swc->swc_nallocated); - KASSERT((pg->pqflags & PQ_SWAPBACKED) != 0); + KASSERT((pg->flags & PG_SWAPBACKED) != 0); slot = swc->swc_slot + swc->swc_nused; uobj = pg->uobject; @@ -588,12 +607,12 @@ uvmpd_dropswap(struct vm_page *pg) bool result = false; struct vm_anon *anon = pg->uanon; - if ((pg->pqflags & PQ_ANON) && anon->an_swslot) { + if ((pg->flags & PG_ANON) && anon->an_swslot) { uvm_swap_free(anon->an_swslot, 1); anon->an_swslot = 0; pg->flags &= ~PG_CLEAN; result = true; - } else if (pg->pqflags & PQ_AOBJ) { + } else if (pg->flags & PG_AOBJ) { int slot = uao_set_swslot(pg->uobject, pg->offset >> PAGE_SHIFT, 0); if (slot) { @@ -610,6 +629,7 @@ uvmpd_dropswap(struct vm_page *pg) * uvmpd_trydropswap: try to free any swap allocated to this page. * * => return true if a slot is successfully freed. + * => page interlock must be held, and will be dropped. */ bool @@ -619,11 +639,13 @@ uvmpd_trydropswap(struct vm_page *pg) bool result; if ((pg->flags & PG_BUSY) != 0) { + mutex_exit(&pg->interlock); return false; } /* * lock the page's owner. + * this will drop pg->interlock. */ slock = uvmpd_trylockowner(pg); @@ -653,7 +675,6 @@ uvmpd_trydropswap(struct vm_page *pg) * uvmpd_scan_queue: scan an replace candidate list for pages * to clean or free. * - * => called with page queues locked * => we work on meeting our free target by converting inactive pages * into free pages. * => we handle the building of swap-backed clusters @@ -669,7 +690,6 @@ uvmpd_scan_queue(void) struct swapcluster swc; #endif /* defined(VMSWAP) */ int dirtyreacts; - int lockownerfail; kmutex_t *slock; UVMHIST_FUNC("uvmpd_scan_queue"); UVMHIST_CALLED(pdhist); @@ -684,7 +704,6 @@ uvmpd_scan_queue(void) #endif /* defined(VMSWAP) */ dirtyreacts = 0; - lockownerfail = 0; uvmpdpol_scaninit(); while (/* CONSTCOND */ 1) { @@ -704,22 +723,9 @@ uvmpd_scan_queue(void) break; } - p = uvmpdpol_selectvictim(); - if (p == NULL) { - break; - } - KASSERT(uvmpdpol_pageisqueued_p(p)); - KASSERT(p->wire_count == 0); - - /* - * we are below target and have a new page to consider. - */ - - anon = p->uanon; - uobj = p->uobject; - /* - * first we attempt to lock the object that this page + * first we have the pdpolicy select a victim page + * and attempt to lock the object that the page * belongs to. if our attempt fails we skip on to * the next page (no harm done). it is important to * "try" locking the object as we are locking in the @@ -727,31 +733,28 @@ uvmpd_scan_queue(void) * deadlock. * * the only time we expect to see an ownerless page - * (i.e. a page with no uobject and !PQ_ANON) is if an + * (i.e. a page with no uobject and !PG_ANON) is if an * anon has loaned a page from a uvm_object and the * uvm_object has dropped the ownership. in that * case, the anon can "take over" the loaned page * and make it its own. */ - slock = uvmpd_trylockowner(p); - if (slock == NULL) { - /* - * yield cpu to make a chance for an LWP holding - * the lock run. otherwise we can busy-loop too long - * if the page queue is filled with a lot of pages - * from few objects. - */ - lockownerfail++; - if (lockownerfail > UVMPD_NUMTRYLOCKOWNER) { - mutex_exit(&uvm_pageqlock); - /* XXX Better than yielding but inadequate. */ - kpause("livelock", false, 1, NULL); - mutex_enter(&uvm_pageqlock); - lockownerfail = 0; - } - continue; + p = uvmpdpol_selectvictim(&slock); + if (p == NULL) { + break; } + KASSERT(uvmpdpol_pageisqueued_p(p)); + KASSERT(uvm_page_locked_p(p)); + KASSERT(p->wire_count == 0); + + /* + * we are below target and have a new page to consider. + */ + + anon = p->uanon; + uobj = p->uobject; + if (p->flags & PG_BUSY) { mutex_exit(slock); uvmexp.pdbusy++; @@ -772,24 +775,22 @@ uvmpd_scan_queue(void) /* - * we now have the object and the page queues locked. + * we now have the object locked. * if the page is not swap-backed, call the object's * pager to flush and free the page. */ #if defined(READAHEAD_STATS) - if ((p->pqflags & PQ_READAHEAD) != 0) { - p->pqflags &= ~PQ_READAHEAD; + if ((p->flags & PG_READAHEAD) != 0) { + p->flags &= ~PG_READAHEAD; uvm_ra_miss.ev_count++; } #endif /* defined(READAHEAD_STATS) */ - if ((p->pqflags & PQ_SWAPBACKED) == 0) { + if ((p->flags & PG_SWAPBACKED) == 0) { KASSERT(uobj != NULL); - mutex_exit(&uvm_pageqlock); (void) (uobj->pgops->pgo_put)(uobj, p->offset, p->offset + PAGE_SIZE, PGO_CLEANIT|PGO_FREE); - mutex_enter(&uvm_pageqlock); continue; } @@ -810,7 +811,7 @@ uvmpd_scan_queue(void) pageidx = p->offset >> PAGE_SHIFT; uvm_pagefree(p); - uvmexp.pdfreed++; + atomic_inc_uint(&uvmexp.pdfreed); /* * for anons, we need to remove the page @@ -877,12 +878,10 @@ uvmpd_scan_queue(void) p->flags |= PG_BUSY; UVM_PAGE_OWN(p, "scan_queue"); - p->flags |= PG_PAGEOUT; - uvm_pagedequeue(p); - uvmexp.pgswapout++; - mutex_exit(&uvm_pageqlock); + + uvm_pagedequeue(p); /* * add the new page to the cluster. @@ -891,7 +890,6 @@ uvmpd_scan_queue(void) if (swapcluster_add(&swc, p)) { p->flags &= ~(PG_BUSY|PG_PAGEOUT); UVM_PAGE_OWN(p, NULL); - mutex_enter(&uvm_pageqlock); dirtyreacts++; uvm_pageactivate(p); mutex_exit(slock); @@ -900,14 +898,13 @@ uvmpd_scan_queue(void) mutex_exit(slock); swapcluster_flush(&swc, false); - mutex_enter(&uvm_pageqlock); /* * the pageout is in progress. bump counters and set up * for the next loop. */ - uvmexp.pdpending++; + atomic_inc_uint(&uvmexp.pdpending); #else /* defined(VMSWAP) */ uvm_pageactivate(p); @@ -916,16 +913,12 @@ uvmpd_scan_queue(void) } #if defined(VMSWAP) - mutex_exit(&uvm_pageqlock); swapcluster_flush(&swc, true); - mutex_enter(&uvm_pageqlock); #endif /* defined(VMSWAP) */ } /* * uvmpd_scan: scan the page queues and attempt to meet our targets. - * - * => called with pageq's locked */ static void Index: sys/uvm/uvm_pdpolicy.h =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pdpolicy.h,v retrieving revision 1.3 diff -u -p -r1.3 uvm_pdpolicy.h --- sys/uvm/uvm_pdpolicy.h 21 Feb 2007 23:00:14 -0000 1.3 +++ sys/uvm/uvm_pdpolicy.h 9 Dec 2019 22:24:40 -0000 @@ -51,7 +51,7 @@ void uvmpdpol_anfree(struct vm_anon *); void uvmpdpol_tune(void); void uvmpdpol_scaninit(void); -struct vm_page *uvmpdpol_selectvictim(void); +struct vm_page *uvmpdpol_selectvictim(kmutex_t **lock); void uvmpdpol_balancequeue(int); void uvmpdpol_sysctlsetup(void); Index: sys/uvm/uvm_pdpolicy_clock.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pdpolicy_clock.c,v retrieving revision 1.17 diff -u -p -r1.17 uvm_pdpolicy_clock.c --- sys/uvm/uvm_pdpolicy_clock.c 30 Jan 2012 17:21:52 -0000 1.17 +++ sys/uvm/uvm_pdpolicy_clock.c 9 Dec 2019 22:24:40 -0000 @@ -79,18 +79,23 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy #include #include #include +#include #endif /* defined(PDSIM) */ -#define PQ_INACTIVE PQ_PRIVATE1 /* page is in inactive list */ -#define PQ_ACTIVE PQ_PRIVATE2 /* page is in active list */ +#define PQ_TIME 0x3fffffff /* time of last activation */ +#define PQ_INACTIVE 0x40000000 /* page is in inactive list */ +#define PQ_ACTIVE 0x80000000 /* page is in active list */ #if !defined(CLOCK_INACTIVEPCT) #define CLOCK_INACTIVEPCT 33 #endif /* !defined(CLOCK_INACTIVEPCT) */ struct uvmpdpol_globalstate { - struct pglist s_activeq; /* allocated pages, in use */ + kmutex_t lock; /* lock on state */ + /* <= compiler pads here */ + struct pglist s_activeq /* allocated pages, in use */ + __aligned(COHERENCY_UNIT); struct pglist s_inactiveq; /* pages between the clock hands */ int s_active; int s_inactive; @@ -110,7 +115,11 @@ struct uvmpdpol_scanstate { struct vm_page *ss_nextpg; }; -static struct uvmpdpol_globalstate pdpol_state; +static void uvmpdpol_pageactivate_locked(struct vm_page *); +static void uvmpdpol_pagedeactivate_locked(struct vm_page *); +static void uvmpdpol_pagedequeue_locked(struct vm_page *); + +static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned; static struct uvmpdpol_scanstate pdpol_scanstate; PDPOL_EVCNT_DEFINE(reactexec) @@ -144,6 +153,7 @@ uvmpdpol_scaninit(void) * to keep usage within the minimum and maximum usage limits. */ + mutex_enter(&s->lock); t = s->s_active + s->s_inactive + uvmexp.free; anonunder = uvmexp.anonpages <= UVM_PCTPARAM_APPLY(&s->s_anonmin, t); fileunder = uvmexp.filepages <= UVM_PCTPARAM_APPLY(&s->s_filemin, t); @@ -162,17 +172,18 @@ uvmpdpol_scaninit(void) ss->ss_execreact = execreact; ss->ss_first = true; + mutex_exit(&s->lock); } struct vm_page * -uvmpdpol_selectvictim(void) +uvmpdpol_selectvictim(kmutex_t **plock) { + struct uvmpdpol_globalstate *s = &pdpol_state; struct uvmpdpol_scanstate *ss = &pdpol_scanstate; struct vm_page *pg; - kmutex_t *lock; - - KASSERT(mutex_owned(&uvm_pageqlock)); + kmutex_t *lock = NULL; + mutex_enter(&s->lock); while (/* CONSTCOND */ 1) { struct vm_anon *anon; struct uvm_object *uobj; @@ -190,28 +201,24 @@ uvmpdpol_selectvictim(void) break; } ss->ss_nextpg = TAILQ_NEXT(pg, pageq.queue); + KASSERT(pg->wire_count == 0); uvmexp.pdscans++; /* - * move referenced pages back to active queue and - * skip to next page. + * acquire interlock to stablize page identity. + * if we have caught the page in a state of flux + * and it should be dequeued, do it now and then + * move on to the next. */ - - lock = uvmpd_trylockowner(pg); - if (lock != NULL) { - if (pmap_is_referenced(pg)) { - uvmpdpol_pageactivate(pg); - uvmexp.pdreact++; - mutex_exit(lock); - continue; - } - mutex_exit(lock); + mutex_enter(&pg->interlock); + if ((pg->uobject == NULL && pg->uanon == NULL) || + pg->wire_count > 0) { + mutex_exit(&pg->interlock); + uvmpdpol_pagedequeue_locked(pg); + continue; } - anon = pg->uanon; - uobj = pg->uobject; - /* * enforce the minimum thresholds on different * types of memory usage. if reusing the current @@ -219,33 +226,74 @@ uvmpdpol_selectvictim(void) * minimum, reactivate the page instead and move * on to the next page. */ - + anon = pg->uanon; + uobj = pg->uobject; if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) { - uvmpdpol_pageactivate(pg); + mutex_exit(&pg->interlock); + uvmpdpol_pageactivate_locked(pg); PDPOL_EVCNT_INCR(reactexec); continue; } if (uobj && UVM_OBJ_IS_VNODE(uobj) && !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) { - uvmpdpol_pageactivate(pg); + mutex_exit(&pg->interlock); + uvmpdpol_pageactivate_locked(pg); PDPOL_EVCNT_INCR(reactfile); continue; } if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) { - uvmpdpol_pageactivate(pg); + mutex_exit(&pg->interlock); + uvmpdpol_pageactivate_locked(pg); PDPOL_EVCNT_INCR(reactanon); continue; } + /* + * try to lock the object that owns the page. + * + * with the page interlock held, we can drop s->lock, which + * could otherwise serve as a barrier to us getting the + * object locked, because the owner of the object's lock may + * be blocked on s->lock (i.e. a deadlock). + * + * whatever happens, uvmpd_trylockowner() will release the + * interlock. with the interlock dropped we can then + * re-acquire our own lock. the order is: + * + * object -> pdpol -> interlock. + */ + mutex_exit(&s->lock); + lock = uvmpd_trylockowner(pg); + /* pg->interlock now released */ + mutex_enter(&s->lock); + if (lock == NULL) { + /* didn't get it - try the next page. */ + continue; + } + + /* + * move referenced pages back to active queue and skip to + * next page. + */ + if (pmap_is_referenced(pg)) { + uvmpdpol_pageactivate_locked(pg); + uvmexp.pdreact++; + mutex_exit(lock); + continue; + } + + /* we have a potential victim. */ break; } - + mutex_exit(&s->lock); + *plock = lock; return pg; } void uvmpdpol_balancequeue(int swap_shortage) { + struct uvmpdpol_globalstate *s = &pdpol_state; int inactive_shortage; struct vm_page *p, *nextpg; kmutex_t *lock; @@ -255,6 +303,7 @@ uvmpdpol_balancequeue(int swap_shortage) * our inactive target. */ + mutex_enter(&s->lock); inactive_shortage = pdpol_state.s_inactarg - pdpol_state.s_inactive; for (p = TAILQ_FIRST(&pdpol_state.s_activeq); p != NULL && (inactive_shortage > 0 || swap_shortage > 0); @@ -265,10 +314,14 @@ uvmpdpol_balancequeue(int swap_shortage) * if there's a shortage of swap slots, try to free it. */ - if (swap_shortage > 0 && (p->pqflags & PQ_SWAPBACKED) != 0) { + if (swap_shortage > 0 && (p->flags & PG_SWAPBACKED) != 0) { + mutex_enter(&p->interlock); + mutex_exit(&s->lock); if (uvmpd_trydropswap(p)) { swap_shortage--; } + /* p->interlock now released */ + mutex_enter(&s->lock); } /* @@ -279,27 +332,42 @@ uvmpdpol_balancequeue(int swap_shortage) continue; } - /* no need to check wire_count as pg is "active" */ + /* + * acquire interlock to stablize page identity. + * if we have caught the page in a state of flux + * and it should be dequeued, do it now and then + * move on to the next. + */ + mutex_enter(&p->interlock); + if ((p->uobject == NULL && p->uanon == NULL) || + p->wire_count > 0) { + mutex_exit(&p->interlock); + uvmpdpol_pagedequeue_locked(p); + continue; + } + mutex_exit(&s->lock); lock = uvmpd_trylockowner(p); + /* p->interlock now released */ + mutex_enter(&s->lock); if (lock != NULL) { - uvmpdpol_pagedeactivate(p); + uvmpdpol_pagedeactivate_locked(p); uvmexp.pddeact++; inactive_shortage--; mutex_exit(lock); } } + mutex_exit(&s->lock); } -void -uvmpdpol_pagedeactivate(struct vm_page *pg) +static void +uvmpdpol_pagedeactivate_locked(struct vm_page *pg) { KASSERT(uvm_page_locked_p(pg)); - KASSERT(mutex_owned(&uvm_pageqlock)); if (pg->pqflags & PQ_ACTIVE) { TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue); - pg->pqflags &= ~PQ_ACTIVE; + pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME); KASSERT(pdpol_state.s_active > 0); pdpol_state.s_active--; } @@ -313,27 +381,49 @@ uvmpdpol_pagedeactivate(struct vm_page * } void -uvmpdpol_pageactivate(struct vm_page *pg) +uvmpdpol_pagedeactivate(struct vm_page *pg) { + struct uvmpdpol_globalstate *s = &pdpol_state; + + mutex_enter(&s->lock); + uvmpdpol_pagedeactivate_locked(pg); + mutex_exit(&s->lock); +} - uvmpdpol_pagedequeue(pg); +static void +uvmpdpol_pageactivate_locked(struct vm_page *pg) +{ + + uvmpdpol_pagedequeue_locked(pg); TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pageq.queue); - pg->pqflags |= PQ_ACTIVE; + pg->pqflags = PQ_ACTIVE | (hardclock_ticks & PQ_TIME); pdpol_state.s_active++; } void -uvmpdpol_pagedequeue(struct vm_page *pg) +uvmpdpol_pageactivate(struct vm_page *pg) +{ + struct uvmpdpol_globalstate *s = &pdpol_state; + + /* Safety: PQ_ACTIVE clear also tells us if it is not enqueued. */ + if ((pg->pqflags & PQ_ACTIVE) == 0 || + ((hardclock_ticks & PQ_TIME) - (pg->pqflags & PQ_TIME)) > hz) { + mutex_enter(&s->lock); + uvmpdpol_pageactivate_locked(pg); + mutex_exit(&s->lock); + } +} + +static void +uvmpdpol_pagedequeue_locked(struct vm_page *pg) { if (pg->pqflags & PQ_ACTIVE) { - KASSERT(mutex_owned(&uvm_pageqlock)); TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue); - pg->pqflags &= ~PQ_ACTIVE; + pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME); KASSERT(pdpol_state.s_active > 0); pdpol_state.s_active--; } else if (pg->pqflags & PQ_INACTIVE) { - KASSERT(mutex_owned(&uvm_pageqlock)); TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pageq.queue); pg->pqflags &= ~PQ_INACTIVE; KASSERT(pdpol_state.s_inactive > 0); @@ -342,10 +432,23 @@ uvmpdpol_pagedequeue(struct vm_page *pg) } void +uvmpdpol_pagedequeue(struct vm_page *pg) +{ + struct uvmpdpol_globalstate *s = &pdpol_state; + + mutex_enter(&s->lock); + uvmpdpol_pagedequeue_locked(pg); + mutex_exit(&s->lock); +} + +void uvmpdpol_pageenqueue(struct vm_page *pg) { + struct uvmpdpol_globalstate *s = &pdpol_state; - uvmpdpol_pageactivate(pg); + mutex_enter(&s->lock); + uvmpdpol_pageactivate_locked(pg); + mutex_exit(&s->lock); } void @@ -357,19 +460,23 @@ bool uvmpdpol_pageisqueued_p(struct vm_page *pg) { + /* Safe to test unlocked due to page life-cycle. */ return (pg->pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0; } void uvmpdpol_estimatepageable(int *active, int *inactive) { + struct uvmpdpol_globalstate *s = &pdpol_state; + mutex_enter(&s->lock); if (active) { *active = pdpol_state.s_active; } if (inactive) { *inactive = pdpol_state.s_inactive; } + mutex_exit(&s->lock); } #if !defined(PDSIM) @@ -400,6 +507,7 @@ uvmpdpol_init(void) { struct uvmpdpol_globalstate *s = &pdpol_state; + mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE); TAILQ_INIT(&s->s_activeq); TAILQ_INIT(&s->s_inactiveq); uvm_pctparam_init(&s->s_inactivepct, CLOCK_INACTIVEPCT, NULL); @@ -420,14 +528,18 @@ bool uvmpdpol_needsscan_p(void) { + /* This must be an unlocked check: can be called from interrupt. */ return pdpol_state.s_inactive < pdpol_state.s_inactarg; } void uvmpdpol_tune(void) { + struct uvmpdpol_globalstate *s = &pdpol_state; + mutex_enter(&s->lock); clock_tune(); + mutex_exit(&s->lock); } #if !defined(PDSIM) Index: sys/uvm/uvm_pdpolicy_clockpro.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pdpolicy_clockpro.c,v retrieving revision 1.17 diff -u -p -r1.17 uvm_pdpolicy_clockpro.c --- sys/uvm/uvm_pdpolicy_clockpro.c 20 Jun 2011 23:18:58 -0000 1.17 +++ sys/uvm/uvm_pdpolicy_clockpro.c 9 Dec 2019 22:24:41 -0000 @@ -121,16 +121,13 @@ PDPOL_EVCNT_DEFINE(speculativemiss) PDPOL_EVCNT_DEFINE(locksuccess) PDPOL_EVCNT_DEFINE(lockfail) -#define PQ_REFERENCED PQ_PRIVATE1 -#define PQ_HOT PQ_PRIVATE2 -#define PQ_TEST PQ_PRIVATE3 -#define PQ_INITIALREF PQ_PRIVATE4 -#if PQ_PRIVATE6 != PQ_PRIVATE5 * 2 || PQ_PRIVATE7 != PQ_PRIVATE6 * 2 -#error PQ_PRIVATE -#endif -#define PQ_QMASK (PQ_PRIVATE5|PQ_PRIVATE6|PQ_PRIVATE7) -#define PQ_QFACTOR PQ_PRIVATE5 -#define PQ_SPECULATIVE PQ_PRIVATE8 +#define PQ_REFERENCED 0x000000001 +#define PQ_HOT 0x000000002 +#define PQ_TEST 0x000000004 +#define PQ_INITIALREF 0x000000008 +#define PQ_QMASK 0x000000070 +#define PQ_QFACTOR 0x000000010 +#define PQ_SPECULATIVE 0x000000080 #define CLOCKPRO_NOQUEUE 0 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */ @@ -170,6 +167,7 @@ typedef struct { } pageq_t; struct clockpro_state { + kmutex_t lock; int s_npages; int s_coldtarget; int s_ncold; @@ -203,7 +201,7 @@ clockpro_switchqueue(void) #endif /* !defined(LISTQ) */ -static struct clockpro_state clockpro; +static struct clockpro_state clockpro __cacheline_aligned; static struct clockpro_scanstate { int ss_nscanned; } scanstate; @@ -585,6 +583,8 @@ static void clockpro_reinit(void) { + KASSERT(mutex_owned(&clockpro.lock)); + clockpro_hashinit(uvmexp.npages); } @@ -594,6 +594,7 @@ clockpro_init(void) struct clockpro_state *s = &clockpro; int i; + mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE); for (i = 0; i < CLOCKPRO_NQUEUE; i++) { pageq_init(&s->s_q[i]); } @@ -608,6 +609,8 @@ clockpro_tune(void) struct clockpro_state *s = &clockpro; int coldtarget; + KASSERT(mutex_owned(&s->lock)); + #if defined(ADAPTIVE) int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100; int coldmin = 1; @@ -639,9 +642,26 @@ clockpro_movereferencebit(struct vm_page kmutex_t *lock; bool referenced; + KASSERT(mutex_owned(&clockpro.lock)); KASSERT(!locked || uvm_page_locked_p(pg)); if (!locked) { + /* + * acquire interlock to stablize page identity. + * if we have caught the page in a state of flux + * and it should be dequeued, abort. it will be + * dequeued later. + */ + mutex_enter(&pg->interlock); + if ((pg->uobject == NULL && pg->uanon == NULL) || + pg->wire_count > 0) { + mutex_exit(&pg->interlock); + PDPOL_EVCNT_INCR(lockfail); + return; + } + mutex_exit(&clockpro.lock); /* XXX */ lock = uvmpd_trylockowner(pg); + /* pg->interlock now dropped */ + mutex_enter(&clockpro.lock); /* XXX */ if (lock == NULL) { /* * XXXuvmplock @@ -664,6 +684,8 @@ static void clockpro_clearreferencebit(struct vm_page *pg, bool locked) { + KASSERT(mutex_owned(&clockpro.lock)); + clockpro_movereferencebit(pg, locked); pg->pqflags &= ~PQ_REFERENCED; } @@ -675,6 +697,8 @@ clockpro___newqrotate(int len) pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ); struct vm_page *pg; + KASSERT(mutex_owned(&s->lock)); + while (pageq_len(newq) > len) { pg = pageq_remove_head(newq); KASSERT(pg != NULL); @@ -693,6 +717,8 @@ clockpro_newqrotate(void) { struct clockpro_state * const s = &clockpro; + KASSERT(mutex_owned(&s->lock)); + check_sanity(); clockpro___newqrotate(s->s_newqlenmax); check_sanity(); @@ -702,6 +728,8 @@ static void clockpro_newqflush(int n) { + KASSERT(mutex_owned(&clockpro.lock)); + check_sanity(); clockpro___newqrotate(n); check_sanity(); @@ -712,6 +740,8 @@ clockpro_newqflushone(void) { struct clockpro_state * const s = &clockpro; + KASSERT(mutex_owned(&s->lock)); + clockpro_newqflush( MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0)); } @@ -725,6 +755,7 @@ clockpro___enqueuetail(struct vm_page *p { struct clockpro_state * const s = &clockpro; + KASSERT(mutex_owned(&s->lock)); KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); check_sanity(); @@ -748,7 +779,7 @@ clockpro_pageenqueue(struct vm_page *pg) bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */ KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0); - KASSERT(mutex_owned(&uvm_pageqlock)); + KASSERT(mutex_owned(&s->lock)); check_sanity(); KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); s->s_npages++; @@ -805,6 +836,8 @@ clockpro_pagequeue(struct vm_page *pg) struct clockpro_state * const s = &clockpro; int qidx; + KASSERT(mutex_owned(&s->lock)); + qidx = clockpro_getq(pg); KASSERT(qidx != CLOCKPRO_NOQUEUE); @@ -817,6 +850,8 @@ clockpro_pagedequeue(struct vm_page *pg) struct clockpro_state * const s = &clockpro; pageq_t *q; + KASSERT(mutex_owned(&s->lock)); + KASSERT(s->s_npages > 0); check_sanity(); q = clockpro_pagequeue(pg); @@ -838,6 +873,8 @@ clockpro_pagerequeue(struct vm_page *pg) struct clockpro_state * const s = &clockpro; int qidx; + KASSERT(mutex_owned(&s->lock)); + qidx = clockpro_getq(pg); KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ); pageq_remove(clockpro_queue(s, qidx), pg); @@ -851,6 +888,8 @@ static void handhot_endtest(struct vm_page *pg) { + KASSERT(mutex_owned(&clockpro.lock)); + KASSERT((pg->pqflags & PQ_HOT) == 0); if ((pg->pqflags & PQ_TEST) != 0) { PDPOL_EVCNT_INCR(hhotcoldtest); @@ -869,6 +908,8 @@ handhot_advance(void) pageq_t *hotq; int hotqlen; + KASSERT(mutex_owned(&s->lock)); + clockpro_tune(); dump("hot called"); @@ -974,6 +1015,8 @@ handcold_advance(void) struct clockpro_state * const s = &clockpro; struct vm_page *pg; + KASSERT(mutex_owned(&s->lock)); + for (;;) { #if defined(LISTQ) pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ); @@ -1089,7 +1132,9 @@ done:; void uvmpdpol_pageactivate(struct vm_page *pg) { + struct clockpro_state * const s = &clockpro; + mutex_enter(&s->lock); if (!uvmpdpol_pageisqueued_p(pg)) { KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); pg->pqflags |= PQ_INITIALREF; @@ -1102,24 +1147,31 @@ uvmpdpol_pageactivate(struct vm_page *pg clockpro_pageenqueue(pg); } pg->pqflags |= PQ_REFERENCED; + mutex_exit(&s->lock); } void uvmpdpol_pagedeactivate(struct vm_page *pg) { + struct clockpro_state * const s = &clockpro; + mutex_enter(&s->lock); clockpro_clearreferencebit(pg, true); + mutex_exit(&s->lock); } void uvmpdpol_pagedequeue(struct vm_page *pg) { + struct clockpro_state * const s = &clockpro; if (!uvmpdpol_pageisqueued_p(pg)) { return; } + mutex_enter(&s->lock); clockpro_pagedequeue(pg); pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE); + mutex_exit(&s->lock); } void @@ -1127,12 +1179,16 @@ uvmpdpol_pageenqueue(struct vm_page *pg) { #if 1 + struct clockpro_state * const s = &clockpro; + if (uvmpdpol_pageisqueued_p(pg)) { return; } + mutex_enter(&s->lock); clockpro_clearreferencebit(pg, true); pg->pqflags |= PQ_SPECULATIVE; clockpro_pageenqueue(pg); + mutex_exit(&s->lock); #else uvmpdpol_pageactivate(pg); #endif @@ -1141,11 +1197,14 @@ uvmpdpol_pageenqueue(struct vm_page *pg) void uvmpdpol_anfree(struct vm_anon *an) { + struct clockpro_state * const s = &clockpro; KASSERT(an->an_page == NULL); + mutex_enter(&s->lock); if (nonresident_lookupremove((objid_t)an, 0)) { PDPOL_EVCNT_INCR(nresanonfree); } + mutex_exit(&s->lock); } void @@ -1158,8 +1217,11 @@ uvmpdpol_init(void) void uvmpdpol_reinit(void) { + struct clockpro_state * const s = &clockpro; + mutex_enter(&s->lock); clockpro_reinit(); + mutex_exit(&s->lock); } void @@ -1167,42 +1229,75 @@ uvmpdpol_estimatepageable(int *active, i { struct clockpro_state * const s = &clockpro; + mutex_enter(&s->lock); if (active) { *active = s->s_npages - s->s_ncold; } if (inactive) { *inactive = s->s_ncold; } + mutex_exit(&s->lock); } bool uvmpdpol_pageisqueued_p(struct vm_page *pg) { + /* Unlocked check OK due to page lifecycle. */ return clockpro_getq(pg) != CLOCKPRO_NOQUEUE; } void uvmpdpol_scaninit(void) { + struct clockpro_state * const s = &clockpro; struct clockpro_scanstate * const ss = &scanstate; + mutex_enter(&s->lock); ss->ss_nscanned = 0; + mutex_exit(&s->lock); } struct vm_page * -uvmpdpol_selectvictim(void) +uvmpdpol_selectvictim(kmutex_t **plock) { struct clockpro_state * const s = &clockpro; struct clockpro_scanstate * const ss = &scanstate; struct vm_page *pg; + kmutex_t *lock = NULL; - if (ss->ss_nscanned > s->s_npages) { - DPRINTF("scan too much\n"); - return NULL; - } - pg = handcold_advance(); - ss->ss_nscanned++; + do { + mutex_enter(&s->lock); + if (ss->ss_nscanned > s->s_npages) { + DPRINTF("scan too much\n"); + mutex_exit(&s->lock); + return NULL; + } + pg = handcold_advance(); + if (pg == NULL) { + mutex_exit(&s->lock); + break; + } + ss->ss_nscanned++; + /* + * acquire interlock to stablize page identity. + * if we have caught the page in a state of flux + * and it should be dequeued, do it now and then + * move on to the next. + */ + mutex_enter(&pg->interlock); + if ((pg->uobject == NULL && pg->uanon == NULL) || + pg->wire_count > 0) { + mutex_exit(&pg->interlock); + clockpro_pagedequeue(pg); + pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE); + continue; + } + mutex_exit(&s->lock); + lock = uvmpd_trylockowner(pg); + /* pg->interlock now dropped */ + } while (lock == NULL); + *plock = lock; return pg; } @@ -1211,6 +1306,8 @@ clockpro_dropswap(pageq_t *q, int *todo) { struct vm_page *pg; + KASSERT(mutex_owned(&clockpro.lock)); + TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pageq.queue) { if (*todo <= 0) { break; @@ -1218,12 +1315,15 @@ clockpro_dropswap(pageq_t *q, int *todo) if ((pg->pqflags & PQ_HOT) == 0) { continue; } - if ((pg->pqflags & PQ_SWAPBACKED) == 0) { + mutex_enter(&pg->interlock); + if ((pg->flags & PG_SWAPBACKED) == 0) { + mutex_exit(&pg->interlock); continue; } if (uvmpd_trydropswap(pg)) { (*todo)--; } + /* pg->interlock now dropped */ } } @@ -1243,9 +1343,11 @@ uvmpdpol_balancequeue(int swap_shortage) DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage); + mutex_enter(&s->lock); clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo); clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo); clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo); + mutex_exit(&s->lock); DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo); } @@ -1255,17 +1357,18 @@ uvmpdpol_needsscan_p(void) { struct clockpro_state * const s = &clockpro; - if (s->s_ncold < s->s_coldtarget) { - return true; - } - return false; + /* This must be an unlocked check: can be called from interrupt. */ + return s->s_ncold < s->s_coldtarget; } void uvmpdpol_tune(void) { + struct clockpro_state * const s = &clockpro; + mutex_enter(&s->lock); clockpro_tune(); + mutex_exit(&s->lock); } #if !defined(PDSIM) Index: sys/uvm/uvm_pglist.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_pglist.c,v retrieving revision 1.72 diff -u -p -r1.72 uvm_pglist.c --- sys/uvm/uvm_pglist.c 13 Nov 2018 10:31:01 -0000 1.72 +++ sys/uvm/uvm_pglist.c 9 Dec 2019 22:24:41 -0000 @@ -108,7 +108,6 @@ uvm_pglist_add(struct vm_page *pg, struc uvmexp.zeropages--; VM_FREE_PAGE_TO_CPU(pg)->pages[pgflidx]--; pg->flags = PG_CLEAN; - pg->pqflags = 0; pg->uobject = NULL; pg->uanon = NULL; TAILQ_INSERT_TAIL(rlist, pg, pageq.queue); @@ -574,7 +573,7 @@ uvm_pglistfree(struct pglist *list) KASSERT(!uvmpdpol_pageisqueued_p(pg)); TAILQ_REMOVE(list, pg, pageq.queue); iszero = (pg->flags & PG_ZERO); - pg->pqflags = PQ_FREE; + pg->flags = (pg->flags & PG_ZERO) | PG_FREE; #ifdef DEBUG pg->uobject = (void *)0xdeadbeef; pg->uanon = (void *)0xdeadbeef; Index: sys/uvm/uvm_physseg.c =================================================================== RCS file: /cvsroot/src/sys/uvm/uvm_physseg.c,v retrieving revision 1.10 diff -u -p -r1.10 uvm_physseg.c --- sys/uvm/uvm_physseg.c 20 Sep 2019 11:09:43 -0000 1.10 +++ sys/uvm/uvm_physseg.c 9 Dec 2019 22:24:41 -0000 @@ -1087,6 +1087,7 @@ uvm_physseg_init_seg(uvm_physseg_t upm, psize_t n; paddr_t paddr; struct uvm_physseg *seg; + struct vm_page *pg; KASSERT(upm != UVM_PHYSSEG_TYPE_INVALID && pgs != NULL); @@ -1107,10 +1108,11 @@ uvm_physseg_init_seg(uvm_physseg_t upm, if (atop(paddr) >= seg->avail_start && atop(paddr) < seg->avail_end) { uvmexp.npages++; - mutex_enter(&uvm_pageqlock); /* add page to free pool */ - uvm_pagefree(&seg->pgs[i]); - mutex_exit(&uvm_pageqlock); + pg = &seg->pgs[i]; + /* Disable LOCKDEBUG: too many and too early. */ + mutex_init(&pg->interlock, MUTEX_NODEBUG, IPL_NONE); + uvm_pagefree(pg); } } }