diff -rup src/sys/uvm/uvm.h src-uvm/sys/uvm/uvm.h
--- src/sys/uvm/uvm.h	2019-12-23 21:33:00.724563317 +0000
+++ src-uvm/sys/uvm/uvm.h	2019-12-23 20:32:23.596067237 +0000
@@ -78,12 +78,21 @@ struct pgflcache;
  */
 
 struct uvm_cpu {
+	/* allocator */
 	struct pgflcache *pgflcache[VM_NFREELIST];/* cpu-local cached pages */
 	void		*pgflcachemem;		/* pointer to allocated mem */
 	size_t		pgflcachememsz;		/* size of allocated memory */
 	u_int		pgflcolor;		/* next color to allocate */
 	u_int		pgflbucket;		/* where to send our pages */
+
+	/* entropy */
 	krndsource_t 	rs;			/* entropy source */
+
+	/* pdpolicy: queue of intended page status changes. */
+	struct vm_page	**pdq;			/* queue entries */
+	u_int		pdqcount;		/* number of entries queued */
+	u_int		pdqmax;			/* maximum number entries */
+	int		pdqtime;		/* last time queue cleared */
 };
 
 /*
@@ -109,9 +118,6 @@ struct uvm {
 
 	/* aio_done is locked by uvm.pagedaemon_lock and splbio! */
 	TAILQ_HEAD(, buf) aio_done;		/* done async i/o reqs */
-
-	/* per-cpu data */
-	struct uvm_cpu *cpus[MAXCPUS];
 };
 
 /*
diff -rup src/sys/uvm/uvm_extern.h src-uvm/sys/uvm/uvm_extern.h
--- src/sys/uvm/uvm_extern.h	2019-12-23 21:33:00.739144500 +0000
+++ src-uvm/sys/uvm/uvm_extern.h	2019-12-22 21:44:51.257414908 +0000
@@ -639,6 +639,7 @@ int			uvm_coredump_walkmap(struct proc *
 int			uvm_coredump_count_segs(struct proc *);
 void			uvm_proc_exit(struct proc *);
 void			uvm_lwp_exit(struct lwp *);
+void			uvm_idle(void);
 void			uvm_init_limits(struct proc *);
 bool			uvm_kernacc(void *, size_t, vm_prot_t);
 __dead void		uvm_scheduler(void);
diff -rup src/sys/uvm/uvm_glue.c src-uvm/sys/uvm/uvm_glue.c
--- src/sys/uvm/uvm_glue.c	2019-12-23 21:33:00.739727884 +0000
+++ src-uvm/sys/uvm/uvm_glue.c	2019-12-23 16:59:45.997805865 +0000
@@ -86,6 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v
 #include <sys/asan.h>
 
 #include <uvm/uvm.h>
+#include <uvm/uvm_pdpolicy.h>
 #include <uvm/uvm_pgflcache.h>
 
 /*
@@ -516,3 +517,22 @@ uvm_scheduler(void)
 		(void)kpause("uvm", false, hz, NULL);
 	}
 }
+
+/*
+ * uvm_idle: called from the idle loop.
+ */
+
+void
+uvm_idle(void)
+{
+	struct cpu_info *ci = curcpu();
+	struct uvm_cpu *ucpu = ci->ci_data.cpu_uvm;
+
+	KASSERT(kpreempt_disabled());
+
+	if (!ci->ci_want_resched)
+		uvmpdpol_idle(ucpu);
+	if (!ci->ci_want_resched)
+		uvm_pageidlezero();
+
+}
diff -rup src/sys/uvm/uvm_page.c src-uvm/sys/uvm/uvm_page.c
--- src/sys/uvm/uvm_page.c	2019-12-23 21:33:00.749522774 +0000
+++ src-uvm/sys/uvm/uvm_page.c	2019-12-23 20:32:16.700069072 +0000
@@ -328,7 +328,6 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr
 	 * structures).
 	 */
 
-	uvm.cpus[0] = &boot_cpu;
 	curcpu()->ci_data.cpu_uvm = &boot_cpu;
 	uvmpdpol_init();
 	for (b = 0; b < __arraycount(uvm_freelist_locks); b++) {
@@ -972,17 +978,19 @@ uvm_cpu_attach(struct cpu_info *ci)
 		    KM_SLEEP);
 		ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu,
 		    coherency_unit);
-		uvm.cpus[cpu_index(ci)] = ucpu;
 		ci->ci_data.cpu_uvm = ucpu;
+	} else {
+		ucpu = ci->ci_data.cpu_uvm;
 	}
 
+	uvmpdpol_init_cpu(ucpu);
+
 	/*
 	 * Attach RNG source for this CPU's VM events
 	 */
-        rnd_attach_source(&uvm.cpus[cpu_index(ci)]->rs,
-			  ci->ci_data.cpu_name, RND_TYPE_VM,
-			  RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE|
-			  RND_FLAG_ESTIMATE_VALUE);
+        rnd_attach_source(&ucpu->rs, ci->ci_data.cpu_name, RND_TYPE_VM,
+	    RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE|
+	    RND_FLAG_ESTIMATE_VALUE);
 }
 
 /*
diff -rup src/sys/uvm/uvm_page.h src-uvm/sys/uvm/uvm_page.h
--- src/sys/uvm/uvm_page.h	2019-12-23 21:33:00.750295428 +0000
+++ src-uvm/sys/uvm/uvm_page.h	2019-12-21 14:59:29.133085280 +0000
@@ -150,6 +150,7 @@ struct vm_page {
 						 * or uvm_pglistalloc output */
 		LIST_ENTRY(vm_page) list;	/* f: global free page queue */
 	} pageq;
+	TAILQ_ENTRY(vm_page)	pdqueue;	/* p: pdpolicy queue */
 	struct vm_anon		*uanon;		/* o,i: anon */
 	struct uvm_object	*uobject;	/* o,i: object */
 	voff_t			offset;		/* o: offset into object */
diff -rup src/sys/uvm/uvm_pdpolicy.h src-uvm/sys/uvm/uvm_pdpolicy.h
--- src/sys/uvm/uvm_pdpolicy.h	2019-12-13 20:10:22.000000000 +0000
+++ src-uvm/sys/uvm/uvm_pdpolicy.h	2019-12-23 16:59:22.739526531 +0000
@@ -37,7 +37,9 @@ struct vm_anon;
  * don't use them directly from outside of /sys/uvm.
  */
 
+void uvmpdpol_idle(struct uvm_cpu *);
 void uvmpdpol_init(void);
+void uvmpdpol_init_cpu(struct uvm_cpu *);
 void uvmpdpol_reinit(void);
 void uvmpdpol_estimatepageable(int *, int *);
 bool uvmpdpol_needsscan_p(void);
 #endif /* !_UVM_PDPOLICY_H_ */
diff -rup src/sys/uvm/uvm_pdpolicy_clock.c src-uvm/sys/uvm/uvm_pdpolicy_clock.c
--- src/sys/uvm/uvm_pdpolicy_clock.c	2019-12-23 19:29:03.000000000 +0000
+++ src-uvm/sys/uvm/uvm_pdpolicy_clock.c	2019-12-24 21:33:56.703815219 +0000
@@ -1,6 +1,35 @@
-/*	$NetBSD: uvm_pdpolicy_clock.c,v 1.22 2019/12/23 19:29:03 ad Exp $	*/
+/*	$NetBSD: uvm_pdpolicy_clock.c,v 1.21 2019/12/21 13:00:25 ad Exp $	*/
 /*	NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $	*/
 
+/*-
+ * Copyright (c) 2019 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Andrew Doran.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
  * Copyright (c) 1991, 1993, The Regents of the University of California.
@@ -69,12 +98,13 @@
 #else /* defined(PDSIM) */
 
 #include <sys/cdefs.h>
 __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.22 2019/12/23 19:29:03 ad Exp $");
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/kmem.h>
 
 #include <uvm/uvm.h>
 #include <uvm/uvm_pdpolicy.h>
@@ -83,9 +113,28 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy
 
 #endif /* defined(PDSIM) */
 
-#define	PQ_TIME		0xfffffffc	/* time of last activation */
-#define PQ_INACTIVE	0x00000001	/* page is in inactive list */
-#define PQ_ACTIVE	0x00000002	/* page is in active list */
+/*
+ * per-CPU queue of pending ("intended") page status changes.
+ * 512 entries makes for 1x 4kB page on _LP64.
+ */
+
+#if !defined(CLOCK_PDQ_SIZE)
+#define	CLOCK_PDQ_SIZE	512
+#endif /* !defined(CLOCK_PDQ_SIZE) */
+
+/*
+ * per-page status flags.
+ */
+
+#define	PQ_TIME		0xfffffff0	/* time of last change */
+#define	PQ_INTENT_0	0x00000000	/* no intent */
+#define	PQ_INTENT_A	0x00000001	/* intend activation */
+#define	PQ_INTENT_I	0x00000002	/* intend deactivation */
+#define	PQ_INTENT_D	0x00000003	/* intend dequeue */
+#define	PQ_INTENT	0x00000003
+#define PQ_INACTIVE	0x00000004	/* page is on inactive list */
+#define PQ_ACTIVE	0x00000008	/* page is on active list */
+
 
 #if !defined(CLOCK_INACTIVEPCT)
 #define	CLOCK_INACTIVEPCT	33
@@ -115,9 +164,14 @@ struct uvmpdpol_scanstate {
 	struct vm_page *ss_nextpg;
 };
 
-static void	uvmpdpol_pageactivate_locked(struct vm_page *);
-static void	uvmpdpol_pagedeactivate_locked(struct vm_page *);
-static void	uvmpdpol_pagedequeue_locked(struct vm_page *);
+static void	uvmpdpol_pagereactivate_locked(struct vm_page *);
+static uint32_t	uvmpdpol_pageactivate_locked(struct vm_page *, uint32_t);
+static uint32_t	uvmpdpol_pagedeactivate_locked(struct vm_page *, uint32_t);
+static uint32_t	uvmpdpol_pagedequeue_locked(struct vm_page *, uint32_t);
+
+static bool	uvmpdpol_pageintent_realize(struct vm_page *);
+static void	uvmpdpol_pageintent_purge(struct uvm_cpu *);
+static void	uvmpdpol_pageintent_set(struct vm_page *, uint32_t);
 
 static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned;
 static struct uvmpdpol_scanstate pdpol_scanstate;
@@ -188,6 +242,7 @@ uvmpdpol_selectvictim(kmutex_t **plock)
 	struct uvmpdpol_globalstate *s = &pdpol_state;
 	struct uvmpdpol_scanstate *ss = &pdpol_scanstate;
 	struct vm_page *pg;
+	uint32_t pqflags;
 	kmutex_t *lock;
 
 	mutex_enter(&s->lock);
@@ -207,20 +262,29 @@ uvmpdpol_selectvictim(kmutex_t **plock)
 		if (pg == NULL) {
 			break;
 		}
-		ss->ss_nextpg = TAILQ_NEXT(pg, pageq.queue);
+		ss->ss_nextpg = TAILQ_NEXT(pg, pdqueue);
 		uvmexp.pdscans++;
 
 		/*
+		 * process any pending intent.  if we had to do something,
+		 * the page is likely not on the queue any more, so skip it.
+		 */
+
+		if (uvmpdpol_pageintent_realize(pg)) {
+			continue;
+		}
+
+		/*
 		 * acquire interlock to stablize page identity.
 		 * if we have caught the page in a state of flux
-		 * and it should be dequeued, do it now and then
-		 * move on to the next.
+		 * it will be dequeued soon, so ignore it and move
+		 * on to the next.
 		 */
+
 		mutex_enter(&pg->interlock);
 	        if ((pg->uobject == NULL && pg->uanon == NULL) ||
 	            pg->wire_count > 0) {
 	            	mutex_exit(&pg->interlock);
-	            	uvmpdpol_pagedequeue_locked(pg);
 	            	continue;
 		}
 
@@ -231,24 +295,25 @@ uvmpdpol_selectvictim(kmutex_t **plock)
 		 * minimum, reactivate the page instead and move
 		 * on to the next page.
 		 */
+
 		anon = pg->uanon;
 		uobj = pg->uobject;
 		if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) {
 			mutex_exit(&pg->interlock);
-			uvmpdpol_pageactivate_locked(pg);
+			uvmpdpol_pagereactivate_locked(pg);
 			PDPOL_EVCNT_INCR(reactexec);
 			continue;
 		}
 		if (uobj && UVM_OBJ_IS_VNODE(uobj) &&
 		    !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) {
 			mutex_exit(&pg->interlock);
-			uvmpdpol_pageactivate_locked(pg);
+			uvmpdpol_pagereactivate_locked(pg);
 			PDPOL_EVCNT_INCR(reactfile);
 			continue;
 		}
 		if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) {
 			mutex_exit(&pg->interlock);
-			uvmpdpol_pageactivate_locked(pg);
+			uvmpdpol_pagereactivate_locked(pg);
 			PDPOL_EVCNT_INCR(reactanon);
 			continue;
 		}
@@ -267,6 +332,7 @@ uvmpdpol_selectvictim(kmutex_t **plock)
 		 *
 		 *	object -> pdpol -> interlock.
 	         */
+
 	        mutex_exit(&s->lock);
         	lock = uvmpd_trylockowner(pg);
         	/* pg->interlock now released */
@@ -280,8 +346,10 @@ uvmpdpol_selectvictim(kmutex_t **plock)
 		 * move referenced pages back to active queue and skip to
 		 * next page.
 		 */
+
 		if (pmap_is_referenced(pg)) {
-			uvmpdpol_pageactivate_locked(pg);
+			pqflags = atomic_load_relaxed(&pg->pqflags);
+			uvmpdpol_pageactivate_locked(pg, pqflags);
 			uvmexp.pdreact++;
 			mutex_exit(lock);
 			continue;
@@ -301,6 +369,7 @@ uvmpdpol_balancequeue(int swap_shortage)
 	struct uvmpdpol_globalstate *s = &pdpol_state;
 	int inactive_shortage;
 	struct vm_page *p, *nextpg;
+	uint32_t pqflags;
 	kmutex_t *lock;
 
 	/*
@@ -313,7 +382,18 @@ uvmpdpol_balancequeue(int swap_shortage)
 	for (p = TAILQ_FIRST(&pdpol_state.s_activeq);
 	     p != NULL && (inactive_shortage > 0 || swap_shortage > 0);
 	     p = nextpg) {
-		nextpg = TAILQ_NEXT(p, pageq.queue);
+		nextpg = TAILQ_NEXT(p, pdqueue);
+
+		/*
+		 * process any pending intent.  if we had to do something,
+		 * the page is likely not on the queue any more, so skip it.
+		 */
+
+		if (uvmpdpol_pageintent_realize(p)) {
+			inactive_shortage = pdpol_state.s_inactarg -
+			    pdpol_state.s_inactive;
+			continue;
+		}
 
 		/*
 		 * if there's a shortage of swap slots, try to free it.
@@ -340,14 +420,14 @@ uvmpdpol_balancequeue(int swap_shortage)
 		/*
 		 * acquire interlock to stablize page identity.
 		 * if we have caught the page in a state of flux
-		 * and it should be dequeued, do it now and then
-		 * move on to the next.
+		 * it will be dequeued soon, so ignore it and move
+		 * on to the next.
 		 */
+
 		mutex_enter(&p->interlock);
 	        if ((p->uobject == NULL && p->uanon == NULL) ||
 	            p->wire_count > 0) {
 	            	mutex_exit(&p->interlock);
-	            	uvmpdpol_pagedequeue_locked(p);
 	            	continue;
 		}
 		mutex_exit(&s->lock);
@@ -355,7 +435,9 @@ uvmpdpol_balancequeue(int swap_shortage)
 		/* p->interlock now released */
 		mutex_enter(&s->lock);
 		if (lock != NULL) {
-			uvmpdpol_pagedeactivate_locked(p);
+			pmap_clear_reference(p);
+			pqflags = atomic_load_relaxed(&p->pqflags);
+			uvmpdpol_pagedeactivate_locked(p, pqflags);
 			uvmexp.pddeact++;
 			inactive_shortage--;
 			mutex_exit(lock);
@@ -364,96 +446,281 @@ uvmpdpol_balancequeue(int swap_shortage)
 	mutex_exit(&s->lock);
 }
 
+/*
+ * uvmpdpol_pageintent_realize:  take the intended action set on the page,
+ * clear out the intent bits, and if they changed behind us, start over
+ * again.
+ */
+
+static bool
+uvmpdpol_pageintent_realize(struct vm_page *pg)
+{
+	struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
+	uint32_t pqflags, intent;
+	bool processed = false;
+
+	KASSERT(mutex_owned(&s->lock));
+
+	pqflags = atomic_load_relaxed(&pg->pqflags);
+	do {
+		switch (intent = pqflags & PQ_INTENT) {
+		case PQ_INTENT_0:
+			break;
+		case PQ_INTENT_A:
+			pqflags = uvmpdpol_pageactivate_locked(pg, pqflags);
+			processed = true;
+			break;
+		case PQ_INTENT_I:
+			pqflags = uvmpdpol_pagedeactivate_locked(pg, pqflags);
+			processed = true;
+			break;
+		case PQ_INTENT_D:
+			pqflags = uvmpdpol_pagedequeue_locked(pg, pqflags);
+			processed = true;
+			break;
+		}
+	} while (__predict_false(intent != (pqflags & PQ_INTENT)));
+	return processed;
+}
+
+/*
+ * uvmpdpol_pageintent_purge:  purge the per-CPU queue of pending page
+ * status changes.
+ */
+
+static void
+uvmpdpol_pageintent_purge(struct uvm_cpu *ucpu)
+{
+	struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
+	int i;
+
+	KASSERT(kpreempt_disabled());
+	KASSERT(mutex_owned(&s->lock));
+
+	for (i = 0; i < ucpu->pdqcount; i++) {
+		(void)uvmpdpol_pageintent_realize(ucpu->pdq[i]);
+	}
+	ucpu->pdqcount = 0;
+	ucpu->pdqtime = hardclock_ticks;
+}
+
+/*
+ * uvmpdpol_pageintent_set: set the intended status for a page, overriding
+ * any prior intent that was set.
+ *
+ * the object owners' lock is held here, so we need only worry about
+ * concurrent activity by the pagedaemon and by uvmpdpol_pageintent_realize()
+ * if the page is already on a queue somewhere else in the system.
+ */
+
 static void
-uvmpdpol_pagedeactivate_locked(struct vm_page *pg)
+uvmpdpol_pageintent_set(struct vm_page *pg, uint32_t intent)
 {
+	struct uvmpdpol_globalstate *s = &pdpol_state;
+	struct uvm_cpu *ucpu;
+	uint32_t o, n, v;
 
 	KASSERT(uvm_page_locked_p(pg));
+	KASSERT((intent & ~PQ_INTENT) == 0);
+
+	/*
+	 * set our intent on the page.  if we find the exact same intent
+	 * already set, then there's nothing more for us to do.
+	 */
+
+	for (o = atomic_load_relaxed(&pg->pqflags);; o = n) {
+		if ((o & PQ_INTENT) == intent) {
+			return;
+		}
+		v = (o & ~PQ_INTENT) | intent;
+		n = atomic_cas_32(&pg->pqflags, o, v);
+		if (n == o) {
+			break;
+		}
+	}
+
+	/*
+	 * if there was an intent set already, then the page is already in a
+	 * queue of pages somewhere (maybe our queue), and the intent is
+	 * sure to be cleared.  otherwise we'll try to add it to our queue
+	 * to process later.
+	 */
+
+	if ((o & PQ_INTENT) != 0) {
+		return;
+	}
+
+	/*
+	 * if our queue isn't full yet, then this is cheap & easy.
+	 */
+
+	kpreempt_disable();
+	ucpu = curcpu()->ci_data.cpu_uvm;
+	if (__predict_true(ucpu->pdqcount < ucpu->pdqmax)) {
+		ucpu->pdq[ucpu->pdqcount++] = pg;
+		kpreempt_enable();
+		return;
+	}
+
+	/*
+	 * the queue is full.  reload ucpu to ensure that we're looking in
+	 * the right place after acquiring the mutex.  if we did switch to
+	 * another CPU, it's no problem - the prior queue will be cleared
+	 * some other time.
+	 */
+
+	kpreempt_enable();
+	mutex_enter(&s->lock);
+	kpreempt_disable();
+	ucpu = curcpu()->ci_data.cpu_uvm;
+	if (ucpu->pdqcount == ucpu->pdqmax) {
+		uvmpdpol_pageintent_purge(ucpu);
+	}
+	ucpu->pdq[ucpu->pdqcount++] = pg;
+	kpreempt_enable();
+	mutex_exit(&s->lock);
+}
+
+/*
+ * uvmpdpol_pagereactivate:  this works like uvmpdpol_pageactivate_locked,
+ * but because we are called here without the object/anon locked, we must
+ * pay close attention to see if the owner set a dequeue pending on the
+ * page, and not override and therefore lose that intent.
+ */
+static void
+uvmpdpol_pagereactivate_locked(struct vm_page *pg)
+{
+	uint32_t o, n, v;
+
+	for (o = atomic_load_relaxed(&pg->pqflags);; o = n) {
+		if ((o & PQ_INTENT) == PQ_INTENT_D) {
+			break;
+		}
+		v = (o & ~PQ_INTENT) | PQ_INTENT_A;
+		n = atomic_cas_32(&pg->pqflags, o, v);
+		if (n == o) {
+			break;
+		}
+	}
+	uvmpdpol_pageintent_realize(pg);
+}
 
-	if (pg->pqflags & PQ_ACTIVE) {
-		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue);
-		pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME);
+static uint32_t
+uvmpdpol_pagedeactivate_locked(struct vm_page *pg, uint32_t pqflags)
+{
+
+	if ((pqflags & PQ_ACTIVE) != 0) {
+		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
 		KASSERT(pdpol_state.s_active > 0);
 		pdpol_state.s_active--;
 	}
-	if ((pg->pqflags & PQ_INACTIVE) == 0) {
-		KASSERT(pg->wire_count == 0);
-		pmap_clear_reference(pg);
-		TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pageq.queue);
-		pg->pqflags |= PQ_INACTIVE;
+	if ((pqflags & PQ_INACTIVE) == 0) {
+		TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue);
 		pdpol_state.s_inactive++;
 	}
+
+	pqflags = PQ_INACTIVE | (hardclock_ticks & PQ_TIME);
+	return atomic_swap_32(&pg->pqflags, pqflags);
 }
 
 void
 uvmpdpol_pagedeactivate(struct vm_page *pg)
 {
-	struct uvmpdpol_globalstate *s = &pdpol_state;
+	uint32_t pqflags;
 
-	mutex_enter(&s->lock);
-	uvmpdpol_pagedeactivate_locked(pg);
-	mutex_exit(&s->lock);
+	KASSERT(uvm_page_locked_p(pg));
+
+	/*
+	 * it might be useful to defer the pmap_clear_reference(), but it
+	 * can't be done as when it comes time to realize the intent the
+	 * page may no longer be locked.
+	 */
+
+	pmap_clear_reference(pg);
+	pqflags = atomic_load_relaxed(&pg->pqflags);
+	if ((pqflags & PQ_INACTIVE) == 0) {
+		uvmpdpol_pageintent_set(pg, PQ_INTENT_I);
+	}
 }
 
-static void
-uvmpdpol_pageactivate_locked(struct vm_page *pg)
+static uint32_t
+uvmpdpol_pageactivate_locked(struct vm_page *pg, uint32_t pqflags)
 {
 
-	uvmpdpol_pagedequeue_locked(pg);
-	TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pageq.queue);
-	pg->pqflags = PQ_ACTIVE | (hardclock_ticks & PQ_TIME);
-	pdpol_state.s_active++;
+	if (pqflags & PQ_ACTIVE) {
+		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
+		TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
+		KASSERT(pdpol_state.s_active > 0);
+	} else if (pqflags & PQ_INACTIVE) {
+		TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
+		TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
+		KASSERT(pdpol_state.s_inactive > 0);
+		pdpol_state.s_inactive--;
+		pdpol_state.s_active++;
+	} else {
+		TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue);
+		pdpol_state.s_active++;
+	}
+
+	pqflags = PQ_ACTIVE | (hardclock_ticks & PQ_TIME);
+	return atomic_swap_32(&pg->pqflags, pqflags);
 }
 
 void
 uvmpdpol_pageactivate(struct vm_page *pg)
 {
-	struct uvmpdpol_globalstate *s = &pdpol_state;
+	uint32_t pqflags;
 
-	/* Safety: PQ_ACTIVE clear also tells us if it is not enqueued. */
-	if ((pg->pqflags & PQ_ACTIVE) == 0 ||
-	    ((hardclock_ticks & PQ_TIME) - (pg->pqflags & PQ_TIME)) >= hz) {
-		mutex_enter(&s->lock);
-		uvmpdpol_pageactivate_locked(pg);
-		mutex_exit(&s->lock);
+	KASSERT(uvm_page_locked_p(pg));
+
+	/*
+	 * PQ_ACTIVE clear also tells us if it is not enqueued.  don't
+	 * touch the page if it was already activated recently (less
+	 * than a second ago).
+	 */
+
+	pqflags = atomic_load_relaxed(&pg->pqflags);
+	if ((pqflags & PQ_ACTIVE) == 0 ||
+	    ((hardclock_ticks & PQ_TIME) - (pqflags & PQ_TIME)) >= hz) {
+		uvmpdpol_pageintent_set(pg, PQ_INTENT_A);
 	}
 }
 
-static void
-uvmpdpol_pagedequeue_locked(struct vm_page *pg)
+static uint32_t
+uvmpdpol_pagedequeue_locked(struct vm_page *pg, uint32_t pqflags)
 {
+	struct uvmpdpol_globalstate *s __diagused = &pdpol_state;
+
+	KASSERT(mutex_owned(&s->lock));
 
-	if (pg->pqflags & PQ_ACTIVE) {
-		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue);
-		pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME);
+	if (pqflags & PQ_ACTIVE) {
+		TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue);
 		KASSERT(pdpol_state.s_active > 0);
 		pdpol_state.s_active--;
-	} else if (pg->pqflags & PQ_INACTIVE) {
-		TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pageq.queue);
-		pg->pqflags &= ~PQ_INACTIVE;
+	} else if (pqflags & PQ_INACTIVE) {
+		TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue);
 		KASSERT(pdpol_state.s_inactive > 0);
 		pdpol_state.s_inactive--;
 	}
+
+	pqflags = hardclock_ticks & PQ_TIME;
+	return atomic_swap_32(&pg->pqflags, pqflags);
 }
 
 void
 uvmpdpol_pagedequeue(struct vm_page *pg)
 {
-	struct uvmpdpol_globalstate *s = &pdpol_state;
 
-	mutex_enter(&s->lock);
-	uvmpdpol_pagedequeue_locked(pg);
-	mutex_exit(&s->lock);
+	KASSERT(uvm_page_locked_p(pg));
+	uvmpdpol_pageintent_set(pg, PQ_INTENT_D);
 }
 
 void
 uvmpdpol_pageenqueue(struct vm_page *pg)
 {
-	struct uvmpdpol_globalstate *s = &pdpol_state;
 
-	mutex_enter(&s->lock);
-	uvmpdpol_pageactivate_locked(pg);
-	mutex_exit(&s->lock);
+	KASSERT(uvm_page_locked_p(pg));
+	uvmpdpol_pageintent_set(pg, PQ_INTENT_A);
 }
 
 void
@@ -464,9 +732,14 @@ uvmpdpol_anfree(struct vm_anon *an)
 bool
 uvmpdpol_pageisqueued_p(struct vm_page *pg)
 {
+	uint32_t pqflags;
 
-	/* Safe to test unlocked due to page life-cycle. */
-	return (pg->pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0;
+	/*
+	 * safe to test unlocked due to page life-cycle.
+	 */
+
+	pqflags = atomic_load_relaxed(&pg->pqflags);
+	return pqflags & (PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_A | PQ_INTENT_I);
 }
 
 void
@@ -525,6 +798,16 @@ uvmpdpol_init(void)
 }
 
 void
+uvmpdpol_init_cpu(struct uvm_cpu *ucpu)
+{
+
+	ucpu->pdq =
+	    kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP);
+	ucpu->pdqcount = 0;
+	ucpu->pdqmax = PAGE_SIZE / sizeof(struct vm_page *);
+}
+
+void
 uvmpdpol_reinit(void)
 {
 }
@@ -533,7 +816,10 @@ bool
 uvmpdpol_needsscan_p(void)
 {
 
-	/* This must be an unlocked check: can be called from interrupt. */
+	/*
+	 * this must be an unlocked check: can be called from interrupt.
+	 */
+
 	return pdpol_state.s_inactive < pdpol_state.s_inactarg;
 }
 
@@ -547,6 +833,47 @@ uvmpdpol_tune(void)
 	mutex_exit(&s->lock);
 }
 
+/*
+ * uvmpdpol_idle: called from the system idle loop.
+ */
+
+void
+uvmpdpol_idle(struct uvm_cpu *ucpu)
+{
+	struct uvmpdpol_globalstate *s = &pdpol_state;
+
+	KASSERT(kpreempt_disabled());
+
+	/*
+	 * if no pages in the queue, we have nothing to do.
+	 */
+
+	if (ucpu->pdqcount == 0) {
+		ucpu->pdqtime = hardclock_ticks;
+		return;
+	}
+
+	/*
+	 * don't do this more than ~8 times a second as it would needlessly
+	 * exert pressure.
+	 */
+
+	if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) {
+		return;
+	}
+
+	/*
+	 * the idle LWP can't block, so we have to try for the lock.
+	 * if we do get it, purge the per-CPU pending update queue.
+	 */
+
+	if (mutex_tryenter(&s->lock)) {
+		uvmpdpol_pageintent_purge(ucpu);
+		mutex_exit(&s->lock);
+	}
+}
+
+
 #if !defined(PDSIM)
 
 #include <sys/sysctl.h>	/* XXX SYSCTL_DESCR */
--- src/sys/kern/kern_idle.c	2019-12-06 21:36:10.000000000 +0000
+++ src-uvm/sys/kern/kern_idle.c	2019-12-22 21:45:02.536173731 +0000
@@ -81,7 +81,7 @@ idle_loop(void *dummy)
 		sched_idle();
 		if (!sched_curcpu_runnable_p()) {
 			if ((spc->spc_flags & SPCF_OFFLINE) == 0) {
-				uvm_pageidlezero();
+				uvm_idle();
 			}
 			if (!sched_curcpu_runnable_p()) {
 				cpu_idle();