diff -rup src/sys/uvm/uvm.h src-uvm/sys/uvm/uvm.h --- src/sys/uvm/uvm.h 2019-12-23 21:33:00.724563317 +0000 +++ src-uvm/sys/uvm/uvm.h 2019-12-23 20:32:23.596067237 +0000 @@ -78,12 +78,21 @@ struct pgflcache; */ struct uvm_cpu { + /* allocator */ struct pgflcache *pgflcache[VM_NFREELIST];/* cpu-local cached pages */ void *pgflcachemem; /* pointer to allocated mem */ size_t pgflcachememsz; /* size of allocated memory */ u_int pgflcolor; /* next color to allocate */ u_int pgflbucket; /* where to send our pages */ + + /* entropy */ krndsource_t rs; /* entropy source */ + + /* pdpolicy: queue of intended page status changes. */ + struct vm_page **pdq; /* queue entries */ + u_int pdqcount; /* number of entries queued */ + u_int pdqmax; /* maximum number entries */ + int pdqtime; /* last time queue cleared */ }; /* @@ -109,9 +118,6 @@ struct uvm { /* aio_done is locked by uvm.pagedaemon_lock and splbio! */ TAILQ_HEAD(, buf) aio_done; /* done async i/o reqs */ - - /* per-cpu data */ - struct uvm_cpu *cpus[MAXCPUS]; }; /* diff -rup src/sys/uvm/uvm_extern.h src-uvm/sys/uvm/uvm_extern.h --- src/sys/uvm/uvm_extern.h 2019-12-23 21:33:00.739144500 +0000 +++ src-uvm/sys/uvm/uvm_extern.h 2019-12-22 21:44:51.257414908 +0000 @@ -639,6 +639,7 @@ int uvm_coredump_walkmap(struct proc * int uvm_coredump_count_segs(struct proc *); void uvm_proc_exit(struct proc *); void uvm_lwp_exit(struct lwp *); +void uvm_idle(void); void uvm_init_limits(struct proc *); bool uvm_kernacc(void *, size_t, vm_prot_t); __dead void uvm_scheduler(void); diff -rup src/sys/uvm/uvm_glue.c src-uvm/sys/uvm/uvm_glue.c --- src/sys/uvm/uvm_glue.c 2019-12-23 21:33:00.739727884 +0000 +++ src-uvm/sys/uvm/uvm_glue.c 2019-12-23 16:59:45.997805865 +0000 @@ -86,6 +86,7 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_glue.c,v #include #include +#include #include /* @@ -516,3 +517,22 @@ uvm_scheduler(void) (void)kpause("uvm", false, hz, NULL); } } + +/* + * uvm_idle: called from the idle loop. + */ + +void +uvm_idle(void) +{ + struct cpu_info *ci = curcpu(); + struct uvm_cpu *ucpu = ci->ci_data.cpu_uvm; + + KASSERT(kpreempt_disabled()); + + if (!ci->ci_want_resched) + uvmpdpol_idle(ucpu); + if (!ci->ci_want_resched) + uvm_pageidlezero(); + +} diff -rup src/sys/uvm/uvm_page.c src-uvm/sys/uvm/uvm_page.c --- src/sys/uvm/uvm_page.c 2019-12-23 21:33:00.749522774 +0000 +++ src-uvm/sys/uvm/uvm_page.c 2019-12-23 20:32:16.700069072 +0000 @@ -328,7 +328,6 @@ uvm_page_init(vaddr_t *kvm_startp, vaddr * structures). */ - uvm.cpus[0] = &boot_cpu; curcpu()->ci_data.cpu_uvm = &boot_cpu; uvmpdpol_init(); for (b = 0; b < __arraycount(uvm_freelist_locks); b++) { @@ -972,17 +978,19 @@ uvm_cpu_attach(struct cpu_info *ci) KM_SLEEP); ucpu = (struct uvm_cpu *)roundup2((uintptr_t)ucpu, coherency_unit); - uvm.cpus[cpu_index(ci)] = ucpu; ci->ci_data.cpu_uvm = ucpu; + } else { + ucpu = ci->ci_data.cpu_uvm; } + uvmpdpol_init_cpu(ucpu); + /* * Attach RNG source for this CPU's VM events */ - rnd_attach_source(&uvm.cpus[cpu_index(ci)]->rs, - ci->ci_data.cpu_name, RND_TYPE_VM, - RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE| - RND_FLAG_ESTIMATE_VALUE); + rnd_attach_source(&ucpu->rs, ci->ci_data.cpu_name, RND_TYPE_VM, + RND_FLAG_COLLECT_TIME|RND_FLAG_COLLECT_VALUE| + RND_FLAG_ESTIMATE_VALUE); } /* diff -rup src/sys/uvm/uvm_page.h src-uvm/sys/uvm/uvm_page.h --- src/sys/uvm/uvm_page.h 2019-12-23 21:33:00.750295428 +0000 +++ src-uvm/sys/uvm/uvm_page.h 2019-12-21 14:59:29.133085280 +0000 @@ -150,6 +150,7 @@ struct vm_page { * or uvm_pglistalloc output */ LIST_ENTRY(vm_page) list; /* f: global free page queue */ } pageq; + TAILQ_ENTRY(vm_page) pdqueue; /* p: pdpolicy queue */ struct vm_anon *uanon; /* o,i: anon */ struct uvm_object *uobject; /* o,i: object */ voff_t offset; /* o: offset into object */ diff -rup src/sys/uvm/uvm_pdpolicy.h src-uvm/sys/uvm/uvm_pdpolicy.h --- src/sys/uvm/uvm_pdpolicy.h 2019-12-13 20:10:22.000000000 +0000 +++ src-uvm/sys/uvm/uvm_pdpolicy.h 2019-12-23 16:59:22.739526531 +0000 @@ -37,7 +37,9 @@ struct vm_anon; * don't use them directly from outside of /sys/uvm. */ +void uvmpdpol_idle(struct uvm_cpu *); void uvmpdpol_init(void); +void uvmpdpol_init_cpu(struct uvm_cpu *); void uvmpdpol_reinit(void); void uvmpdpol_estimatepageable(int *, int *); bool uvmpdpol_needsscan_p(void); #endif /* !_UVM_PDPOLICY_H_ */ diff -rup src/sys/uvm/uvm_pdpolicy_clock.c src-uvm/sys/uvm/uvm_pdpolicy_clock.c --- src/sys/uvm/uvm_pdpolicy_clock.c 2019-12-23 19:29:03.000000000 +0000 +++ src-uvm/sys/uvm/uvm_pdpolicy_clock.c 2019-12-24 21:33:56.703815219 +0000 @@ -1,6 +1,35 @@ -/* $NetBSD: uvm_pdpolicy_clock.c,v 1.22 2019/12/23 19:29:03 ad Exp $ */ +/* $NetBSD: uvm_pdpolicy_clock.c,v 1.21 2019/12/21 13:00:25 ad Exp $ */ /* NetBSD: uvm_pdaemon.c,v 1.72 2006/01/05 10:47:33 yamt Exp $ */ +/*- + * Copyright (c) 2019 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Andrew Doran. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + /* * Copyright (c) 1997 Charles D. Cranor and Washington University. * Copyright (c) 1991, 1993, The Regents of the University of California. @@ -69,12 +98,13 @@ #else /* defined(PDSIM) */ #include __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clock.c,v 1.22 2019/12/23 19:29:03 ad Exp $"); #include #include #include #include +#include #include #include @@ -83,9 +113,28 @@ __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy #endif /* defined(PDSIM) */ -#define PQ_TIME 0xfffffffc /* time of last activation */ -#define PQ_INACTIVE 0x00000001 /* page is in inactive list */ -#define PQ_ACTIVE 0x00000002 /* page is in active list */ +/* + * per-CPU queue of pending ("intended") page status changes. + * 512 entries makes for 1x 4kB page on _LP64. + */ + +#if !defined(CLOCK_PDQ_SIZE) +#define CLOCK_PDQ_SIZE 512 +#endif /* !defined(CLOCK_PDQ_SIZE) */ + +/* + * per-page status flags. + */ + +#define PQ_TIME 0xfffffff0 /* time of last change */ +#define PQ_INTENT_0 0x00000000 /* no intent */ +#define PQ_INTENT_A 0x00000001 /* intend activation */ +#define PQ_INTENT_I 0x00000002 /* intend deactivation */ +#define PQ_INTENT_D 0x00000003 /* intend dequeue */ +#define PQ_INTENT 0x00000003 +#define PQ_INACTIVE 0x00000004 /* page is on inactive list */ +#define PQ_ACTIVE 0x00000008 /* page is on active list */ + #if !defined(CLOCK_INACTIVEPCT) #define CLOCK_INACTIVEPCT 33 @@ -115,9 +164,14 @@ struct uvmpdpol_scanstate { struct vm_page *ss_nextpg; }; -static void uvmpdpol_pageactivate_locked(struct vm_page *); -static void uvmpdpol_pagedeactivate_locked(struct vm_page *); -static void uvmpdpol_pagedequeue_locked(struct vm_page *); +static void uvmpdpol_pagereactivate_locked(struct vm_page *); +static uint32_t uvmpdpol_pageactivate_locked(struct vm_page *, uint32_t); +static uint32_t uvmpdpol_pagedeactivate_locked(struct vm_page *, uint32_t); +static uint32_t uvmpdpol_pagedequeue_locked(struct vm_page *, uint32_t); + +static bool uvmpdpol_pageintent_realize(struct vm_page *); +static void uvmpdpol_pageintent_purge(struct uvm_cpu *); +static void uvmpdpol_pageintent_set(struct vm_page *, uint32_t); static struct uvmpdpol_globalstate pdpol_state __cacheline_aligned; static struct uvmpdpol_scanstate pdpol_scanstate; @@ -188,6 +242,7 @@ uvmpdpol_selectvictim(kmutex_t **plock) struct uvmpdpol_globalstate *s = &pdpol_state; struct uvmpdpol_scanstate *ss = &pdpol_scanstate; struct vm_page *pg; + uint32_t pqflags; kmutex_t *lock; mutex_enter(&s->lock); @@ -207,20 +262,29 @@ uvmpdpol_selectvictim(kmutex_t **plock) if (pg == NULL) { break; } - ss->ss_nextpg = TAILQ_NEXT(pg, pageq.queue); + ss->ss_nextpg = TAILQ_NEXT(pg, pdqueue); uvmexp.pdscans++; /* + * process any pending intent. if we had to do something, + * the page is likely not on the queue any more, so skip it. + */ + + if (uvmpdpol_pageintent_realize(pg)) { + continue; + } + + /* * acquire interlock to stablize page identity. * if we have caught the page in a state of flux - * and it should be dequeued, do it now and then - * move on to the next. + * it will be dequeued soon, so ignore it and move + * on to the next. */ + mutex_enter(&pg->interlock); if ((pg->uobject == NULL && pg->uanon == NULL) || pg->wire_count > 0) { mutex_exit(&pg->interlock); - uvmpdpol_pagedequeue_locked(pg); continue; } @@ -231,24 +295,25 @@ uvmpdpol_selectvictim(kmutex_t **plock) * minimum, reactivate the page instead and move * on to the next page. */ + anon = pg->uanon; uobj = pg->uobject; if (uobj && UVM_OBJ_IS_VTEXT(uobj) && ss->ss_execreact) { mutex_exit(&pg->interlock); - uvmpdpol_pageactivate_locked(pg); + uvmpdpol_pagereactivate_locked(pg); PDPOL_EVCNT_INCR(reactexec); continue; } if (uobj && UVM_OBJ_IS_VNODE(uobj) && !UVM_OBJ_IS_VTEXT(uobj) && ss->ss_filereact) { mutex_exit(&pg->interlock); - uvmpdpol_pageactivate_locked(pg); + uvmpdpol_pagereactivate_locked(pg); PDPOL_EVCNT_INCR(reactfile); continue; } if ((anon || UVM_OBJ_IS_AOBJ(uobj)) && ss->ss_anonreact) { mutex_exit(&pg->interlock); - uvmpdpol_pageactivate_locked(pg); + uvmpdpol_pagereactivate_locked(pg); PDPOL_EVCNT_INCR(reactanon); continue; } @@ -267,6 +332,7 @@ uvmpdpol_selectvictim(kmutex_t **plock) * * object -> pdpol -> interlock. */ + mutex_exit(&s->lock); lock = uvmpd_trylockowner(pg); /* pg->interlock now released */ @@ -280,8 +346,10 @@ uvmpdpol_selectvictim(kmutex_t **plock) * move referenced pages back to active queue and skip to * next page. */ + if (pmap_is_referenced(pg)) { - uvmpdpol_pageactivate_locked(pg); + pqflags = atomic_load_relaxed(&pg->pqflags); + uvmpdpol_pageactivate_locked(pg, pqflags); uvmexp.pdreact++; mutex_exit(lock); continue; @@ -301,6 +369,7 @@ uvmpdpol_balancequeue(int swap_shortage) struct uvmpdpol_globalstate *s = &pdpol_state; int inactive_shortage; struct vm_page *p, *nextpg; + uint32_t pqflags; kmutex_t *lock; /* @@ -313,7 +382,18 @@ uvmpdpol_balancequeue(int swap_shortage) for (p = TAILQ_FIRST(&pdpol_state.s_activeq); p != NULL && (inactive_shortage > 0 || swap_shortage > 0); p = nextpg) { - nextpg = TAILQ_NEXT(p, pageq.queue); + nextpg = TAILQ_NEXT(p, pdqueue); + + /* + * process any pending intent. if we had to do something, + * the page is likely not on the queue any more, so skip it. + */ + + if (uvmpdpol_pageintent_realize(p)) { + inactive_shortage = pdpol_state.s_inactarg - + pdpol_state.s_inactive; + continue; + } /* * if there's a shortage of swap slots, try to free it. @@ -340,14 +420,14 @@ uvmpdpol_balancequeue(int swap_shortage) /* * acquire interlock to stablize page identity. * if we have caught the page in a state of flux - * and it should be dequeued, do it now and then - * move on to the next. + * it will be dequeued soon, so ignore it and move + * on to the next. */ + mutex_enter(&p->interlock); if ((p->uobject == NULL && p->uanon == NULL) || p->wire_count > 0) { mutex_exit(&p->interlock); - uvmpdpol_pagedequeue_locked(p); continue; } mutex_exit(&s->lock); @@ -355,7 +435,9 @@ uvmpdpol_balancequeue(int swap_shortage) /* p->interlock now released */ mutex_enter(&s->lock); if (lock != NULL) { - uvmpdpol_pagedeactivate_locked(p); + pmap_clear_reference(p); + pqflags = atomic_load_relaxed(&p->pqflags); + uvmpdpol_pagedeactivate_locked(p, pqflags); uvmexp.pddeact++; inactive_shortage--; mutex_exit(lock); @@ -364,96 +446,281 @@ uvmpdpol_balancequeue(int swap_shortage) mutex_exit(&s->lock); } +/* + * uvmpdpol_pageintent_realize: take the intended action set on the page, + * clear out the intent bits, and if they changed behind us, start over + * again. + */ + +static bool +uvmpdpol_pageintent_realize(struct vm_page *pg) +{ + struct uvmpdpol_globalstate *s __diagused = &pdpol_state; + uint32_t pqflags, intent; + bool processed = false; + + KASSERT(mutex_owned(&s->lock)); + + pqflags = atomic_load_relaxed(&pg->pqflags); + do { + switch (intent = pqflags & PQ_INTENT) { + case PQ_INTENT_0: + break; + case PQ_INTENT_A: + pqflags = uvmpdpol_pageactivate_locked(pg, pqflags); + processed = true; + break; + case PQ_INTENT_I: + pqflags = uvmpdpol_pagedeactivate_locked(pg, pqflags); + processed = true; + break; + case PQ_INTENT_D: + pqflags = uvmpdpol_pagedequeue_locked(pg, pqflags); + processed = true; + break; + } + } while (__predict_false(intent != (pqflags & PQ_INTENT))); + return processed; +} + +/* + * uvmpdpol_pageintent_purge: purge the per-CPU queue of pending page + * status changes. + */ + +static void +uvmpdpol_pageintent_purge(struct uvm_cpu *ucpu) +{ + struct uvmpdpol_globalstate *s __diagused = &pdpol_state; + int i; + + KASSERT(kpreempt_disabled()); + KASSERT(mutex_owned(&s->lock)); + + for (i = 0; i < ucpu->pdqcount; i++) { + (void)uvmpdpol_pageintent_realize(ucpu->pdq[i]); + } + ucpu->pdqcount = 0; + ucpu->pdqtime = hardclock_ticks; +} + +/* + * uvmpdpol_pageintent_set: set the intended status for a page, overriding + * any prior intent that was set. + * + * the object owners' lock is held here, so we need only worry about + * concurrent activity by the pagedaemon and by uvmpdpol_pageintent_realize() + * if the page is already on a queue somewhere else in the system. + */ + static void -uvmpdpol_pagedeactivate_locked(struct vm_page *pg) +uvmpdpol_pageintent_set(struct vm_page *pg, uint32_t intent) { + struct uvmpdpol_globalstate *s = &pdpol_state; + struct uvm_cpu *ucpu; + uint32_t o, n, v; KASSERT(uvm_page_locked_p(pg)); + KASSERT((intent & ~PQ_INTENT) == 0); + + /* + * set our intent on the page. if we find the exact same intent + * already set, then there's nothing more for us to do. + */ + + for (o = atomic_load_relaxed(&pg->pqflags);; o = n) { + if ((o & PQ_INTENT) == intent) { + return; + } + v = (o & ~PQ_INTENT) | intent; + n = atomic_cas_32(&pg->pqflags, o, v); + if (n == o) { + break; + } + } + + /* + * if there was an intent set already, then the page is already in a + * queue of pages somewhere (maybe our queue), and the intent is + * sure to be cleared. otherwise we'll try to add it to our queue + * to process later. + */ + + if ((o & PQ_INTENT) != 0) { + return; + } + + /* + * if our queue isn't full yet, then this is cheap & easy. + */ + + kpreempt_disable(); + ucpu = curcpu()->ci_data.cpu_uvm; + if (__predict_true(ucpu->pdqcount < ucpu->pdqmax)) { + ucpu->pdq[ucpu->pdqcount++] = pg; + kpreempt_enable(); + return; + } + + /* + * the queue is full. reload ucpu to ensure that we're looking in + * the right place after acquiring the mutex. if we did switch to + * another CPU, it's no problem - the prior queue will be cleared + * some other time. + */ + + kpreempt_enable(); + mutex_enter(&s->lock); + kpreempt_disable(); + ucpu = curcpu()->ci_data.cpu_uvm; + if (ucpu->pdqcount == ucpu->pdqmax) { + uvmpdpol_pageintent_purge(ucpu); + } + ucpu->pdq[ucpu->pdqcount++] = pg; + kpreempt_enable(); + mutex_exit(&s->lock); +} + +/* + * uvmpdpol_pagereactivate: this works like uvmpdpol_pageactivate_locked, + * but because we are called here without the object/anon locked, we must + * pay close attention to see if the owner set a dequeue pending on the + * page, and not override and therefore lose that intent. + */ +static void +uvmpdpol_pagereactivate_locked(struct vm_page *pg) +{ + uint32_t o, n, v; + + for (o = atomic_load_relaxed(&pg->pqflags);; o = n) { + if ((o & PQ_INTENT) == PQ_INTENT_D) { + break; + } + v = (o & ~PQ_INTENT) | PQ_INTENT_A; + n = atomic_cas_32(&pg->pqflags, o, v); + if (n == o) { + break; + } + } + uvmpdpol_pageintent_realize(pg); +} - if (pg->pqflags & PQ_ACTIVE) { - TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue); - pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME); +static uint32_t +uvmpdpol_pagedeactivate_locked(struct vm_page *pg, uint32_t pqflags) +{ + + if ((pqflags & PQ_ACTIVE) != 0) { + TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); KASSERT(pdpol_state.s_active > 0); pdpol_state.s_active--; } - if ((pg->pqflags & PQ_INACTIVE) == 0) { - KASSERT(pg->wire_count == 0); - pmap_clear_reference(pg); - TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pageq.queue); - pg->pqflags |= PQ_INACTIVE; + if ((pqflags & PQ_INACTIVE) == 0) { + TAILQ_INSERT_TAIL(&pdpol_state.s_inactiveq, pg, pdqueue); pdpol_state.s_inactive++; } + + pqflags = PQ_INACTIVE | (hardclock_ticks & PQ_TIME); + return atomic_swap_32(&pg->pqflags, pqflags); } void uvmpdpol_pagedeactivate(struct vm_page *pg) { - struct uvmpdpol_globalstate *s = &pdpol_state; + uint32_t pqflags; - mutex_enter(&s->lock); - uvmpdpol_pagedeactivate_locked(pg); - mutex_exit(&s->lock); + KASSERT(uvm_page_locked_p(pg)); + + /* + * it might be useful to defer the pmap_clear_reference(), but it + * can't be done as when it comes time to realize the intent the + * page may no longer be locked. + */ + + pmap_clear_reference(pg); + pqflags = atomic_load_relaxed(&pg->pqflags); + if ((pqflags & PQ_INACTIVE) == 0) { + uvmpdpol_pageintent_set(pg, PQ_INTENT_I); + } } -static void -uvmpdpol_pageactivate_locked(struct vm_page *pg) +static uint32_t +uvmpdpol_pageactivate_locked(struct vm_page *pg, uint32_t pqflags) { - uvmpdpol_pagedequeue_locked(pg); - TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pageq.queue); - pg->pqflags = PQ_ACTIVE | (hardclock_ticks & PQ_TIME); - pdpol_state.s_active++; + if (pqflags & PQ_ACTIVE) { + TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); + TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue); + KASSERT(pdpol_state.s_active > 0); + } else if (pqflags & PQ_INACTIVE) { + TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue); + TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue); + KASSERT(pdpol_state.s_inactive > 0); + pdpol_state.s_inactive--; + pdpol_state.s_active++; + } else { + TAILQ_INSERT_TAIL(&pdpol_state.s_activeq, pg, pdqueue); + pdpol_state.s_active++; + } + + pqflags = PQ_ACTIVE | (hardclock_ticks & PQ_TIME); + return atomic_swap_32(&pg->pqflags, pqflags); } void uvmpdpol_pageactivate(struct vm_page *pg) { - struct uvmpdpol_globalstate *s = &pdpol_state; + uint32_t pqflags; - /* Safety: PQ_ACTIVE clear also tells us if it is not enqueued. */ - if ((pg->pqflags & PQ_ACTIVE) == 0 || - ((hardclock_ticks & PQ_TIME) - (pg->pqflags & PQ_TIME)) >= hz) { - mutex_enter(&s->lock); - uvmpdpol_pageactivate_locked(pg); - mutex_exit(&s->lock); + KASSERT(uvm_page_locked_p(pg)); + + /* + * PQ_ACTIVE clear also tells us if it is not enqueued. don't + * touch the page if it was already activated recently (less + * than a second ago). + */ + + pqflags = atomic_load_relaxed(&pg->pqflags); + if ((pqflags & PQ_ACTIVE) == 0 || + ((hardclock_ticks & PQ_TIME) - (pqflags & PQ_TIME)) >= hz) { + uvmpdpol_pageintent_set(pg, PQ_INTENT_A); } } -static void -uvmpdpol_pagedequeue_locked(struct vm_page *pg) +static uint32_t +uvmpdpol_pagedequeue_locked(struct vm_page *pg, uint32_t pqflags) { + struct uvmpdpol_globalstate *s __diagused = &pdpol_state; + + KASSERT(mutex_owned(&s->lock)); - if (pg->pqflags & PQ_ACTIVE) { - TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pageq.queue); - pg->pqflags &= ~(PQ_ACTIVE | PQ_TIME); + if (pqflags & PQ_ACTIVE) { + TAILQ_REMOVE(&pdpol_state.s_activeq, pg, pdqueue); KASSERT(pdpol_state.s_active > 0); pdpol_state.s_active--; - } else if (pg->pqflags & PQ_INACTIVE) { - TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pageq.queue); - pg->pqflags &= ~PQ_INACTIVE; + } else if (pqflags & PQ_INACTIVE) { + TAILQ_REMOVE(&pdpol_state.s_inactiveq, pg, pdqueue); KASSERT(pdpol_state.s_inactive > 0); pdpol_state.s_inactive--; } + + pqflags = hardclock_ticks & PQ_TIME; + return atomic_swap_32(&pg->pqflags, pqflags); } void uvmpdpol_pagedequeue(struct vm_page *pg) { - struct uvmpdpol_globalstate *s = &pdpol_state; - mutex_enter(&s->lock); - uvmpdpol_pagedequeue_locked(pg); - mutex_exit(&s->lock); + KASSERT(uvm_page_locked_p(pg)); + uvmpdpol_pageintent_set(pg, PQ_INTENT_D); } void uvmpdpol_pageenqueue(struct vm_page *pg) { - struct uvmpdpol_globalstate *s = &pdpol_state; - mutex_enter(&s->lock); - uvmpdpol_pageactivate_locked(pg); - mutex_exit(&s->lock); + KASSERT(uvm_page_locked_p(pg)); + uvmpdpol_pageintent_set(pg, PQ_INTENT_A); } void @@ -464,9 +732,14 @@ uvmpdpol_anfree(struct vm_anon *an) bool uvmpdpol_pageisqueued_p(struct vm_page *pg) { + uint32_t pqflags; - /* Safe to test unlocked due to page life-cycle. */ - return (pg->pqflags & (PQ_ACTIVE | PQ_INACTIVE)) != 0; + /* + * safe to test unlocked due to page life-cycle. + */ + + pqflags = atomic_load_relaxed(&pg->pqflags); + return pqflags & (PQ_ACTIVE | PQ_INACTIVE | PQ_INTENT_A | PQ_INTENT_I); } void @@ -525,6 +798,16 @@ uvmpdpol_init(void) } void +uvmpdpol_init_cpu(struct uvm_cpu *ucpu) +{ + + ucpu->pdq = + kmem_alloc(CLOCK_PDQ_SIZE * sizeof(struct vm_page *), KM_SLEEP); + ucpu->pdqcount = 0; + ucpu->pdqmax = PAGE_SIZE / sizeof(struct vm_page *); +} + +void uvmpdpol_reinit(void) { } @@ -533,7 +816,10 @@ bool uvmpdpol_needsscan_p(void) { - /* This must be an unlocked check: can be called from interrupt. */ + /* + * this must be an unlocked check: can be called from interrupt. + */ + return pdpol_state.s_inactive < pdpol_state.s_inactarg; } @@ -547,6 +833,47 @@ uvmpdpol_tune(void) mutex_exit(&s->lock); } +/* + * uvmpdpol_idle: called from the system idle loop. + */ + +void +uvmpdpol_idle(struct uvm_cpu *ucpu) +{ + struct uvmpdpol_globalstate *s = &pdpol_state; + + KASSERT(kpreempt_disabled()); + + /* + * if no pages in the queue, we have nothing to do. + */ + + if (ucpu->pdqcount == 0) { + ucpu->pdqtime = hardclock_ticks; + return; + } + + /* + * don't do this more than ~8 times a second as it would needlessly + * exert pressure. + */ + + if (hardclock_ticks - ucpu->pdqtime < (hz >> 3)) { + return; + } + + /* + * the idle LWP can't block, so we have to try for the lock. + * if we do get it, purge the per-CPU pending update queue. + */ + + if (mutex_tryenter(&s->lock)) { + uvmpdpol_pageintent_purge(ucpu); + mutex_exit(&s->lock); + } +} + + #if !defined(PDSIM) #include /* XXX SYSCTL_DESCR */ --- src/sys/kern/kern_idle.c 2019-12-06 21:36:10.000000000 +0000 +++ src-uvm/sys/kern/kern_idle.c 2019-12-22 21:45:02.536173731 +0000 @@ -81,7 +81,7 @@ idle_loop(void *dummy) sched_idle(); if (!sched_curcpu_runnable_p()) { if ((spc->spc_flags & SPCF_OFFLINE) == 0) { - uvm_pageidlezero(); + uvm_idle(); } if (!sched_curcpu_runnable_p()) { cpu_idle();