/* $NetBSD: uvm_pdpolicy_clockpro.c,v 1.27 2022/04/12 20:27:56 andvar Exp $ */ /*- * Copyright (c)2005, 2006 YAMAMOTO Takashi, * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * CLOCK-Pro replacement policy: * http://web.cse.ohio-state.edu/hpcs/WWW/HTML/publications/abs05-3.html * * approximation of the list of non-resident pages using hash: * http://linux-mm.org/ClockProApproximation */ /* #define CLOCKPRO_DEBUG */ #if defined(PDSIM) #include "pdsim.h" #else /* defined(PDSIM) */ #include __KERNEL_RCSID(0, "$NetBSD: uvm_pdpolicy_clockpro.c,v 1.27 2022/04/12 20:27:56 andvar Exp $"); #include "opt_ddb.h" #include #include #include #include #include #include #include /* for uvmpd_trylockowner */ #include #include #if ((__STDC_VERSION__ - 0) >= 199901L) #define DPRINTF(...) /* nothing */ #define WARN(...) printf(__VA_ARGS__) #else /* ((__STDC_VERSION__ - 0) >= 199901L) */ #define DPRINTF(a...) /* nothing */ /* GCC */ #define WARN(a...) printf(a) #endif /* ((__STDC_VERSION__ - 0) >= 199901L) */ #define dump(a) /* nothing */ #undef USEONCE2 #define LISTQ #undef ADAPTIVE #endif /* defined(PDSIM) */ #if !defined(CLOCKPRO_COLDPCT) #define CLOCKPRO_COLDPCT 10 #endif /* !defined(CLOCKPRO_COLDPCT) */ #define CLOCKPRO_COLDPCTMAX 90 #if !defined(CLOCKPRO_HASHFACTOR) #define CLOCKPRO_HASHFACTOR 2 #endif /* !defined(CLOCKPRO_HASHFACTOR) */ #define CLOCKPRO_NEWQMIN ((1024 * 1024) >> PAGE_SHIFT) /* XXX */ int clockpro_hashfactor = CLOCKPRO_HASHFACTOR; PDPOL_EVCNT_DEFINE(nresrecordobj) PDPOL_EVCNT_DEFINE(nresrecordanon) PDPOL_EVCNT_DEFINE(nreslookupobj) PDPOL_EVCNT_DEFINE(nreslookupanon) PDPOL_EVCNT_DEFINE(nresfoundobj) PDPOL_EVCNT_DEFINE(nresfoundanon) PDPOL_EVCNT_DEFINE(nresanonfree) PDPOL_EVCNT_DEFINE(nresconflict) PDPOL_EVCNT_DEFINE(nresoverwritten) PDPOL_EVCNT_DEFINE(nreshandhot) PDPOL_EVCNT_DEFINE(hhottakeover) PDPOL_EVCNT_DEFINE(hhotref) PDPOL_EVCNT_DEFINE(hhotunref) PDPOL_EVCNT_DEFINE(hhotcold) PDPOL_EVCNT_DEFINE(hhotcoldtest) PDPOL_EVCNT_DEFINE(hcoldtakeover) PDPOL_EVCNT_DEFINE(hcoldref) PDPOL_EVCNT_DEFINE(hcoldunref) PDPOL_EVCNT_DEFINE(hcoldreftest) PDPOL_EVCNT_DEFINE(hcoldunreftest) PDPOL_EVCNT_DEFINE(hcoldunreftestspeculative) PDPOL_EVCNT_DEFINE(hcoldhot) PDPOL_EVCNT_DEFINE(speculativeenqueue) PDPOL_EVCNT_DEFINE(speculativehit1) PDPOL_EVCNT_DEFINE(speculativehit2) PDPOL_EVCNT_DEFINE(speculativemiss) PDPOL_EVCNT_DEFINE(locksuccess) PDPOL_EVCNT_DEFINE(lockfail) #define PQ_REFERENCED 0x000000010 #define PQ_HOT 0x000000020 #define PQ_TEST 0x000000040 #define PQ_INITIALREF 0x000000080 #define PQ_QMASK 0x000000700 #define PQ_QFACTOR 0x000000100 #define PQ_SPECULATIVE 0x000000800 #define CLOCKPRO_NOQUEUE 0 #define CLOCKPRO_NEWQ 1 /* small queue to clear initial ref. */ #if defined(LISTQ) #define CLOCKPRO_COLDQ 2 #define CLOCKPRO_HOTQ 3 #else /* defined(LISTQ) */ #define CLOCKPRO_COLDQ (2 + coldqidx) /* XXX */ #define CLOCKPRO_HOTQ (3 - coldqidx) /* XXX */ #endif /* defined(LISTQ) */ #define CLOCKPRO_LISTQ 4 #define CLOCKPRO_NQUEUE 4 static bool uvmpdpol_pagerealize_locked(struct vm_page *); static inline void clockpro_setq(struct vm_page *pg, int qidx) { KASSERT(qidx >= CLOCKPRO_NOQUEUE); KASSERT(qidx <= CLOCKPRO_NQUEUE); pg->pqflags = (pg->pqflags & ~PQ_QMASK) | (qidx * PQ_QFACTOR); } static inline int clockpro_getq(struct vm_page *pg) { int qidx; qidx = (pg->pqflags & PQ_QMASK) / PQ_QFACTOR; KASSERT(qidx >= CLOCKPRO_NOQUEUE); KASSERT(qidx <= CLOCKPRO_NQUEUE); return qidx; } typedef struct { struct pglist q_q; int q_len; } pageq_t; struct clockpro_state { kmutex_t lock; int s_npages; int s_coldtarget; int s_ncold; int s_newqlenmax; pageq_t s_q[CLOCKPRO_NQUEUE]; struct uvm_pctparam s_coldtargetpct; }; static pageq_t * clockpro_queue(struct clockpro_state *s, int qidx) { KASSERT(CLOCKPRO_NOQUEUE < qidx); KASSERT(qidx <= CLOCKPRO_NQUEUE); return &s->s_q[qidx - 1]; } #if !defined(LISTQ) static int coldqidx; static void clockpro_switchqueue(void) { coldqidx = 1 - coldqidx; } #endif /* !defined(LISTQ) */ static struct clockpro_state clockpro __cacheline_aligned; static struct clockpro_scanstate { int ss_nscanned; } scanstate; /* ---------------------------------------- */ static void pageq_init(pageq_t *q) { TAILQ_INIT(&q->q_q); q->q_len = 0; } static int pageq_len(const pageq_t *q) { return q->q_len; } static struct vm_page * pageq_first(const pageq_t *q) { return TAILQ_FIRST(&q->q_q); } static void pageq_insert_tail(pageq_t *q, struct vm_page *pg) { TAILQ_INSERT_TAIL(&q->q_q, pg, pdqueue); q->q_len++; } #if defined(LISTQ) static void pageq_insert_head(pageq_t *q, struct vm_page *pg) { TAILQ_INSERT_HEAD(&q->q_q, pg, pdqueue); q->q_len++; } #endif static void pageq_remove(pageq_t *q, struct vm_page *pg) { #if 1 KASSERT(clockpro_queue(&clockpro, clockpro_getq(pg)) == q); #endif KASSERT(q->q_len > 0); TAILQ_REMOVE(&q->q_q, pg, pdqueue); q->q_len--; } static struct vm_page * pageq_remove_head(pageq_t *q) { struct vm_page *pg; pg = TAILQ_FIRST(&q->q_q); if (pg == NULL) { KASSERT(q->q_len == 0); return NULL; } pageq_remove(q, pg); return pg; } /* ---------------------------------------- */ static void clockpro_insert_tail(struct clockpro_state *s, int qidx, struct vm_page *pg) { pageq_t *q = clockpro_queue(s, qidx); clockpro_setq(pg, qidx); pageq_insert_tail(q, pg); } #if defined(LISTQ) static void clockpro_insert_head(struct clockpro_state *s, int qidx, struct vm_page *pg) { pageq_t *q = clockpro_queue(s, qidx); clockpro_setq(pg, qidx); pageq_insert_head(q, pg); } #endif /* ---------------------------------------- */ typedef uint32_t nonres_cookie_t; #define NONRES_COOKIE_INVAL 0 typedef uintptr_t objid_t; /* * XXX maybe these hash functions need reconsideration, * given that hash distribution is critical here. */ static uint32_t pageidentityhash1(objid_t obj, off_t idx) { uint32_t hash = HASH32_BUF_INIT; #if 1 hash = hash32_buf(&idx, sizeof(idx), hash); hash = hash32_buf(&obj, sizeof(obj), hash); #else hash = hash32_buf(&obj, sizeof(obj), hash); hash = hash32_buf(&idx, sizeof(idx), hash); #endif return hash; } static uint32_t pageidentityhash2(objid_t obj, off_t idx) { uint32_t hash = HASH32_BUF_INIT; hash = hash32_buf(&obj, sizeof(obj), hash); hash = hash32_buf(&idx, sizeof(idx), hash); return hash; } static nonres_cookie_t calccookie(objid_t obj, off_t idx) { uint32_t hash = pageidentityhash2(obj, idx); nonres_cookie_t cookie = hash; if (__predict_false(cookie == NONRES_COOKIE_INVAL)) { cookie++; /* XXX */ } return cookie; } #define BUCKETSIZE 14 struct bucket { int cycle; int cur; nonres_cookie_t pages[BUCKETSIZE]; }; static int cycle_target; static int cycle_target_frac; static struct bucket static_bucket; static struct bucket *buckets = &static_bucket; static size_t hashsize = 1; static int coldadj; #define COLDTARGET_ADJ(d) coldadj += (d) #if defined(PDSIM) static void * clockpro_hashalloc(int n) { size_t allocsz = sizeof(*buckets) * n; return malloc(allocsz); } static void clockpro_hashfree(void *p, int n) { free(p); } #else /* defined(PDSIM) */ static void * clockpro_hashalloc(int n) { size_t allocsz = round_page(sizeof(*buckets) * n); return (void *)uvm_km_alloc(kernel_map, allocsz, 0, UVM_KMF_WIRED); } static void clockpro_hashfree(void *p, int n) { size_t allocsz = round_page(sizeof(*buckets) * n); uvm_km_free(kernel_map, (vaddr_t)p, allocsz, UVM_KMF_WIRED); } #endif /* defined(PDSIM) */ static void clockpro_hashinit(uint64_t n) { struct bucket *newbuckets; struct bucket *oldbuckets; size_t sz; size_t oldsz; int i; sz = howmany(n, BUCKETSIZE); sz *= clockpro_hashfactor; newbuckets = clockpro_hashalloc(sz); if (newbuckets == NULL) { panic("%s: allocation failure", __func__); } for (i = 0; i < sz; i++) { struct bucket *b = &newbuckets[i]; int j; b->cycle = cycle_target; b->cur = 0; for (j = 0; j < BUCKETSIZE; j++) { b->pages[j] = NONRES_COOKIE_INVAL; } } /* XXX lock */ oldbuckets = buckets; oldsz = hashsize; buckets = newbuckets; hashsize = sz; /* XXX unlock */ if (oldbuckets != &static_bucket) { clockpro_hashfree(oldbuckets, oldsz); } } static struct bucket * nonresident_getbucket(objid_t obj, off_t idx) { uint32_t hash; hash = pageidentityhash1(obj, idx); return &buckets[hash % hashsize]; } static void nonresident_rotate(struct bucket *b) { const int target = cycle_target; const int cycle = b->cycle; int cur; int todo; todo = target - cycle; if (todo >= BUCKETSIZE * 2) { todo = (todo % BUCKETSIZE) + BUCKETSIZE; } cur = b->cur; while (todo > 0) { if (b->pages[cur] != NONRES_COOKIE_INVAL) { PDPOL_EVCNT_INCR(nreshandhot); COLDTARGET_ADJ(-1); } b->pages[cur] = NONRES_COOKIE_INVAL; cur++; if (cur == BUCKETSIZE) { cur = 0; } todo--; } b->cycle = target; b->cur = cur; } static bool nonresident_lookupremove(objid_t obj, off_t idx) { struct bucket *b = nonresident_getbucket(obj, idx); nonres_cookie_t cookie = calccookie(obj, idx); int i; nonresident_rotate(b); for (i = 0; i < BUCKETSIZE; i++) { if (b->pages[i] == cookie) { b->pages[i] = NONRES_COOKIE_INVAL; return true; } } return false; } static objid_t pageobj(struct vm_page *pg) { const void *obj; /* * XXX object pointer is often freed and reused for unrelated object. * for vnodes, it would be better to use something like * a hash of fsid/fileid/generation. */ obj = pg->uobject; if (obj == NULL) { obj = pg->uanon; KASSERT(obj != NULL); } return (objid_t)obj; } static off_t pageidx(struct vm_page *pg) { KASSERT((pg->offset & PAGE_MASK) == 0); return pg->offset >> PAGE_SHIFT; } static bool nonresident_pagelookupremove(struct vm_page *pg) { bool found = nonresident_lookupremove(pageobj(pg), pageidx(pg)); if (pg->uobject) { PDPOL_EVCNT_INCR(nreslookupobj); } else { PDPOL_EVCNT_INCR(nreslookupanon); } if (found) { if (pg->uobject) { PDPOL_EVCNT_INCR(nresfoundobj); } else { PDPOL_EVCNT_INCR(nresfoundanon); } } return found; } static void nonresident_pagerecord(struct vm_page *pg) { objid_t obj = pageobj(pg); off_t idx = pageidx(pg); struct bucket *b = nonresident_getbucket(obj, idx); nonres_cookie_t cookie = calccookie(obj, idx); #if defined(DEBUG) int i; for (i = 0; i < BUCKETSIZE; i++) { if (b->pages[i] == cookie) { PDPOL_EVCNT_INCR(nresconflict); } } #endif /* defined(DEBUG) */ if (pg->uobject) { PDPOL_EVCNT_INCR(nresrecordobj); } else { PDPOL_EVCNT_INCR(nresrecordanon); } nonresident_rotate(b); if (b->pages[b->cur] != NONRES_COOKIE_INVAL) { PDPOL_EVCNT_INCR(nresoverwritten); COLDTARGET_ADJ(-1); } b->pages[b->cur] = cookie; b->cur = (b->cur + 1) % BUCKETSIZE; } /* ---------------------------------------- */ #if defined(CLOCKPRO_DEBUG) static void check_sanity(void) { } #else /* defined(CLOCKPRO_DEBUG) */ #define check_sanity() /* nothing */ #endif /* defined(CLOCKPRO_DEBUG) */ static void clockpro_reinit(void) { KASSERT(mutex_owned(&clockpro.lock)); clockpro_hashinit(uvmexp.npages); } static void clockpro_init(void) { struct clockpro_state *s = &clockpro; int i; mutex_init(&s->lock, MUTEX_DEFAULT, IPL_NONE); for (i = 0; i < CLOCKPRO_NQUEUE; i++) { pageq_init(&s->s_q[i]); } s->s_newqlenmax = 1; s->s_coldtarget = 1; uvm_pctparam_init(&s->s_coldtargetpct, CLOCKPRO_COLDPCT, NULL); } static void clockpro_tune(void) { struct clockpro_state *s = &clockpro; int coldtarget; KASSERT(mutex_owned(&s->lock)); #if defined(ADAPTIVE) int coldmax = s->s_npages * CLOCKPRO_COLDPCTMAX / 100; int coldmin = 1; coldtarget = s->s_coldtarget; if (coldtarget + coldadj < coldmin) { coldadj = coldmin - coldtarget; } else if (coldtarget + coldadj > coldmax) { coldadj = coldmax - coldtarget; } coldtarget += coldadj; #else /* defined(ADAPTIVE) */ coldtarget = UVM_PCTPARAM_APPLY(&s->s_coldtargetpct, s->s_npages); if (coldtarget < 1) { coldtarget = 1; } #endif /* defined(ADAPTIVE) */ s->s_coldtarget = coldtarget; s->s_newqlenmax = coldtarget / 4; if (s->s_newqlenmax < CLOCKPRO_NEWQMIN) { s->s_newqlenmax = CLOCKPRO_NEWQMIN; } } static void clockpro_movereferencebit(struct vm_page *pg, bool locked) { kmutex_t *lock; bool referenced; KASSERT(mutex_owned(&clockpro.lock)); KASSERT(!locked || uvm_page_owner_locked_p(pg, false)); if (!locked) { /* * acquire interlock to stabilize page identity. * if we have caught the page in a state of flux * and it should be dequeued, abort. it will be * dequeued later. */ mutex_enter(&pg->interlock); if ((pg->uobject == NULL && pg->uanon == NULL) || pg->wire_count > 0) { mutex_exit(&pg->interlock); PDPOL_EVCNT_INCR(lockfail); return; } mutex_exit(&clockpro.lock); /* XXX */ lock = uvmpd_trylockowner(pg); /* pg->interlock now dropped */ mutex_enter(&clockpro.lock); /* XXX */ if (lock == NULL) { /* * XXXuvmplock */ PDPOL_EVCNT_INCR(lockfail); return; } PDPOL_EVCNT_INCR(locksuccess); } referenced = pmap_clear_reference(pg); if (!locked) { mutex_exit(lock); } if (referenced) { pg->pqflags |= PQ_REFERENCED; } } static void clockpro_clearreferencebit(struct vm_page *pg, bool locked) { KASSERT(mutex_owned(&clockpro.lock)); clockpro_movereferencebit(pg, locked); pg->pqflags &= ~PQ_REFERENCED; } static void clockpro___newqrotate(int len) { struct clockpro_state * const s = &clockpro; pageq_t * const newq = clockpro_queue(s, CLOCKPRO_NEWQ); struct vm_page *pg; KASSERT(mutex_owned(&s->lock)); while (pageq_len(newq) > len) { pg = pageq_remove_head(newq); KASSERT(pg != NULL); KASSERT(clockpro_getq(pg) == CLOCKPRO_NEWQ); if ((pg->pqflags & PQ_INITIALREF) != 0) { clockpro_clearreferencebit(pg, false); pg->pqflags &= ~PQ_INITIALREF; } /* place at the list head */ clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg); } } static void clockpro_newqrotate(void) { struct clockpro_state * const s = &clockpro; KASSERT(mutex_owned(&s->lock)); check_sanity(); clockpro___newqrotate(s->s_newqlenmax); check_sanity(); } static void clockpro_newqflush(int n) { KASSERT(mutex_owned(&clockpro.lock)); check_sanity(); clockpro___newqrotate(n); check_sanity(); } static void clockpro_newqflushone(void) { struct clockpro_state * const s = &clockpro; KASSERT(mutex_owned(&s->lock)); clockpro_newqflush( MAX(pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) - 1, 0)); } /* * our "tail" is called "list-head" in the paper. */ static void clockpro___enqueuetail(struct vm_page *pg) { struct clockpro_state * const s = &clockpro; KASSERT(mutex_owned(&s->lock)); KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); check_sanity(); #if !defined(USEONCE2) clockpro_insert_tail(s, CLOCKPRO_NEWQ, pg); clockpro_newqrotate(); #else /* !defined(USEONCE2) */ #if defined(LISTQ) KASSERT((pg->pqflags & PQ_REFERENCED) == 0); #endif /* defined(LISTQ) */ clockpro_insert_tail(s, CLOCKPRO_COLDQ, pg); #endif /* !defined(USEONCE2) */ check_sanity(); } static void clockpro_pageenqueue(struct vm_page *pg) { struct clockpro_state * const s = &clockpro; bool hot; bool speculative = (pg->pqflags & PQ_SPECULATIVE) != 0; /* XXX */ KASSERT((~pg->pqflags & (PQ_INITIALREF|PQ_SPECULATIVE)) != 0); KASSERT(mutex_owned(&s->lock)); check_sanity(); KASSERT(clockpro_getq(pg) == CLOCKPRO_NOQUEUE); s->s_npages++; pg->pqflags &= ~(PQ_HOT|PQ_TEST); if (speculative) { hot = false; PDPOL_EVCNT_INCR(speculativeenqueue); } else { hot = nonresident_pagelookupremove(pg); if (hot) { COLDTARGET_ADJ(1); } } /* * consider mmap'ed file: * * - read-ahead enqueues a page. * * - on the following read-ahead hit, the fault handler activates it. * * - finally, the userland code which caused the above fault * actually accesses the page. it makes its reference bit set. * * we want to count the above as a single access, rather than * three accesses with short reuse distances. */ #if defined(USEONCE2) pg->pqflags &= ~PQ_INITIALREF; if (hot) { pg->pqflags |= PQ_TEST; } s->s_ncold++; clockpro_clearreferencebit(pg, false); clockpro___enqueuetail(pg); #else /* defined(USEONCE2) */ if (speculative) { s->s_ncold++; } else if (hot) { pg->pqflags |= PQ_HOT; } else { pg->pqflags |= PQ_TEST; s->s_ncold++; } clockpro___enqueuetail(pg); #endif /* defined(USEONCE2) */ KASSERT(s->s_ncold <= s->s_npages); } static pageq_t * clockpro_pagequeue(struct vm_page *pg) { struct clockpro_state * const s = &clockpro; int qidx; KASSERT(mutex_owned(&s->lock)); qidx = clockpro_getq(pg); KASSERT(qidx != CLOCKPRO_NOQUEUE); return clockpro_queue(s, qidx); } static void clockpro_pagedequeue(struct vm_page *pg) { struct clockpro_state * const s = &clockpro; pageq_t *q; KASSERT(mutex_owned(&s->lock)); KASSERT(s->s_npages > 0); check_sanity(); q = clockpro_pagequeue(pg); pageq_remove(q, pg); check_sanity(); clockpro_setq(pg, CLOCKPRO_NOQUEUE); if ((pg->pqflags & PQ_HOT) == 0) { KASSERT(s->s_ncold > 0); s->s_ncold--; } KASSERT(s->s_npages > 0); s->s_npages--; check_sanity(); } static void clockpro_pagerequeue(struct vm_page *pg) { struct clockpro_state * const s = &clockpro; int qidx; KASSERT(mutex_owned(&s->lock)); qidx = clockpro_getq(pg); KASSERT(qidx == CLOCKPRO_HOTQ || qidx == CLOCKPRO_COLDQ); pageq_remove(clockpro_queue(s, qidx), pg); check_sanity(); clockpro_setq(pg, CLOCKPRO_NOQUEUE); clockpro___enqueuetail(pg); } static void handhot_endtest(struct vm_page *pg) { KASSERT(mutex_owned(&clockpro.lock)); KASSERT((pg->pqflags & PQ_HOT) == 0); if ((pg->pqflags & PQ_TEST) != 0) { PDPOL_EVCNT_INCR(hhotcoldtest); COLDTARGET_ADJ(-1); pg->pqflags &= ~PQ_TEST; } else { PDPOL_EVCNT_INCR(hhotcold); } } static void handhot_advance(void) { struct clockpro_state * const s = &clockpro; struct vm_page *pg; pageq_t *hotq; int hotqlen; KASSERT(mutex_owned(&s->lock)); clockpro_tune(); dump("hot called"); if (s->s_ncold >= s->s_coldtarget) { return; } hotq = clockpro_queue(s, CLOCKPRO_HOTQ); again: pg = pageq_first(hotq); if (pg == NULL) { DPRINTF("%s: HHOT TAKEOVER\n", __func__); dump("hhottakeover"); PDPOL_EVCNT_INCR(hhottakeover); #if defined(LISTQ) while (/* CONSTCOND */ 1) { pageq_t *coldq = clockpro_queue(s, CLOCKPRO_COLDQ); pg = pageq_first(coldq); if (pg == NULL) { clockpro_newqflushone(); pg = pageq_first(coldq); if (pg == NULL) { WARN("hhot: no page?\n"); return; } } KASSERT(clockpro_pagequeue(pg) == coldq); pageq_remove(coldq, pg); check_sanity(); if ((pg->pqflags & PQ_HOT) == 0) { handhot_endtest(pg); clockpro_insert_tail(s, CLOCKPRO_LISTQ, pg); } else { clockpro_insert_head(s, CLOCKPRO_HOTQ, pg); break; } } #else /* defined(LISTQ) */ clockpro_newqflush(0); /* XXX XXX */ clockpro_switchqueue(); hotq = clockpro_queue(s, CLOCKPRO_HOTQ); goto again; #endif /* defined(LISTQ) */ } KASSERT(clockpro_pagequeue(pg) == hotq); /* * terminate test period of nonresident pages by cycling them. */ cycle_target_frac += BUCKETSIZE; hotqlen = pageq_len(hotq); while (cycle_target_frac >= hotqlen) { cycle_target++; cycle_target_frac -= hotqlen; } if ((pg->pqflags & PQ_HOT) == 0) { #if defined(LISTQ) panic("cold page in hotq: %p", pg); #else /* defined(LISTQ) */ handhot_endtest(pg); goto next; #endif /* defined(LISTQ) */ } KASSERT((pg->pqflags & PQ_TEST) == 0); KASSERT((pg->pqflags & PQ_INITIALREF) == 0); KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); /* * once we met our target, * stop at a hot page so that no cold pages in test period * have larger recency than any hot pages. */ if (s->s_ncold >= s->s_coldtarget) { dump("hot done"); return; } clockpro_movereferencebit(pg, false); if ((pg->pqflags & PQ_REFERENCED) == 0) { PDPOL_EVCNT_INCR(hhotunref); uvmexp.pddeact++; pg->pqflags &= ~PQ_HOT; clockpro.s_ncold++; KASSERT(s->s_ncold <= s->s_npages); } else { PDPOL_EVCNT_INCR(hhotref); } pg->pqflags &= ~PQ_REFERENCED; #if !defined(LISTQ) next: #endif /* !defined(LISTQ) */ clockpro_pagerequeue(pg); dump("hot"); goto again; } static struct vm_page * handcold_advance(void) { struct clockpro_state * const s = &clockpro; struct vm_page *pg; KASSERT(mutex_owned(&s->lock)); for (;;) { #if defined(LISTQ) pageq_t *listq = clockpro_queue(s, CLOCKPRO_LISTQ); #endif /* defined(LISTQ) */ pageq_t *coldq; clockpro_newqrotate(); handhot_advance(); #if defined(LISTQ) pg = pageq_first(listq); if (pg != NULL) { KASSERT(clockpro_getq(pg) == CLOCKPRO_LISTQ); KASSERT((pg->pqflags & PQ_TEST) == 0); KASSERT((pg->pqflags & PQ_HOT) == 0); KASSERT((pg->pqflags & PQ_INITIALREF) == 0); pageq_remove(listq, pg); check_sanity(); clockpro_insert_head(s, CLOCKPRO_COLDQ, pg); /* XXX */ goto gotcold; } #endif /* defined(LISTQ) */ check_sanity(); coldq = clockpro_queue(s, CLOCKPRO_COLDQ); pg = pageq_first(coldq); if (pg == NULL) { clockpro_newqflushone(); pg = pageq_first(coldq); } if (pg == NULL) { DPRINTF("%s: HCOLD TAKEOVER\n", __func__); dump("hcoldtakeover"); PDPOL_EVCNT_INCR(hcoldtakeover); KASSERT( pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)) == 0); #if defined(LISTQ) KASSERT( pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)) == 0); #else /* defined(LISTQ) */ clockpro_switchqueue(); coldq = clockpro_queue(s, CLOCKPRO_COLDQ); pg = pageq_first(coldq); #endif /* defined(LISTQ) */ } if (pg == NULL) { WARN("hcold: no page?\n"); return NULL; } KASSERT((pg->pqflags & PQ_INITIALREF) == 0); if ((pg->pqflags & PQ_HOT) != 0) { PDPOL_EVCNT_INCR(hcoldhot); pageq_remove(coldq, pg); clockpro_insert_tail(s, CLOCKPRO_HOTQ, pg); check_sanity(); KASSERT((pg->pqflags & PQ_TEST) == 0); uvmexp.pdscans++; continue; } #if defined(LISTQ) gotcold: #endif /* defined(LISTQ) */ KASSERT((pg->pqflags & PQ_HOT) == 0); uvmexp.pdscans++; clockpro_movereferencebit(pg, false); if ((pg->pqflags & PQ_SPECULATIVE) != 0) { KASSERT((pg->pqflags & PQ_TEST) == 0); if ((pg->pqflags & PQ_REFERENCED) != 0) { PDPOL_EVCNT_INCR(speculativehit2); pg->pqflags &= ~(PQ_SPECULATIVE|PQ_REFERENCED); clockpro_pagedequeue(pg); clockpro_pageenqueue(pg); continue; } PDPOL_EVCNT_INCR(speculativemiss); } switch (pg->pqflags & (PQ_REFERENCED|PQ_TEST)) { case PQ_TEST: PDPOL_EVCNT_INCR(hcoldunreftest); nonresident_pagerecord(pg); goto gotit; case 0: PDPOL_EVCNT_INCR(hcoldunref); gotit: KASSERT(s->s_ncold > 0); clockpro_pagerequeue(pg); /* XXX */ dump("cold done"); /* XXX "pg" is still in queue */ handhot_advance(); goto done; case PQ_REFERENCED|PQ_TEST: PDPOL_EVCNT_INCR(hcoldreftest); s->s_ncold--; COLDTARGET_ADJ(1); pg->pqflags |= PQ_HOT; pg->pqflags &= ~PQ_TEST; break; case PQ_REFERENCED: PDPOL_EVCNT_INCR(hcoldref); pg->pqflags |= PQ_TEST; break; } pg->pqflags &= ~PQ_REFERENCED; uvmexp.pdreact++; /* move to the list head */ clockpro_pagerequeue(pg); dump("cold"); } done:; return pg; } static void uvmpdpol_pageactivate_locked(struct vm_page *pg) { if (!uvmpdpol_pageisqueued_p(pg)) { KASSERT((pg->pqflags & PQ_SPECULATIVE) == 0); pg->pqflags |= PQ_INITIALREF; clockpro_pageenqueue(pg); } else if ((pg->pqflags & PQ_SPECULATIVE)) { PDPOL_EVCNT_INCR(speculativehit1); pg->pqflags &= ~PQ_SPECULATIVE; pg->pqflags |= PQ_INITIALREF; clockpro_pagedequeue(pg); clockpro_pageenqueue(pg); } pg->pqflags |= PQ_REFERENCED; } void uvmpdpol_pageactivate(struct vm_page *pg) { uvmpdpol_set_intent(pg, PQ_INTENT_A); } static void uvmpdpol_pagedeactivate_locked(struct vm_page *pg) { clockpro_clearreferencebit(pg, true); } void uvmpdpol_pagedeactivate(struct vm_page *pg) { uvmpdpol_set_intent(pg, PQ_INTENT_I); } static void uvmpdpol_pagedequeue_locked(struct vm_page *pg) { if (!uvmpdpol_pageisqueued_p(pg)) { return; } clockpro_pagedequeue(pg); pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE); } void uvmpdpol_pagedequeue(struct vm_page *pg) { uvmpdpol_set_intent(pg, PQ_INTENT_D); } static void uvmpdpol_pageenqueue_locked(struct vm_page *pg) { #if 1 if (uvmpdpol_pageisqueued_p(pg)) { return; } clockpro_clearreferencebit(pg, true); pg->pqflags |= PQ_SPECULATIVE; clockpro_pageenqueue(pg); #else uvmpdpol_pageactivate_locked(pg); #endif } void uvmpdpol_pageenqueue(struct vm_page *pg) { uvmpdpol_set_intent(pg, PQ_INTENT_D); } static bool uvmpdpol_pagerealize_locked(struct vm_page *pg) { uint32_t pqflags; KASSERT(mutex_owned(&clockpro.lock)); KASSERT(mutex_owned(&pg->interlock)); /* XXX this needs to be called from elsewhere, like uvmpdpol_clock. */ pqflags = pg->pqflags; pq->pqflags &= ~(PQ_INTENT_SET | PQ_INTENT_QUEUED); switch (pqflags & (PQ_INTENT_MASK | PQ_INTENT_SET)) { case PQ_INTENT_A | PQ_INTENT_SET: uvmpdpol_pageactivate_locked(pg); return true; case PQ_INTENT_E | PQ_INTENT_SET: uvmpdpol_pageenqueue_locked(pg); return true; case PQ_INTENT_I | PQ_INTENT_SET: uvmpdpol_pagedeactivate_locked(pg); return true; case PQ_INTENT_D | PQ_INTENT_SET: uvmpdpol_pagedequeue_locked(pg); return true; default: return false; } } void uvmpdpol_pagerealize(struct vm_page *pg) { struct clockpro_state * const s = &clockpro; mutex_enter(&s->lock); uvmpdpol_pagerealize_locked(pg); mutex_exit(&s->lock); } void uvmpdpol_anfree(struct vm_anon *an) { struct clockpro_state * const s = &clockpro; KASSERT(an->an_page == NULL); mutex_enter(&s->lock); if (nonresident_lookupremove((objid_t)an, 0)) { PDPOL_EVCNT_INCR(nresanonfree); } mutex_exit(&s->lock); } void uvmpdpol_init(void) { clockpro_init(); } void uvmpdpol_reinit(void) { struct clockpro_state * const s = &clockpro; mutex_enter(&s->lock); clockpro_reinit(); mutex_exit(&s->lock); } void uvmpdpol_estimatepageable(int *active, int *inactive) { struct clockpro_state * const s = &clockpro; /* * Don't take any locks here. This can be called from DDB, and in * any case the numbers are stale the instant the lock is dropped, * so it just doesn't matter. */ if (active) { *active = s->s_npages - s->s_ncold; } if (inactive) { *inactive = s->s_ncold; } } bool uvmpdpol_pageisqueued_p(struct vm_page *pg) { /* Unlocked check OK due to page lifecycle. */ return clockpro_getq(pg) != CLOCKPRO_NOQUEUE; } bool uvmpdpol_pageactivate_p(struct vm_page *pg) { /* For now, no heuristic, always receive activations. */ return true; } void uvmpdpol_scaninit(void) { struct clockpro_state * const s = &clockpro; struct clockpro_scanstate * const ss = &scanstate; mutex_enter(&s->lock); ss->ss_nscanned = 0; mutex_exit(&s->lock); } void uvmpdpol_scanfini(void) { } struct vm_page * uvmpdpol_selectvictim(kmutex_t **plock) { struct clockpro_state * const s = &clockpro; struct clockpro_scanstate * const ss = &scanstate; struct vm_page *pg; kmutex_t *lock = NULL; do { mutex_enter(&s->lock); if (ss->ss_nscanned > s->s_npages) { DPRINTF("scan too much\n"); mutex_exit(&s->lock); return NULL; } pg = handcold_advance(); if (pg == NULL) { mutex_exit(&s->lock); break; } ss->ss_nscanned++; /* * acquire interlock to stabilize page identity. * if we have caught the page in a state of flux * and it should be dequeued, do it now and then * move on to the next. */ mutex_enter(&pg->interlock); if ((pg->uobject == NULL && pg->uanon == NULL) || pg->wire_count > 0) { mutex_exit(&pg->interlock); clockpro_pagedequeue(pg); pg->pqflags &= ~(PQ_INITIALREF|PQ_SPECULATIVE); continue; } mutex_exit(&s->lock); lock = uvmpd_trylockowner(pg); /* pg->interlock now dropped */ } while (lock == NULL); *plock = lock; return pg; } static void clockpro_dropswap(pageq_t *q, int *todo) { struct vm_page *pg; kmutex_t *lock; KASSERT(mutex_owned(&clockpro.lock)); TAILQ_FOREACH_REVERSE(pg, &q->q_q, pglist, pdqueue) { if (*todo <= 0) { break; } if ((pg->pqflags & PQ_HOT) == 0) { continue; } mutex_enter(&pg->interlock); if ((pg->flags & PG_SWAPBACKED) == 0) { mutex_exit(&pg->interlock); continue; } /* * try to lock the object that owns the page. */ mutex_exit(&clockpro.lock); lock = uvmpd_trylockowner(pg); /* pg->interlock now released */ mutex_enter(&clockpro.lock); if (lock == NULL) { /* didn't get it - try the next page. */ /* XXXAD lost position in queue */ continue; } /* * if there's a shortage of swap slots, try to free it. */ if ((pg->flags & PG_SWAPBACKED) != 0 && (pg->flags & PG_BUSY) == 0) { if (uvmpd_dropswap(pg)) { (*todo)--; } } mutex_exit(lock); } } void uvmpdpol_balancequeue(int swap_shortage) { struct clockpro_state * const s = &clockpro; int todo = swap_shortage; if (todo == 0) { return; } /* * reclaim swap slots from hot pages */ DPRINTF("%s: swap_shortage=%d\n", __func__, swap_shortage); mutex_enter(&s->lock); clockpro_dropswap(clockpro_queue(s, CLOCKPRO_NEWQ), &todo); clockpro_dropswap(clockpro_queue(s, CLOCKPRO_COLDQ), &todo); clockpro_dropswap(clockpro_queue(s, CLOCKPRO_HOTQ), &todo); mutex_exit(&s->lock); DPRINTF("%s: done=%d\n", __func__, swap_shortage - todo); } bool uvmpdpol_needsscan_p(void) { struct clockpro_state * const s = &clockpro; /* This must be an unlocked check: can be called from interrupt. */ return s->s_ncold < s->s_coldtarget; } void uvmpdpol_tune(void) { struct clockpro_state * const s = &clockpro; mutex_enter(&s->lock); clockpro_tune(); mutex_exit(&s->lock); } void uvmpdpol_idle(void) { } #if !defined(PDSIM) #include /* XXX SYSCTL_DESCR */ void uvmpdpol_sysctlsetup(void) { #if !defined(ADAPTIVE) struct clockpro_state * const s = &clockpro; uvm_pctparam_createsysctlnode(&s->s_coldtargetpct, "coldtargetpct", SYSCTL_DESCR("Percentage cold target queue of the entire queue")); #endif /* !defined(ADAPTIVE) */ } #endif /* !defined(PDSIM) */ #if defined(DDB) #if 0 /* XXXuvmplock */ #define _pmap_is_referenced(pg) pmap_is_referenced(pg) #else #define _pmap_is_referenced(pg) false #endif void clockpro_dump(void); void clockpro_dump(void) { struct clockpro_state * const s = &clockpro; struct vm_page *pg; int ncold, nhot, ntest, nspeculative, ninitialref, nref; int newqlen, coldqlen, hotqlen, listqlen; newqlen = coldqlen = hotqlen = listqlen = 0; printf("npages=%d, ncold=%d, coldtarget=%d, newqlenmax=%d\n", s->s_npages, s->s_ncold, s->s_coldtarget, s->s_newqlenmax); #define INITCOUNT() \ ncold = nhot = ntest = nspeculative = ninitialref = nref = 0 #define COUNT(pg) \ if ((pg->pqflags & PQ_HOT) != 0) { \ nhot++; \ } else { \ ncold++; \ if ((pg->pqflags & PQ_TEST) != 0) { \ ntest++; \ } \ if ((pg->pqflags & PQ_SPECULATIVE) != 0) { \ nspeculative++; \ } \ if ((pg->pqflags & PQ_INITIALREF) != 0) { \ ninitialref++; \ } else if ((pg->pqflags & PQ_REFERENCED) != 0 || \ _pmap_is_referenced(pg)) { \ nref++; \ } \ } #define PRINTCOUNT(name) \ printf("%s hot=%d, cold=%d, test=%d, speculative=%d, initialref=%d, " \ "nref=%d\n", \ (name), nhot, ncold, ntest, nspeculative, ninitialref, nref) INITCOUNT(); TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_NEWQ)->q_q, pdqueue) { if (clockpro_getq(pg) != CLOCKPRO_NEWQ) { printf("newq corrupt %p\n", pg); } COUNT(pg) newqlen++; } PRINTCOUNT("newq"); INITCOUNT(); TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_COLDQ)->q_q, pdqueue) { if (clockpro_getq(pg) != CLOCKPRO_COLDQ) { printf("coldq corrupt %p\n", pg); } COUNT(pg) coldqlen++; } PRINTCOUNT("coldq"); INITCOUNT(); TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_HOTQ)->q_q, pdqueue) { if (clockpro_getq(pg) != CLOCKPRO_HOTQ) { printf("hotq corrupt %p\n", pg); } #if defined(LISTQ) if ((pg->pqflags & PQ_HOT) == 0) { printf("cold page in hotq: %p\n", pg); } #endif /* defined(LISTQ) */ COUNT(pg) hotqlen++; } PRINTCOUNT("hotq"); INITCOUNT(); TAILQ_FOREACH(pg, &clockpro_queue(s, CLOCKPRO_LISTQ)->q_q, pdqueue) { #if !defined(LISTQ) printf("listq %p\n", pg); #endif /* !defined(LISTQ) */ if (clockpro_getq(pg) != CLOCKPRO_LISTQ) { printf("listq corrupt %p\n", pg); } COUNT(pg) listqlen++; } PRINTCOUNT("listq"); printf("newqlen=%d/%d, coldqlen=%d/%d, hotqlen=%d/%d, listqlen=%d/%d\n", newqlen, pageq_len(clockpro_queue(s, CLOCKPRO_NEWQ)), coldqlen, pageq_len(clockpro_queue(s, CLOCKPRO_COLDQ)), hotqlen, pageq_len(clockpro_queue(s, CLOCKPRO_HOTQ)), listqlen, pageq_len(clockpro_queue(s, CLOCKPRO_LISTQ))); } #endif /* defined(DDB) */ #if defined(PDSIM) #if defined(DEBUG) static void pdsim_dumpq(int qidx) { struct clockpro_state * const s = &clockpro; pageq_t *q = clockpro_queue(s, qidx); struct vm_page *pg; TAILQ_FOREACH(pg, &q->q_q, pdqueue) { DPRINTF(" %" PRIu64 "%s%s%s%s%s%s", pg->offset >> PAGE_SHIFT, (pg->pqflags & PQ_HOT) ? "H" : "", (pg->pqflags & PQ_TEST) ? "T" : "", (pg->pqflags & PQ_REFERENCED) ? "R" : "", _pmap_is_referenced(pg) ? "r" : "", (pg->pqflags & PQ_INITIALREF) ? "I" : "", (pg->pqflags & PQ_SPECULATIVE) ? "S" : "" ); } } #endif /* defined(DEBUG) */ void pdsim_dump(const char *id) { #if defined(DEBUG) struct clockpro_state * const s = &clockpro; DPRINTF(" %s L(", id); pdsim_dumpq(CLOCKPRO_LISTQ); DPRINTF(" ) H("); pdsim_dumpq(CLOCKPRO_HOTQ); DPRINTF(" ) C("); pdsim_dumpq(CLOCKPRO_COLDQ); DPRINTF(" ) N("); pdsim_dumpq(CLOCKPRO_NEWQ); DPRINTF(" ) ncold=%d/%d, coldadj=%d\n", s->s_ncold, s->s_coldtarget, coldadj); #endif /* defined(DEBUG) */ } #endif /* defined(PDSIM) */