git: 3247bc7cd652 - main - arm64 pmap: per-domain pv chunk list
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 27 Sep 2022 15:15:22 UTC
The branch main has been updated by andrew: URL: https://cgit.FreeBSD.org/src/commit/?id=3247bc7cd65275ac30f717b9dcd8a295e92e1e1e commit 3247bc7cd65275ac30f717b9dcd8a295e92e1e1e Author: Andrew Turner <andrew@FreeBSD.org> AuthorDate: 2022-08-19 10:50:06 +0000 Commit: Andrew Turner <andrew@FreeBSD.org> CommitDate: 2022-09-27 15:05:52 +0000 arm64 pmap: per-domain pv chunk list As with amd64 use a per-domain pv chunk lock to reduce contention as chunks get created and removed all the time. Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D36307 --- sys/arm64/arm64/pmap.c | 180 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 123 insertions(+), 57 deletions(-) diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c index 0c2f623aa9a3..dcc0c637cc1e 100644 --- a/sys/arm64/arm64/pmap.c +++ b/sys/arm64/arm64/pmap.c @@ -150,6 +150,12 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #include <machine/pcb.h> +#ifdef NUMA +#define PMAP_MEMDOM MAXMEMDOM +#else +#define PMAP_MEMDOM 1 +#endif + #define PMAP_ASSERT_STAGE1(pmap) MPASS((pmap)->pm_stage == PM_STAGE1) #define PMAP_ASSERT_STAGE2(pmap) MPASS((pmap)->pm_stage == PM_STAGE2) @@ -276,8 +282,28 @@ vm_offset_t kernel_vm_end = 0; /* * Data for the pv entry allocation mechanism. */ -static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); -static struct mtx pv_chunks_mutex; +#ifdef NUMA +static __inline int +pc_to_domain(struct pv_chunk *pc) +{ + return (vm_phys_domain(DMAP_TO_PHYS((vm_offset_t)pc))); +} +#else +static __inline int +pc_to_domain(struct pv_chunk *pc __unused) +{ + return (0); +} +#endif + +struct pv_chunks_list { + struct mtx pvc_lock; + TAILQ_HEAD(pch, pv_chunk) pvc_list; + int active_reclaims; +} __aligned(CACHE_LINE_SIZE); + +struct pv_chunks_list __exclusive_cache_line pv_chunks[PMAP_MEMDOM]; + static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static struct md_page *pv_table; static struct md_page pv_dummy; @@ -1324,9 +1350,13 @@ pmap_init(void) } /* - * Initialize the pv chunk list mutex. + * Initialize pv chunk lists. */ - mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); + for (i = 0; i < PMAP_MEMDOM; i++) { + mtx_init(&pv_chunks[i].pvc_lock, "pmap pv chunk list", NULL, + MTX_DEF); + TAILQ_INIT(&pv_chunks[i].pvc_list); + } /* * Initialize the pool of pv list locks. @@ -2550,8 +2580,9 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, * exacerbating the shortage of free pv entries. */ static vm_page_t -reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) +reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain) { + struct pv_chunks_list *pvc; struct pv_chunk *pc, *pc_marker, *pc_marker_end; struct pv_chunk_header pc_marker_b, pc_marker_end_b; struct md_page *pvh; @@ -2564,7 +2595,6 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) struct spglist free; uint64_t inuse; int bit, field, freed, lvl; - static int active_reclaims = 0; PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); @@ -2577,10 +2607,11 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) pc_marker = (struct pv_chunk *)&pc_marker_b; pc_marker_end = (struct pv_chunk *)&pc_marker_end_b; - mtx_lock(&pv_chunks_mutex); - active_reclaims++; - TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru); - TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru); + pvc = &pv_chunks[domain]; + mtx_lock(&pvc->pvc_lock); + pvc->active_reclaims++; + TAILQ_INSERT_HEAD(&pvc->pvc_list, pc_marker, pc_lru); + TAILQ_INSERT_TAIL(&pvc->pvc_list, pc_marker_end, pc_lru); while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end && SLIST_EMPTY(&free)) { next_pmap = pc->pc_pmap; @@ -2593,11 +2624,11 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) */ goto next_chunk; } - mtx_unlock(&pv_chunks_mutex); + mtx_unlock(&pvc->pvc_lock); /* * A pv_chunk can only be removed from the pc_lru list - * when both pv_chunks_mutex is owned and the + * when both pvc->pvc_lock is owned and the * corresponding pmap is locked. */ if (pmap != next_pmap) { @@ -2608,15 +2639,15 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) if (pmap > locked_pmap) { RELEASE_PV_LIST_LOCK(lockp); PMAP_LOCK(pmap); - mtx_lock(&pv_chunks_mutex); + mtx_lock(&pvc->pvc_lock); continue; } else if (pmap != locked_pmap) { if (PMAP_TRYLOCK(pmap)) { - mtx_lock(&pv_chunks_mutex); + mtx_lock(&pvc->pvc_lock); continue; } else { pmap = NULL; /* pmap is not locked */ - mtx_lock(&pv_chunks_mutex); + mtx_lock(&pvc->pvc_lock); pc = TAILQ_NEXT(pc_marker, pc_lru); if (pc == NULL || pc->pc_pmap != next_pmap) @@ -2668,7 +2699,7 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) } } if (freed == 0) { - mtx_lock(&pv_chunks_mutex); + mtx_lock(&pvc->pvc_lock); goto next_chunk; } /* Every freed mapping is for a 4 KB page. */ @@ -2684,20 +2715,20 @@ reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m_pc->phys_addr); - mtx_lock(&pv_chunks_mutex); - TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + mtx_lock(&pvc->pvc_lock); + TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); break; } TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); - mtx_lock(&pv_chunks_mutex); + mtx_lock(&pvc->pvc_lock); /* One freed pv entry in locked_pmap is sufficient. */ if (pmap == locked_pmap) break; next_chunk: - TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); - TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru); - if (active_reclaims == 1 && pmap != NULL) { + TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru); + TAILQ_INSERT_AFTER(&pvc->pvc_list, pc, pc_marker, pc_lru); + if (pvc->active_reclaims == 1 && pmap != NULL) { /* * Rotate the pv chunks list so that we do not * scan the same pv chunks that could not be @@ -2705,17 +2736,17 @@ next_chunk: * and/or superpage mapping) on every * invocation of reclaim_pv_chunk(). */ - while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) { + while ((pc = TAILQ_FIRST(&pvc->pvc_list)) != pc_marker){ MPASS(pc->pc_pmap != NULL); - TAILQ_REMOVE(&pv_chunks, pc, pc_lru); - TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); + TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru); } } } - TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); - TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru); - active_reclaims--; - mtx_unlock(&pv_chunks_mutex); + TAILQ_REMOVE(&pvc->pvc_list, pc_marker, pc_lru); + TAILQ_REMOVE(&pvc->pvc_list, pc_marker_end, pc_lru); + pvc->active_reclaims--; + mtx_unlock(&pvc->pvc_lock); if (pmap != NULL && pmap != locked_pmap) PMAP_UNLOCK(pmap); if (m_pc == NULL && !SLIST_EMPTY(&free)) { @@ -2728,6 +2759,23 @@ next_chunk: return (m_pc); } +static vm_page_t +reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) +{ + vm_page_t m; + int i, domain; + + domain = PCPU_GET(domain); + for (i = 0; i < vm_ndomains; i++) { + m = reclaim_pv_chunk_domain(locked_pmap, lockp, domain); + if (m != NULL) + break; + domain = (domain + 1) % vm_ndomains; + } + + return (m); +} + /* * free the pv_entry back to the free list */ @@ -2776,28 +2824,37 @@ free_pv_chunk_dequeued(struct pv_chunk *pc) static void free_pv_chunk(struct pv_chunk *pc) { - mtx_lock(&pv_chunks_mutex); - TAILQ_REMOVE(&pv_chunks, pc, pc_lru); - mtx_unlock(&pv_chunks_mutex); + struct pv_chunks_list *pvc; + + pvc = &pv_chunks[pc_to_domain(pc)]; + mtx_lock(&pvc->pvc_lock); + TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); + mtx_unlock(&pvc->pvc_lock); free_pv_chunk_dequeued(pc); } static void free_pv_chunk_batch(struct pv_chunklist *batch) { + struct pv_chunks_list *pvc; struct pv_chunk *pc, *npc; + int i; - if (TAILQ_EMPTY(batch)) - return; - - mtx_lock(&pv_chunks_mutex); - TAILQ_FOREACH(pc, batch, pc_list) { - TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + for (i = 0; i < vm_ndomains; i++) { + if (TAILQ_EMPTY(&batch[i])) + continue; + pvc = &pv_chunks[i]; + mtx_lock(&pvc->pvc_lock); + TAILQ_FOREACH(pc, &batch[i], pc_list) { + TAILQ_REMOVE(&pvc->pvc_list, pc, pc_lru); + } + mtx_unlock(&pvc->pvc_lock); } - mtx_unlock(&pv_chunks_mutex); - TAILQ_FOREACH_SAFE(pc, batch, pc_list, npc) { - free_pv_chunk_dequeued(pc); + for (i = 0; i < vm_ndomains; i++) { + TAILQ_FOREACH_SAFE(pc, &batch[i], pc_list, npc) { + free_pv_chunk_dequeued(pc); + } } } @@ -2812,6 +2869,7 @@ free_pv_chunk_batch(struct pv_chunklist *batch) static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { + struct pv_chunks_list *pvc; int bit, field; pv_entry_t pv; struct pv_chunk *pc; @@ -2860,9 +2918,10 @@ retry: pc->pc_pmap = pmap; memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask)); pc->pc_map[0] &= ~1ul; /* preallocated bit 0 */ - mtx_lock(&pv_chunks_mutex); - TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); - mtx_unlock(&pv_chunks_mutex); + pvc = &pv_chunks[vm_page_domain(m)]; + mtx_lock(&pvc->pvc_lock); + TAILQ_INSERT_TAIL(&pvc->pvc_list, pc, pc_lru); + mtx_unlock(&pvc->pvc_lock); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); @@ -2879,10 +2938,11 @@ retry: static void reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) { - struct pch new_tail; + struct pv_chunks_list *pvc; + struct pch new_tail[PMAP_MEMDOM]; struct pv_chunk *pc; vm_page_t m; - int avail, free; + int avail, free, i; bool reclaimed; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -2894,7 +2954,8 @@ reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) * reclaim_pv_chunk() could recycle one of these chunks. In * contrast, these chunks must be added to the pmap upon allocation. */ - TAILQ_INIT(&new_tail); + for (i = 0; i < PMAP_MEMDOM; i++) + TAILQ_INIT(&new_tail[i]); retry: avail = 0; TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { @@ -2921,7 +2982,7 @@ retry: pc->pc_pmap = pmap; memcpy(pc->pc_map, pc_freemask, sizeof(pc_freemask)); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); - TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); + TAILQ_INSERT_TAIL(&new_tail[vm_page_domain(m)], pc, pc_lru); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); /* @@ -2932,10 +2993,13 @@ retry: if (reclaimed) goto retry; } - if (!TAILQ_EMPTY(&new_tail)) { - mtx_lock(&pv_chunks_mutex); - TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); - mtx_unlock(&pv_chunks_mutex); + for (i = 0; i < vm_ndomains; i++) { + if (TAILQ_EMPTY(&new_tail[i])) + continue; + pvc = &pv_chunks[i]; + mtx_lock(&pvc->pvc_lock); + TAILQ_CONCAT(&pvc->pvc_list, &new_tail[i], pc_lru); + mtx_unlock(&pvc->pvc_lock); } } @@ -5276,7 +5340,7 @@ pmap_remove_pages(pmap_t pmap) pd_entry_t *pde; pt_entry_t *pte, tpte; struct spglist free; - struct pv_chunklist free_chunks; + struct pv_chunklist free_chunks[PMAP_MEMDOM]; vm_page_t m, ml3, mt; pv_entry_t pv; struct md_page *pvh; @@ -5284,12 +5348,13 @@ pmap_remove_pages(pmap_t pmap) struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; - int allfree, field, freed, idx, lvl; + int allfree, field, freed, i, idx, lvl; vm_paddr_t pa; lock = NULL; - TAILQ_INIT(&free_chunks); + for (i = 0; i < PMAP_MEMDOM; i++) + TAILQ_INIT(&free_chunks[i]); SLIST_INIT(&free); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { @@ -5430,13 +5495,14 @@ pmap_remove_pages(pmap_t pmap) PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); - TAILQ_INSERT_TAIL(&free_chunks, pc, pc_list); + TAILQ_INSERT_TAIL(&free_chunks[pc_to_domain(pc)], pc, + pc_list); } } if (lock != NULL) rw_wunlock(lock); pmap_invalidate_all(pmap); - free_pv_chunk_batch(&free_chunks); + free_pv_chunk_batch(free_chunks); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, true); }