git: 8f58b693814e - main - vm_grab_pages_unlocked: read all the pages at once
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Tue, 29 Apr 2025 06:33:46 UTC
The branch main has been updated by dougm: URL: https://cgit.FreeBSD.org/src/commit/?id=8f58b693814e06afc51eade99a8c38e4abe9a804 commit 8f58b693814e06afc51eade99a8c38e4abe9a804 Author: Doug Moore <dougm@FreeBSD.org> AuthorDate: 2025-04-29 06:28:32 +0000 Commit: Doug Moore <dougm@FreeBSD.org> CommitDate: 2025-04-29 06:28:32 +0000 vm_grab_pages_unlocked: read all the pages at once Define a function pctrie_lookup_range_unlocked, which looks up consecutive elements in the pctrie, until a count limit is reached or an item is discovered missing, and writes them into the first elements of an array, returning the count of items found. It does not require a lock. It uses an iterator, strictly between smr_enter and smr_exit, when some of the nodes in the pctrie on entry may come to have only one child, but none of them can be freed before exit. Define a vm_radix interface to it for reading page ranges. Change vm_page_grab_pages_unlocked to read all requested pages at once, without relying on the existence of a linked list of pages. Drop smr arguments from pctrie_iter_stride, since there's no smr version in use. Drop _pctrie_iter_lookup, since it isn't needed. Reviewed by: alc, kib, markj Differential Revision: https://reviews.freebsd.org/D47114 --- sys/kern/subr_pctrie.c | 83 +++++++++++++++++++++++++++++++------------------- sys/sys/pctrie.h | 26 ++++++++++++++-- sys/vm/vm_page.c | 24 ++++----------- sys/vm/vm_radix.h | 12 ++++++++ 4 files changed, 93 insertions(+), 52 deletions(-) diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c index 03cf3e1e5990..feb29cf82330 100644 --- a/sys/kern/subr_pctrie.c +++ b/sys/kern/subr_pctrie.c @@ -494,7 +494,7 @@ _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node, * search for a value matching 'index'. */ while (node != NULL) { - KASSERT(!powerof2(node->pn_popmap), + KASSERT(access == PCTRIE_SMR || !powerof2(node->pn_popmap), ("%s: freed node in iter path", __func__)); if (!pctrie_keybarr(node, index, &slot)) break; @@ -520,29 +520,20 @@ _pctrie_lookup_node(struct pctrie *ptree, struct pctrie_node *node, } /* - * Returns the value stored at a given index value, possibly NULL. + * Returns the value stored at a given index value, possibly NULL, assuming + * access is externally synchronized by a lock. */ -static __always_inline uint64_t * -_pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index, smr_t smr, - enum pctrie_access access) +uint64_t * +pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) { struct pctrie_node *node; it->index = index; node = _pctrie_lookup_node(it->ptree, it->node, index, &it->node, - smr, access); + NULL, PCTRIE_LOCKED); return (pctrie_match_value(node, index)); } -/* - * Returns the value stored at a given index value, possibly NULL. - */ -uint64_t * -pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index) -{ - return (_pctrie_iter_lookup(it, index, NULL, PCTRIE_LOCKED)); -} - /* * Insert the val in the trie, starting search with iterator. Return a pointer * to indicate where a new node must be allocated to complete insertion. @@ -581,9 +572,8 @@ pctrie_iter_insert_lookup(struct pctrie_iter *it, uint64_t *val) * Returns the value stored at a fixed offset from the current index value, * possibly NULL. */ -static __always_inline uint64_t * -_pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr, - enum pctrie_access access) +uint64_t * +pctrie_iter_stride(struct pctrie_iter *it, int stride) { uint64_t index = it->index + stride; @@ -594,17 +584,7 @@ _pctrie_iter_stride(struct pctrie_iter *it, int stride, smr_t smr, if ((index < it->limit) != (it->index < it->limit)) return (NULL); - return (_pctrie_iter_lookup(it, index, smr, access)); -} - -/* - * Returns the value stored at a fixed offset from the current index value, - * possibly NULL. - */ -uint64_t * -pctrie_iter_stride(struct pctrie_iter *it, int stride) -{ - return (_pctrie_iter_stride(it, stride, NULL, PCTRIE_LOCKED)); + return (pctrie_iter_lookup(it, index)); } /* @@ -614,7 +594,7 @@ pctrie_iter_stride(struct pctrie_iter *it, int stride) uint64_t * pctrie_iter_next(struct pctrie_iter *it) { - return (_pctrie_iter_stride(it, 1, NULL, PCTRIE_LOCKED)); + return (pctrie_iter_stride(it, 1)); } /* @@ -624,7 +604,48 @@ pctrie_iter_next(struct pctrie_iter *it) uint64_t * pctrie_iter_prev(struct pctrie_iter *it) { - return (_pctrie_iter_stride(it, -1, NULL, PCTRIE_LOCKED)); + return (pctrie_iter_stride(it, -1)); +} + +/* + * Returns the number of contiguous, non-NULL entries read into the value[] + * array, without requiring an external lock. These entries *may* never have + * been in the pctrie all at one time, but for a series of times t0, t1, t2, + * ..., with ti <= t(i+1), value[i] was in the trie at time ti. + */ +int +pctrie_lookup_range_unlocked(struct pctrie *ptree, uint64_t index, + uint64_t *value[], int count, smr_t smr) +{ + struct pctrie_node *parent, *node; + int base, end, i; + + parent = NULL; + smr_enter(smr); + for (i = 0; i < count;) { + node = _pctrie_lookup_node(ptree, parent, index + i, &parent, + smr, PCTRIE_SMR); + value[i] = pctrie_match_value(node, index + i); + if (value[i] == NULL) + break; + ++i; + base = (index + i) % PCTRIE_COUNT; + if (base == 0 || parent == NULL || parent->pn_clev != 0) + continue; + end = MIN(count, i + PCTRIE_COUNT - base); + while (i < end) { + node = pctrie_node_load(&parent->pn_child[base++], + smr, PCTRIE_SMR); + value[i] = pctrie_toval(node); + if (value[i] == NULL) + break; + ++i; + } + if (i < end) + break; + } + smr_exit(smr); + return (i); } /* diff --git a/sys/sys/pctrie.h b/sys/sys/pctrie.h index dac18b58498b..01d9f34f11b9 100644 --- a/sys/sys/pctrie.h +++ b/sys/sys/pctrie.h @@ -83,6 +83,19 @@ name##_PCTRIE_LOOKUP_UNLOCKED(struct pctrie *ptree, uint64_t key) \ return name##_PCTRIE_VAL2PTR(pctrie_lookup_unlocked(ptree, \ key, (smr))); \ } \ + \ +static __inline __unused int \ +name##_PCTRIE_LOOKUP_RANGE_UNLOCKED(struct pctrie *ptree, uint64_t key, \ + struct type *value[], int count) \ +{ \ + uint64_t **data = (uint64_t **)value; \ + \ + count = pctrie_lookup_range_unlocked(ptree, key, data, count, \ + (smr)); \ + for (int i = 0; i < count; i++) \ + value[i] = name##_PCTRIE_NZVAL2PTR(data[i]); \ + return (count); \ +} \ #define PCTRIE_DEFINE(name, type, field, allocfn, freefn) \ \ @@ -94,13 +107,18 @@ CTASSERT(sizeof(((struct type *)0)->field) == sizeof(uint64_t)); \ CTASSERT((__offsetof(struct type, field) & (sizeof(uint32_t) - 1)) == 0); \ \ static __inline struct type * \ -name##_PCTRIE_VAL2PTR(uint64_t *val) \ +name##_PCTRIE_NZVAL2PTR(uint64_t *val) \ { \ + return (struct type *) \ + ((uintptr_t)val - __offsetof(struct type, field)); \ +} \ \ +static __inline struct type * \ +name##_PCTRIE_VAL2PTR(uint64_t *val) \ +{ \ if (val == NULL) \ return (NULL); \ - return (struct type *) \ - ((uintptr_t)val - __offsetof(struct type, field)); \ + return (name##_PCTRIE_NZVAL2PTR(val)); \ } \ \ static __inline uint64_t * \ @@ -365,6 +383,8 @@ void pctrie_insert_node(uint64_t *val, struct pctrie_node *parent, uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key); uint64_t *pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t key, smr_t smr); +int pctrie_lookup_range_unlocked(struct pctrie *ptree, + uint64_t index, uint64_t *value[], int count, smr_t smr); uint64_t *pctrie_iter_lookup(struct pctrie_iter *it, uint64_t index); uint64_t *pctrie_iter_stride(struct pctrie_iter *it, int stride); uint64_t *pctrie_iter_next(struct pctrie_iter *it); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index e2f11eb57100..8dce6feaca09 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -5269,7 +5269,7 @@ vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex, { vm_page_t m; int flags; - int i; + int i, num_fetched; KASSERT(count > 0, ("vm_page_grab_pages_unlocked: invalid page count %d", count)); @@ -5281,22 +5281,10 @@ vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex, */ flags = allocflags & ~VM_ALLOC_NOBUSY; vm_page_grab_check(flags); - m = NULL; - for (i = 0; i < count; i++, pindex++) { - /* - * We may see a false NULL here because the previous page has - * been removed or just inserted and the list is loaded without - * barriers. Switch to radix to verify. - */ - if (m == NULL || QMD_IS_TRASHED(m) || m->pindex != pindex || - atomic_load_ptr(&m->object) != object) { - /* - * This guarantees the result is instantaneously - * correct. - */ - m = NULL; - } - m = vm_page_acquire_unlocked(object, pindex, m, flags); + num_fetched = vm_radix_lookup_range_unlocked(&object->rtree, pindex, + ma, count); + for (i = 0; i < num_fetched; i++, pindex++) { + m = vm_page_acquire_unlocked(object, pindex, ma[i], flags); if (m == PAGE_NOT_ACQUIRED) return (i); if (m == NULL) @@ -5308,8 +5296,8 @@ vm_page_grab_pages_unlocked(vm_object_t object, vm_pindex_t pindex, } /* m will still be wired or busy according to flags. */ vm_page_grab_release(m, allocflags); + /* vm_page_acquire_unlocked() may not return ma[i]. */ ma[i] = m; - m = TAILQ_NEXT(m, listq); } if (i == count || (allocflags & VM_ALLOC_NOCREAT) != 0) return (i); diff --git a/sys/vm/vm_radix.h b/sys/vm/vm_radix.h index df3639091d58..231075d32754 100644 --- a/sys/vm/vm_radix.h +++ b/sys/vm/vm_radix.h @@ -121,6 +121,18 @@ vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index) return (VM_RADIX_PCTRIE_LOOKUP_UNLOCKED(&rtree->rt_trie, index)); } +/* + * Returns the number of contiguous, non-NULL pages read into the ma[] + * array, without requiring an external lock. + */ +static __inline int +vm_radix_lookup_range_unlocked(struct vm_radix *rtree, vm_pindex_t index, + vm_page_t ma[], int count) +{ + return (VM_RADIX_PCTRIE_LOOKUP_RANGE_UNLOCKED(&rtree->rt_trie, index, + ma, count)); +} + /* * Initialize an iterator for vm_radix. */