svn commit: r365980 - head/sys/vm
D Scott Phillips
scottph at FreeBSD.org
Mon Sep 21 22:22:54 UTC 2020
Author: scottph
Date: Mon Sep 21 22:22:53 2020
New Revision: 365980
URL: https://svnweb.freebsd.org/changeset/base/365980
Log:
vm_reserv: Sparsify the vm_reserv_array when VM_PHYSSEG_SPARSE
On an Ampere Altra system, the physical memory is populated
sparsely within the physical address space, with only about 0.4%
of physical addresses backed by RAM in the range [0, last_pa].
This is causing the vm_reserv_array to be over-sized by a few
orders of magnitude, wasting roughly 5 GiB on a system with
256 GiB of RAM.
The sparse allocation of vm_reserv_array is controlled by defining
VM_PHYSSEG_SPARSE, with the dense allocation still remaining for
platforms with VM_PHYSSEG_DENSE.
Reviewed by: markj, alc, kib
Approved by: scottl (implicit)
MFC after: 1 week
Sponsored by: Ampere Computing, Inc.
Differential Revision: https://reviews.freebsd.org/D26130
Modified:
head/sys/vm/vm_phys.h
head/sys/vm/vm_reserv.c
Modified: head/sys/vm/vm_phys.h
==============================================================================
--- head/sys/vm/vm_phys.h Mon Sep 21 22:22:06 2020 (r365979)
+++ head/sys/vm/vm_phys.h Mon Sep 21 22:22:53 2020 (r365980)
@@ -69,6 +69,9 @@ struct vm_phys_seg {
vm_paddr_t start;
vm_paddr_t end;
vm_page_t first_page;
+#if VM_NRESERVLEVEL > 0
+ vm_reserv_t first_reserv;
+#endif
int domain;
struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX];
};
Modified: head/sys/vm/vm_reserv.c
==============================================================================
--- head/sys/vm/vm_reserv.c Mon Sep 21 22:22:06 2020 (r365979)
+++ head/sys/vm/vm_reserv.c Mon Sep 21 22:22:53 2020 (r365980)
@@ -333,11 +333,17 @@ sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS)
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
+#ifdef VM_PHYSSEG_SPARSE
+ rv = seg->first_reserv + (paddr >> VM_LEVEL_0_SHIFT) -
+ (seg->start >> VM_LEVEL_0_SHIFT);
+#else
+ rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT];
+#endif
while (paddr + VM_LEVEL_0_SIZE > paddr && paddr +
VM_LEVEL_0_SIZE <= seg->end) {
- rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT];
fullpop += rv->popcnt == VM_LEVEL_0_NPAGES;
paddr += VM_LEVEL_0_SIZE;
+ rv++;
}
}
return (sysctl_handle_int(oidp, &fullpop, 0, req));
@@ -496,8 +502,15 @@ vm_reserv_depopulate(vm_reserv_t rv, int index)
static __inline vm_reserv_t
vm_reserv_from_page(vm_page_t m)
{
+#ifdef VM_PHYSSEG_SPARSE
+ struct vm_phys_seg *seg;
+ seg = &vm_phys_segs[m->segind];
+ return (seg->first_reserv + (VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT) -
+ (seg->start >> VM_LEVEL_0_SHIFT));
+#else
return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]);
+#endif
}
/*
@@ -1054,22 +1067,38 @@ vm_reserv_init(void)
struct vm_phys_seg *seg;
struct vm_reserv *rv;
struct vm_reserv_domain *rvd;
+#ifdef VM_PHYSSEG_SPARSE
+ vm_pindex_t used;
+#endif
int i, j, segind;
/*
* Initialize the reservation array. Specifically, initialize the
* "pages" field for every element that has an underlying superpage.
*/
+#ifdef VM_PHYSSEG_SPARSE
+ used = 0;
+#endif
for (segind = 0; segind < vm_phys_nsegs; segind++) {
seg = &vm_phys_segs[segind];
+#ifdef VM_PHYSSEG_SPARSE
+ seg->first_reserv = &vm_reserv_array[used];
+ used += howmany(seg->end, VM_LEVEL_0_SIZE) -
+ seg->start / VM_LEVEL_0_SIZE;
+#else
+ seg->first_reserv =
+ &vm_reserv_array[seg->start >> VM_LEVEL_0_SHIFT];
+#endif
paddr = roundup2(seg->start, VM_LEVEL_0_SIZE);
+ rv = seg->first_reserv + (paddr >> VM_LEVEL_0_SHIFT) -
+ (seg->start >> VM_LEVEL_0_SHIFT);
while (paddr + VM_LEVEL_0_SIZE > paddr && paddr +
VM_LEVEL_0_SIZE <= seg->end) {
- rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT];
rv->pages = PHYS_TO_VM_PAGE(paddr);
rv->domain = seg->domain;
mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF);
paddr += VM_LEVEL_0_SIZE;
+ rv++;
}
}
for (i = 0; i < MAXMEMDOM; i++) {
@@ -1400,30 +1429,40 @@ vm_reserv_size(int level)
vm_paddr_t
vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end)
{
- vm_paddr_t new_end, high_water;
+ vm_paddr_t new_end;
+ vm_pindex_t count;
size_t size;
int i;
- high_water = phys_avail[1];
+ count = 0;
for (i = 0; i < vm_phys_nsegs; i++) {
- if (vm_phys_segs[i].end > high_water)
- high_water = vm_phys_segs[i].end;
+#ifdef VM_PHYSSEG_SPARSE
+ count += howmany(vm_phys_segs[i].end, VM_LEVEL_0_SIZE) -
+ vm_phys_segs[i].start / VM_LEVEL_0_SIZE;
+#else
+ count = MAX(count,
+ howmany(vm_phys_segs[i].end, VM_LEVEL_0_SIZE));
+#endif
}
- /* Skip the first chunk. It is already accounted for. */
- for (i = 2; phys_avail[i + 1] != 0; i += 2) {
- if (phys_avail[i + 1] > high_water)
- high_water = phys_avail[i + 1];
+ for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+#ifdef VM_PHYSSEG_SPARSE
+ count += howmany(phys_avail[i + 1], VM_LEVEL_0_SIZE) -
+ phys_avail[i] / VM_LEVEL_0_SIZE;
+#else
+ count = MAX(count,
+ howmany(phys_avail[i + 1], VM_LEVEL_0_SIZE));
+#endif
}
/*
- * Calculate the size (in bytes) of the reservation array. Round up
- * from "high_water" because every small page is mapped to an element
- * in the reservation array based on its physical address. Thus, the
- * number of elements in the reservation array can be greater than the
- * number of superpages.
+ * Calculate the size (in bytes) of the reservation array. Rounding up
+ * for partial superpages at boundaries, as every small page is mapped
+ * to an element in the reservation array based on its physical address.
+ * Thus, the number of elements in the reservation array can be greater
+ * than the number of superpages.
*/
- size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv);
+ size = count * sizeof(struct vm_reserv);
/*
* Allocate and map the physical memory for the reservation array. The
More information about the svn-src-all
mailing list