svn commit: r365520 - head/sys/vm
Konstantin Belousov
kib at FreeBSD.org
Wed Sep 9 22:02:31 UTC 2020
Author: kib
Date: Wed Sep 9 22:02:30 2020
New Revision: 365520
URL: https://svnweb.freebsd.org/changeset/base/365520
Log:
vm_map: Add a map entry kind that can only be clipped at specific boundary.
The entries and their clip boundaries must be aligned on supported
superpages sizes from pagesizes[]. vm_map operations return Mach
error KERN_INVALID_ARGUMENT, which is usually translated to EINVAL, if
it would require clip not at the boundary.
In other words, entries force preserving virtual addresses superpage
properties.
Reviewed by: markj
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
Differential revision: https://reviews.freebsd.org/D24652
Modified:
head/sys/vm/vm_map.c
head/sys/vm/vm_map.h
Modified: head/sys/vm/vm_map.c
==============================================================================
--- head/sys/vm/vm_map.c Wed Sep 9 21:57:55 2020 (r365519)
+++ head/sys/vm/vm_map.c Wed Sep 9 22:02:30 2020 (r365520)
@@ -1554,13 +1554,17 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
struct ucred *cred;
vm_eflags_t protoeflags;
vm_inherit_t inheritance;
+ u_long bdry;
+ u_int bidx;
VM_MAP_ASSERT_LOCKED(map);
KASSERT(object != kernel_object ||
(cow & MAP_COPY_ON_WRITE) == 0,
("vm_map_insert: kernel object and COW"));
- KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0,
- ("vm_map_insert: paradoxical MAP_NOFAULT request"));
+ KASSERT(object == NULL || (cow & MAP_NOFAULT) == 0 ||
+ (cow & MAP_SPLIT_BOUNDARY_MASK) != 0,
+ ("vm_map_insert: paradoxical MAP_NOFAULT request, obj %p cow %#x",
+ object, cow));
KASSERT((prot & ~max) == 0,
("prot %#x is not subset of max_prot %#x", prot, max));
@@ -1615,6 +1619,17 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_oof
inheritance = VM_INHERIT_SHARE;
else
inheritance = VM_INHERIT_DEFAULT;
+ if ((cow & MAP_SPLIT_BOUNDARY_MASK) != 0) {
+ /* This magically ignores index 0, for usual page size. */
+ bidx = (cow & MAP_SPLIT_BOUNDARY_MASK) >>
+ MAP_SPLIT_BOUNDARY_SHIFT;
+ if (bidx >= MAXPAGESIZES)
+ return (KERN_INVALID_ARGUMENT);
+ bdry = pagesizes[bidx] - 1;
+ if ((start & bdry) != 0 || (end & bdry) != 0)
+ return (KERN_INVALID_ARGUMENT);
+ protoeflags |= bidx << MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+ }
cred = NULL;
if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
@@ -2342,31 +2357,40 @@ vm_map_entry_clone(vm_map_t map, vm_map_entry_t entry)
* the specified address; if necessary,
* it splits the entry into two.
*/
-static inline void
-vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
+static int
+vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t startaddr)
{
vm_map_entry_t new_entry;
+ int bdry_idx;
if (!map->system_map)
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
"%s: map %p entry %p start 0x%jx", __func__, map, entry,
- (uintmax_t)start);
+ (uintmax_t)startaddr);
- if (start <= entry->start)
- return;
+ if (startaddr <= entry->start)
+ return (KERN_SUCCESS);
VM_MAP_ASSERT_LOCKED(map);
- KASSERT(entry->end > start && entry->start < start,
+ KASSERT(entry->end > startaddr && entry->start < startaddr,
("%s: invalid clip of entry %p", __func__, entry));
+ bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+ MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+ if (bdry_idx != 0) {
+ if ((startaddr & (pagesizes[bdry_idx] - 1)) != 0)
+ return (KERN_INVALID_ARGUMENT);
+ }
+
new_entry = vm_map_entry_clone(map, entry);
/*
* Split off the front portion. Insert the new entry BEFORE this one,
* so that this entry has the specified starting address.
*/
- new_entry->end = start;
+ new_entry->end = startaddr;
vm_map_entry_link(map, new_entry);
+ return (KERN_SUCCESS);
}
/*
@@ -2376,11 +2400,12 @@ vm_map_clip_start(vm_map_t map, vm_map_entry_t entry,
* the interior of the entry. Return entry after 'start', and in
* prev_entry set the entry before 'start'.
*/
-static inline vm_map_entry_t
+static int
vm_map_lookup_clip_start(vm_map_t map, vm_offset_t start,
- vm_map_entry_t *prev_entry)
+ vm_map_entry_t *res_entry, vm_map_entry_t *prev_entry)
{
vm_map_entry_t entry;
+ int rv;
if (!map->system_map)
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
@@ -2389,11 +2414,14 @@ vm_map_lookup_clip_start(vm_map_t map, vm_offset_t sta
if (vm_map_lookup_entry(map, start, prev_entry)) {
entry = *prev_entry;
- vm_map_clip_start(map, entry, start);
+ rv = vm_map_clip_start(map, entry, start);
+ if (rv != KERN_SUCCESS)
+ return (rv);
*prev_entry = vm_map_entry_pred(entry);
} else
entry = vm_map_entry_succ(*prev_entry);
- return (entry);
+ *res_entry = entry;
+ return (KERN_SUCCESS);
}
/*
@@ -2403,31 +2431,41 @@ vm_map_lookup_clip_start(vm_map_t map, vm_offset_t sta
* the specified address; if necessary,
* it splits the entry into two.
*/
-static inline void
-vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
+static int
+vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t endaddr)
{
vm_map_entry_t new_entry;
+ int bdry_idx;
if (!map->system_map)
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
"%s: map %p entry %p end 0x%jx", __func__, map, entry,
- (uintmax_t)end);
+ (uintmax_t)endaddr);
- if (end >= entry->end)
- return;
+ if (endaddr >= entry->end)
+ return (KERN_SUCCESS);
VM_MAP_ASSERT_LOCKED(map);
- KASSERT(entry->start < end && entry->end > end,
+ KASSERT(entry->start < endaddr && entry->end > endaddr,
("%s: invalid clip of entry %p", __func__, entry));
+ bdry_idx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+ MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+ if (bdry_idx != 0) {
+ if ((endaddr & (pagesizes[bdry_idx] - 1)) != 0)
+ return (KERN_INVALID_ARGUMENT);
+ }
+
new_entry = vm_map_entry_clone(map, entry);
/*
* Split off the back portion. Insert the new entry AFTER this one,
* so that this entry has the specified ending address.
*/
- new_entry->start = end;
+ new_entry->start = endaddr;
vm_map_entry_link(map, new_entry);
+
+ return (KERN_SUCCESS);
}
/*
@@ -2469,12 +2507,17 @@ vm_map_submap(
if (vm_map_lookup_entry(map, start, &entry) && entry->end >= end &&
(entry->eflags & MAP_ENTRY_COW) == 0 &&
entry->object.vm_object == NULL) {
- vm_map_clip_start(map, entry, start);
- vm_map_clip_end(map, entry, end);
+ result = vm_map_clip_start(map, entry, start);
+ if (result != KERN_SUCCESS)
+ goto unlock;
+ result = vm_map_clip_end(map, entry, end);
+ if (result != KERN_SUCCESS)
+ goto unlock;
entry->object.sub_map = submap;
entry->eflags |= MAP_ENTRY_IS_SUB_MAP;
result = KERN_SUCCESS;
}
+unlock:
vm_map_unlock(map);
if (result != KERN_SUCCESS) {
@@ -2661,11 +2704,18 @@ again:
* of this loop early and let the next loop simplify the entries, since
* some may now be mergeable.
*/
- rv = KERN_SUCCESS;
- vm_map_clip_start(map, first_entry, start);
+ rv = vm_map_clip_start(map, first_entry, start);
+ if (rv != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return (rv);
+ }
for (entry = first_entry; entry->start < end;
entry = vm_map_entry_succ(entry)) {
- vm_map_clip_end(map, entry, end);
+ rv = vm_map_clip_end(map, entry, end);
+ if (rv != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return (rv);
+ }
if (set_max ||
((new_prot & ~entry->protection) & VM_PROT_WRITE) == 0 ||
@@ -2785,6 +2835,7 @@ vm_map_madvise(
int behav)
{
vm_map_entry_t entry, prev_entry;
+ int rv;
bool modify_map;
/*
@@ -2830,13 +2881,22 @@ vm_map_madvise(
* We clip the vm_map_entry so that behavioral changes are
* limited to the specified address range.
*/
- for (entry = vm_map_lookup_clip_start(map, start, &prev_entry);
- entry->start < end;
- prev_entry = entry, entry = vm_map_entry_succ(entry)) {
+ rv = vm_map_lookup_clip_start(map, start, &entry, &prev_entry);
+ if (rv != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return (vm_mmap_to_errno(rv));
+ }
+
+ for (; entry->start < end; prev_entry = entry,
+ entry = vm_map_entry_succ(entry)) {
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
continue;
- vm_map_clip_end(map, entry, end);
+ rv = vm_map_clip_end(map, entry, end);
+ if (rv != KERN_SUCCESS) {
+ vm_map_unlock(map);
+ return (vm_mmap_to_errno(rv));
+ }
switch (behav) {
case MADV_NORMAL:
@@ -2969,7 +3029,8 @@ int
vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
vm_inherit_t new_inheritance)
{
- vm_map_entry_t entry, prev_entry;
+ vm_map_entry_t entry, lentry, prev_entry, start_entry;
+ int rv;
switch (new_inheritance) {
case VM_INHERIT_NONE:
@@ -2984,18 +3045,37 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_off
return (KERN_SUCCESS);
vm_map_lock(map);
VM_MAP_RANGE_CHECK(map, start, end);
- for (entry = vm_map_lookup_clip_start(map, start, &prev_entry);
- entry->start < end;
- prev_entry = entry, entry = vm_map_entry_succ(entry)) {
- vm_map_clip_end(map, entry, end);
+ rv = vm_map_lookup_clip_start(map, start, &start_entry, &prev_entry);
+ if (rv != KERN_SUCCESS)
+ goto unlock;
+ if (vm_map_lookup_entry(map, end - 1, &lentry)) {
+ rv = vm_map_clip_end(map, lentry, end);
+ if (rv != KERN_SUCCESS)
+ goto unlock;
+ }
+ if (new_inheritance == VM_INHERIT_COPY) {
+ for (entry = start_entry; entry->start < end;
+ prev_entry = entry, entry = vm_map_entry_succ(entry)) {
+ if ((entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
+ != 0) {
+ rv = KERN_INVALID_ARGUMENT;
+ goto unlock;
+ }
+ }
+ }
+ for (entry = start_entry; entry->start < end; prev_entry = entry,
+ entry = vm_map_entry_succ(entry)) {
+ KASSERT(entry->end <= end, ("non-clipped entry %p end %jx %jx",
+ entry, (uintmax_t)entry->end, (uintmax_t)end));
if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
new_inheritance != VM_INHERIT_ZERO)
entry->inheritance = new_inheritance;
vm_map_try_merge_entries(map, prev_entry, entry);
}
vm_map_try_merge_entries(map, prev_entry, entry);
+unlock:
vm_map_unlock(map);
- return (KERN_SUCCESS);
+ return (rv);
}
/*
@@ -3094,8 +3174,13 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offs
next_entry : NULL;
continue;
}
- vm_map_clip_start(map, entry, start);
- vm_map_clip_end(map, entry, end);
+ rv = vm_map_clip_start(map, entry, start);
+ if (rv != KERN_SUCCESS)
+ break;
+ rv = vm_map_clip_end(map, entry, end);
+ if (rv != KERN_SUCCESS)
+ break;
+
/*
* Mark the entry in case the map lock is released. (See
* above.)
@@ -3262,8 +3347,8 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
{
vm_map_entry_t entry, first_entry, next_entry, prev_entry;
vm_offset_t faddr, saved_end, saved_start;
- u_long npages;
- u_int last_timestamp;
+ u_long incr, npages;
+ u_int bidx, last_timestamp;
int rv;
bool holes_ok, need_wakeup, user_wire;
vm_prot_t prot;
@@ -3301,8 +3386,13 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
next_entry : NULL;
continue;
}
- vm_map_clip_start(map, entry, start);
- vm_map_clip_end(map, entry, end);
+ rv = vm_map_clip_start(map, entry, start);
+ if (rv != KERN_SUCCESS)
+ goto done;
+ rv = vm_map_clip_end(map, entry, end);
+ if (rv != KERN_SUCCESS)
+ goto done;
+
/*
* Mark the entry in case the map lock is released. (See
* above.)
@@ -3339,20 +3429,23 @@ vm_map_wire_locked(vm_map_t map, vm_offset_t start, vm
saved_start = entry->start;
saved_end = entry->end;
last_timestamp = map->timestamp;
+ bidx = (entry->eflags & MAP_ENTRY_SPLIT_BOUNDARY_MASK)
+ >> MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+ incr = pagesizes[bidx];
vm_map_busy(map);
vm_map_unlock(map);
- faddr = saved_start;
- do {
+ for (faddr = saved_start; faddr < saved_end;
+ faddr += incr) {
/*
* Simulate a fault to get the page and enter
* it into the physical map.
*/
- if ((rv = vm_fault(map, faddr,
- VM_PROT_NONE, VM_FAULT_WIRE, NULL)) !=
- KERN_SUCCESS)
+ rv = vm_fault(map, faddr, VM_PROT_NONE,
+ VM_FAULT_WIRE, NULL);
+ if (rv != KERN_SUCCESS)
break;
- } while ((faddr += PAGE_SIZE) < saved_end);
+ }
vm_map_lock(map);
vm_map_unbusy(map);
if (last_timestamp + 1 != map->timestamp) {
@@ -3427,10 +3520,14 @@ done:
* Moreover, another thread could be simultaneously
* wiring this new mapping entry. Detect these cases
* and skip any entries marked as in transition not by us.
+ *
+ * Another way to get an entry not marked with
+ * MAP_ENTRY_IN_TRANSITION is after failed clipping,
+ * which set rv to KERN_INVALID_ARGUMENT.
*/
if ((entry->eflags & MAP_ENTRY_IN_TRANSITION) == 0 ||
entry->wiring_thread != curthread) {
- KASSERT(holes_ok,
+ KASSERT(holes_ok || rv == KERN_INVALID_ARGUMENT,
("vm_map_wire: !HOLESOK and new/changed entry"));
continue;
}
@@ -3508,6 +3605,7 @@ vm_map_sync(
vm_object_t object;
vm_ooffset_t offset;
unsigned int last_timestamp;
+ int bdry_idx;
boolean_t failed;
vm_map_lock_read(map);
@@ -3519,14 +3617,26 @@ vm_map_sync(
start = first_entry->start;
end = first_entry->end;
}
+
/*
- * Make a first pass to check for user-wired memory and holes.
+ * Make a first pass to check for user-wired memory, holes,
+ * and partial invalidation of largepage mappings.
*/
for (entry = first_entry; entry->start < end; entry = next_entry) {
- if (invalidate &&
- (entry->eflags & MAP_ENTRY_USER_WIRED) != 0) {
- vm_map_unlock_read(map);
- return (KERN_INVALID_ARGUMENT);
+ if (invalidate) {
+ if ((entry->eflags & MAP_ENTRY_USER_WIRED) != 0) {
+ vm_map_unlock_read(map);
+ return (KERN_INVALID_ARGUMENT);
+ }
+ bdry_idx = (entry->eflags &
+ MAP_ENTRY_SPLIT_BOUNDARY_MASK) >>
+ MAP_ENTRY_SPLIT_BOUNDARY_SHIFT;
+ if (bdry_idx != 0 &&
+ ((start & (pagesizes[bdry_idx] - 1)) != 0 ||
+ (end & (pagesizes[bdry_idx] - 1)) != 0)) {
+ vm_map_unlock_read(map);
+ return (KERN_INVALID_ARGUMENT);
+ }
}
next_entry = vm_map_entry_succ(entry);
if (end > entry->end &&
@@ -3703,7 +3813,8 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry
int
vm_map_delete(vm_map_t map, vm_offset_t start, vm_offset_t end)
{
- vm_map_entry_t entry, next_entry;
+ vm_map_entry_t entry, next_entry, scratch_entry;
+ int rv;
VM_MAP_ASSERT_LOCKED(map);
@@ -3714,8 +3825,10 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
* Find the start of the region, and clip it.
* Step through all entries in this region.
*/
- for (entry = vm_map_lookup_clip_start(map, start, &entry);
- entry->start < end; entry = next_entry) {
+ rv = vm_map_lookup_clip_start(map, start, &entry, &scratch_entry);
+ if (rv != KERN_SUCCESS)
+ return (rv);
+ for (; entry->start < end; entry = next_entry) {
/*
* Wait for wiring or unwiring of an entry to complete.
* Also wait for any system wirings to disappear on
@@ -3739,13 +3852,19 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
* Specifically, the entry may have been
* clipped, merged, or deleted.
*/
- next_entry = vm_map_lookup_clip_start(map,
- saved_start, &next_entry);
+ rv = vm_map_lookup_clip_start(map, saved_start,
+ &next_entry, &scratch_entry);
+ if (rv != KERN_SUCCESS)
+ break;
} else
next_entry = entry;
continue;
}
- vm_map_clip_end(map, entry, end);
+
+ /* XXXKIB or delete to the upper superpage boundary ? */
+ rv = vm_map_clip_end(map, entry, end);
+ if (rv != KERN_SUCCESS)
+ break;
next_entry = vm_map_entry_succ(entry);
/*
@@ -3775,7 +3894,7 @@ vm_map_delete(vm_map_t map, vm_offset_t start, vm_offs
*/
vm_map_entry_delete(map, entry);
}
- return (KERN_SUCCESS);
+ return (rv);
}
/*
@@ -4219,7 +4338,8 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_c
new_entry->end = old_entry->end;
new_entry->eflags = old_entry->eflags &
~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
- MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC);
+ MAP_ENTRY_WRITECNT | MAP_ENTRY_VN_EXEC |
+ MAP_ENTRY_SPLIT_BOUNDARY_MASK);
new_entry->protection = old_entry->protection;
new_entry->max_protection = old_entry->max_protection;
new_entry->inheritance = VM_INHERIT_ZERO;
Modified: head/sys/vm/vm_map.h
==============================================================================
--- head/sys/vm/vm_map.h Wed Sep 9 21:57:55 2020 (r365519)
+++ head/sys/vm/vm_map.h Wed Sep 9 22:02:30 2020 (r365520)
@@ -149,6 +149,10 @@ struct vm_map_entry {
#define MAP_ENTRY_STACK_GAP_UP 0x00040000
#define MAP_ENTRY_HEADER 0x00080000
+#define MAP_ENTRY_SPLIT_BOUNDARY_MASK 0x00300000
+
+#define MAP_ENTRY_SPLIT_BOUNDARY_SHIFT 20
+
#ifdef _KERNEL
static __inline u_char
vm_map_entry_behavior(vm_map_entry_t entry)
@@ -373,6 +377,9 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define MAP_CREATE_STACK_GAP_UP 0x00010000
#define MAP_CREATE_STACK_GAP_DN 0x00020000
#define MAP_VN_EXEC 0x00040000
+#define MAP_SPLIT_BOUNDARY_MASK 0x00180000
+
+#define MAP_SPLIT_BOUNDARY_SHIFT 19
/*
* vm_fault option flags
More information about the svn-src-all
mailing list