git: 454bc887f250 - main - uipc_shm: Implements fspacectl(2) support
Ka Ho Ng
khng at FreeBSD.org
Thu Aug 12 15:06:30 UTC 2021
The branch main has been updated by khng:
URL: https://cgit.FreeBSD.org/src/commit/?id=454bc887f250ce0bceaabd0ec624d077269d3220
commit 454bc887f250ce0bceaabd0ec624d077269d3220
Author: Ka Ho Ng <khng at FreeBSD.org>
AuthorDate: 2021-08-12 15:01:02 +0000
Commit: Ka Ho Ng <khng at FreeBSD.org>
CommitDate: 2021-08-12 15:04:18 +0000
uipc_shm: Implements fspacectl(2) support
This implements fspacectl(2) support on shared memory objects. The
semantic of SPACECTL_DEALLOC is equivalent to clearing the backing
store and free the pages within the affected range. If the call
succeeds, subsequent reads on the affected range return all zero.
tests/sys/posixshm/posixshm_tests.c is expanded to include a
fspacectl(2) functional test.
Sponsored by: The FreeBSD Foundation
Reviewed by: kevans, kib
Differential Revision: https://reviews.freebsd.org/D31490
---
sys/kern/uipc_shm.c | 197 ++++++++++++++++++++++++++++--------
tests/sys/posixshm/posixshm_test.c | 199 +++++++++++++++++++++++++++++++++++++
2 files changed, 354 insertions(+), 42 deletions(-)
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 58c9f8cec239..16d1e22a898b 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -131,6 +131,8 @@ static int shm_dotruncate_locked(struct shmfd *shmfd, off_t length,
void *rl_cookie);
static int shm_copyin_path(struct thread *td, const char *userpath_in,
char **path_out);
+static int shm_deallocate(struct shmfd *shmfd, off_t *offset,
+ off_t *length, int flags);
static fo_rdwr_t shm_read;
static fo_rdwr_t shm_write;
@@ -146,6 +148,7 @@ static fo_mmap_t shm_mmap;
static fo_get_seals_t shm_get_seals;
static fo_add_seals_t shm_add_seals;
static fo_fallocate_t shm_fallocate;
+static fo_fspacectl_t shm_fspacectl;
/* File descriptor operations. */
struct fileops shm_ops = {
@@ -166,6 +169,7 @@ struct fileops shm_ops = {
.fo_get_seals = shm_get_seals,
.fo_add_seals = shm_add_seals,
.fo_fallocate = shm_fallocate,
+ .fo_fspacectl = shm_fspacectl,
.fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE,
};
@@ -626,14 +630,64 @@ out:
return (error);
}
+static int
+shm_partial_page_invalidate(vm_object_t object, vm_pindex_t idx, int base,
+ int end)
+{
+ vm_page_t m;
+ int rv;
+
+ VM_OBJECT_ASSERT_WLOCKED(object);
+ KASSERT(base >= 0, ("%s: base %d", __func__, base));
+ KASSERT(end - base <= PAGE_SIZE, ("%s: base %d end %d", __func__, base,
+ end));
+
+retry:
+ m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
+ if (m != NULL) {
+ MPASS(vm_page_all_valid(m));
+ } else if (vm_pager_has_page(object, idx, NULL, NULL)) {
+ m = vm_page_alloc(object, idx,
+ VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
+ if (m == NULL)
+ goto retry;
+ vm_object_pip_add(object, 1);
+ VM_OBJECT_WUNLOCK(object);
+ rv = vm_pager_get_pages(object, &m, 1, NULL, NULL);
+ VM_OBJECT_WLOCK(object);
+ vm_object_pip_wakeup(object);
+ if (rv == VM_PAGER_OK) {
+ /*
+ * Since the page was not resident, and therefore not
+ * recently accessed, immediately enqueue it for
+ * asynchronous laundering. The current operation is
+ * not regarded as an access.
+ */
+ vm_page_launder(m);
+ } else {
+ vm_page_free(m);
+ VM_OBJECT_WUNLOCK(object);
+ return (EIO);
+ }
+ }
+ if (m != NULL) {
+ pmap_zero_page_area(m, base, end - base);
+ KASSERT(vm_page_all_valid(m), ("%s: page %p is invalid",
+ __func__, m));
+ vm_page_set_dirty(m);
+ vm_page_xunbusy(m);
+ }
+
+ return (0);
+}
+
static int
shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
{
vm_object_t object;
- vm_page_t m;
- vm_pindex_t idx, nobjsize;
+ vm_pindex_t nobjsize;
vm_ooffset_t delta;
- int base, rv;
+ int base, error;
KASSERT(length >= 0, ("shm_dotruncate: length < 0"));
object = shmfd->shm_object;
@@ -660,45 +714,10 @@ shm_dotruncate_locked(struct shmfd *shmfd, off_t length, void *rl_cookie)
*/
base = length & PAGE_MASK;
if (base != 0) {
- idx = OFF_TO_IDX(length);
-retry:
- m = vm_page_grab(object, idx, VM_ALLOC_NOCREAT);
- if (m != NULL) {
- MPASS(vm_page_all_valid(m));
- } else if (vm_pager_has_page(object, idx, NULL, NULL)) {
- m = vm_page_alloc(object, idx,
- VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL);
- if (m == NULL)
- goto retry;
- vm_object_pip_add(object, 1);
- VM_OBJECT_WUNLOCK(object);
- rv = vm_pager_get_pages(object, &m, 1, NULL,
- NULL);
- VM_OBJECT_WLOCK(object);
- vm_object_pip_wakeup(object);
- if (rv == VM_PAGER_OK) {
- /*
- * Since the page was not resident,
- * and therefore not recently
- * accessed, immediately enqueue it
- * for asynchronous laundering. The
- * current operation is not regarded
- * as an access.
- */
- vm_page_launder(m);
- } else {
- vm_page_free(m);
- VM_OBJECT_WUNLOCK(object);
- return (EIO);
- }
- }
- if (m != NULL) {
- pmap_zero_page_area(m, base, PAGE_SIZE - base);
- KASSERT(vm_page_all_valid(m),
- ("shm_dotruncate: page %p is invalid", m));
- vm_page_set_dirty(m);
- vm_page_xunbusy(m);
- }
+ error = shm_partial_page_invalidate(object,
+ OFF_TO_IDX(length), base, PAGE_SIZE);
+ if (error)
+ return (error);
}
delta = IDX_TO_OFF(object->size - nobjsize);
@@ -1874,6 +1893,100 @@ shm_get_seals(struct file *fp, int *seals)
return (0);
}
+static int
+shm_deallocate(struct shmfd *shmfd, off_t *offset, off_t *length, int flags)
+{
+ vm_object_t object;
+ vm_pindex_t pistart, pi, piend;
+ vm_ooffset_t off, len;
+ int startofs, endofs, end;
+ int error;
+
+ off = *offset;
+ len = *length;
+ KASSERT(off + len <= (vm_ooffset_t)OFF_MAX, ("off + len overflows"));
+ object = shmfd->shm_object;
+ startofs = off & PAGE_MASK;
+ endofs = (off + len) & PAGE_MASK;
+ pistart = OFF_TO_IDX(off);
+ piend = OFF_TO_IDX(off + len);
+ pi = OFF_TO_IDX(off + PAGE_MASK);
+ error = 0;
+
+ VM_OBJECT_WLOCK(object);
+
+ if (startofs != 0) {
+ end = pistart != piend ? PAGE_SIZE : endofs;
+ error = shm_partial_page_invalidate(object, pistart, startofs,
+ end);
+ if (error)
+ goto out;
+ off += end - startofs;
+ len -= end - startofs;
+ }
+
+ if (pi < piend) {
+ vm_object_page_remove(object, pi, piend, 0);
+ off += IDX_TO_OFF(piend - pi);
+ len -= IDX_TO_OFF(piend - pi);
+ }
+
+ if (endofs != 0 && pistart != piend) {
+ error = shm_partial_page_invalidate(object, piend, 0, endofs);
+ if (error)
+ goto out;
+ off += endofs;
+ len -= endofs;
+ }
+
+out:
+ VM_OBJECT_WUNLOCK(shmfd->shm_object);
+ *offset = off;
+ *length = len;
+ return (error);
+}
+
+static int
+shm_fspacectl(struct file *fp, int cmd, off_t *offset, off_t *length, int flags,
+ struct ucred *active_cred, struct thread *td)
+{
+ void *rl_cookie;
+ struct shmfd *shmfd;
+ off_t off, len;
+ int error;
+
+ /* This assumes that the caller already checked for overflow. */
+ error = EINVAL;
+ shmfd = fp->f_data;
+ off = *offset;
+ len = *length;
+
+ if (cmd != SPACECTL_DEALLOC || off < 0 || len <= 0 ||
+ len > OFF_MAX - off || flags != 0)
+ return (EINVAL);
+
+ rl_cookie = rangelock_wlock(&shmfd->shm_rl, off, off + len,
+ &shmfd->shm_mtx);
+ switch (cmd) {
+ case SPACECTL_DEALLOC:
+ if ((shmfd->shm_seals & F_SEAL_WRITE) != 0) {
+ error = EPERM;
+ break;
+ }
+ error = shm_deallocate(shmfd, &off, &len, flags);
+ if (error != 0)
+ break;
+ *offset = off;
+ *length = len;
+ break;
+ default:
+ __assert_unreachable();
+ }
+ rangelock_unlock(&shmfd->shm_rl, rl_cookie, &shmfd->shm_mtx);
+ return (error);
+}
+
+
static int
shm_fallocate(struct file *fp, off_t offset, off_t len, struct thread *td)
{
diff --git a/tests/sys/posixshm/posixshm_test.c b/tests/sys/posixshm/posixshm_test.c
index d1c1b14aef65..eddb1d2d8250 100644
--- a/tests/sys/posixshm/posixshm_test.c
+++ b/tests/sys/posixshm/posixshm_test.c
@@ -2,6 +2,11 @@
* Copyright (c) 2006 Robert N. M. Watson
* All rights reserved.
*
+ * Copyright (c) 2021 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Ka Ho Ng
+ * under sponsorship from the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -173,6 +178,126 @@ verify_object(const char *path, char expected_value)
close(fd);
}
+static off_t shm_max_pages = 32;
+static const char byte_to_fill = 0x5f;
+
+static int
+shm_fill(int fd, off_t offset, off_t len)
+{
+ int error;
+ size_t blen;
+ char *buf;
+ error = 0;
+ buf = malloc(PAGE_SIZE);
+ if (buf == NULL)
+ return (1);
+
+ while (len > 0) {
+ blen = len < (off_t)PAGE_SIZE ? len : PAGE_SIZE;
+ memset(buf, byte_to_fill, blen);
+ if (pwrite(fd, buf, blen, offset) != (ssize_t)blen) {
+ error = 1;
+ break;
+ }
+ len -= blen;
+ offset += blen;
+ }
+
+ free(buf);
+ return (error);
+}
+
+static int
+check_content_dealloc(int fd, off_t hole_start, off_t hole_len, off_t shm_sz)
+{
+ int error;
+ size_t blen;
+ off_t offset, resid;
+ struct stat statbuf;
+ char *buf, *sblk;
+
+ error = 0;
+ buf = malloc(PAGE_SIZE * 2);
+ if (buf == NULL)
+ return (1);
+ sblk = buf + PAGE_SIZE;
+
+ memset(sblk, 0, PAGE_SIZE);
+
+ if ((uint64_t)hole_start + hole_len > (uint64_t)shm_sz)
+ hole_len = shm_sz - hole_start;
+
+ /*
+ * Check hole is zeroed.
+ */
+ offset = hole_start;
+ resid = hole_len;
+ while (resid > 0) {
+ blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+ if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+ error = 1;
+ break;
+ }
+ if (memcmp(buf, sblk, blen) != 0) {
+ error = 1;
+ break;
+ }
+ resid -= blen;
+ offset += blen;
+ }
+
+ memset(sblk, byte_to_fill, PAGE_SIZE);
+
+ /*
+ * Check file region before hole is zeroed.
+ */
+ offset = 0;
+ resid = hole_start;
+ while (resid > 0) {
+ blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+ if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+ error = 1;
+ break;
+ }
+ if (memcmp(buf, sblk, blen) != 0) {
+ error = 1;
+ break;
+ }
+ resid -= blen;
+ offset += blen;
+ }
+
+ /*
+ * Check file region after hole is zeroed.
+ */
+ offset = hole_start + hole_len;
+ resid = shm_sz - offset;
+ while (resid > 0) {
+ blen = resid < (off_t)PAGE_SIZE ? resid : PAGE_SIZE;
+ if (pread(fd, buf, blen, offset) != (ssize_t)blen) {
+ error = 1;
+ break;
+ }
+ if (memcmp(buf, sblk, blen) != 0) {
+ error = 1;
+ break;
+ }
+ resid -= blen;
+ offset += blen;
+ }
+
+ /*
+ * Check file size matches with expected file size.
+ */
+ if (fstat(fd, &statbuf) == -1)
+ error = -1;
+ if (statbuf.st_size != shm_sz)
+ error = -1;
+
+ free(buf);
+ return (error);
+}
+
ATF_TC_WITHOUT_HEAD(remap_object);
ATF_TC_BODY(remap_object, tc)
{
@@ -958,6 +1083,79 @@ ATF_TC_BODY(fallocate, tc)
close(fd);
}
+ATF_TC_WITHOUT_HEAD(fspacectl);
+ATF_TC_BODY(fspacectl, tc)
+{
+ struct spacectl_range range;
+ off_t offset, length, shm_sz;
+ int fd, error;
+
+ shm_sz = shm_max_pages << PAGE_SHIFT;
+
+ fd = shm_open("/testtest", O_RDWR|O_CREAT, 0666);
+ ATF_REQUIRE_MSG(fd >= 0, "shm_open failed; errno:%d", errno);
+ ATF_REQUIRE_MSG((error = posix_fallocate(fd, 0, shm_sz)) == 0,
+ "posix_fallocate failed; error=%d", error);
+
+ /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
+ ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+ range.r_offset = offset = PAGE_SIZE;
+ range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) -
+ range.r_offset;
+ ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+ "Aligned fspacectl failed; errno=%d", errno);
+ ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+ "Aligned fspacectl content checking failed", errno);
+
+ /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) */
+ ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+ range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+ range.r_len = length = ((shm_max_pages - 1) << PAGE_SHIFT) +
+ (1 << (PAGE_SHIFT - 1)) - offset;
+ ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+ "Unaligned fspacectl failed; errno=%d", errno);
+ ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+ "Unaligned fspacectl content checking failed", errno);
+
+ /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
+ ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+ range.r_offset = offset = PAGE_SHIFT;
+ range.r_len = length = OFF_MAX - offset;
+ ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+ "Aligned fspacectl to OFF_MAX failed; errno=%d", errno);
+ ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+ "Aligned fspacectl to OFF_MAX content checking failed", errno);
+
+ /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) to OFF_MAX */
+ ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+ range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+ range.r_len = length = OFF_MAX - offset;
+ ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+ "Unaligned fspacectl to OFF_MAX failed; errno=%d", errno);
+ ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+ "Unaligned fspacectl to OFF_MAX content checking failed", errno);
+
+ /* Aligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
+ ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+ range.r_offset = offset = PAGE_SIZE;
+ range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
+ ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+ "Aligned fspacectl past shm_sz failed; errno=%d", errno);
+ ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+ "Aligned fspacectl past shm_sz content checking failed", errno);
+
+ /* Unaligned fspacectl(fd, SPACECTL_DEALLOC, ...) past shm_sz */
+ ATF_REQUIRE(shm_fill(fd, 0, shm_sz) == 0);
+ range.r_offset = offset = 1 << (PAGE_SHIFT - 1);
+ range.r_len = length = ((shm_max_pages + 1) << PAGE_SHIFT) - offset;
+ ATF_CHECK_MSG(fspacectl(fd, SPACECTL_DEALLOC, &range, 0, &range) == 0,
+ "Unaligned fspacectl past shm_sz failed; errno=%d", errno);
+ ATF_CHECK_MSG(check_content_dealloc(fd, offset, length, shm_sz) == 0,
+ "Unaligned fspacectl past shm_sz content checking failed", errno);
+
+ ATF_REQUIRE(close(fd) == 0);
+}
+
static int
shm_open_large(int psind, int policy, size_t sz)
{
@@ -1704,6 +1902,7 @@ ATF_TP_ADD_TCS(tp)
ATF_TP_ADD_TC(tp, cloexec);
ATF_TP_ADD_TC(tp, mode);
ATF_TP_ADD_TC(tp, fallocate);
+ ATF_TP_ADD_TC(tp, fspacectl);
ATF_TP_ADD_TC(tp, largepage_basic);
ATF_TP_ADD_TC(tp, largepage_config);
ATF_TP_ADD_TC(tp, largepage_mmap);
More information about the dev-commits-src-main
mailing list