svn commit: r359473 - head/sys/kern
Konstantin Belousov
kib at FreeBSD.org
Mon Mar 30 22:22:37 UTC 2020
Author: kib
Date: Mon Mar 30 22:13:32 2020
New Revision: 359473
URL: https://svnweb.freebsd.org/changeset/base/359473
Log:
kern_sendfile.c: fix bugs with handling of busy page states.
- Do not call into a vnode pager while leaving some pages from the
same block as the current run, xbusy. This immediately deadlocks if
pager needs to instantiate the buffer.
- Only relookup bogus pages after io finished, otherwise we might
obliterate the valid pages by out of date disk content. While there,
expand the comment explaining this pecularity.
- Do not double-unbusy on error. Split unbusy for error case, which
is left in the sendfile_swapin(), from the more properly coded
normal case in sendfile_iodone().
- Add an XXXKIB comment explaining the serious bug in the validation
algorithm, not fixed by this patch series.
PR: 244713
Reviewed by: glebius, markj
Tested by: pho
Sponsored by: The FreeBSD Foundation
MFC after: 1 week
Differential revision: https://reviews.freebsd.org/D24038
Modified:
head/sys/kern/kern_sendfile.c
Modified: head/sys/kern/kern_sendfile.c
==============================================================================
--- head/sys/kern/kern_sendfile.c Mon Mar 30 22:07:11 2020 (r359472)
+++ head/sys/kern/kern_sendfile.c Mon Mar 30 22:13:32 2020 (r359473)
@@ -92,6 +92,7 @@ struct sf_io {
struct socket *so;
struct mbuf *m;
vm_object_t obj;
+ vm_pindex_t pindex0;
#ifdef KERN_TLS
struct ktls_session *tls;
#endif
@@ -270,17 +271,42 @@ sendfile_iowait(struct sf_io *sfio, const char *wmesg)
* I/O completion callback.
*/
static void
-sendfile_iodone(void *arg, vm_page_t *pg, int count, int error)
+sendfile_iodone(void *arg, vm_page_t *pa, int count, int error)
{
struct sf_io *sfio = arg;
struct socket *so;
+ int i;
- for (int i = 0; i < count; i++)
- if (pg[i] != bogus_page)
- vm_page_xunbusy_unchecked(pg[i]);
-
- if (error)
+ if (error != 0) {
sfio->error = error;
+ /*
+ * Restore of the pg[] elements is done by
+ * sendfile_swapin().
+ */
+ } else {
+ /*
+ * Restore the valid page pointers. They are already
+ * unbusied, but still wired. For error != 0 case,
+ * sendfile_swapin() handles unbusy.
+ *
+ * XXXKIB since pages are only wired, and we do not
+ * own the object lock, other users might have
+ * invalidated them in meantime. Similarly, after we
+ * unbusied the swapped-in pages, they can become
+ * invalid under us.
+ */
+ for (i = 0; i < count; i++) {
+ if (pa[i] == bogus_page) {
+ pa[i] = vm_page_relookup(sfio->obj,
+ sfio->pindex0 + i + (sfio->pa - pa));
+ KASSERT(pa[i] != NULL,
+ ("%s: page %p[%d] disappeared",
+ __func__, pa, i));
+ } else {
+ vm_page_xunbusy_unchecked(pa[i]);
+ }
+ }
+ }
if (!refcount_release(&sfio->nios))
return;
@@ -361,11 +387,13 @@ static int
sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off,
off_t len, int npages, int rhpages, int flags)
{
- vm_page_t *pa = sfio->pa;
- int grabbed;
+ vm_page_t *pa;
+ int a, count, count1, grabbed, i, j, rv;
+ pa = sfio->pa;
*nios = 0;
flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0;
+ sfio->pindex0 = OFF_TO_IDX(off);
/*
* First grab all the pages and wire them. Note that we grab
@@ -380,9 +408,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
rhpages = 0;
}
- for (int i = 0; i < npages;) {
- int j, a, count, rv;
-
+ for (i = 0; i < npages;) {
/* Skip valid pages. */
if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK,
xfsize(i, npages, off, len))) {
@@ -422,19 +448,41 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
count = min(a + 1, npages - i);
/*
- * We should not pagein into a valid page, thus we first trim
- * any valid pages off the end of request, and substitute
- * to bogus_page those, that are in the middle.
+ * We should not pagein into a valid page because
+ * there might be still unfinished write tracked by
+ * e.g. a buffer, thus we substitute any valid pages
+ * with the bogus one.
+ *
+ * We must not leave around xbusy pages which are not
+ * part of the run passed to vm_pager_getpages(),
+ * otherwise pager might deadlock waiting for the busy
+ * status of the page, e.g. if it constitues the
+ * buffer needed to validate other page.
+ *
+ * First trim the end of the run consisting of the
+ * valid pages, then replace the rest of the valid
+ * with bogus.
*/
+ count1 = count;
for (j = i + count - 1; j > i; j--) {
if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
xfsize(j, npages, off, len))) {
+ vm_page_xunbusy(pa[j]);
+ SFSTAT_INC(sf_pages_valid);
count--;
- rhpages = 0;
- } else
+ } else {
break;
+ }
}
- for (j = i + 1; j < i + count - 1; j++)
+
+ /*
+ * The last page in the run pa[i + count - 1] is
+ * guaranteed to be invalid by the trim above, so it
+ * is not replaced with bogus, thus -1 in the loop end
+ * condition.
+ */
+ MPASS(pa[i + count - 1]->valid != VM_PAGE_BITS_ALL);
+ for (j = i + 1; j < i + count - 1; j++) {
if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK,
xfsize(j, npages, off, len))) {
vm_page_xunbusy(pa[j]);
@@ -442,6 +490,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
SFSTAT_INC(sf_pages_bogus);
pa[j] = bogus_page;
}
+ }
refcount_acquire(&sfio->nios);
rv = vm_pager_get_pages_async(obj, pa + i, count, NULL,
@@ -453,12 +502,16 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
/*
* Perform full pages recovery before returning EIO.
* Pages from 0 to npages are wired.
- * Pages from i to npages are also busied.
* Pages from (i + 1) to (i + count - 1) may be
* substituted to bogus page, and not busied.
+ * Pages from (i + count) to (i + count1 - 1) are
+ * not busied.
+ * Rest of the pages from i to npages are busied.
*/
for (j = 0; j < npages; j++) {
- if (j > i && j < i + count - 1 &&
+ if (j >= i + count && j < i + count1)
+ ;
+ else if (j > i && j < i + count - 1 &&
pa[j] == bogus_page)
pa[j] = vm_page_relookup(obj,
OFF_TO_IDX(vmoff(j, off)));
@@ -477,19 +530,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, i
if (i + count == npages)
SFSTAT_ADD(sf_rhpages_read, rhpages);
- /*
- * Restore the valid page pointers. They are already
- * unbusied, but still wired.
- */
- for (j = i + 1; j < i + count - 1; j++)
- if (pa[j] == bogus_page) {
- pa[j] = vm_page_relookup(obj,
- OFF_TO_IDX(vmoff(j, off)));
- KASSERT(pa[j], ("%s: page %p[%d] disappeared",
- __func__, pa, j));
-
- }
- i += count;
+ i += count1;
(*nios)++;
}
More information about the svn-src-head
mailing list