svn commit: r308554 - in stable/11/sys: kern sys ufs/ffs
Konstantin Belousov
kib at FreeBSD.org
Fri Nov 11 20:18:10 UTC 2016
Author: kib
Date: Fri Nov 11 20:18:08 2016
New Revision: 308554
URL: https://svnweb.freebsd.org/changeset/base/308554
Log:
MFC r308026:
Generalize UFS buffer pager.
MFC r308442:
Tweaks for the buffer pager.
Modified:
stable/11/sys/kern/vfs_bio.c
stable/11/sys/sys/buf.h
stable/11/sys/ufs/ffs/ffs_vnops.c
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/kern/vfs_bio.c
==============================================================================
--- stable/11/sys/kern/vfs_bio.c Fri Nov 11 20:09:54 2016 (r308553)
+++ stable/11/sys/kern/vfs_bio.c Fri Nov 11 20:18:08 2016 (r308554)
@@ -75,9 +75,10 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_kern.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_page.h>
#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
#include <vm/vm_map.h>
#include <vm/swap_pager.h>
@@ -4691,6 +4692,165 @@ bdata2bio(struct buf *bp, struct bio *bi
}
}
+static int buf_pager_relbuf;
+SYSCTL_INT(_vfs, OID_AUTO, buf_pager_relbuf, CTLFLAG_RWTUN,
+ &buf_pager_relbuf, 0,
+ "Make buffer pager release buffers after reading");
+
+/*
+ * The buffer pager. It uses buffer reads to validate pages.
+ *
+ * In contrast to the generic local pager from vm/vnode_pager.c, this
+ * pager correctly and easily handles volumes where the underlying
+ * device block size is greater than the machine page size. The
+ * buffer cache transparently extends the requested page run to be
+ * aligned at the block boundary, and does the necessary bogus page
+ * replacements in the addends to avoid obliterating already valid
+ * pages.
+ *
+ * The only non-trivial issue is that the exclusive busy state for
+ * pages, which is assumed by the vm_pager_getpages() interface, is
+ * incompatible with the VMIO buffer cache's desire to share-busy the
+ * pages. This function performs a trivial downgrade of the pages'
+ * state before reading buffers, and a less trivial upgrade from the
+ * shared-busy to excl-busy state after the read.
+ */
+int
+vfs_bio_getpages(struct vnode *vp, vm_page_t *ma, int count,
+ int *rbehind, int *rahead, vbg_get_lblkno_t get_lblkno,
+ vbg_get_blksize_t get_blksize)
+{
+ vm_page_t m;
+ vm_object_t object;
+ struct buf *bp;
+ struct mount *mp;
+ daddr_t lbn, lbnp;
+ vm_ooffset_t la, lb, poff, poffe;
+ long bsize;
+ int bo_bs, br_flags, error, i;
+ bool redo, lpart;
+
+ object = vp->v_object;
+ mp = vp->v_mount;
+ la = IDX_TO_OFF(ma[count - 1]->pindex);
+ if (la >= object->un_pager.vnp.vnp_size)
+ return (VM_PAGER_BAD);
+ lpart = la + PAGE_SIZE > object->un_pager.vnp.vnp_size;
+ bo_bs = get_blksize(vp, get_lblkno(vp, IDX_TO_OFF(ma[0]->pindex)));
+ if (rbehind != NULL) {
+ lb = IDX_TO_OFF(ma[0]->pindex);
+ *rbehind = OFF_TO_IDX(lb - rounddown2(lb, bo_bs));
+ }
+ if (rahead != NULL) {
+ *rahead = OFF_TO_IDX(roundup2(la, bo_bs) - la);
+ if (la + IDX_TO_OFF(*rahead) >= object->un_pager.vnp.vnp_size) {
+ *rahead = OFF_TO_IDX(roundup2(object->un_pager.
+ vnp.vnp_size, PAGE_SIZE) - la);
+ }
+ }
+ br_flags = (mp != NULL && (mp->mnt_kern_flag & MNTK_UNMAPPED_BUFS)
+ != 0) ? GB_UNMAPPED : 0;
+ VM_OBJECT_WLOCK(object);
+again:
+ for (i = 0; i < count; i++)
+ vm_page_busy_downgrade(ma[i]);
+ VM_OBJECT_WUNLOCK(object);
+
+ lbnp = -1;
+ for (i = 0; i < count; i++) {
+ m = ma[i];
+
+ /*
+ * Pages are shared busy and the object lock is not
+ * owned, which together allow for the pages'
+ * invalidation. The racy test for validity avoids
+ * useless creation of the buffer for the most typical
+ * case when invalidation is not used in redo or for
+ * parallel read. The shared->excl upgrade loop at
+ * the end of the function catches the race in a
+ * reliable way (protected by the object lock).
+ */
+ if (m->valid == VM_PAGE_BITS_ALL)
+ continue;
+
+ poff = IDX_TO_OFF(m->pindex);
+ poffe = MIN(poff + PAGE_SIZE, object->un_pager.vnp.vnp_size);
+ for (; poff < poffe; poff += bsize) {
+ lbn = get_lblkno(vp, poff);
+ if (lbn == lbnp)
+ goto next_page;
+ lbnp = lbn;
+
+ bsize = get_blksize(vp, lbn);
+ error = bread_gb(vp, lbn, bsize, curthread->td_ucred,
+ br_flags, &bp);
+ if (error != 0)
+ goto end_pages;
+ if (LIST_EMPTY(&bp->b_dep)) {
+ /*
+ * Invalidation clears m->valid, but
+ * may leave B_CACHE flag if the
+ * buffer existed at the invalidation
+ * time. In this case, recycle the
+ * buffer to do real read on next
+ * bread() after redo.
+ *
+ * Otherwise B_RELBUF is not strictly
+ * necessary, enable to reduce buf
+ * cache pressure.
+ */
+ if (buf_pager_relbuf ||
+ m->valid != VM_PAGE_BITS_ALL)
+ bp->b_flags |= B_RELBUF;
+
+ bp->b_flags &= ~B_NOCACHE;
+ brelse(bp);
+ } else {
+ bqrelse(bp);
+ }
+ }
+ KASSERT(1 /* racy, enable for debugging */ ||
+ m->valid == VM_PAGE_BITS_ALL || i == count - 1,
+ ("buf %d %p invalid", i, m));
+ if (i == count - 1 && lpart) {
+ VM_OBJECT_WLOCK(object);
+ if (m->valid != 0 &&
+ m->valid != VM_PAGE_BITS_ALL)
+ vm_page_zero_invalid(m, TRUE);
+ VM_OBJECT_WUNLOCK(object);
+ }
+next_page:;
+ }
+end_pages:
+
+ VM_OBJECT_WLOCK(object);
+ redo = false;
+ for (i = 0; i < count; i++) {
+ vm_page_sunbusy(ma[i]);
+ ma[i] = vm_page_grab(object, ma[i]->pindex, VM_ALLOC_NORMAL);
+
+ /*
+ * Since the pages were only sbusy while neither the
+ * buffer nor the object lock was held by us, or
+ * reallocated while vm_page_grab() slept for busy
+ * relinguish, they could have been invalidated.
+ * Recheck the valid bits and re-read as needed.
+ *
+ * Note that the last page is made fully valid in the
+ * read loop, and partial validity for the page at
+ * index count - 1 could mean that the page was
+ * invalidated or removed, so we must restart for
+ * safety as well.
+ */
+ if (ma[i]->valid != VM_PAGE_BITS_ALL)
+ redo = true;
+ }
+ if (redo && error == 0)
+ goto again;
+ VM_OBJECT_WUNLOCK(object);
+ return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
+}
+
#include "opt_ddb.h"
#ifdef DDB
#include <ddb/ddb.h>
Modified: stable/11/sys/sys/buf.h
==============================================================================
--- stable/11/sys/sys/buf.h Fri Nov 11 20:09:54 2016 (r308553)
+++ stable/11/sys/sys/buf.h Fri Nov 11 20:18:08 2016 (r308554)
@@ -68,6 +68,7 @@ extern struct bio_ops {
} bioops;
struct vm_object;
+struct vm_page;
typedef unsigned char b_xflags_t;
@@ -547,6 +548,12 @@ void bpin(struct buf *);
void bunpin(struct buf *);
void bunpin_wait(struct buf *);
+typedef daddr_t (vbg_get_lblkno_t)(struct vnode *, vm_ooffset_t);
+typedef int (vbg_get_blksize_t)(struct vnode *, daddr_t);
+int vfs_bio_getpages(struct vnode *vp, struct vm_page **ma, int count,
+ int *rbehind, int *rahead, vbg_get_lblkno_t get_lblkno,
+ vbg_get_blksize_t get_blksize);
+
#endif /* _KERNEL */
#endif /* !_SYS_BUF_H_ */
Modified: stable/11/sys/ufs/ffs/ffs_vnops.c
==============================================================================
--- stable/11/sys/ufs/ffs/ffs_vnops.c Fri Nov 11 20:09:54 2016 (r308553)
+++ stable/11/sys/ufs/ffs/ffs_vnops.c Fri Nov 11 20:18:08 2016 (r308554)
@@ -87,7 +87,6 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
-#include <vm/vm_pageout.h>
#include <vm/vnode_pager.h>
#include <ufs/ufs/extattr.h>
@@ -1791,160 +1790,33 @@ SYSCTL_DECL(_vfs_ffs);
static int use_buf_pager = 0;
SYSCTL_INT(_vfs_ffs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0,
"Always use buffer pager instead of bmap");
-static int buf_pager_relbuf;
-SYSCTL_INT(_vfs_ffs, OID_AUTO, buf_pager_relbuf, CTLFLAG_RWTUN,
- &buf_pager_relbuf, 0,
- "Make buffer pager release buffers after reading");
-/*
- * The FFS pager. It uses buffer reads to validate pages.
- *
- * In contrast to the generic local pager from vm/vnode_pager.c, this
- * pager correctly and easily handles volumes where the underlying
- * device block size is greater than the machine page size. The
- * buffer cache transparently extends the requested page run to be
- * aligned at the block boundary, and does the necessary bogus page
- * replacements in the addends to avoid obliterating already valid
- * pages.
- *
- * The only non-trivial issue is that the exclusive busy state for
- * pages, which is assumed by the vm_pager_getpages() interface, is
- * incompatible with the VMIO buffer cache's desire to share-busy the
- * pages. This function performs a trivial downgrade of the pages'
- * state before reading buffers, and a less trivial upgrade from the
- * shared-busy to excl-busy state after the read.
- */
+static daddr_t
+ffs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
+{
+
+ return (lblkno(VFSTOUFS(vp->v_mount)->um_fs, off));
+}
+
+static int
+ffs_gbp_getblksz(struct vnode *vp, daddr_t lbn)
+{
+
+ return (blksize(VFSTOUFS(vp->v_mount)->um_fs, VTOI(vp), lbn));
+}
+
static int
ffs_getpages(struct vop_getpages_args *ap)
{
struct vnode *vp;
- vm_page_t *ma, m;
- vm_object_t object;
- struct buf *bp;
struct ufsmount *um;
- ufs_lbn_t lbn, lbnp;
- vm_ooffset_t la, lb;
- long bsize;
- int bo_bs, count, error, i;
- bool redo, lpart;
vp = ap->a_vp;
- ma = ap->a_m;
- count = ap->a_count;
+ um = VFSTOUFS(vp->v_mount);
- um = VFSTOUFS(ap->a_vp->v_mount);
- bo_bs = um->um_devvp->v_bufobj.bo_bsize;
- if (!use_buf_pager && bo_bs <= PAGE_SIZE)
- return (vnode_pager_generic_getpages(vp, ma, count,
+ if (!use_buf_pager && um->um_devvp->v_bufobj.bo_bsize <= PAGE_SIZE)
+ return (vnode_pager_generic_getpages(vp, ap->a_m, ap->a_count,
ap->a_rbehind, ap->a_rahead, NULL, NULL));
-
- object = vp->v_object;
- la = IDX_TO_OFF(ma[count - 1]->pindex);
- if (la >= object->un_pager.vnp.vnp_size)
- return (VM_PAGER_BAD);
- lpart = la + PAGE_SIZE > object->un_pager.vnp.vnp_size;
- if (ap->a_rbehind != NULL) {
- lb = IDX_TO_OFF(ma[0]->pindex);
- *ap->a_rbehind = OFF_TO_IDX(lb - rounddown2(lb, bo_bs));
- }
- if (ap->a_rahead != NULL) {
- *ap->a_rahead = OFF_TO_IDX(roundup2(la, bo_bs) - la);
- if (la + IDX_TO_OFF(*ap->a_rahead) >=
- object->un_pager.vnp.vnp_size) {
- *ap->a_rahead = OFF_TO_IDX(roundup2(object->un_pager.
- vnp.vnp_size, PAGE_SIZE) - la);
- }
- }
- VM_OBJECT_WLOCK(object);
-again:
- for (i = 0; i < count; i++)
- vm_page_busy_downgrade(ma[i]);
- VM_OBJECT_WUNLOCK(object);
-
- lbnp = -1;
- for (i = 0; i < count; i++) {
- m = ma[i];
-
- /*
- * Pages are shared busy and the object lock is not
- * owned, which together allow for the pages'
- * invalidation. The racy test for validity avoids
- * useless creation of the buffer for the most typical
- * case when invalidation is not used in redo or for
- * parallel read. The shared->excl upgrade loop at
- * the end of the function catches the race in a
- * reliable way (protected by the object lock).
- */
- if (m->valid == VM_PAGE_BITS_ALL)
- continue;
-
- lbn = lblkno(um->um_fs, IDX_TO_OFF(m->pindex));
- if (lbn != lbnp) {
- bsize = blksize(um->um_fs, VTOI(vp), lbn);
- error = bread_gb(vp, lbn, bsize, NOCRED, GB_UNMAPPED,
- &bp);
- if (error != 0)
- break;
- KASSERT(1 /* racy, enable for debugging */ ||
- m->valid == VM_PAGE_BITS_ALL || i == count - 1,
- ("buf %d %p invalid", i, m));
- if (i == count - 1 && lpart) {
- VM_OBJECT_WLOCK(object);
- if (m->valid != 0 &&
- m->valid != VM_PAGE_BITS_ALL)
- vm_page_zero_invalid(m, TRUE);
- VM_OBJECT_WUNLOCK(object);
- }
- if (LIST_EMPTY(&bp->b_dep)) {
- /*
- * Invalidation clears m->valid, but
- * may leave B_CACHE flag if the
- * buffer existed at the invalidation
- * time. In this case, recycle the
- * buffer to do real read on next
- * bread() after redo.
- *
- * Otherwise B_RELBUF is not strictly
- * necessary, enable to reduce buf
- * cache pressure.
- */
- if (buf_pager_relbuf ||
- m->valid != VM_PAGE_BITS_ALL)
- bp->b_flags |= B_RELBUF;
-
- bp->b_flags &= ~B_NOCACHE;
- brelse(bp);
- } else {
- bqrelse(bp);
- }
- lbnp = lbn;
- }
- }
-
- VM_OBJECT_WLOCK(object);
- redo = false;
- for (i = 0; i < count; i++) {
- vm_page_sunbusy(ma[i]);
- ma[i] = vm_page_grab(object, ma[i]->pindex, VM_ALLOC_NORMAL);
-
- /*
- * Since the pages were only sbusy while neither the
- * buffer nor the object lock was held by us, or
- * reallocated while vm_page_grab() slept for busy
- * relinguish, they could have been invalidated.
- * Recheck the valid bits and re-read as needed.
- *
- * Note that the last page is made fully valid in the
- * read loop, and partial validity for the page at
- * index count - 1 could mean that the page was
- * invalidated or removed, so we must restart for
- * safety as well.
- */
- if (ma[i]->valid != VM_PAGE_BITS_ALL)
- redo = true;
- }
- if (redo && error == 0)
- goto again;
- VM_OBJECT_WUNLOCK(object);
- return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK);
+ return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
+ ap->a_rahead, ffs_gbp_getblkno, ffs_gbp_getblksz));
}
More information about the svn-src-stable
mailing list