svn commit: r330991 - in stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs: . sys
Andriy Gapon
avg at FreeBSD.org
Thu Mar 15 12:05:19 UTC 2018
Author: avg
Date: Thu Mar 15 12:05:17 2018
New Revision: 330991
URL: https://svnweb.freebsd.org/changeset/base/330991
Log:
MFC r329363: read-behind / read-ahead support for zfs_getpages()
Modified:
stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
Directory Properties:
stable/11/ (props changed)
Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Thu Mar 15 11:06:04 2018 (r330990)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c Thu Mar 15 12:05:17 2018 (r330991)
@@ -1518,6 +1518,188 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_
dmu_buf_rele_array(dbp, numbufs, FTAG);
return (err);
}
+
+int
+dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
+ int *rbehind, int *rahead, int last_size)
+{
+ struct sf_buf *sf;
+ vm_object_t vmobj;
+ vm_page_t m;
+ dmu_buf_t **dbp;
+ dmu_buf_t *db;
+ caddr_t va;
+ int numbufs, i;
+ int bufoff, pgoff, tocpy;
+ int mi, di;
+ int err;
+
+ ASSERT3U(ma[0]->pindex + count - 1, ==, ma[count - 1]->pindex);
+ ASSERT(last_size <= PAGE_SIZE);
+
+ err = dmu_buf_hold_array(os, object, IDX_TO_OFF(ma[0]->pindex),
+ IDX_TO_OFF(count - 1) + last_size, TRUE, FTAG, &numbufs, &dbp);
+ if (err != 0)
+ return (err);
+
+#ifdef DEBUG
+ IMPLY(last_size < PAGE_SIZE, *rahead == 0);
+ if (dbp[0]->db_offset != 0 || numbufs > 1) {
+ for (i = 0; i < numbufs; i++) {
+ ASSERT(ISP2(dbp[i]->db_size));
+ ASSERT((dbp[i]->db_offset % dbp[i]->db_size) == 0);
+ ASSERT3U(dbp[i]->db_size, ==, dbp[0]->db_size);
+ }
+ }
+#endif
+
+ vmobj = ma[0]->object;
+ zfs_vmobject_wlock(vmobj);
+
+ db = dbp[0];
+ for (i = 0; i < *rbehind; i++) {
+ m = vm_page_grab(vmobj, ma[0]->pindex - 1 - i,
+ VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_NOBUSY);
+ if (m == NULL)
+ break;
+ if (m->valid != 0) {
+ ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
+ break;
+ }
+ ASSERT(m->dirty == 0);
+ ASSERT(!pmap_page_is_mapped(m));
+
+ ASSERT(db->db_size > PAGE_SIZE);
+ bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
+ va = zfs_map_page(m, &sf);
+ bcopy((char *)db->db_data + bufoff, va, PAGESIZE);
+ zfs_unmap_page(sf);
+ m->valid = VM_PAGE_BITS_ALL;
+ vm_page_lock(m);
+ if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
+ vm_page_activate(m);
+ else
+ vm_page_deactivate(m);
+ vm_page_unlock(m);
+ }
+ *rbehind = i;
+
+ bufoff = IDX_TO_OFF(ma[0]->pindex) % db->db_size;
+ pgoff = 0;
+ for (mi = 0, di = 0; mi < count && di < numbufs; ) {
+ if (pgoff == 0) {
+ m = ma[mi];
+ vm_page_assert_xbusied(m);
+ ASSERT(m->valid == 0);
+ ASSERT(m->dirty == 0);
+ ASSERT(!pmap_page_is_mapped(m));
+ va = zfs_map_page(m, &sf);
+ }
+ if (bufoff == 0)
+ db = dbp[di];
+
+ ASSERT3U(IDX_TO_OFF(m->pindex) + pgoff, ==,
+ db->db_offset + bufoff);
+
+ /*
+ * We do not need to clamp the copy size by the file
+ * size as the last block is zero-filled beyond the
+ * end of file anyway.
+ */
+ tocpy = MIN(db->db_size - bufoff, PAGESIZE - pgoff);
+ bcopy((char *)db->db_data + bufoff, va + pgoff, tocpy);
+
+ pgoff += tocpy;
+ ASSERT(pgoff <= PAGESIZE);
+ if (pgoff == PAGESIZE) {
+ zfs_unmap_page(sf);
+ m->valid = VM_PAGE_BITS_ALL;
+ ASSERT(mi < count);
+ mi++;
+ pgoff = 0;
+ }
+
+ bufoff += tocpy;
+ ASSERT(bufoff <= db->db_size);
+ if (bufoff == db->db_size) {
+ ASSERT(di < numbufs);
+ di++;
+ bufoff = 0;
+ }
+ }
+
+#ifdef DEBUG
+ /*
+ * Three possibilities:
+ * - last requested page ends at a buffer boundary and , thus,
+ * all pages and buffers have been iterated;
+ * - all requested pages are filled, but the last buffer
+ * has not been exhausted;
+ * the read-ahead is possible only in this case;
+ * - all buffers have been read, but the last page has not been
+ * fully filled;
+ * this is only possible if the file has only a single buffer
+ * with a size that is not a multiple of the page size.
+ */
+ if (mi == count) {
+ ASSERT(di >= numbufs - 1);
+ IMPLY(*rahead != 0, di == numbufs - 1);
+ IMPLY(*rahead != 0, bufoff != 0);
+ ASSERT(pgoff == 0);
+ }
+ if (di == numbufs) {
+ ASSERT(mi >= count - 1);
+ ASSERT(*rahead == 0);
+ IMPLY(pgoff == 0, mi == count);
+ if (pgoff != 0) {
+ ASSERT(mi == count - 1);
+ ASSERT((dbp[0]->db_size & PAGE_MASK) != 0);
+ }
+ }
+#endif
+ if (pgoff != 0) {
+ bzero(va + pgoff, PAGESIZE - pgoff);
+ zfs_unmap_page(sf);
+ m->valid = VM_PAGE_BITS_ALL;
+ }
+
+ for (i = 0; i < *rahead; i++) {
+ m = vm_page_grab(vmobj, ma[count - 1]->pindex + 1 + i,
+ VM_ALLOC_NORMAL | VM_ALLOC_NOWAIT | VM_ALLOC_NOBUSY);
+ if (m == NULL)
+ break;
+ if (m->valid != 0) {
+ ASSERT3U(m->valid, ==, VM_PAGE_BITS_ALL);
+ break;
+ }
+ ASSERT(m->dirty == 0);
+ ASSERT(!pmap_page_is_mapped(m));
+
+ ASSERT(db->db_size > PAGE_SIZE);
+ bufoff = IDX_TO_OFF(m->pindex) % db->db_size;
+ tocpy = MIN(db->db_size - bufoff, PAGESIZE);
+ va = zfs_map_page(m, &sf);
+ bcopy((char *)db->db_data + bufoff, va, tocpy);
+ if (tocpy < PAGESIZE) {
+ ASSERT(i == *rahead - 1);
+ ASSERT((db->db_size & PAGE_MASK) != 0);
+ bzero(va + tocpy, PAGESIZE - tocpy);
+ }
+ zfs_unmap_page(sf);
+ m->valid = VM_PAGE_BITS_ALL;
+ vm_page_lock(m);
+ if ((m->busy_lock & VPB_BIT_WAITERS) != 0)
+ vm_page_activate(m);
+ else
+ vm_page_deactivate(m);
+ vm_page_unlock(m);
+ }
+ *rahead = i;
+ zfs_vmobject_wunlock(vmobj);
+
+ dmu_buf_rele_array(dbp, numbufs, FTAG);
+ return (0);
+}
#endif /* illumos */
#endif /* _KERNEL */
Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Thu Mar 15 11:06:04 2018 (r330990)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dmu.h Thu Mar 15 12:05:17 2018 (r330991)
@@ -753,6 +753,8 @@ int dmu_write_pages(objset_t *os, uint64_t object, uin
#else
int dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, struct vm_page **ppa, dmu_tx_t *tx);
+int dmu_read_pages(objset_t *os, uint64_t object, vm_page_t *ma, int count,
+ int *rbehind, int *rahead, int last_size);
#endif
#endif
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Thu Mar 15 11:06:04 2018 (r330990)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c Thu Mar 15 12:05:17 2018 (r330991)
@@ -4515,81 +4515,85 @@ zfs_setsecattr(vnode_t *vp, vsecattr_t *vsecp, int fla
}
static int
-zfs_getpages(struct vnode *vp, vm_page_t *m, int count, int *rbehind,
+zfs_getpages(struct vnode *vp, vm_page_t *ma, int count, int *rbehind,
int *rahead)
{
znode_t *zp = VTOZ(vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
objset_t *os = zp->z_zfsvfs->z_os;
- vm_page_t mlast;
+ rl_t *rl;
vm_object_t object;
- caddr_t va;
- struct sf_buf *sf;
- off_t startoff, endoff;
- int i, error;
- vm_pindex_t reqstart, reqend;
- int lsize, size;
+ off_t start, end, obj_size;
+ uint_t blksz;
+ int pgsin_b, pgsin_a;
+ int error;
- object = m[0]->object;
- error = 0;
-
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- zfs_vmobject_wlock(object);
- if (m[count - 1]->valid != 0 && --count == 0) {
- zfs_vmobject_wunlock(object);
- goto out;
+ start = IDX_TO_OFF(ma[0]->pindex);
+ end = IDX_TO_OFF(ma[count - 1]->pindex + 1);
+
+ /*
+ * Lock a range covering all required and optional pages.
+ * Note that we need to handle the case of the block size growing.
+ */
+ for (;;) {
+ blksz = zp->z_blksz;
+ rl = zfs_range_lock(zp, rounddown(start, blksz),
+ roundup(end, blksz) - rounddown(start, blksz), RL_READER);
+ if (blksz == zp->z_blksz)
+ break;
+ zfs_range_unlock(rl);
}
- mlast = m[count - 1];
-
- if (IDX_TO_OFF(mlast->pindex) >=
- object->un_pager.vnp.vnp_size) {
- zfs_vmobject_wunlock(object);
+ object = ma[0]->object;
+ zfs_vmobject_wlock(object);
+ obj_size = object->un_pager.vnp.vnp_size;
+ zfs_vmobject_wunlock(object);
+ if (IDX_TO_OFF(ma[count - 1]->pindex) >= obj_size) {
+ zfs_range_unlock(rl);
ZFS_EXIT(zfsvfs);
return (zfs_vm_pagerret_bad);
}
- PCPU_INC(cnt.v_vnodein);
- PCPU_ADD(cnt.v_vnodepgsin, count);
+ pgsin_b = 0;
+ if (rbehind != NULL) {
+ pgsin_b = OFF_TO_IDX(start - rounddown(start, blksz));
+ pgsin_b = MIN(*rbehind, pgsin_b);
+ }
- lsize = PAGE_SIZE;
- if (IDX_TO_OFF(mlast->pindex) + lsize > object->un_pager.vnp.vnp_size)
- lsize = object->un_pager.vnp.vnp_size -
- IDX_TO_OFF(mlast->pindex);
- zfs_vmobject_wunlock(object);
-
- for (i = 0; i < count; i++) {
- size = PAGE_SIZE;
- if (i == count - 1)
- size = lsize;
- va = zfs_map_page(m[i], &sf);
- error = dmu_read(os, zp->z_id, IDX_TO_OFF(m[i]->pindex),
- size, va, DMU_READ_PREFETCH);
- if (size != PAGE_SIZE)
- bzero(va + size, PAGE_SIZE - size);
- zfs_unmap_page(sf);
- if (error != 0)
- goto out;
+ pgsin_a = 0;
+ if (rahead != NULL) {
+ pgsin_a = OFF_TO_IDX(roundup(end, blksz) - end);
+ if (end + IDX_TO_OFF(pgsin_a) >= obj_size)
+ pgsin_a = OFF_TO_IDX(round_page(obj_size) - end);
+ pgsin_a = MIN(*rahead, pgsin_a);
}
- zfs_vmobject_wlock(object);
- for (i = 0; i < count; i++)
- m[i]->valid = VM_PAGE_BITS_ALL;
- zfs_vmobject_wunlock(object);
+ /*
+ * NB: we need to pass the exact byte size of the data that we expect
+ * to read after accounting for the file size. This is required because
+ * ZFS will panic if we request DMU to read beyond the end of the last
+ * allocated block.
+ */
+ error = dmu_read_pages(os, zp->z_id, ma, count, &pgsin_b, &pgsin_a,
+ MIN(end, obj_size) - (end - PAGE_SIZE));
-out:
+ zfs_range_unlock(rl);
ZFS_ACCESSTIME_STAMP(zfsvfs, zp);
ZFS_EXIT(zfsvfs);
- if (error == 0) {
- if (rbehind)
- *rbehind = 0;
- if (rahead)
- *rahead = 0;
- return (zfs_vm_pagerret_ok);
- } else
+
+ if (error != 0)
return (zfs_vm_pagerret_error);
+
+ PCPU_INC(cnt.v_vnodein);
+ PCPU_ADD(cnt.v_vnodepgsin, count + pgsin_b + pgsin_a);
+ if (rbehind != NULL)
+ *rbehind = pgsin_b;
+ if (rahead != NULL)
+ *rahead = pgsin_a;
+ return (zfs_vm_pagerret_ok);
}
static int
More information about the svn-src-all
mailing list