svn commit: r298105 - head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs

Andriy Gapon avg at FreeBSD.org
Sat Apr 16 07:35:55 UTC 2016


Author: avg
Date: Sat Apr 16 07:35:53 2016
New Revision: 298105
URL: https://svnweb.freebsd.org/changeset/base/298105

Log:
  zfs: enable vn_io_fault support
  
  Note that now we have to account for possible partial writes
  in dmu_write_uio_dbuf().  It seems that on illumos either all or none
  of the data are expected to be written.  But the partial writes are
  quite expected when vn_io_fault support is enabled.
  
  Reviewed by:	kib
  MFC after:	7 weeks
  Differential Revision: https://reviews.freebsd.org/D2790

Modified:
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
  head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Sat Apr 16 07:33:42 2016	(r298104)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu.c	Sat Apr 16 07:35:53 2016	(r298105)
@@ -1092,8 +1092,13 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *u
 			else
 				XUIOSTAT_BUMP(xuiostat_rbuf_copied);
 		} else {
+#ifdef illumos
 			err = uiomove((char *)db->db_data + bufoff, tocpy,
 			    UIO_READ, uio);
+#else
+			err = vn_io_fault_uiomove((char *)db->db_data + bufoff,
+			    tocpy, uio);
+#endif
 		}
 		if (err)
 			break;
@@ -1187,6 +1192,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *
 		else
 			dmu_buf_will_dirty(db, tx);
 
+#ifdef illumos
 		/*
 		 * XXX uiomove could block forever (eg. nfs-backed
 		 * pages).  There needs to be a uiolockdown() function
@@ -1195,6 +1201,10 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *
 		 */
 		err = uiomove((char *)db->db_data + bufoff, tocpy,
 		    UIO_WRITE, uio);
+#else
+		err = vn_io_fault_uiomove((char *)db->db_data + bufoff, tocpy,
+		    uio);
+#endif
 
 		if (tocpy == db->db_size)
 			dmu_buf_fill_done(db, tx);

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Sat Apr 16 07:33:42 2016	(r298104)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c	Sat Apr 16 07:35:53 2016	(r298105)
@@ -1170,6 +1170,7 @@ zfs_domount(vfs_t *vfsp, char *osname)
 	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
 	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
 	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
+	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
 
 	/*
 	 * The fsid is 64 bits, composed of an 8-bit fs type, which

Modified: head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c
==============================================================================
--- head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Sat Apr 16 07:33:42 2016	(r298104)
+++ head/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c	Sat Apr 16 07:35:53 2016	(r298105)
@@ -656,7 +656,11 @@ mappedread(vnode_t *vp, int nbytes, uio_
 
 			zfs_vmobject_wunlock(obj);
 			va = zfs_map_page(pp, &sf);
+#ifdef illumos
 			error = uiomove(va + off, bytes, UIO_READ, uio);
+#else
+			error = vn_io_fault_uiomove(va + off, bytes, uio);
+#endif
 			zfs_unmap_page(sf);
 			zfs_vmobject_wlock(obj);
 			page_unhold(pp);
@@ -1034,18 +1038,31 @@ zfs_write(vnode_t *vp, uio_t *uio, int i
 			 * holding up the transaction if the data copy hangs
 			 * up on a pagefault (e.g., from an NFS server mapping).
 			 */
+#ifdef illumos
 			size_t cbytes;
+#endif
 
 			abuf = dmu_request_arcbuf(sa_get_db(zp->z_sa_hdl),
 			    max_blksz);
 			ASSERT(abuf != NULL);
 			ASSERT(arc_buf_size(abuf) == max_blksz);
+#ifdef illumos
 			if (error = uiocopy(abuf->b_data, max_blksz,
 			    UIO_WRITE, uio, &cbytes)) {
 				dmu_return_arcbuf(abuf);
 				break;
 			}
 			ASSERT(cbytes == max_blksz);
+#else
+			ssize_t resid = uio->uio_resid;
+			error = vn_io_fault_uiomove(abuf->b_data, max_blksz, uio);
+			if (error != 0) {
+				uio->uio_offset -= resid - uio->uio_resid;
+				uio->uio_resid = resid;
+				dmu_return_arcbuf(abuf);
+				break;
+			}
+#endif
 		}
 
 		/*
@@ -1123,8 +1140,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int i
 				dmu_assign_arcbuf(sa_get_db(zp->z_sa_hdl),
 				    woff, abuf, tx);
 			}
+#ifdef illumos
 			ASSERT(tx_bytes <= uio->uio_resid);
 			uioskip(uio, tx_bytes);
+#endif
 		}
 		if (tx_bytes && vn_has_cached_data(vp)) {
 			update_pages(vp, woff, tx_bytes, zfsvfs->z_os,
@@ -1178,7 +1197,11 @@ zfs_write(vnode_t *vp, uio_t *uio, int i
 		while ((end_size = zp->z_size) < uio->uio_loffset) {
 			(void) atomic_cas_64(&zp->z_size, end_size,
 			    uio->uio_loffset);
+#ifdef illumos
 			ASSERT(error == 0);
+#else
+			ASSERT(error == 0 || error == EFAULT);
+#endif
 		}
 		/*
 		 * If we are replaying and eof is non zero then force
@@ -1188,7 +1211,10 @@ zfs_write(vnode_t *vp, uio_t *uio, int i
 		if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
 			zp->z_size = zfsvfs->z_replay_eof;
 
-		error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		if (error == 0)
+			error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
+		else
+			(void) sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
 
 		zfs_log_write(zilog, tx, TX_WRITE, zp, woff, tx_bytes, ioflag);
 		dmu_tx_commit(tx);
@@ -1215,6 +1241,17 @@ zfs_write(vnode_t *vp, uio_t *uio, int i
 		return (error);
 	}
 
+#ifdef __FreeBSD__
+	/*
+	 * EFAULT means that at least one page of the source buffer was not
+	 * available.  VFS will re-try remaining I/O upon this error.
+	 */
+	if (error == EFAULT) {
+		ZFS_EXIT(zfsvfs);
+		return (error);
+	}
+#endif
+
 	if (ioflag & (FSYNC | FDSYNC) ||
 	    zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
 		zil_commit(zilog, zp->z_id);


More information about the svn-src-head mailing list