RFC: NFS client patch to reduce sychronous writes
Rick Macklem
rmacklem at uoguelph.ca
Fri Nov 29 23:50:07 UTC 2013
Ok, I've put an updated patch here:
http://people.freebsd.org/~rmacklem/noncontig-write.patch
It now enables the change which allows non-contiguous byte
range writes within the same buffer cache block when a mount
option I called "noncontigwr" is given to a mount point.
(It still disables this if there has been a file lock applied
to the file since it was opened by the process(es) that currently
have the file open and/or mmap()'d.)
If anyone has a suggestion for a better name for the option,
please suggest it.
I didn't do a count for when the non-contiguous writes happen,
since I suspect it will happen on most any mount point that
uses the option (and it seemed to be too NFS specific and
obscure for "mount -v" imho).
So, does this sound reasonable to commit to head?
rick
--- here is the patch, in case you want to look at it ---
--- fs/nfsclient/nfs_clbio.c.orig 2013-08-28 18:45:41.000000000 -0400
+++ fs/nfsclient/nfs_clbio.c 2013-11-28 19:59:30.000000000 -0500
@@ -874,7 +874,7 @@ ncl_write(struct vop_write_args *ap)
struct vattr vattr;
struct nfsmount *nmp = VFSTONFS(vp->v_mount);
daddr_t lbn;
- int bcount;
+ int bcount, noncontig_write, obcount;
int bp_cached, n, on, error = 0, error1;
size_t orig_resid, local_resid;
off_t orig_size, tmp_off;
@@ -1037,7 +1037,15 @@ again:
* unaligned buffer size.
*/
mtx_lock(&np->n_mtx);
- if (uio->uio_offset == np->n_size && n) {
+ if ((np->n_flag & NHASBEENLOCKED) == 0 &&
+ (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0)
+ noncontig_write = 1;
+ else
+ noncontig_write = 0;
+ if ((uio->uio_offset == np->n_size ||
+ (noncontig_write != 0 &&
+ lbn == (np->n_size / biosize) &&
+ uio->uio_offset + n > np->n_size)) && n) {
mtx_unlock(&np->n_mtx);
/*
* Get the buffer (in its pre-append state to maintain
@@ -1045,8 +1053,8 @@ again:
* nfsnode after we have locked the buffer to prevent
* readers from reading garbage.
*/
- bcount = on;
- bp = nfs_getcacheblk(vp, lbn, bcount, td);
+ obcount = np->n_size - (lbn * biosize);
+ bp = nfs_getcacheblk(vp, lbn, obcount, td);
if (bp != NULL) {
long save;
@@ -1058,9 +1066,12 @@ again:
mtx_unlock(&np->n_mtx);
save = bp->b_flags & B_CACHE;
- bcount += n;
+ bcount = on + n;
allocbuf(bp, bcount);
bp->b_flags |= save;
+ if (noncontig_write != 0 && on > obcount)
+ vfs_bio_bzero_buf(bp, obcount, on -
+ obcount);
}
} else {
/*
@@ -1159,19 +1170,23 @@ again:
* area, just update the b_dirtyoff and b_dirtyend,
* otherwise force a write rpc of the old dirty area.
*
+ * If there has been a file lock applied to this file
+ * or vfs.nfs.old_noncontig_writing is set, do the following:
* While it is possible to merge discontiguous writes due to
* our having a B_CACHE buffer ( and thus valid read data
* for the hole), we don't because it could lead to
* significant cache coherency problems with multiple clients,
* especially if locking is implemented later on.
*
- * As an optimization we could theoretically maintain
- * a linked list of discontinuous areas, but we would still
- * have to commit them separately so there isn't much
- * advantage to it except perhaps a bit of asynchronization.
+ * If vfs.nfs.old_noncontig_writing is not set and there has
+ * not been file locking done on this file:
+ * Relax coherency a bit for the sake of performance and
+ * expand the current dirty region to contain the new
+ * write even if it means we mark some non-dirty data as
+ * dirty.
*/
- if (bp->b_dirtyend > 0 &&
+ if (noncontig_write == 0 && bp->b_dirtyend > 0 &&
(on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
if (bwrite(bp) == EINTR) {
error = EINTR;
--- fs/nfsclient/nfsnode.h.orig 2013-11-19 18:17:37.000000000 -0500
+++ fs/nfsclient/nfsnode.h 2013-11-25 21:29:58.000000000 -0500
@@ -157,6 +157,7 @@ struct nfsnode {
#define NLOCKWANT 0x00010000 /* Want the sleep lock */
#define NNOLAYOUT 0x00020000 /* Can't get a layout for this file */
#define NWRITEOPENED 0x00040000 /* Has been opened for writing */
+#define NHASBEENLOCKED 0x00080000 /* Has been file locked. */
/*
* Convert between nfsnode pointers and vnode pointers
--- fs/nfsclient/nfs_clvnops.c.orig 2013-11-19 18:19:42.000000000 -0500
+++ fs/nfsclient/nfs_clvnops.c 2013-11-25 21:32:47.000000000 -0500
@@ -3079,6 +3079,10 @@ nfs_advlock(struct vop_advlock_args *ap)
np->n_change = va.va_filerev;
}
}
+ /* Mark that a file lock has been acquired. */
+ mtx_lock(&np->n_mtx);
+ np->n_flag |= NHASBEENLOCKED;
+ mtx_unlock(&np->n_mtx);
}
NFSVOPUNLOCK(vp, 0);
return (0);
@@ -3098,6 +3102,12 @@ nfs_advlock(struct vop_advlock_args *ap)
error = ENOLCK;
}
}
+ if (error == 0 && ap->a_op == F_SETLK) {
+ /* Mark that a file lock has been acquired. */
+ mtx_lock(&np->n_mtx);
+ np->n_flag |= NHASBEENLOCKED;
+ mtx_unlock(&np->n_mtx);
+ }
}
return (error);
}
--- fs/nfsclient/nfs_clvfsops.c.orig 2013-11-28 20:00:58.000000000 -0500
+++ fs/nfsclient/nfs_clvfsops.c 2013-11-28 20:06:32.000000000 -0500
@@ -719,7 +719,8 @@ static const char *nfs_opts[] = { "from"
"retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", "resvport",
"readahead", "hostname", "timeout", "addr", "fh", "nfsv3", "sec",
"principal", "nfsv4", "gssname", "allgssname", "dirpath", "minorversion",
- "nametimeo", "negnametimeo", "nocto", "pnfs", "wcommitsize",
+ "nametimeo", "negnametimeo", "nocto", "noncontigwr", "pnfs",
+ "wcommitsize",
NULL };
/*
@@ -840,6 +841,8 @@ nfs_mount(struct mount *mp)
args.flags |= NFSMNT_ALLGSSNAME;
if (vfs_getopt(mp->mnt_optnew, "nocto", NULL, NULL) == 0)
args.flags |= NFSMNT_NOCTO;
+ if (vfs_getopt(mp->mnt_optnew, "noncontigwr", NULL, NULL) == 0)
+ args.flags |= NFSMNT_NONCONTIGWR;
if (vfs_getopt(mp->mnt_optnew, "pnfs", NULL, NULL) == 0)
args.flags |= NFSMNT_PNFS;
if (vfs_getopt(mp->mnt_optnew, "readdirsize", (void **)&opt, NULL) == 0) {
@@ -1792,6 +1795,8 @@ void nfscl_retopts(struct nfsmount *nmp,
&blen);
nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NOCTO) != 0, ",nocto", &buf,
&blen);
+ nfscl_printopt(nmp, (nmp->nm_flag & NFSMNT_NONCONTIGWR) != 0,
+ ",noncontigwr", &buf, &blen);
nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
0, ",lockd", &buf, &blen);
nfscl_printopt(nmp, (nmp->nm_flag & (NFSMNT_NOLOCKD | NFSMNT_NFSV4)) ==
--- nfsclient/nfsargs.h.orig 2013-11-28 19:53:56.000000000 -0500
+++ nfsclient/nfsargs.h 2013-11-28 19:56:38.000000000 -0500
@@ -99,5 +99,6 @@ struct nfs_args {
#define NFSMNT_STRICT3530 0x10000000 /* Adhere strictly to RFC3530 */
#define NFSMNT_NOCTO 0x20000000 /* Don't flush attrcache on open */
#define NFSMNT_PNFS 0x40000000 /* Enable pNFS support */
+#define NFSMNT_NONCONTIGWR 0x80000000 /* Enable non-contiguous writes */
#endif
More information about the freebsd-fs
mailing list