git: 03a39a17089a - main - nfscl: Clear out a lot of cruft related to B_DIRECT

From: Rick Macklem <rmacklem_at_FreeBSD.org>
Date: Sun, 28 Apr 2024 00:12:13 UTC
The branch main has been updated by rmacklem:

URL: https://cgit.FreeBSD.org/src/commit/?id=03a39a17089adc1d0e28076670e664dcdebccf73

commit 03a39a17089adc1d0e28076670e664dcdebccf73
Author:     Rick Macklem <rmacklem@FreeBSD.org>
AuthorDate: 2024-04-28 00:10:48 +0000
Commit:     Rick Macklem <rmacklem@FreeBSD.org>
CommitDate: 2024-04-28 00:10:48 +0000

    nfscl: Clear out a lot of cruft related to B_DIRECT
    
    There is only one place in the unpatched sources where B_DIRECT is
    set in the NFS client and this code is never executed. As such, this patch
    removes this code that is never executed, since B_DIRECT should never
    be set.
    
    During a IETF testing event this week, I saw a crash in ncl_doio_directwrite(),
    but this function is only called if B_DIRECT is set.
    I cannot explain how ncl_doio_directwrite() got called, but once this patch
    was applied to the sources, the crash did not recur. This is not surprising,
    since this patch deleted the function.
    
    Reviewed by:    kib, markj
    MFC after:      3 days
    Differential Revision:  https://reviews.freebsd.org/D44980
---
 sys/fs/nfs/nfs_commonport.c     |   1 -
 sys/fs/nfs/nfsport.h            |   2 -
 sys/fs/nfsclient/nfs.h          |   1 -
 sys/fs/nfsclient/nfs_clbio.c    | 237 ++++++++--------------------------------
 sys/fs/nfsclient/nfs_clnfsiod.c |  19 ++--
 sys/fs/nfsclient/nfs_clvnops.c  |  24 +---
 sys/fs/nfsclient/nfsnode.h      |   3 -
 7 files changed, 57 insertions(+), 230 deletions(-)

diff --git a/sys/fs/nfs/nfs_commonport.c b/sys/fs/nfs/nfs_commonport.c
index cfceaf604b13..2db9af5b9ea9 100644
--- a/sys/fs/nfs/nfs_commonport.c
+++ b/sys/fs/nfs/nfs_commonport.c
@@ -121,7 +121,6 @@ MALLOC_DEFINE(M_NEWNFSCLCLIENT, "NFSCL client", "NFSCL Client");
 MALLOC_DEFINE(M_NEWNFSCLLOCKOWNER, "NFSCL lckown", "NFSCL Lock Owner");
 MALLOC_DEFINE(M_NEWNFSCLLOCK, "NFSCL lck", "NFSCL Lock");
 MALLOC_DEFINE(M_NEWNFSV4NODE, "NEWNFSnode", "NFS vnode");
-MALLOC_DEFINE(M_NEWNFSDIRECTIO, "NEWdirectio", "NFS Direct IO buffer");
 MALLOC_DEFINE(M_NEWNFSDIROFF, "NFSCL diroff",
     "NFS directory offset data");
 MALLOC_DEFINE(M_NEWNFSDROLLBACK, "NFSD rollback",
diff --git a/sys/fs/nfs/nfsport.h b/sys/fs/nfs/nfsport.h
index 7e88ccdaffa1..0b16ba9b85a8 100644
--- a/sys/fs/nfs/nfsport.h
+++ b/sys/fs/nfs/nfsport.h
@@ -938,7 +938,6 @@ MALLOC_DECLARE(M_NEWNFSCLLOCKOWNER);
 MALLOC_DECLARE(M_NEWNFSCLLOCK);
 MALLOC_DECLARE(M_NEWNFSDIROFF);
 MALLOC_DECLARE(M_NEWNFSV4NODE);
-MALLOC_DECLARE(M_NEWNFSDIRECTIO);
 MALLOC_DECLARE(M_NEWNFSMNT);
 MALLOC_DECLARE(M_NEWNFSDROLLBACK);
 MALLOC_DECLARE(M_NEWNFSLAYOUT);
@@ -965,7 +964,6 @@ MALLOC_DECLARE(M_NEWNFSDSESSION);
 #define	M_NFSCLLOCK	M_NEWNFSCLLOCK
 #define	M_NFSDIROFF	M_NEWNFSDIROFF
 #define	M_NFSV4NODE	M_NEWNFSV4NODE
-#define	M_NFSDIRECTIO	M_NEWNFSDIRECTIO
 #define	M_NFSDROLLBACK	M_NEWNFSDROLLBACK
 #define	M_NFSLAYOUT	M_NEWNFSLAYOUT
 #define	M_NFSFLAYOUT	M_NEWNFSFLAYOUT
diff --git a/sys/fs/nfsclient/nfs.h b/sys/fs/nfsclient/nfs.h
index aa755a6b5f4d..eeb68a434a6b 100644
--- a/sys/fs/nfsclient/nfs.h
+++ b/sys/fs/nfsclient/nfs.h
@@ -90,7 +90,6 @@ enum nfsiod_state {
  * Function prototypes.
  */
 int ncl_meta_setsize(struct vnode *, struct thread *, u_quad_t);
-void ncl_doio_directwrite(struct buf *);
 int ncl_bioread(struct vnode *, struct uio *, int, struct ucred *);
 int ncl_biowrite(struct vnode *, struct uio *, int, struct ucred *);
 int ncl_vinvalbuf(struct vnode *, int, struct thread *, int);
diff --git a/sys/fs/nfsclient/nfs_clbio.c b/sys/fs/nfsclient/nfs_clbio.c
index 1cf45bb0c924..c691e797aa01 100644
--- a/sys/fs/nfsclient/nfs_clbio.c
+++ b/sys/fs/nfsclient/nfs_clbio.c
@@ -764,144 +764,58 @@ static int
 nfs_directio_write(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     int ioflag)
 {
-	int error;
+	struct uio uio;
+	struct iovec iov;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct thread *td = uiop->uio_td;
-	int size;
-	int wsize;
+	int error, iomode, must_commit, size, wsize;
 
+	KASSERT((ioflag & IO_SYNC) != 0, ("nfs_directio_write: not sync"));
 	mtx_lock(&nmp->nm_mtx);
 	wsize = nmp->nm_wsize;
 	mtx_unlock(&nmp->nm_mtx);
-	if (ioflag & IO_SYNC) {
-		int iomode, must_commit;
-		struct uio uio;
-		struct iovec iov;
-do_sync:
-		while (uiop->uio_resid > 0) {
-			size = MIN(uiop->uio_resid, wsize);
-			size = MIN(uiop->uio_iov->iov_len, size);
-			iov.iov_base = uiop->uio_iov->iov_base;
-			iov.iov_len = size;
-			uio.uio_iov = &iov;
-			uio.uio_iovcnt = 1;
-			uio.uio_offset = uiop->uio_offset;
-			uio.uio_resid = size;
-			uio.uio_segflg = uiop->uio_segflg;
-			uio.uio_rw = UIO_WRITE;
-			uio.uio_td = td;
-			iomode = NFSWRITE_FILESYNC;
-			/*
-			 * When doing direct I/O we do not care if the
-			 * server's write verifier has changed, but we
-			 * do not want to update the verifier if it has
-			 * changed, since that hides the change from
-			 * writes being done through the buffer cache.
-			 * By passing must_commit in set to two, the code
-			 * in nfsrpc_writerpc() will not update the
-			 * verifier on the mount point.
-			 */
-			must_commit = 2;
-			error = ncl_writerpc(vp, &uio, cred, &iomode,
-			    &must_commit, 0, ioflag);
-			KASSERT((must_commit == 2),
-			    ("ncl_directio_write: Updated write verifier"));
-			if (error)
-				return (error);
-			if (iomode != NFSWRITE_FILESYNC)
-				printf("nfs_directio_write: Broken server "
-				    "did not reply FILE_SYNC\n");
-			uiop->uio_offset += size;
-			uiop->uio_resid -= size;
-			if (uiop->uio_iov->iov_len <= size) {
-				uiop->uio_iovcnt--;
-				uiop->uio_iov++;
-			} else {
-				uiop->uio_iov->iov_base =
-					(char *)uiop->uio_iov->iov_base + size;
-				uiop->uio_iov->iov_len -= size;
-			}
-		}
-	} else {
-		struct uio *t_uio;
-		struct iovec *t_iov;
-		struct buf *bp;
-
+	while (uiop->uio_resid > 0) {
+		size = MIN(uiop->uio_resid, wsize);
+		size = MIN(uiop->uio_iov->iov_len, size);
+		iov.iov_base = uiop->uio_iov->iov_base;
+		iov.iov_len = size;
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		uio.uio_offset = uiop->uio_offset;
+		uio.uio_resid = size;
+		uio.uio_segflg = uiop->uio_segflg;
+		uio.uio_rw = UIO_WRITE;
+		uio.uio_td = td;
+		iomode = NFSWRITE_FILESYNC;
 		/*
-		 * Break up the write into blocksize chunks and hand these
-		 * over to nfsiod's for write back.
-		 * Unfortunately, this incurs a copy of the data. Since
-		 * the user could modify the buffer before the write is
-		 * initiated.
-		 *
-		 * The obvious optimization here is that one of the 2 copies
-		 * in the async write path can be eliminated by copying the
-		 * data here directly into mbufs and passing the mbuf chain
-		 * down. But that will require a fair amount of re-working
-		 * of the code and can be done if there's enough interest
-		 * in NFS directio access.
+		 * When doing direct I/O we do not care if the
+		 * server's write verifier has changed, but we
+		 * do not want to update the verifier if it has
+		 * changed, since that hides the change from
+		 * writes being done through the buffer cache.
+		 * By passing must_commit in set to two, the code
+		 * in nfsrpc_writerpc() will not update the
+		 * verifier on the mount point.
 		 */
-		while (uiop->uio_resid > 0) {
-			size = MIN(uiop->uio_resid, wsize);
-			size = MIN(uiop->uio_iov->iov_len, size);
-			bp = uma_zalloc(ncl_pbuf_zone, M_WAITOK);
-			t_uio = malloc(sizeof(struct uio), M_NFSDIRECTIO, M_WAITOK);
-			t_iov = malloc(sizeof(struct iovec), M_NFSDIRECTIO, M_WAITOK);
-			t_iov->iov_base = malloc(size, M_NFSDIRECTIO, M_WAITOK);
-			t_iov->iov_len = size;
-			t_uio->uio_iov = t_iov;
-			t_uio->uio_iovcnt = 1;
-			t_uio->uio_offset = uiop->uio_offset;
-			t_uio->uio_resid = size;
-			t_uio->uio_segflg = UIO_SYSSPACE;
-			t_uio->uio_rw = UIO_WRITE;
-			t_uio->uio_td = td;
-			KASSERT(uiop->uio_segflg == UIO_USERSPACE ||
-			    uiop->uio_segflg == UIO_SYSSPACE,
-			    ("nfs_directio_write: Bad uio_segflg"));
-			if (uiop->uio_segflg == UIO_USERSPACE) {
-				error = copyin(uiop->uio_iov->iov_base,
-				    t_iov->iov_base, size);
-				if (error != 0)
-					goto err_free;
-			} else
-				/*
-				 * UIO_SYSSPACE may never happen, but handle
-				 * it just in case it does.
-				 */
-				bcopy(uiop->uio_iov->iov_base, t_iov->iov_base,
-				    size);
-			bp->b_flags |= B_DIRECT;
-			bp->b_iocmd = BIO_WRITE;
-			if (cred != NOCRED) {
-				crhold(cred);
-				bp->b_wcred = cred;
-			} else
-				bp->b_wcred = NOCRED;
-			bp->b_caller1 = (void *)t_uio;
-			bp->b_vp = vp;
-			error = ncl_asyncio(nmp, bp, NOCRED, td);
-err_free:
-			if (error) {
-				free(t_iov->iov_base, M_NFSDIRECTIO);
-				free(t_iov, M_NFSDIRECTIO);
-				free(t_uio, M_NFSDIRECTIO);
-				bp->b_vp = NULL;
-				uma_zfree(ncl_pbuf_zone, bp);
-				if (error == EINTR)
-					return (error);
-				goto do_sync;
-			}
-			uiop->uio_offset += size;
-			uiop->uio_resid -= size;
-			if (uiop->uio_iov->iov_len <= size) {
-				uiop->uio_iovcnt--;
-				uiop->uio_iov++;
-			} else {
-				uiop->uio_iov->iov_base =
-					(char *)uiop->uio_iov->iov_base + size;
-				uiop->uio_iov->iov_len -= size;
-			}
+		must_commit = 2;
+		error = ncl_writerpc(vp, &uio, cred, &iomode,
+		    &must_commit, 0, ioflag);
+		KASSERT(must_commit == 2,
+		    ("ncl_directio_write: Updated write verifier"));
+		if (error != 0)
+			return (error);
+		if (iomode != NFSWRITE_FILESYNC)
+			printf("nfs_directio_write: Broken server "
+			    "did not reply FILE_SYNC\n");
+		uiop->uio_offset += size;
+		uiop->uio_resid -= size;
+		if (uiop->uio_iov->iov_len <= size) {
+			uiop->uio_iovcnt--;
+			uiop->uio_iov++;
+		} else {
+			uiop->uio_iov->iov_base =
+				(char *)uiop->uio_iov->iov_base + size;
+			uiop->uio_iov->iov_len -= size;
 		}
 	}
 	return (0);
@@ -1467,7 +1381,7 @@ ncl_vinvalbuf(struct vnode *vp, int flags, struct thread *td, int intrflg)
 		nanouptime(&ts);
 		NFSLOCKNODE(np);
 	}
-	if (np->n_directio_asyncwr == 0 && (np->n_flag & NMODIFIED) != 0) {
+	if ((np->n_flag & NMODIFIED) != 0) {
 		np->n_localmodtime = ts;
 		np->n_flag &= ~NMODIFIED;
 	}
@@ -1612,12 +1526,8 @@ again:
 		BUF_KERNPROC(bp);
 		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
 		nmp->nm_bufqlen++;
-		if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
-			NFSLOCKNODE(VTONFS(bp->b_vp));
-			VTONFS(bp->b_vp)->n_flag |= NMODIFIED;
-			VTONFS(bp->b_vp)->n_directio_asyncwr++;
-			NFSUNLOCKNODE(VTONFS(bp->b_vp));
-		}
+		KASSERT((bp->b_flags & B_DIRECT) == 0,
+		    ("ncl_asyncio: B_DIRECT set"));
 		NFSUNLOCKIOD();
 		return (0);
 	}
@@ -1632,59 +1542,6 @@ again:
 	return (EIO);
 }
 
-void
-ncl_doio_directwrite(struct buf *bp)
-{
-	int iomode, must_commit;
-	struct uio *uiop = (struct uio *)bp->b_caller1;
-	char *iov_base = uiop->uio_iov->iov_base;
-
-	iomode = NFSWRITE_FILESYNC;
-	uiop->uio_td = NULL; /* NULL since we're in nfsiod */
-	/*
-	 * When doing direct I/O we do not care if the
-	 * server's write verifier has changed, but we
-	 * do not want to update the verifier if it has
-	 * changed, since that hides the change from
-	 * writes being done through the buffer cache.
-	 * By passing must_commit in set to two, the code
-	 * in nfsrpc_writerpc() will not update the
-	 * verifier on the mount point.
-	 */
-	must_commit = 2;
-	ncl_writerpc(bp->b_vp, uiop, bp->b_wcred, &iomode, &must_commit, 0, 0);
-	KASSERT((must_commit == 2), ("ncl_doio_directwrite: Updated write"
-	    " verifier"));
-	if (iomode != NFSWRITE_FILESYNC)
-		printf("ncl_doio_directwrite: Broken server "
-		    "did not reply FILE_SYNC\n");
-	free(iov_base, M_NFSDIRECTIO);
-	free(uiop->uio_iov, M_NFSDIRECTIO);
-	free(uiop, M_NFSDIRECTIO);
-	if ((bp->b_flags & B_DIRECT) && bp->b_iocmd == BIO_WRITE) {
-		struct nfsnode *np = VTONFS(bp->b_vp);
-		NFSLOCKNODE(np);
-		if (NFSHASPNFS(VFSTONFS(bp->b_vp->v_mount))) {
-			/*
-			 * Invalidate the attribute cache, since writes to a DS
-			 * won't update the size attribute.
-			 */
-			np->n_attrstamp = 0;
-		}
-		np->n_directio_asyncwr--;
-		if (np->n_directio_asyncwr == 0) {
-			np->n_flag &= ~NMODIFIED;
-			if ((np->n_flag & NFSYNCWAIT)) {
-				np->n_flag &= ~NFSYNCWAIT;
-				wakeup((caddr_t)&np->n_directio_asyncwr);
-			}
-		}
-		NFSUNLOCKNODE(np);
-	}
-	bp->b_vp = NULL;
-	uma_zfree(ncl_pbuf_zone, bp);
-}
-
 /*
  * Do an I/O operation to/from a cache block. This may be called
  * synchronously or from an nfsiod.
diff --git a/sys/fs/nfsclient/nfs_clnfsiod.c b/sys/fs/nfsclient/nfs_clnfsiod.c
index be3a89a1f159..5e3c5ca0066c 100644
--- a/sys/fs/nfsclient/nfs_clnfsiod.c
+++ b/sys/fs/nfsclient/nfs_clnfsiod.c
@@ -291,17 +291,14 @@ nfssvc_iod(void *instance)
 		    wakeup(&nmp->nm_bufq);
 		}
 		NFSUNLOCKIOD();
-		if (bp->b_flags & B_DIRECT) {
-			KASSERT((bp->b_iocmd == BIO_WRITE), ("nfscvs_iod: BIO_WRITE not set"));
-			(void)ncl_doio_directwrite(bp);
-		} else {
-			if (bp->b_iocmd == BIO_READ)
-				(void) ncl_doio(bp->b_vp, bp, bp->b_rcred,
-				    NULL, 0);
-			else
-				(void) ncl_doio(bp->b_vp, bp, bp->b_wcred,
-				    NULL, 0);
-		}
+		KASSERT((bp->b_flags & B_DIRECT) == 0,
+		    ("nfssvc_iod: B_DIRECT set"));
+		if (bp->b_iocmd == BIO_READ)
+			(void) ncl_doio(bp->b_vp, bp, bp->b_rcred,
+			    NULL, 0);
+		else
+			(void) ncl_doio(bp->b_vp, bp, bp->b_wcred,
+			    NULL, 0);
 		NFSLOCKIOD();
 		/*
 		 * Make sure the nmp hasn't been dismounted as soon as
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index 85c0ebd7a10f..76a3cdf9281e 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -961,10 +961,6 @@ nfs_close(struct vop_close_args *ap)
 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
 			    (gid_t)0);
 	}
-	if (newnfs_directio_enable)
-		KASSERT((np->n_directio_asyncwr == 0),
-			("nfs_close: dirty unflushed (%d) directio buffers\n",
-			 np->n_directio_asyncwr));
 	if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
 		NFSLOCKNODE(np);
 		KASSERT((np->n_directio_opens > 0), 
@@ -3188,21 +3184,6 @@ loop:
 		 * Wait for all the async IO requests to drain
 		 */
 		BO_UNLOCK(bo);
-		NFSLOCKNODE(np);
-		while (np->n_directio_asyncwr > 0) {
-			np->n_flag |= NFSYNCWAIT;
-			error = newnfs_msleep(td, &np->n_directio_asyncwr,
-			    &np->n_mtx, slpflag | (PRIBIO + 1), 
-			    "nfsfsync", 0);
-			if (error) {
-				if (newnfs_sigintr(nmp, td)) {
-					NFSUNLOCKNODE(np);
-					error = EINTR;	
-					goto done;
-				}
-			}
-		}
-		NFSUNLOCKNODE(np);
 	} else
 		BO_UNLOCK(bo);
 	if (NFSHASPNFS(nmp)) {
@@ -3220,15 +3201,14 @@ loop:
 		np->n_flag &= ~NWRITEERR;
 	}
   	if (commit && bo->bo_dirty.bv_cnt == 0 &&
-	    bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
+	    bo->bo_numoutput == 0)
   		np->n_flag &= ~NMODIFIED;
 	NFSUNLOCKNODE(np);
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
-	    (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
-	    np->n_directio_asyncwr != 0)) {
+	    (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0)) {
 		if (trycnt++ < 5) {
 			/* try, try again... */
 			passone = 1;
diff --git a/sys/fs/nfsclient/nfsnode.h b/sys/fs/nfsclient/nfsnode.h
index 99b6ae57b1fd..cc1959b7bf79 100644
--- a/sys/fs/nfsclient/nfsnode.h
+++ b/sys/fs/nfsclient/nfsnode.h
@@ -122,7 +122,6 @@ struct nfsnode {
 	short			n_fhsize;	/* size in bytes, of fh */
 	u_int32_t		n_flag;		/* Flag for locking.. */
 	int			n_directio_opens;
-	int                     n_directio_asyncwr;
 	u_int64_t		 n_change;	/* old Change attribute */
 	struct nfsv4node	*n_v4;		/* extra V4 stuff */
 	struct ucred		*n_writecred;	/* Cred. for putpages */
@@ -142,8 +141,6 @@ struct nfsnode {
  * Flags for n_flag
  */
 #define	NDIRCOOKIELK	0x00000001  /* Lock to serialize access to directory cookies */
-#define	NFSYNCWAIT      0x00000002  /* fsync waiting for all directio async
-				  writes to drain */
 #define	NMODIFIED	0x00000004  /* Might have a modified buffer in bio */
 #define	NWRITEERR	0x00000008  /* Flag write errors so close will know */
 #define	NCREATED	0x00000010  /* Opened by nfs_create() */