git: 7430017b9978 - main - fusefs: fix a recurse-on-non-recursive lockmgr panic

From: Alan Somers <asomers_at_FreeBSD.org>
Date: Wed, 06 Oct 2021 20:08:00 UTC
The branch main has been updated by asomers:

URL: https://cgit.FreeBSD.org/src/commit/?id=7430017b9978cae054ed99e5160f739e5ca021d5

commit 7430017b9978cae054ed99e5160f739e5ca021d5
Author:     Alan Somers <asomers@FreeBSD.org>
AuthorDate: 2021-10-02 18:17:36 +0000
Commit:     Alan Somers <asomers@FreeBSD.org>
CommitDate: 2021-10-06 20:07:33 +0000

    fusefs: fix a recurse-on-non-recursive lockmgr panic
    
    fuse_vnop_bmap needs to know the file's size in order to calculate the
    optimum amount of readahead.  If the file's size is unknown, it must ask
    the FUSE server.  But if the file's data was previously cached and the
    server reports that its size has shrunk, fusefs must invalidate the
    cached data.  That's not possible during VOP_BMAP because the buffer
    object is already locked.
    
    Fix the panic by not querying the FUSE server for the file's size during
    VOP_BMAP if we don't need it.  That's also a a slight performance
    optimization.
    
    PR:             256937
    Reported by:    Agata <chogata@moosefs.pro>
    Tested by:      Agata <chogata@moosefs.pro>
    MFC after:      2 weeks
---
 sys/fs/fuse/fuse_vnops.c    | 20 ++++++++---
 tests/sys/fs/fusefs/bmap.cc | 86 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index dd8ff0fcc45a..9aafbad990c5 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -516,8 +516,9 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
 	struct fuse_bmap_in *fbi;
 	struct fuse_bmap_out *fbo;
 	struct fuse_data *data;
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	uint64_t biosize;
-	off_t filesize;
+	off_t fsize;
 	daddr_t lbn = ap->a_bn;
 	daddr_t *pbn = ap->a_bnp;
 	int *runp = ap->a_runp;
@@ -550,10 +551,21 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
 	 */
 	if (runb != NULL)
 		*runb = MIN(lbn, maxrun);
-	if (runp != NULL) {
-		error = fuse_vnode_size(vp, &filesize, td->td_ucred, td);
+	if (runp != NULL && maxrun == 0)
+		*runp = 0;
+	else if (runp != NULL) {
+		/*
+		 * If the file's size is cached, use that value to calculate
+		 * runp, even if the cache is expired.  runp is only advisory,
+		 * and the risk of getting it wrong is not worth the cost of
+		 * another upcall.
+		 */
+		if (fvdat->cached_attrs.va_size != VNOVAL)
+			fsize = fvdat->cached_attrs.va_size;
+		else
+			error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
 		if (error == 0)
-			*runp = MIN(MAX(0, filesize / (off_t)biosize - lbn - 1),
+			*runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
 				    maxrun);
 		else
 			*runp = 0;
diff --git a/tests/sys/fs/fusefs/bmap.cc b/tests/sys/fs/fusefs/bmap.cc
index 3f392a4447f2..c635f4d7e46f 100644
--- a/tests/sys/fs/fusefs/bmap.cc
+++ b/tests/sys/fs/fusefs/bmap.cc
@@ -75,6 +75,8 @@ void expect_lookup(const char *relpath, uint64_t ino, off_t size)
 }
 };
 
+class BmapEof: public Bmap, public WithParamInterface<int> {};
+
 /*
  * Test FUSE_BMAP
  * XXX The FUSE protocol does not include the runp and runb variables, so those
@@ -165,3 +167,87 @@ TEST_F(Bmap, default_)
 
 	leak(fd);
 }
+
+/*
+ * VOP_BMAP should not query the server for the file's size, even if its cached
+ * attributes have expired.
+ * Regression test for https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=256937
+ */
+TEST_P(BmapEof, eof)
+{
+	/*
+	 * Outline:
+	 * 1) lookup the file, setting attr_valid=0
+	 * 2) Read more than one block, causing the kernel to issue VOP_BMAP to
+	 *    plan readahead.
+	 * 3) Nothing should panic
+	 * 4) Repeat the tests, truncating the file after different numbers of
+	 *    GETATTR operations.
+	 */
+	Sequence seq;
+	const off_t filesize = 2 * m_maxbcachebuf;
+	const ino_t ino = 42;
+	mode_t mode = S_IFREG | 0644;
+	void *contents, *buf;
+	int fd;
+	int ngetattrs;
+
+	ngetattrs = GetParam();
+	contents = calloc(1, filesize);
+	FuseTest::expect_lookup(RELPATH, ino, mode, filesize, 1, 0);
+	expect_open(ino, 0, 1);
+	// Depending on ngetattrs, FUSE_READ could be called with either
+	// filesize or filesize / 2 .
+	EXPECT_CALL(*m_mock, process(
+	ResultOf([=](auto in) {
+		return (in.header.opcode == FUSE_READ &&
+			in.header.nodeid == ino &&
+			in.body.read.offset == 0 &&
+			( in.body.read.size == filesize ||
+			  in.body.read.size == filesize / 2));
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto in, auto& out) {
+		size_t osize = in.body.read.size;
+		out.header.len = sizeof(struct fuse_out_header) + osize;
+		bzero(out.body.bytes, osize);
+	})));
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([](auto in) {
+			return (in.header.opcode == FUSE_GETATTR &&
+				in.header.nodeid == ino);
+		}, Eq(true)),
+		_)
+	).Times(Between(ngetattrs - 1, ngetattrs))
+	.InSequence(seq)
+	.WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, attr);
+		out.body.attr.attr_valid = 0;
+		out.body.attr.attr.ino = ino;
+		out.body.attr.attr.mode = S_IFREG | 0644;
+		out.body.attr.attr.size = filesize;
+	})));
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([](auto in) {
+			return (in.header.opcode == FUSE_GETATTR &&
+				in.header.nodeid == ino);
+		}, Eq(true)),
+		_)
+	).InSequence(seq)
+	.WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) {
+		SET_OUT_HEADER_LEN(out, attr);
+		out.body.attr.attr_valid = 0;
+		out.body.attr.attr.ino = ino;
+		out.body.attr.attr.mode = S_IFREG | 0644;
+		out.body.attr.attr.size = filesize / 2;
+	})));
+
+	buf = calloc(1, filesize);
+	fd = open(FULLPATH, O_RDWR);
+	ASSERT_LE(0, fd) << strerror(errno);
+	read(fd, buf, filesize);
+}
+
+INSTANTIATE_TEST_CASE_P(BE, BmapEof,
+	Values(1, 2, 3)
+);