svn commit: r350388 - in projects/fuse2: sys/fs/fuse tests/sys/fs/fusefs
Alan Somers
asomers at FreeBSD.org
Sun Jul 28 15:17:34 UTC 2019
Author: asomers
Date: Sun Jul 28 15:17:32 2019
New Revision: 350388
URL: https://svnweb.freebsd.org/changeset/base/350388
Log:
fusefs: fix panic when writing with O_DIRECT and using writeback cache
When a fusefs file system is mounted using the writeback cache, the cache
may still be bypassed by opening a file with O_DIRECT. When writing with
O_DIRECT, the cache must be invalidated for the affected portion of the
file. Fix some panics caused by inadvertently invalidating too much.
Sponsored by: The FreeBSD Foundation
Modified:
projects/fuse2/sys/fs/fuse/fuse_io.c
projects/fuse2/tests/sys/fs/fusefs/write.cc
Modified: projects/fuse2/sys/fs/fuse/fuse_io.c
==============================================================================
--- projects/fuse2/sys/fs/fuse/fuse_io.c Sun Jul 28 04:02:22 2019 (r350387)
+++ projects/fuse2/sys/fs/fuse/fuse_io.c Sun Jul 28 15:17:32 2019 (r350388)
@@ -119,9 +119,11 @@ SDT_PROVIDER_DECLARE(fusefs);
*/
SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*");
+static int
+fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end);
static void
fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
- struct thread *td);
+ struct thread *td);
static int
fuse_read_directbackend(struct vnode *vp, struct uio *uio,
struct ucred *cred, struct fuse_filehandle *fufh);
@@ -136,6 +138,58 @@ static int
fuse_write_biobackend(struct vnode *vp, struct uio *uio,
struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid);
+/* Invalidate a range of cached data, whether dirty of not */
+static int
+fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end)
+{
+ struct buf *bp;
+ daddr_t left_lbn, end_lbn, right_lbn;
+ off_t new_filesize;
+ int iosize, left_on, right_on, right_blksize;
+
+ iosize = fuse_iosize(vp);
+ left_lbn = start / iosize;
+ end_lbn = howmany(end, iosize);
+ left_on = start & (iosize - 1);
+ if (left_on != 0) {
+ bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0);
+ if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) {
+ /*
+ * Flush the dirty buffer, because we don't have a
+ * byte-granular way to record which parts of the
+ * buffer are valid.
+ */
+ bwrite(bp);
+ if (bp->b_error)
+ return (bp->b_error);
+ } else {
+ brelse(bp);
+ }
+ }
+ right_on = end & (iosize - 1);
+ if (right_on != 0) {
+ right_lbn = end / iosize;
+ new_filesize = MAX(filesize, end);
+ right_blksize = MIN(iosize, new_filesize - iosize * right_lbn);
+ bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0);
+ if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) {
+ /*
+ * Flush the dirty buffer, because we don't have a
+ * byte-granular way to record which parts of the
+ * buffer are valid.
+ */
+ bwrite(bp);
+ if (bp->b_error)
+ return (bp->b_error);
+ } else {
+ brelse(bp);
+ }
+ }
+
+ v_inval_buf_range(vp, left_lbn, end_lbn, iosize);
+ return (0);
+}
+
/*
* FreeBSD clears the SUID and SGID bits on any write by a non-root user.
*/
@@ -236,7 +290,6 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, in
case UIO_WRITE:
fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE);
if (directio) {
- const int iosize = fuse_iosize(vp);
off_t start, end, filesize;
SDT_PROBE2(fusefs, , io, trace, 1,
@@ -252,7 +305,9 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, in
(IO_VMIO | IO_DIRECT),
("IO_DIRECT used for a cache flush?"));
/* Invalidate the write cache when writing directly */
- v_inval_buf_range(vp, start, end, iosize);
+ err = fuse_inval_buf_range(vp, filesize, start, end);
+ if (err)
+ return (err);
err = fuse_write_directbackend(vp, uio, cred, fufh,
filesize, ioflag, false);
} else {
Modified: projects/fuse2/tests/sys/fs/fusefs/write.cc
==============================================================================
--- projects/fuse2/tests/sys/fs/fusefs/write.cc Sun Jul 28 04:02:22 2019 (r350387)
+++ projects/fuse2/tests/sys/fs/fusefs/write.cc Sun Jul 28 15:17:32 2019 (r350388)
@@ -951,6 +951,132 @@ TEST_F(WriteBackAsync, delay)
}
/*
+ * A direct write should not evict dirty cached data from outside of its own
+ * byte range.
+ */
+TEST_F(WriteBackAsync, direct_io_ignores_unrelated_cached)
+{
+ const char FULLPATH[] = "mountpoint/some_file.txt";
+ const char RELPATH[] = "some_file.txt";
+ const char CONTENTS0[] = "abcdefgh";
+ const char CONTENTS1[] = "ijklmnop";
+ uint64_t ino = 42;
+ int fd;
+ ssize_t bufsize = strlen(CONTENTS0) + 1;
+ ssize_t fsize = 2 * m_maxbcachebuf;
+ char readbuf[bufsize];
+ void *zeros;
+
+ zeros = calloc(1, m_maxbcachebuf);
+ ASSERT_NE(nullptr, zeros);
+
+ expect_lookup(RELPATH, ino, fsize);
+ expect_open(ino, 0, 1);
+ expect_read(ino, 0, m_maxbcachebuf, m_maxbcachebuf, zeros);
+ FuseTest::expect_write(ino, m_maxbcachebuf, bufsize, bufsize, 0, 0,
+ CONTENTS1);
+
+ fd = open(FULLPATH, O_RDWR);
+ EXPECT_LE(0, fd) << strerror(errno);
+
+ // Cache first block with dirty data. This will entail first reading
+ // the existing data.
+ ASSERT_EQ(bufsize, pwrite(fd, CONTENTS0, bufsize, 0))
+ << strerror(errno);
+
+ // Write directly to second block
+ ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
+ ASSERT_EQ(bufsize, pwrite(fd, CONTENTS1, bufsize, m_maxbcachebuf))
+ << strerror(errno);
+
+ // Read from the first block again. Should be serviced by cache.
+ ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
+ ASSERT_EQ(bufsize, pread(fd, readbuf, bufsize, 0)) << strerror(errno);
+ ASSERT_STREQ(readbuf, CONTENTS0);
+
+ leak(fd);
+ free(zeros);
+}
+
+/*
+ * If a direct io write partially overlaps one or two blocks of dirty cached
+ * data, No dirty data should be lost. Admittedly this is a weird test,
+ * because it would be unusual to use O_DIRECT and the writeback cache.
+ */
+TEST_F(WriteBackAsync, direct_io_partially_overlaps_cached_block)
+{
+ const char FULLPATH[] = "mountpoint/some_file.txt";
+ const char RELPATH[] = "some_file.txt";
+ uint64_t ino = 42;
+ int fd;
+ off_t bs = m_maxbcachebuf;
+ ssize_t fsize = 3 * bs;
+ void *readbuf, *zeros, *ones, *zeroones, *onezeros;
+
+ readbuf = malloc(bs);
+ ASSERT_NE(nullptr, readbuf) << strerror(errno);
+ zeros = calloc(1, 3 * bs);
+ ASSERT_NE(nullptr, zeros);
+ ones = calloc(1, 2 * bs);
+ ASSERT_NE(nullptr, ones);
+ memset(ones, 1, 2 * bs);
+ zeroones = calloc(1, bs);
+ ASSERT_NE(nullptr, zeroones);
+ memset((uint8_t*)zeroones + bs / 2, 1, bs / 2);
+ onezeros = calloc(1, bs);
+ ASSERT_NE(nullptr, onezeros);
+ memset(onezeros, 1, bs / 2);
+
+ expect_lookup(RELPATH, ino, fsize);
+ expect_open(ino, 0, 1);
+
+ fd = open(FULLPATH, O_RDWR);
+ EXPECT_LE(0, fd) << strerror(errno);
+
+ /* Cache first and third blocks with dirty data. */
+ ASSERT_EQ(3 * bs, pwrite(fd, zeros, 3 * bs, 0)) << strerror(errno);
+
+ /*
+ * Write directly to all three blocks. The partially written blocks
+ * will be flushed because they're dirty.
+ */
+ FuseTest::expect_write(ino, 0, bs, bs, 0, 0, zeros);
+ FuseTest::expect_write(ino, 2 * bs, bs, bs, 0, 0, zeros);
+ /* The direct write is split in two because of the m_maxwrite value */
+ FuseTest::expect_write(ino, bs / 2, bs, bs, 0, 0, ones);
+ FuseTest::expect_write(ino, 3 * bs / 2, bs, bs, 0, 0, ones);
+ ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno);
+ ASSERT_EQ(2 * bs, pwrite(fd, ones, 2 * bs, bs / 2)) << strerror(errno);
+
+ /*
+ * Read from both the valid and invalid portions of the first and third
+ * blocks again. This will entail FUSE_READ operations because these
+ * blocks were invalidated by the direct write.
+ */
+ expect_read(ino, 0, bs, bs, zeroones);
+ expect_read(ino, 2 * bs, bs, bs, onezeros);
+ ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno);
+ ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 0)) << strerror(errno);
+ EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2));
+ ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 5 * bs / 2))
+ << strerror(errno);
+ EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2));
+ ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, bs / 2))
+ << strerror(errno);
+ EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2));
+ ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 2 * bs))
+ << strerror(errno);
+ EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2));
+
+ leak(fd);
+ free(zeroones);
+ free(onezeros);
+ free(ones);
+ free(zeros);
+ free(readbuf);
+}
+
+/*
* In WriteBack mode, writes may be cached beyond what the server thinks is the
* EOF. In this case, a short read at EOF should _not_ cause fusefs to update
* the file's size.
More information about the svn-src-projects
mailing list