git: 05bfa3e05896 - stable/13 - lio_listio(2): Allow LIO_READV and LIO_WRITEV.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Wed, 06 Sep 2023 21:56:33 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=05bfa3e0589629bc88b8da31942252af8a559441

commit 05bfa3e0589629bc88b8da31942252af8a559441
Author:     Thomas Munro <tmunro@FreeBSD.org>
AuthorDate: 2021-08-22 09:48:59 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2023-09-06 21:56:09 +0000

    lio_listio(2):  Allow LIO_READV and LIO_WRITEV.
    
    Allow multiple vector IOs to be started with one system call.
    aio_readv() and aio_writev() already used these opcodes under the
    covers.  This commit makes them available to user space.
    
    Being non-standard extensions, they're only visible if __BSD_VISIBLE is
    defined, like the functions.
    
    Reviewed by:    asomers, kib
    MFC after:      2 weeks
    Differential Revision:  https://reviews.freebsd.org/D31627
    
    (cherry picked from commit f30a1ae8d5290a52e898279bafc38556bf16bed8)
---
 lib/libc/sys/lio_listio.2 |  16 ++++++-
 sys/kern/vfs_aio.c        |   4 ++
 sys/sys/aio.h             |   4 +-
 tests/sys/aio/lio_test.c  | 109 ++++++++++++++++++++++++++++++++++++----------
 usr.bin/truss/syscalls.c  |   2 +-
 5 files changed, 110 insertions(+), 25 deletions(-)

diff --git a/lib/libc/sys/lio_listio.2 b/lib/libc/sys/lio_listio.2
index a477da7c6177..34d2490cca01 100644
--- a/lib/libc/sys/lio_listio.2
+++ b/lib/libc/sys/lio_listio.2
@@ -22,7 +22,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd December 7, 2019
+.Dd August 22, 2021
 .Dt LIO_LISTIO 2
 .Os
 .Sh NAME
@@ -64,11 +64,17 @@ The following operations are supported:
 .It Dv LIO_READ
 Read data as if by a call to
 .Xr aio_read 2 .
+.It Dv LIO_READV
+Read data as if by a call to
+.Xr aio_readv 2 .
 .It Dv LIO_NOP
 No operation.
 .It Dv LIO_WRITE
 Write data as if by a call to
 .Xr aio_write 2 .
+.It Dv LIO_WRITEV
+Write data as if by a call to
+.Xr aio_writev 2 .
 .El
 .Pp
 If the
@@ -203,7 +209,9 @@ structure individually by calling
 .Sh SEE ALSO
 .Xr aio_error 2 ,
 .Xr aio_read 2 ,
+.Xr aio_readv 2 ,
 .Xr aio_write 2 ,
+.Xr aio_writev 2 ,
 .Xr read 2 ,
 .Xr write 2 ,
 .Xr sigevent 3 ,
@@ -214,6 +222,12 @@ The
 .Fn lio_listio
 function is expected to conform to
 .St -p1003.1-2001 .
+The
+.Dv LIO_READV
+and
+.Dv LIO_WRITEV
+operations are
+.Fx extensions, and should not be used in portable code.
 .Sh HISTORY
 The
 .Fn lio_listio
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index bd739ebf7b54..02014ceefdf5 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1419,6 +1419,8 @@ aiocb_copyin(struct aiocb *ujob, struct kaiocb *kjob, int type)
 	error = copyin(ujob, kcb, sizeof(struct aiocb));
 	if (error)
 		return (error);
+	if (type == LIO_NOP)
+		type = kcb->aio_lio_opcode;
 	if (type & LIO_VECTORED) {
 		/* malloc a uio and copy in the iovec */
 		error = copyinuio(__DEVOLATILE(struct iovec*, kcb->aio_iov),
@@ -1557,8 +1559,10 @@ aio_aqueue(struct thread *td, struct aiocb *ujob, struct aioliojob *lj,
 	if (type == LIO_NOP) {
 		switch (job->uaiocb.aio_lio_opcode) {
 		case LIO_WRITE:
+		case LIO_WRITEV:
 		case LIO_NOP:
 		case LIO_READ:
+		case LIO_READV:
 			opcode = job->uaiocb.aio_lio_opcode;
 			break;
 		default:
diff --git a/sys/sys/aio.h b/sys/sys/aio.h
index d58ee5efd3d5..e9c5bdc63c87 100644
--- a/sys/sys/aio.h
+++ b/sys/sys/aio.h
@@ -41,10 +41,12 @@
 #define	LIO_NOP			0x0
 #define LIO_WRITE		0x1
 #define	LIO_READ		0x2
-#if defined(_KERNEL) || defined(_WANT_ALL_LIO_OPCODES)
+#if __BSD_VISIBLE
 #define	LIO_VECTORED		0x4
 #define	LIO_WRITEV		(LIO_WRITE | LIO_VECTORED)
 #define	LIO_READV		(LIO_READ | LIO_VECTORED)
+#endif
+#if defined(_KERNEL) || defined(_WANT_ALL_LIO_OPCODES)
 #define	LIO_SYNC		0x8
 #define	LIO_DSYNC		(0x10 | LIO_SYNC)
 #define	LIO_MLOCK		0x20
diff --git a/tests/sys/aio/lio_test.c b/tests/sys/aio/lio_test.c
index 94ce07086987..a04e54a39d9b 100644
--- a/tests/sys/aio/lio_test.c
+++ b/tests/sys/aio/lio_test.c
@@ -208,15 +208,96 @@ ATF_TC_BODY(lio_listio_empty_nowait_thread, tc)
 	ATF_REQUIRE_EQ(0, sem_destroy(&completions));
 }
 
+/*
+ * A simple check that the allowed operations work.
+ */
+ATF_TC_WITHOUT_HEAD(lio_listio_opcodes);
+ATF_TC_BODY(lio_listio_opcodes, tc)
+{
+	struct aiocb write_cb, read_cb, writev_cb, readv_cb;
+	struct aiocb *list[] = {&write_cb, &read_cb, &writev_cb, &readv_cb};
+	struct iovec writev_iov[2];
+	struct iovec readv_iov[2];
+	char buffer[6];
+	int fd;
+
+	fd = open("testfile", O_CREAT | O_RDWR);
+	ATF_REQUIRE_MSG(fd >= 0, "open: %s", strerror(errno));
+
+	/* We start with numbers in a file and letters in memory... */
+	ATF_CHECK_EQ(6, write(fd, "123456", 6));
+	memcpy(buffer, "abcdef", 6);
+
+	/* a -> 1 */
+	bzero(&write_cb, sizeof(write_cb));
+	write_cb.aio_sigevent.sigev_notify = SIGEV_NONE;
+	write_cb.aio_fildes = fd;
+	write_cb.aio_lio_opcode = LIO_WRITE;
+	write_cb.aio_buf = &buffer[0];
+	write_cb.aio_nbytes = 1;
+	write_cb.aio_offset = 0;
+
+	/* b <- 2 */
+	bzero(&read_cb, sizeof(read_cb));
+	read_cb.aio_sigevent.sigev_notify = SIGEV_NONE;
+	read_cb.aio_fildes = fd;
+	read_cb.aio_lio_opcode = LIO_READ;
+	read_cb.aio_buf = &buffer[1];
+	read_cb.aio_nbytes = 1;
+	read_cb.aio_offset = 1;
+
+	/* d -> 3, c -> 4 */
+	writev_iov[0].iov_base = &buffer[3];
+	writev_iov[0].iov_len = 1;
+	writev_iov[1].iov_base = &buffer[2];
+	writev_iov[1].iov_len = 1;
+	bzero(&writev_cb, sizeof(writev_cb));
+	writev_cb.aio_sigevent.sigev_notify = SIGEV_NONE;
+	writev_cb.aio_fildes = fd;
+	writev_cb.aio_lio_opcode = LIO_WRITEV;
+	writev_cb.aio_iov = &writev_iov;
+	writev_cb.aio_iovcnt = 2;
+	writev_cb.aio_offset = 2;
+
+	/* f <- 5, e <- 6 */
+	readv_iov[0].iov_base = &buffer[5];
+	readv_iov[0].iov_len = 1;
+	readv_iov[1].iov_base = &buffer[4];
+	readv_iov[1].iov_len = 1;
+	bzero(&readv_cb, sizeof(readv_cb));
+	readv_cb.aio_sigevent.sigev_notify = SIGEV_NONE;
+	readv_cb.aio_fildes = fd;
+	readv_cb.aio_lio_opcode = LIO_READV;
+	readv_cb.aio_iov = &readv_iov;
+	readv_cb.aio_iovcnt = 2;
+	readv_cb.aio_offset = 4;
+
+	ATF_CHECK_EQ(0, lio_listio(LIO_WAIT, list, nitems(list), NULL));
+	ATF_CHECK_EQ(0, aio_error(&write_cb));
+	ATF_CHECK_EQ(1, aio_return(&write_cb));
+	ATF_CHECK_EQ(0, aio_error(&read_cb));
+	ATF_CHECK_EQ(1, aio_return(&read_cb));
+	ATF_CHECK_EQ(0, aio_error(&writev_cb));
+	ATF_CHECK_EQ(2, aio_return(&writev_cb));
+	ATF_CHECK_EQ(0, aio_error(&readv_cb));
+	ATF_CHECK_EQ(2, aio_return(&readv_cb));
+
+	ATF_CHECK_EQ(0, memcmp(buffer, "a2cd65", 6));
+	ATF_CHECK_EQ(6, pread(fd, buffer, 6, 0));
+	ATF_CHECK_EQ(0, memcmp(buffer, "a2dc56", 6));
+
+	close(fd);
+}
+
+
 /*
  * Only select opcodes are allowed with lio_listio
  */
 ATF_TC_WITHOUT_HEAD(lio_listio_invalid_opcode);
 ATF_TC_BODY(lio_listio_invalid_opcode, tc)
 {
-	struct aiocb sync_cb, mlock_cb, writev_cb, readv_cb;
-	struct aiocb *list[] = {&sync_cb, &mlock_cb, &writev_cb, &readv_cb};
-	struct iovec iov;
+	struct aiocb sync_cb, mlock_cb;
+	struct aiocb *list[] = {&sync_cb, &mlock_cb};
 	int fd;
 
 	fd = open("testfile", O_CREAT | O_RDWR);
@@ -229,30 +310,13 @@ ATF_TC_BODY(lio_listio_invalid_opcode, tc)
 	bzero(&mlock_cb, sizeof(mlock_cb));
 	mlock_cb.aio_lio_opcode = LIO_MLOCK;
 
-	iov.iov_base = NULL;
-	iov.iov_len = 0;
-
-	bzero(&readv_cb, sizeof(readv_cb));
-	readv_cb.aio_fildes = fd;
-	readv_cb.aio_lio_opcode = LIO_READV;
-	readv_cb.aio_iov = &iov;
-	readv_cb.aio_iovcnt = 1;
-
-	bzero(&writev_cb, sizeof(writev_cb));
-	writev_cb.aio_fildes = fd;
-	writev_cb.aio_lio_opcode = LIO_WRITEV;
-	writev_cb.aio_iov = &iov;
-	writev_cb.aio_iovcnt = 1;
-
 	ATF_CHECK_ERRNO(EIO, lio_listio(LIO_WAIT, list, nitems(list), NULL));
 	ATF_CHECK_EQ(EINVAL, aio_error(&sync_cb));
 	ATF_CHECK_ERRNO(EINVAL, aio_return(&sync_cb) < 0);
 	ATF_CHECK_EQ(EINVAL, aio_error(&mlock_cb));
 	ATF_CHECK_ERRNO(EINVAL, aio_return(&mlock_cb) < 0);
-	ATF_CHECK_EQ(EINVAL, aio_error(&readv_cb));
-	ATF_CHECK_ERRNO(EINVAL, aio_return(&readv_cb) < 0);
-	ATF_CHECK_EQ(EINVAL, aio_error(&writev_cb));
-	ATF_CHECK_ERRNO(EINVAL, aio_return(&writev_cb) < 0);
+
+	close(fd);
 }
 
 
@@ -265,6 +329,7 @@ ATF_TP_ADD_TCS(tp)
 	ATF_TP_ADD_TC(tp, lio_listio_empty_nowait_signal);
 	ATF_TP_ADD_TC(tp, lio_listio_empty_nowait_thread);
 	ATF_TP_ADD_TC(tp, lio_listio_empty_wait);
+	ATF_TP_ADD_TC(tp, lio_listio_opcodes);
 	ATF_TP_ADD_TC(tp, lio_listio_invalid_opcode);
 
 	return (atf_no_error());
diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c
index fff62ce7911c..010308beae3e 100644
--- a/usr.bin/truss/syscalls.c
+++ b/usr.bin/truss/syscalls.c
@@ -763,7 +763,7 @@ static struct xlat lio_modes[] = {
 };
 
 static struct xlat lio_opcodes[] = {
-	X(LIO_WRITE) X(LIO_READ) X(LIO_NOP)
+	X(LIO_WRITE) X(LIO_READ) X(LIO_READV) X(LIO_WRITEV) X(LIO_NOP)
 	XEND
 };