git: 1b1e392aed49 - main - ggate: Add support for O_DIRECT access

From: Mark Johnston <markj_at_FreeBSD.org>
Date: Sat, 21 Sep 2024 13:05:52 UTC
The branch main has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=1b1e392aed4957a38c49599512b4f65b844a0772

commit 1b1e392aed4957a38c49599512b4f65b844a0772
Author:     David E. Cross <dec@FreeBSD.org>
AuthorDate: 2024-09-21 07:22:57 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2024-09-21 13:05:22 +0000

    ggate: Add support for O_DIRECT access
    
    Adds support for controlling O_DIRECT access to ggated, ggatec, and
    ggatel.
    
    Reviewed by:    markj
    Relnotes:       yes
    MFC after:      1 month
    Differential Revision:  https://reviews.freebsd.org/D45056
---
 sbin/ggate/ggatec/ggatec.8 |  25 ++++++++---
 sbin/ggate/ggatec/ggatec.c |  12 ++++--
 sbin/ggate/ggated/ggated.8 |  54 ++++++++++++++++++++++--
 sbin/ggate/ggated/ggated.c | 102 +++++++++++++++++++++++++++++++++++----------
 sbin/ggate/ggatel/ggatel.8 |  24 +++++++----
 sbin/ggate/ggatel/ggatel.c |  20 ++++++---
 sbin/ggate/shared/ggate.h  |   7 +++-
 7 files changed, 192 insertions(+), 52 deletions(-)

diff --git a/sbin/ggate/ggatec/ggatec.8 b/sbin/ggate/ggatec/ggatec.8
index 1b780632f154..99f9bcab5964 100644
--- a/sbin/ggate/ggatec/ggatec.8
+++ b/sbin/ggate/ggatec/ggatec.8
@@ -34,6 +34,7 @@
 .Op Fl n
 .Op Fl v
 .Op Fl o Cm ro | wo | rw
+.Op Fl o Cm direct
 .Op Fl p Ar port
 .Op Fl q Ar queue_size
 .Op Fl R Ar rcvbuf
@@ -48,6 +49,7 @@
 .Op Fl n
 .Op Fl v
 .Op Fl o Cm ro | wo | rw
+.Op Fl o Cm direct
 .Op Fl p Ar port
 .Op Fl R Ar rcvbuf
 .Op Fl S Ar sndbuf
@@ -108,13 +110,21 @@ provider (cancels all pending requests).
 Do not use
 .Dv TCP_NODELAY
 option on TCP sockets.
-.It Fl o Cm ro | wo | rw
-Specify permissions to use when opening the file or device: read-only
-.Pq Cm ro ,
+.It Fl o Ar option
+Specify permissions and options to use when opening the file or device.
+.Bl -tag -width indent
+.It Cm ro
+read-only
+.It Cm wo
 write-only
-.Pq Cm wo ,
-or read-write
-.Pq Cm rw .
+.It Cm rw
+read-write
+.It Cm direct
+open with
+.Dv O_DIRECT
+option on the file
+.El
+.Pp
 Default is
 .Cm rw .
 .It Fl p Ar port
@@ -160,11 +170,14 @@ Use a CD-ROM device on a remote host.
 .Bd -literal -offset indent
 server# cat /etc/gg.exports
 client RO /dev/cd0
+client RW /tmp/image
 server# ggated
 
 client# ggatec create -o ro server /dev/cd0
 ggate0
 client# mount_cd9660 /dev/ggate0 /cdrom
+client# ggatec create -o rw -o direct server /tmp/image
+ggate1
 .Ed
 .Sh SEE ALSO
 .Xr geom 4 ,
diff --git a/sbin/ggate/ggatec/ggatec.c b/sbin/ggate/ggatec/ggatec.c
index 5ce605596fb9..b6f248c05290 100644
--- a/sbin/ggate/ggatec/ggatec.c
+++ b/sbin/ggate/ggatec/ggatec.c
@@ -52,7 +52,6 @@
 #include <netinet/tcp.h>
 #include <arpa/inet.h>
 
-#include <geom/gate/g_gate.h>
 #include "ggate.h"
 
 
@@ -62,6 +61,7 @@ static const char *path = NULL;
 static const char *host = NULL;
 static int unit = G_GATE_UNIT_AUTO;
 static unsigned flags = 0;
+static int direct_flag = 0;
 static int force = 0;
 static unsigned queue_size = G_GATE_QUEUE_SIZE;
 static unsigned port = G_GATE_PORT;
@@ -78,10 +78,12 @@ static void
 usage(void)
 {
 
-	fprintf(stderr, "usage: %s create [-nv] [-o <ro|wo|rw>] [-p port] "
+	fprintf(stderr, "usage: %s create [-nv] [-o <ro|wo|rw>] "
+	    "[-o <direct>] [-p port] "
 	    "[-q queue_size] [-R rcvbuf] [-S sndbuf] [-s sectorsize] "
 	    "[-t timeout] [-u unit] <host> <path>\n", getprogname());
-	fprintf(stderr, "       %s rescue [-nv] [-o <ro|wo|rw>] [-p port] "
+	fprintf(stderr, "       %s rescue [-nv] [-o <ro|wo|rw>] "
+	    "[-o <direct>] [-p port] "
 	    "[-R rcvbuf] [-S sndbuf] <-u unit> <host> <path>\n", getprogname());
 	fprintf(stderr, "       %s destroy [-f] <-u unit>\n", getprogname());
 	fprintf(stderr, "       %s list [-v] [-u unit]\n", getprogname());
@@ -361,7 +363,7 @@ handshake(int dir)
 		close(sfd);
 		return (-1);
 	}
-	cinit.gc_flags = flags | dir;
+	cinit.gc_flags = flags | direct_flag | dir;
 	cinit.gc_token = token;
 	cinit.gc_nconn = 2;
 	g_gate_swap2n_cinit(&cinit);
@@ -585,6 +587,8 @@ main(int argc, char *argv[])
 				flags = G_GATE_FLAG_WRITEONLY;
 			else if (strcasecmp("rw", optarg) == 0)
 				flags = 0;
+			else if (strcasecmp("direct", optarg) == 0)
+				direct_flag = GGATE_FLAG_DIRECT;
 			else {
 				errx(EXIT_FAILURE,
 				    "Invalid argument for '-o' option.");
diff --git a/sbin/ggate/ggated/ggated.8 b/sbin/ggate/ggated/ggated.8
index 7016dd58f87a..8e3ad68d1db0 100644
--- a/sbin/ggate/ggated/ggated.8
+++ b/sbin/ggate/ggated/ggated.8
@@ -85,10 +85,51 @@ An alternate location for the exports file.
 .Pp
 The format of an exports file is as follows:
 .Bd -literal -offset indent
-1.2.3.4		RO	/dev/cd0
-1.2.3.0/24	RW	/tmp/test.img
-hostname	WO	/tmp/image
+1.2.3.4		RO		/dev/cd0
+1.2.3.0/24	RW		/tmp/test.img
+hostname	WO		/tmp/image
+hostname	RW,DIRECT	/tmp/direct-image
+hostname	RW,NODIRECT	/tmp/nodirect-image
 .Ed
+.Pp
+The first colunm specifies the ip, network with netmask, or the hostname
+that the export applies to.
+.Pp
+The next column is the access flags that apply to the export
+.Bl -tag -width ".Cm NODIRECT"
+.It Cm RO
+Read-Only the path specified will be exported to the client read only.
+.It Cm WO
+Write-Only the path specified will be exported to the client write only.
+.It Cm RW
+Read-Write the path specified will be exported to the client read-write.
+.It Cm DIRECT
+The path specified will always be opened with O_DIRECT for clients.
+.It Cm NODIRECT
+The path specified will never be opened with O_DIRECT for clients.
+.El
+.Pp
+The final column specifies the path to export.
+.Pp
+Files are opened with the least common flags between the client and the
+server.  A client may request read or write only to a read-write export
+and the server will honor the client request and open the file in the
+requested mode.  A client requesting greater access than permissions listed
+in the file will be rejected.
+.Pp
+DIRECT and NODIRECT are used to coerce the use of the O_DIRECT flag to
+.Xr open 2 when the specified path is opened. If DIRECT is specified the
+path is always opened with O_DIRECT. If NODIRECT is specified the path is
+never opened with O_DIRECT. DIRECT access limits the cache effects of
+IO operaions on the file.  This has the effect of having clients accessing
+exports to not impact the cache of the local machine, however it
+will cause greater IO utilization to the devices on which the files reside.
+.Pp
+If neither is specified the server will use
+the preference specified by the client, with the default to not use O_DIRECT.
+If the client specifies a preference against the server's configuration the
+client preference will be silently ignored.
+.Pp
 .Sh FILES
 .Bl -tag -width ".Pa /var/run/ggated.pid" -compact
 .It Pa /var/run/ggated.pid
@@ -104,13 +145,18 @@ should be called with the
 .Fl v
 option.
 .Sh EXAMPLES
-Export CD-ROM device and a file:
+Export CD-ROM device, a file, and a file with
+.Dv O_DIRECT
+option:
 .Bd -literal -offset indent
 # echo "1.2.3.0/24 RO /dev/cd0" > /etc/gg.exports
 # echo "client RW /image" >> /etc/gg.exports
+# echo "client RW,DIRECT /image2" >> /etc/gg.exports
+# echo "client RW,NODIRECT /image3" >> /etc/gg.exports
 # ggated
 .Ed
 .Sh SEE ALSO
+.Xr open 2 ,
 .Xr geom 4 ,
 .Xr ggatec 8 ,
 .Xr ggatel 8
diff --git a/sbin/ggate/ggated/ggated.c b/sbin/ggate/ggated/ggated.c
index c579da1c1267..ef7b61cb13c4 100644
--- a/sbin/ggate/ggated/ggated.c
+++ b/sbin/ggate/ggated/ggated.c
@@ -64,7 +64,7 @@
 struct ggd_connection {
 	off_t		 c_mediasize;
 	unsigned	 c_sectorsize;
-	unsigned	 c_flags;	/* flags (RO/RW) */
+	int		 c_flags;	/* flags (RO/RW) */
 	int		 c_diskfd;
 	int		 c_sendfd;
 	int		 c_recvfd;
@@ -85,11 +85,18 @@ struct ggd_request {
 #define	r_length	r_hdr.gh_length
 #define	r_error		r_hdr.gh_error
 
+#define EFLAGS_RDONLY	0x0000
+#define EFLAGS_WRONLY	0x0001
+#define EFLAGS_RDWR	0x0002
+#define EFLAGS_ACCMODE	0x0003
+#define EFLAGS_DIRECT	0x0004
+#define EFLAGS_NODIRECT	0x0008
+
 struct ggd_export {
 	char		*e_path;	/* path to device/file */
 	in_addr_t	 e_ip;		/* remote IP address */
 	in_addr_t	 e_mask;	/* IP mask */
-	unsigned	 e_flags;	/* flags (RO/RW) */
+	int		 e_flags;	/* flags (WO/RO/RW/DIRECT/NODIRECT) */
 	SLIST_ENTRY(ggd_export) e_next;
 };
 
@@ -146,20 +153,61 @@ countmask(unsigned m)
 	return (mask);
 }
 
+static int
+parse_flags(const char *flagsstr, int lineno)
+{
+	char *flagscpy;
+	char *word, *brkf;
+	int access_flags = -1;
+	int direct_flags = 0;
+
+	flagscpy = strdup(flagsstr);
+	if (flagscpy == NULL) {
+		g_gate_xlog("Not enough memory.");
+	}
+
+	for (word = strtok_r(flagscpy, ",", &brkf);
+	     word != NULL;
+	     word = strtok_r(NULL, ",", &brkf)) {
+		if (strcasecmp("ro", word) == 0 ||
+		    strcasecmp("rd", word) == 0) {
+			access_flags = EFLAGS_RDONLY;
+		} else if (strcasecmp("wo", word) == 0) {
+			access_flags = EFLAGS_WRONLY;
+		} else if (strcasecmp("rw", word) == 0) {
+			access_flags = EFLAGS_RDWR;
+		} else if (strcasecmp("direct", word) == 0) {
+			direct_flags = EFLAGS_DIRECT;
+		} else if (strcasecmp("nodirect", word) == 0) {
+			direct_flags = EFLAGS_NODIRECT;
+		} else {
+			g_gate_xlog("Invalid value (%s) in flags field at "
+                            "line %u.", word, lineno);
+		}
+	}
+	free(flagscpy);
+	if (access_flags == -1) {
+		g_gate_xlog("Invalid value (%s) in flags field at "
+		    "line %u.", flagsstr, lineno);
+	}
+	return (direct_flags | access_flags);
+}
+
 static void
 line_parse(char *line, unsigned lineno)
 {
 	struct ggd_export *ex;
-	char *word, *path, *sflags;
-	unsigned flags, i, vmask;
+	char *word, *path, *sflags, *brkl;
+	unsigned i, vmask;
+	int flags;
 	in_addr_t ip, mask;
 
 	ip = mask = flags = vmask = 0;
 	path = NULL;
 	sflags = NULL;
 
-	for (i = 0, word = strtok(line, " \t"); word != NULL;
-	    i++, word = strtok(NULL, " \t")) {
+	for (i = 0, word = strtok_r(line, " \t", &brkl); word != NULL;
+	    i++, word = strtok_r(NULL, " \t", &brkl)) {
 		switch (i) {
 		case 0: /* IP address or host name */
 			ip = g_gate_str2ip(strsep(&word, "/"));
@@ -185,17 +233,7 @@ line_parse(char *line, unsigned lineno)
 			mask = countmask(vmask);
 			break;
 		case 1:	/* flags */
-			if (strcasecmp("rd", word) == 0 ||
-			    strcasecmp("ro", word) == 0) {
-				flags = O_RDONLY;
-			} else if (strcasecmp("wo", word) == 0) {
-				flags = O_WRONLY;
-			} else if (strcasecmp("rw", word) == 0) {
-				flags = O_RDWR;
-			} else {
-				g_gate_xlog("Invalid value in flags field at "
-				    "line %u.", lineno);
-			}
+			flags = parse_flags(word, lineno);
 			sflags = word;
 			break;
 		case 2:	/* path */
@@ -306,13 +344,16 @@ exports_check(struct ggd_export *ex, struct g_gate_cinit *cinit,
     struct ggd_connection *conn)
 {
 	char ipmask[32]; /* 32 == strlen("xxx.xxx.xxx.xxx/xxx.xxx.xxx.xxx")+1 */
-	int error = 0, flags;
+	int error = 0, flags, access_flags, direct_flags = 0;
 
 	strlcpy(ipmask, ip2str(ex->e_ip), sizeof(ipmask));
 	strlcat(ipmask, "/", sizeof(ipmask));
 	strlcat(ipmask, ip2str(ex->e_mask), sizeof(ipmask));
+
+	access_flags = ex->e_flags & EFLAGS_ACCMODE;
+
 	if ((cinit->gc_flags & GGATE_FLAG_RDONLY) != 0) {
-		if (ex->e_flags == O_WRONLY) {
+		if (access_flags == EFLAGS_WRONLY) {
 			g_gate_log(LOG_WARNING, "Read-only access requested, "
 			    "but %s (%s) is exported write-only.", ex->e_path,
 			    ipmask);
@@ -321,7 +362,7 @@ exports_check(struct ggd_export *ex, struct g_gate_cinit *cinit,
 			conn->c_flags |= GGATE_FLAG_RDONLY;
 		}
 	} else if ((cinit->gc_flags & GGATE_FLAG_WRONLY) != 0) {
-		if (ex->e_flags == O_RDONLY) {
+		if (access_flags == EFLAGS_RDONLY) {
 			g_gate_log(LOG_WARNING, "Write-only access requested, "
 			    "but %s (%s) is exported read-only.", ex->e_path,
 			    ipmask);
@@ -330,24 +371,41 @@ exports_check(struct ggd_export *ex, struct g_gate_cinit *cinit,
 			conn->c_flags |= GGATE_FLAG_WRONLY;
 		}
 	} else {
-		if (ex->e_flags == O_RDONLY) {
+		if (access_flags == EFLAGS_RDONLY) {
 			g_gate_log(LOG_WARNING, "Read-write access requested, "
 			    "but %s (%s) is exported read-only.", ex->e_path,
 			    ipmask);
 			return (EPERM);
-		} else if (ex->e_flags == O_WRONLY) {
+		} else if (access_flags == EFLAGS_WRONLY) {
 			g_gate_log(LOG_WARNING, "Read-write access requested, "
 			    "but %s (%s) is exported write-only.", ex->e_path,
 			    ipmask);
 			return (EPERM);
 		}
 	}
+
+	if ((cinit->gc_flags & GGATE_FLAG_DIRECT) != 0) {
+		if (ex->e_flags & EFLAGS_NODIRECT) {
+			g_gate_log(LOG_WARNING, "Direct IO requested, "
+			    "but %s (%s) is exported NODIRECT.", ex->e_path,
+			    ipmask);
+		} else {
+			conn->c_flags |= GGATE_FLAG_DIRECT;
+			direct_flags = O_DIRECT;
+		}
+	}
+
+	if (ex->e_flags & EFLAGS_DIRECT) {
+		direct_flags = O_DIRECT;
+	}
+
 	if ((conn->c_flags & GGATE_FLAG_RDONLY) != 0)
 		flags = O_RDONLY;
 	else if ((conn->c_flags & GGATE_FLAG_WRONLY) != 0)
 		flags = O_WRONLY;
 	else
 		flags = O_RDWR;
+	flags |= direct_flags;
 	if (conn->c_diskfd != -1) {
 		if (strcmp(conn->c_path, ex->e_path) != 0) {
 			g_gate_log(LOG_ERR, "old %s and new %s: "
diff --git a/sbin/ggate/ggatel/ggatel.8 b/sbin/ggate/ggatel/ggatel.8
index 816a5b9a6e0e..0dd4f30041c5 100644
--- a/sbin/ggate/ggatel/ggatel.8
+++ b/sbin/ggate/ggatel/ggatel.8
@@ -32,7 +32,7 @@
 .Nm
 .Cm create
 .Op Fl v
-.Op Fl o Cm ro | wo | rw
+.Oo Fl o option Oc ...
 .Op Fl s Ar sectorsize
 .Op Fl t Ar timeout
 .Op Fl u Ar unit
@@ -48,7 +48,7 @@
 .Nm
 .Cm rescue
 .Op Fl v
-.Op Fl o Cm ro | wo | rw
+.Oo Fl o option Oc ...
 .Fl u Ar unit
 .Ar path
 .Sh DESCRIPTION
@@ -92,13 +92,21 @@ Available options:
 Forcibly destroy
 .Nm ggate
 provider (cancels all pending requests).
-.It Fl o Cm ro | wo | rw
-Specify permissions to use when opening the file or device: read-only
-.Pq Cm ro ,
+.It Fl o Ar option
+Specify permissions and options to use when opening the file or device.
+.Bl -tag -width indent
+.It Cm ro
+read-only
+.It Cm wo
 write-only
-.Pq Cm wo ,
-or read-write
-.Pq Cm rw .
+.It Cm rw
+read-write
+.It Cm direct
+open with
+.Dv O_DIRECT
+option on the file
+.El
+.Pp
 Default is
 .Cm rw .
 .It Fl s Ar sectorsize
diff --git a/sbin/ggate/ggatel/ggatel.c b/sbin/ggate/ggatel/ggatel.c
index 246a419ecaaf..1cbb6af28b7e 100644
--- a/sbin/ggate/ggatel/ggatel.c
+++ b/sbin/ggate/ggatel/ggatel.c
@@ -43,7 +43,6 @@
 #include <sys/stat.h>
 #include <sys/syslog.h>
 
-#include <geom/gate/g_gate.h>
 #include "ggate.h"
 
 
@@ -52,6 +51,7 @@ static enum { UNSET, CREATE, DESTROY, LIST, RESCUE } action = UNSET;
 static const char *path = NULL;
 static int unit = G_GATE_UNIT_AUTO;
 static unsigned flags = 0;
+static int direct_flag = 0;
 static int force = 0;
 static unsigned sectorsize = 0;
 static unsigned timeout = G_GATE_TIMEOUT;
@@ -60,24 +60,30 @@ static void
 usage(void)
 {
 
-	fprintf(stderr, "usage: %s create [-v] [-o <ro|wo|rw>] "
+	fprintf(stderr, "usage: %s create [-v] [-o option] ... "
 	    "[-s sectorsize] [-t timeout] [-u unit] <path>\n", getprogname());
-	fprintf(stderr, "       %s rescue [-v] [-o <ro|wo|rw>] <-u unit> "
+	fprintf(stderr, "       %s rescue [-v] [-o option] ... <-u unit> "
 	    "<path>\n", getprogname());
 	fprintf(stderr, "       %s destroy [-f] <-u unit>\n", getprogname());
 	fprintf(stderr, "       %s list [-v] [-u unit]\n", getprogname());
+	fprintf(stderr, "          option = {ro, wo, rw, direct}\n");
 	exit(EXIT_FAILURE);
 }
 
 static int
 g_gate_openflags(unsigned ggflags)
 {
+	int openflags = O_RDWR;
 
 	if ((ggflags & G_GATE_FLAG_READONLY) != 0)
-		return (O_RDONLY);
+		openflags = O_RDONLY;
 	else if ((ggflags & G_GATE_FLAG_WRITEONLY) != 0)
-		return (O_WRONLY);
-	return (O_RDWR);
+		openflags = O_WRONLY;
+
+	if (direct_flag)
+		openflags |= O_DIRECT;
+
+	return (openflags);
 }
 
 static void
@@ -248,6 +254,8 @@ main(int argc, char *argv[])
 				flags = G_GATE_FLAG_WRITEONLY;
 			else if (strcasecmp("rw", optarg) == 0)
 				flags = 0;
+			else if (strcasecmp("direct", optarg) == 0)
+				direct_flag = 1;
 			else {
 				errx(EXIT_FAILURE,
 				    "Invalid argument for '-o' option.");
diff --git a/sbin/ggate/shared/ggate.h b/sbin/ggate/shared/ggate.h
index 0170df2dd1b6..78488a24c3fa 100644
--- a/sbin/ggate/shared/ggate.h
+++ b/sbin/ggate/shared/ggate.h
@@ -29,6 +29,7 @@
 #ifndef _GGATE_H_
 #define	_GGATE_H_
 
+#include <geom/gate/g_gate.h>
 #include <sys/endian.h>
 #include <stdarg.h>
 
@@ -42,8 +43,8 @@
 #define	GGATE_MAGIC		"GEOM_GATE       "
 #define	GGATE_VERSION		0
 
-#define	GGATE_FLAG_RDONLY	0x0001
-#define	GGATE_FLAG_WRONLY	0x0002
+#define	GGATE_FLAG_RDONLY	G_GATE_FLAG_READONLY
+#define	GGATE_FLAG_WRONLY	G_GATE_FLAG_WRITEONLY
 /*
  * If neither the GGATE_FLAG_SEND nor the GGATE_FLAG_RECV flag is
  * set - this is initial connection.
@@ -53,6 +54,8 @@
 #define	GGATE_FLAG_SEND		0x0004
 #define	GGATE_FLAG_RECV		0x0008
 
+#define	GGATE_FLAG_DIRECT	0x0010
+
 #define	GGATE_CMD_READ		0
 #define	GGATE_CMD_WRITE		1
 #define	GGATE_CMD_FLUSH		3