git: 8624f4347e81 - main - divert: declare PF_DIVERT domain and stop abusing PF_INET
Date: Tue, 30 Aug 2022 22:49:24 UTC
The branch main has been updated by glebius: URL: https://cgit.FreeBSD.org/src/commit/?id=8624f4347e8133911b0554e816f6bedb56dc5fb3 commit 8624f4347e8133911b0554e816f6bedb56dc5fb3 Author: Gleb Smirnoff <glebius@FreeBSD.org> AuthorDate: 2022-08-30 22:09:21 +0000 Commit: Gleb Smirnoff <glebius@FreeBSD.org> CommitDate: 2022-08-30 22:09:21 +0000 divert: declare PF_DIVERT domain and stop abusing PF_INET The divert(4) is not a protocol of IPv4. It is a socket to intercept packets from ipfw(4) to userland and re-inject them back. It can divert and re-inject IPv4 and IPv6 packets today, but potentially it is not limited to these two protocols. The IPPROTO_DIVERT does not belong to known IP protocols, it doesn't even fit into u_char. I guess, the implementation of divert(4) was done the way it is done basically because it was easier to do it this way, back when protocols for sockets were intertwined with IP protocols and domains were statically compiled in. Moving divert(4) out of inetsw accomplished two important things: 1) IPDIVERT is getting much closer to be not dependent on INET. This will be finalized in following changes. 2) Now divert socket no longer aliases with raw IPv4 socket. Domain/proto selection code won't need a hack for SOCK_RAW and multiple entries in inetsw implementing different flavors of raw socket can merge into one without requirement of raw IPv4 being the last member of dom_protosw. Differential revision: https://reviews.freebsd.org/D36379 --- lib/libc/sys/socket.2 | 4 +++- share/examples/netgraph/ngctl | 6 +++--- share/man/man4/divert.4 | 38 +++++++++++++++++++++++--------------- sys/kern/uipc_socket.c | 11 +++++++++++ sys/netgraph/ng_ksocket.c | 2 +- sys/netinet/in_mcast.c | 20 ++++++-------------- sys/netinet/ip_divert.c | 23 ++++++++++++++--------- sys/netinet6/in6_mcast.c | 20 ++++++-------------- sys/sys/socket.h | 4 +++- usr.bin/netstat/inet.c | 7 +++---- usr.bin/netstat/main.c | 2 +- 11 files changed, 74 insertions(+), 63 deletions(-) diff --git a/lib/libc/sys/socket.2 b/lib/libc/sys/socket.2 index 8ced1f0ba930..1eceabbf6fd4 100644 --- a/lib/libc/sys/socket.2 +++ b/lib/libc/sys/socket.2 @@ -28,7 +28,7 @@ .\" From: @(#)socket.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd August 26, 2022 +.Dd August 30, 2022 .Dt SOCKET 2 .Os .Sh NAME @@ -60,6 +60,7 @@ PF_LOCAL Host-internal protocols (alias for PF_UNIX), PF_UNIX Host-internal protocols, PF_INET Internet version 4 protocols, PF_INET6 Internet version 6 protocols, +PF_DIVERT Firewall packet diversion/re-injection, PF_ROUTE Internal routing protocol, PF_KEY Internal key-management function, PF_NETGRAPH Netgraph sockets, @@ -283,6 +284,7 @@ The socket type is not supported by the protocol. .Xr accept 2 , .Xr bind 2 , .Xr connect 2 , +.Xr divert 4 , .Xr getpeername 2 , .Xr getsockname 2 , .Xr getsockopt 2 , diff --git a/share/examples/netgraph/ngctl b/share/examples/netgraph/ngctl index e7b7cd86b04f..8dc6b23815b7 100644 --- a/share/examples/netgraph/ngctl +++ b/share/examples/netgraph/ngctl @@ -31,10 +31,10 @@ quit Exit program + -# Now let's create a ng_ksocket(4) node, in the family PF_INET, -# of type SOCK_RAW, and protocol IPPROTO_DIVERT: +# Now let's create a ng_ksocket(4) node, in the family PF_DIVERT, +# of type SOCK_RAW: - + mkpeer ksocket foo inet/raw/divert + + mkpeer ksocket foo divert/raw/0 # Note that ``foo'' is the hook name on the socket node, which can be # anything. The ``inet/raw/divert'' is the hook name on the ksocket diff --git a/share/man/man4/divert.4 b/share/man/man4/divert.4 index d8296995ca97..cfe1a31486c9 100644 --- a/share/man/man4/divert.4 +++ b/share/man/man4/divert.4 @@ -1,6 +1,6 @@ .\" $FreeBSD$ .\" -.Dd December 17, 2004 +.Dd August 30, 2022 .Dt DIVERT 4 .Os .Sh NAME @@ -11,7 +11,7 @@ .In sys/socket.h .In netinet/in.h .Ft int -.Fn socket PF_INET SOCK_RAW IPPROTO_DIVERT +.Fn socket PF_DIVERT SOCK_RAW 0 .Pp To enable support for divert sockets, place the following lines in the kernel configuration file: @@ -30,24 +30,30 @@ ipfw_load="YES" ipdivert_load="YES" .Ed .Sh DESCRIPTION -Divert sockets are similar to raw IP sockets, except that they -can be bound to a specific +Divert sockets allow to intercept and re-inject packets flowing through +the +.Xr ipfw 4 +firewall. +A divert socket can be bound to a specific .Nm port via the .Xr bind 2 system call. -The IP address in the bind is ignored; only the port -number is significant. +The sockaddr argument shall be sockaddr_in with sin_port set to the +desired value. +Note that the +.Nm +port has nothing to do with TCP/UDP ports. +It is just a cookie number, that allows to differentiate between different +divert points in the +.Xr ipfw 4 +ruleset. A divert socket bound to a divert port will receive all packets diverted -to that port by some (here unspecified) kernel mechanism(s). -Packets may also be written to a divert port, in which case they -re-enter kernel IP packet processing. +to that port by +.Xr ipfw 4 . +Packets may also be written to a divert port, in which case they re-enter +firewall processing at the next rule. .Pp -Divert sockets are normally used in conjunction with -.Fx Ns 's -packet filtering implementation and the -.Xr ipfw 8 -program. By reading from and writing to a divert socket, matching packets can be passed through an arbitrary ``filter'' as they travel through the host machine, special routing tricks can be done, etc. @@ -154,7 +160,9 @@ Packets written as incoming and having incorrect checksums will be dropped. Otherwise, all header fields are unchanged (and therefore in network order). .Pp Binding to port numbers less than 1024 requires super-user access, as does -creating a socket of type SOCK_RAW. +creating a +.Nm +socket. .Sh ERRORS Writing to a divert socket can return these errors, along with the usual errors possible when writing raw packets: diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index bf22c0245f24..1bc172eacd89 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -509,6 +509,17 @@ socreate(int dom, struct socket **aso, int type, int proto, struct socket *so; int error; + /* + * XXX: divert(4) historically abused PF_INET. Keep this compatibility + * shim until all applications have been updated. + */ + if (__predict_false(dom == PF_INET && type == SOCK_RAW && + proto == IPPROTO_DIVERT)) { + dom = PF_DIVERT; + printf("%s uses obsolete way to create divert(4) socket\n", + td->td_proc->p_comm); + } + if (proto) prp = pffindproto(dom, proto, type); else diff --git a/sys/netgraph/ng_ksocket.c b/sys/netgraph/ng_ksocket.c index d4f41fe02205..ff5e7b4812bf 100644 --- a/sys/netgraph/ng_ksocket.c +++ b/sys/netgraph/ng_ksocket.c @@ -121,6 +121,7 @@ static const struct ng_ksocket_alias ng_ksocket_families[] = { { "inet", PF_INET }, { "inet6", PF_INET6 }, { "atm", PF_ATM }, + { "divert", PF_DIVERT }, { NULL, -1 }, }; @@ -147,7 +148,6 @@ static const struct ng_ksocket_alias ng_ksocket_protos[] = { { "ah", IPPROTO_AH, PF_INET }, { "swipe", IPPROTO_SWIPE, PF_INET }, { "encap", IPPROTO_ENCAP, PF_INET }, - { "divert", IPPROTO_DIVERT, PF_INET }, { "pim", IPPROTO_PIM, PF_INET }, { NULL, -1 }, }; diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c index 3f25471f0858..87de83da7a6a 100644 --- a/sys/netinet/in_mcast.c +++ b/sys/netinet/in_mcast.c @@ -1751,13 +1751,9 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) INP_WLOCK(inp); imo = inp->inp_moptions; - /* - * If socket is neither of type SOCK_RAW or SOCK_DGRAM, - * or is a divert socket, reject it. - */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { + /* If socket is neither of type SOCK_RAW or SOCK_DGRAM reject it. */ + if (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) { INP_WUNLOCK(inp); return (EOPNOTSUPP); } @@ -2717,13 +2713,9 @@ inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) error = 0; - /* - * If socket is neither of type SOCK_RAW or SOCK_DGRAM, - * or is a divert socket, reject it. - */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) + /* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */ + if (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) return (EOPNOTSUPP); switch (sopt->sopt_name) { diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index d14ec5190ad0..b09d7e1dda7a 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -36,7 +36,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_sctp.h" #ifndef INET -#error "IPDIVERT requires INET" +#error "IPDIVERT requires INET" /* XXX! */ #endif #include <sys/param.h> @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include <sys/kernel.h> #include <sys/priv.h> #include <sys/proc.h> +#include <sys/domain.h> #include <sys/protosw.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -716,7 +717,6 @@ SYSCTL_PROC(_net_inet_divert, OID_AUTO, pcblist, static struct protosw div_protosw = { .pr_type = SOCK_RAW, - .pr_protocol = IPPROTO_DIVERT, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_attach = div_attach, .pr_bind = div_bind, @@ -729,6 +729,13 @@ static struct protosw div_protosw = { .pr_sosetlabel = in_pcbsosetlabel }; +static struct domain divertdomain = { + .dom_family = PF_DIVERT, + .dom_name = "divert", + .dom_nprotosw = 1, + .dom_protosw = { &div_protosw }, +}; + static int div_modevent(module_t mod, int type, void *unused) { @@ -736,12 +743,7 @@ div_modevent(module_t mod, int type, void *unused) switch (type) { case MOD_LOAD: - /* - * Protocol will be initialized by pf_proto_register(). - */ - err = protosw_register(&inetdomain, &div_protosw); - if (err != 0) - return (err); + domain_add(&divertdomain); ip_divert_ptr = divert_packet; break; case MOD_QUIESCE: @@ -763,6 +765,9 @@ div_modevent(module_t mod, int type, void *unused) * XXXRW: Note that there is a slight race here, as a new * socket open request could be spinning on the lock and then * we destroy the lock. + * + * XXXGL: One more reason this code is incorrect is that it + * checks only the current vnet. */ INP_INFO_WLOCK(&V_divcbinfo); if (V_divcbinfo.ipi_count != 0) { @@ -771,7 +776,7 @@ div_modevent(module_t mod, int type, void *unused) break; } ip_divert_ptr = NULL; - err = protosw_unregister(&div_protosw); + domain_remove(&divertdomain); INP_INFO_WUNLOCK(&V_divcbinfo); #ifndef VIMAGE div_destroy(NULL); diff --git a/sys/netinet6/in6_mcast.c b/sys/netinet6/in6_mcast.c index d0f8186e75c7..a02e18656dc2 100644 --- a/sys/netinet6/in6_mcast.c +++ b/sys/netinet6/in6_mcast.c @@ -1772,13 +1772,9 @@ ip6_getmoptions(struct inpcb *inp, struct sockopt *sopt) INP_WLOCK(inp); im6o = inp->in6p_moptions; - /* - * If socket is neither of type SOCK_RAW or SOCK_DGRAM, - * or is a divert socket, reject it. - */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { + /* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */ + if (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) { INP_WUNLOCK(inp); return (EOPNOTSUPP); } @@ -2655,13 +2651,9 @@ ip6_setmoptions(struct inpcb *inp, struct sockopt *sopt) error = 0; - /* - * If socket is neither of type SOCK_RAW or SOCK_DGRAM, - * or is a divert socket, reject it. - */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) + /* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */ + if (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) return (EOPNOTSUPP); switch (sopt->sopt_name) { diff --git a/sys/sys/socket.h b/sys/sys/socket.h index 3ec0d3b1d06d..f81aba8f972d 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -268,7 +268,8 @@ struct accept_filter_arg { #define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */ #define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */ #define AF_HYPERV 43 /* HyperV sockets */ -#define AF_MAX 43 +#define AF_DIVERT 44 /* divert(4) */ +#define AF_MAX 44 /* * When allocating a new AF_ constant, please only allocate * even numbered constants for FreeBSD until 134 as odd numbered AF_ @@ -393,6 +394,7 @@ struct sockproto { #define PF_NETLINK AF_NETLINK #define PF_INET_SDP AF_INET_SDP #define PF_INET6_SDP AF_INET6_SDP +#define PF_DIVERT AF_DIVERT #define PF_MAX AF_MAX diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index b7dbcb3531b0..e848874d1695 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -109,15 +109,14 @@ pcblist_sysctl(int proto, const char *name, char **bufp) case IPPROTO_UDP: mibvar = "net.inet.udp.pcblist"; break; - case IPPROTO_DIVERT: - mibvar = "net.inet.divert.pcblist"; - break; default: mibvar = "net.inet.raw.pcblist"; break; } if (strncmp(name, "sdp", 3) == 0) mibvar = "net.inet.sdp.pcblist"; + else if (strncmp(name, "divert", 6) == 0) + mibvar = "net.inet.divert.pcblist"; len = 0; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) @@ -272,7 +271,7 @@ protopr(u_long off, const char *name, int af1, int proto) so = &inp->xi_socket; /* Ignore sockets for protocols other than the desired one. */ - if (so->xso_protocol != proto) + if (proto != 0 && so->xso_protocol != proto) continue; /* Ignore PCBs which were freed during copyout. */ diff --git a/usr.bin/netstat/main.c b/usr.bin/netstat/main.c index 1a011b9d5488..d1b069f38f0c 100644 --- a/usr.bin/netstat/main.c +++ b/usr.bin/netstat/main.c @@ -101,7 +101,7 @@ static struct protox { NULL, NULL, "sdp", 1, IPPROTO_TCP }, #endif { N_DIVCBINFO, -1, 1, protopr, - NULL, NULL, "divert", 1, IPPROTO_DIVERT }, + NULL, NULL, "divert", 1, 0 }, { N_RIPCBINFO, N_IPSTAT, 1, protopr, ip_stats, NULL, "ip", 1, IPPROTO_RAW }, { N_RIPCBINFO, N_ICMPSTAT, 1, protopr,