git: dfc6db8e18b6 - stable/13 - netlink: add interface notification on link status / flags change.

From: Alexander V. Chernikov <melifaro_at_FreeBSD.org>
Date: Mon, 23 Jan 2023 22:12:04 UTC
The branch stable/13 has been updated by melifaro:

URL: https://cgit.FreeBSD.org/src/commit/?id=dfc6db8e18b65ee3101a007aace55e21d72c4011

commit dfc6db8e18b65ee3101a007aace55e21d72c4011
Author:     Alexander V. Chernikov <melifaro@FreeBSD.org>
AuthorDate: 2022-12-03 17:10:50 +0000
Commit:     Alexander V. Chernikov <melifaro@FreeBSD.org>
CommitDate: 2023-01-23 22:04:03 +0000

    netlink: add interface notification on link status / flags change.
    
    * Add link-state change notifications by subscribing to ifnet_link_event.
     In the Linux netlink model, link state is reported in 2 places: first is
     the IFLA_OPERSTATE, which stores state per RFC2863.
     The second is an IFF_LOWER_UP interface flag. As many applications rely
     on the latter, reserve 1 bit from if_flags, named as IFF_NETLINK_1.
     This flag is mapped to IFF_LOWER_UP in the netlink headers. This is done
     to avoid making applications think this flag is actually
     supported / presented in non-netlink outputs.
    * Add flag change notifications, by hooking into rt_ifmsg().
     In the netlink model, notification should include the bitmask for the
     change flags. Update rt_ifmsg() to include such bitmask.
    
    Differential Revision: https://reviews.freebsd.org/D37597
    
    (cherry picked from commit 1bcd230f9508b3c917f26be4b905e4b5141decea)
---
 sys/dev/usb/usb_pf.c          |  2 +-
 sys/net/if.c                  | 10 ++++----
 sys/net/if.h                  |  1 +
 sys/net/route.c               | 24 +++++++++++++++++-
 sys/net/route.h               |  1 +
 sys/net/route/route_ctl.h     |  2 ++
 sys/net/rtsock.c              | 10 +++++---
 sys/netlink/netlink_route.c   |  5 +++-
 sys/netlink/route/iface.c     | 59 ++++++++++++++++++++++++++++---------------
 sys/netlink/route/interface.h |  3 +++
 sys/netlink/route/route_var.h |  1 +
 11 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/sys/dev/usb/usb_pf.c b/sys/dev/usb/usb_pf.c
index 6ccb5ebbc62b..0b7d1f9a02a0 100644
--- a/sys/dev/usb/usb_pf.c
+++ b/sys/dev/usb/usb_pf.c
@@ -203,7 +203,7 @@ usbpf_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
 	ifp->if_ioctl = usbpf_ioctl;
 	if_attach(ifp);
 	ifp->if_flags |= IFF_UP;
-	rt_ifmsg(ifp);
+	rt_ifmsg_14(ifp, IFF_UP);
 	/*
 	 * XXX According to the specification of DLT_USB, it indicates
 	 * packets beginning with USB setup header. But not sure all
diff --git a/sys/net/if.c b/sys/net/if.c
index d42b642e8d18..7423c4963f57 100644
--- a/sys/net/if.c
+++ b/sys/net/if.c
@@ -2222,7 +2222,7 @@ if_unroute(struct ifnet *ifp, int flag, int fam)
 
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
-	rt_ifmsg(ifp);
+	rt_ifmsg_14(ifp, IFF_UP);
 }
 
 /*
@@ -2246,7 +2246,7 @@ if_route(struct ifnet *ifp, int flag, int fam)
 	NET_EPOCH_EXIT(et);
 	if (ifp->if_carp)
 		(*carp_linkstate_p)(ifp);
-	rt_ifmsg(ifp);
+	rt_ifmsg_14(ifp, IFF_UP);
 #ifdef INET6
 	in6_if_up(ifp);
 #endif
@@ -2290,7 +2290,7 @@ do_link_state_change(void *arg, int pending)
 	link_state = ifp->if_link_state;
 
 	CURVNET_SET(ifp->if_vnet);
-	rt_ifmsg(ifp);
+	rt_ifmsg_14(ifp, 0);
 	if (ifp->if_vlantrunk != NULL)
 		(*vlan_link_state_p)(ifp);
 
@@ -2760,7 +2760,7 @@ ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
 		error = (*ifp->if_ioctl)(ifp, cmd, data);
 		if (error == 0) {
 			getmicrotime(&ifp->if_lastchange);
-			rt_ifmsg(ifp);
+			rt_ifmsg_14(ifp, 0);
 #ifdef INET
 			DEBUGNET_NOTIFY_MTU(ifp);
 #endif
@@ -3190,7 +3190,7 @@ if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
 	if (error)
 		goto recover;
 	/* Notify userland that interface flags have changed */
-	rt_ifmsg(ifp);
+	rt_ifmsg_14(ifp, flag);
 	return (0);
 
 recover:
diff --git a/sys/net/if.h b/sys/net/if.h
index 782e792cf87c..409289869c27 100644
--- a/sys/net/if.h
+++ b/sys/net/if.h
@@ -163,6 +163,7 @@ struct if_data {
 #define	IFF_DYING	0x200000	/* (n) interface is winding down */
 #define	IFF_RENAMING	0x400000	/* (n) interface is being renamed */
 #define	IFF_NOGROUP	0x800000	/* (n) interface is not part of any groups */
+#define	IFF_NETLINK_1	0x1000000	/* (n) used by netlink */
 
 /*
  * Old names for driver flags so that user space tools can continue to use
diff --git a/sys/net/route.c b/sys/net/route.c
index b4076123e5d6..80d08d1bc7f0 100644
--- a/sys/net/route.c
+++ b/sys/net/route.c
@@ -813,12 +813,34 @@ rt_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
 	return (rtsock_routemsg_info(cmd, info, fibnum));
 }
 
+void
+rt_ifmsg(struct ifnet *ifp)
+{
+	rt_ifmsg_14(ifp, 0);
+}
+
+void
+rt_ifmsg_14(struct ifnet *ifp, int if_flags_mask)
+{
+	rtsock_callback_p->ifmsg_f(ifp, if_flags_mask);
+	netlink_callback_p->ifmsg_f(ifp, if_flags_mask);
+}
+
 /* Netlink-related callbacks needed to glue rtsock, netlink and linuxolator */
 static void
 ignore_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
 {
 }
-static struct rtbridge ignore_cb = { .route_f = ignore_route_event };
+
+static void
+ignore_ifmsg_event(struct ifnet *ifp, int if_flags_mask)
+{
+}
+
+static struct rtbridge ignore_cb = {
+	.route_f = ignore_route_event,
+	.ifmsg_f = ignore_ifmsg_event,
+};
 
 void *linux_netlink_p = NULL; /* Callback pointer for Linux translator functions */
 struct rtbridge *rtsock_callback_p = &ignore_cb;
diff --git a/sys/net/route.h b/sys/net/route.h
index a2394d54b92f..3dac437426b0 100644
--- a/sys/net/route.h
+++ b/sys/net/route.h
@@ -416,6 +416,7 @@ struct rib_head;
 
 void	 rt_ieee80211msg(struct ifnet *, int, void *, size_t);
 void	 rt_ifmsg(struct ifnet *);
+void	 rt_ifmsg_14(struct ifnet *, int);
 void	 rt_missmsg(int, struct rt_addrinfo *, int, int);
 void	 rt_missmsg_fib(int, struct rt_addrinfo *, int, int, int);
 int	 rt_addrmsg(int, struct ifaddr *, int);
diff --git a/sys/net/route/route_ctl.h b/sys/net/route/route_ctl.h
index d150da6264d4..e8560e681ddb 100644
--- a/sys/net/route/route_ctl.h
+++ b/sys/net/route/route_ctl.h
@@ -191,8 +191,10 @@ void rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
 
 /* Event bridge */
 typedef void route_event_f(uint32_t fibnum, const struct rib_cmd_info *rc);
+typedef void ifmsg_event_f(struct ifnet *ifp, int if_flags_mask);
 struct rtbridge{
 	route_event_f	*route_f;
+	ifmsg_event_f	*ifmsg_f;
 };
 extern struct rtbridge *rtsock_callback_p;
 extern struct rtbridge *netlink_callback_p;
diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c
index 69a5698e4b5f..fc9439602c13 100644
--- a/sys/net/rtsock.c
+++ b/sys/net/rtsock.c
@@ -211,6 +211,7 @@ static void	send_rtm_reply(struct socket *so, struct rt_msghdr *rtm,
 static bool	can_export_rte(struct ucred *td_ucred, bool rt_is_host,
 			const struct sockaddr *rt_dst);
 static void	rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc);
+static void	rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask);
 
 static struct netisr_handler rtsock_nh = {
 	.nh_name = "rtsock",
@@ -288,7 +289,10 @@ rts_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
 #endif
 		report_route_event(rc, (void *)(uintptr_t)fibnum);
 }
-static struct rtbridge rtsbridge = { .route_f = rts_handle_route_event };
+static struct rtbridge rtsbridge = {
+	.route_f = rts_handle_route_event,
+	.ifmsg_f = rtsock_ifmsg,
+};
 static struct rtbridge *rtsbridge_orig_p;
 
 static void
@@ -1984,8 +1988,8 @@ rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
  * This routine is called to generate a message from the routing
  * socket indicating that the status of a network interface has changed.
  */
-void
-rt_ifmsg(struct ifnet *ifp)
+static void
+rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask __unused)
 {
 	struct if_msghdr *ifm;
 	struct mbuf *m;
diff --git a/sys/netlink/netlink_route.c b/sys/netlink/netlink_route.c
index cc1a0cc6db8d..0622656715c4 100644
--- a/sys/netlink/netlink_route.c
+++ b/sys/netlink/netlink_route.c
@@ -106,7 +106,10 @@ rtnl_handle_message(struct nlmsghdr *hdr, struct nl_pstate *npt)
 	return (error);
 }
 
-static struct rtbridge nlbridge = { .route_f = rtnl_handle_route_event };
+static struct rtbridge nlbridge = {
+	.route_f = rtnl_handle_route_event,
+	.ifmsg_f = rtnl_handle_ifnet_event,
+};
 static struct rtbridge *nlbridge_orig_p;
 
 static void
diff --git a/sys/netlink/route/iface.c b/sys/netlink/route/iface.c
index 5ffe11cc7e80..579869e9662c 100644
--- a/sys/netlink/route/iface.c
+++ b/sys/netlink/route/iface.c
@@ -68,7 +68,7 @@ struct netlink_walkargs {
 	int dumped;
 };
 
-static eventhandler_tag ifdetach_event, ifattach_event, ifaddr_event;
+static eventhandler_tag ifdetach_event, ifattach_event, iflink_event, ifaddr_event;
 
 static SLIST_HEAD(, nl_cloner) nl_cloners = SLIST_HEAD_INITIALIZER(nl_cloners);
 
@@ -234,11 +234,13 @@ dump_sa(struct nl_writer *nw, int attr, const struct sockaddr *sa)
  * @nw: message writer
  * @ifp: target interface
  * @hdr: template header
+ * @if_flags_mask: changed if_[drv]_flags bitmask
  *
  * This function is called without epoch and MAY sleep.
  */
 static bool
-dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr)
+dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr,
+    int if_flags_mask)
 {
         struct ifinfomsg *ifinfo;
 
@@ -253,13 +255,15 @@ dump_iface(struct nl_writer *nw, struct ifnet *ifp, const struct nlmsghdr *hdr)
         ifinfo->ifi_type = ifp->if_type;
         ifinfo->ifi_index = ifp->if_index;
         ifinfo->ifi_flags = ifp_flags_to_netlink(ifp);
-        ifinfo->ifi_change = 0;
-
-        nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
+        ifinfo->ifi_change = if_flags_mask;
 
 	struct if_state ifs = {};
 	get_operstate(ifp, &ifs);
 
+	if (ifs.ifla_operstate == IF_OPER_UP)
+		ifinfo->ifi_flags |= IFF_LOWER_UP;
+
+        nlattr_add_string(nw, IFLA_IFNAME, if_name(ifp));
         nlattr_add_u8(nw, IFLA_OPERSTATE, ifs.ifla_operstate);
         nlattr_add_u8(nw, IFLA_CARRIER, ifs.ifla_carrier);
 
@@ -387,7 +391,7 @@ rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *n
 		NLP_LOG(LOG_DEBUG3, nlp, "fast track -> searching index %u", attrs.ifi_index);
 		if (ifp != NULL) {
 			if (match_iface(&attrs, ifp)) {
-				if (!dump_iface(wa.nw, ifp, &wa.hdr))
+				if (!dump_iface(wa.nw, ifp, &wa.hdr, 0))
 					error = ENOMEM;
 			} else
 				error = ESRCH;
@@ -439,7 +443,7 @@ rtnl_handle_getlink(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_pstate *n
 
 	NL_LOG(LOG_DEBUG2, "Matched %d interface(s), dumping", offset);
 	for (int i = 0; error == 0 && i < offset; i++) {
-		if (!dump_iface(wa.nw, match_array[i], &wa.hdr))
+		if (!dump_iface(wa.nw, match_array[i], &wa.hdr, 0))
 			error = ENOMEM;
 	}
 	for (int i = 0; i < offset; i++)
@@ -767,9 +771,9 @@ rtnl_handle_ifaddr(void *arg __unused, struct ifaddr *ifa, int cmd)
 }
 
 static void
-rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
+rtnl_handle_ifevent(struct ifnet *ifp, int nlmsg_type, int if_flags_mask)
 {
-	struct nlmsghdr hdr = { .nlmsg_type = NL_RTM_NEWLINK };
+	struct nlmsghdr hdr = { .nlmsg_type = nlmsg_type };
 	struct nl_writer nw = {};
 
 	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
@@ -779,25 +783,36 @@ rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
 		NL_LOG(LOG_DEBUG, "error allocating mbuf");
 		return;
 	}
-	dump_iface(&nw, ifp, &hdr);
+	dump_iface(&nw, ifp, &hdr, if_flags_mask);
         nlmsg_flush(&nw);
 }
 
+static void
+rtnl_handle_ifattach(void *arg, struct ifnet *ifp)
+{
+	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
+	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
+}
+
 static void
 rtnl_handle_ifdetach(void *arg, struct ifnet *ifp)
 {
-	struct nlmsghdr hdr = { .nlmsg_type = NL_RTM_DELLINK };
-	struct nl_writer nw = {};
+	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
+	rtnl_handle_ifevent(ifp, NL_RTM_DELLINK, 0);
+}
 
-	if (!nl_has_listeners(NETLINK_ROUTE, RTNLGRP_LINK))
-		return;
+static void
+rtnl_handle_iflink(void *arg, struct ifnet *ifp)
+{
+	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
+	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, 0);
+}
 
-	if (!nlmsg_get_group_writer(&nw, NLMSG_LARGE, NETLINK_ROUTE, RTNLGRP_LINK)) {
-		NL_LOG(LOG_DEBUG, "error allocating mbuf");
-		return;
-	}
-	dump_iface(&nw, ifp, &hdr);
-        nlmsg_flush(&nw);
+void
+rtnl_handle_ifnet_event(struct ifnet *ifp, int if_flags_mask)
+{
+	NL_LOG(LOG_DEBUG2, "ifnet %s", if_name(ifp));
+	rtnl_handle_ifevent(ifp, NL_RTM_NEWLINK, if_flags_mask);
 }
 
 static const struct rtnl_cmd_handler cmd_handlers[] = {
@@ -867,6 +882,9 @@ rtnl_ifaces_init(void)
 	ifaddr_event = EVENTHANDLER_REGISTER(
 	    rt_addrmsg, rtnl_handle_ifaddr, NULL,
 	    EVENTHANDLER_PRI_ANY);
+	iflink_event = EVENTHANDLER_REGISTER(
+	    ifnet_link_event, rtnl_handle_iflink, NULL,
+	    EVENTHANDLER_PRI_ANY);
 	NL_VERIFY_PARSERS(all_parsers);
 	rtnl_iface_drivers_register();
 	rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers));
@@ -878,4 +896,5 @@ rtnl_ifaces_destroy(void)
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ifattach_event);
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_event);
 	EVENTHANDLER_DEREGISTER(rt_addrmsg, ifaddr_event);
+	EVENTHANDLER_DEREGISTER(ifnet_link_event, iflink_event);
 }
diff --git a/sys/netlink/route/interface.h b/sys/netlink/route/interface.h
index cae763cc4a58..1b8f1cf7b53d 100644
--- a/sys/netlink/route/interface.h
+++ b/sys/netlink/route/interface.h
@@ -42,6 +42,9 @@ struct ifinfomsg {
 	unsigned	ifi_change;	/* IFF_* change mask */
 };
 
+/* Linux-specific link-level state flag */
+#define	IFF_LOWER_UP	IFF_NETLINK_1
+
 #ifndef _KERNEL
 /* Compatilbility helpers */
 #define	_IFINFO_HDRLEN		((int)sizeof(struct ifinfomsg))
diff --git a/sys/netlink/route/route_var.h b/sys/netlink/route/route_var.h
index 7a31a8c896a5..0bcfcc962020 100644
--- a/sys/netlink/route/route_var.h
+++ b/sys/netlink/route/route_var.h
@@ -88,6 +88,7 @@ void rtnl_ifaces_init(void);
 void rtnl_ifaces_destroy(void);
 void rtnl_iface_add_cloner(struct nl_cloner *cloner);
 void rtnl_iface_del_cloner(struct nl_cloner *cloner);
+void rtnl_handle_ifnet_event(struct ifnet *ifp, int if_change_mask);
 
 /* iface_drivers.c */
 void rtnl_iface_drivers_register(void);