Re: git: 9ce46cbc95d7 - main - ip_mroute: move ip_mrouter_done outside lock
Date: Fri, 21 Jan 2022 13:33:27 UTC
On 21 Jan 2022, at 14:01, Kristof Provost wrote: > Hi Wojciech, > > On 21 Jan 2022, at 6:19, Wojciech Macek wrote: >> The branch main has been updated by wma: >> >> URL: >> https://cgit.FreeBSD.org/src/commit/?id=9ce46cbc95d7a6fccb55af0d42cbb85c29f10639 >> >> commit 9ce46cbc95d7a6fccb55af0d42cbb85c29f10639 >> Author: Wojciech Macek <wma@FreeBSD.org> >> AuthorDate: 2022-01-21 05:15:08 +0000 >> Commit: Wojciech Macek <wma@FreeBSD.org> >> CommitDate: 2022-01-21 05:17:19 +0000 >> >> ip_mroute: move ip_mrouter_done outside lock >> >> X_ip_mrouter_done might sleep, which triggers INVARIANTS to >> print additional errors on the screen. >> Move it outside the lock, but provide some basic synchronization >> to avoid race condition during module uninit/unload. >> >> Obtained from: Semihalf >> Sponsored by: Stormshield > > I suspect this change causes panics like this one: > https://ci.freebsd.org/job/FreeBSD-main-amd64-test/20437/consoleText > >> sys/netinet/ip_mroute.c | 11 ++++++++--- >> sys/netinet/ip_mroute.h | 4 +++- >> sys/netinet/raw_ip.c | 11 ++++++++--- >> 3 files changed, 19 insertions(+), 7 deletions(-) >> >> diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c >> index 8cd0b2ac7449..0566048621ad 100644 >> --- a/sys/netinet/ip_mroute.c >> +++ b/sys/netinet/ip_mroute.c >> @@ -300,7 +300,7 @@ VNET_DEFINE_STATIC(struct ifnet *, >> multicast_register_if); >> static u_long X_ip_mcast_src(int); >> static int X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *, >> struct ip_moptions *); >> -static int X_ip_mrouter_done(void); >> +static int X_ip_mrouter_done(void *); >> static int X_ip_mrouter_get(struct socket *, struct sockopt *); >> static int X_ip_mrouter_set(struct socket *, struct sockopt *); >> static int X_legal_vif_num(int); >> @@ -431,7 +431,7 @@ X_ip_mrouter_set(struct socket *so, struct >> sockopt *sopt) >> break; >> >> case MRT_DONE: >> - error = ip_mrouter_done(); >> + error = ip_mrouter_done(NULL); >> break; >> >> case MRT_ADD_VIF: >> @@ -734,7 +734,7 @@ ip_mrouter_init(struct socket *so, int version) >> * Disable multicast forwarding. >> */ >> static int >> -X_ip_mrouter_done(void) >> +X_ip_mrouter_done(void *locked) >> { >> struct ifnet *ifp; >> u_long i; >> @@ -751,6 +751,11 @@ X_ip_mrouter_done(void) >> atomic_subtract_int(&ip_mrouter_cnt, 1); >> V_mrt_api_config = 0; >> >> + if (locked) { >> + struct epoch_tracker *mrouter_et = locked; >> + MROUTER_RUNLOCK_PARAM(mrouter_et); >> + } >> + >> MROUTER_WAIT(); >> >> /* Stop and drain task queue */ >> diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h >> index 65c5bdd3a025..016d026d184c 100644 >> --- a/sys/netinet/ip_mroute.h >> +++ b/sys/netinet/ip_mroute.h >> @@ -363,12 +363,14 @@ struct sockopt; >> >> extern int (*ip_mrouter_set)(struct socket *, struct sockopt *); >> extern int (*ip_mrouter_get)(struct socket *, struct sockopt *); >> -extern int (*ip_mrouter_done)(void); >> +extern int (*ip_mrouter_done)(void *); >> extern int (*mrt_ioctl)(u_long, caddr_t, int); >> >> #define MROUTER_RLOCK_TRACKER struct epoch_tracker mrouter_et >> +#define MROUTER_RLOCK_PARAM_PTR &mrouter_et >> #define MROUTER_RLOCK() epoch_enter_preempt(net_epoch_preempt, >> &mrouter_et) >> #define MROUTER_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, >> &mrouter_et) >> +#define MROUTER_RUNLOCK_PARAM(param) epoch_exit_preempt(net_epoch_preempt, >> param) >> #define MROUTER_WAIT() epoch_wait_preempt(net_epoch_preempt) >> >> #endif /* _KERNEL */ >> diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c >> index 7c495745806e..08ce848a63f7 100644 >> --- a/sys/netinet/raw_ip.c >> +++ b/sys/netinet/raw_ip.c >> @@ -119,7 +119,7 @@ VNET_DEFINE(struct socket *, ip_mrouter); >> */ >> int (*ip_mrouter_set)(struct socket *, struct sockopt *); >> int (*ip_mrouter_get)(struct socket *, struct sockopt *); >> -int (*ip_mrouter_done)(void); >> +int (*ip_mrouter_done)(void *locked); >> int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, >> struct ip_moptions *); >> int (*mrt_ioctl)(u_long, caddr_t, int); >> @@ -879,18 +879,23 @@ static void >> rip_detach(struct socket *so) >> { >> struct inpcb *inp; >> + MROUTER_RLOCK_TRACKER; >> >> inp = sotoinpcb(so); >> KASSERT(inp != NULL, ("rip_detach: inp == NULL")); >> KASSERT(inp->inp_faddr.s_addr == INADDR_ANY, >> ("rip_detach: not closed")); >> >> + /* Disable mrouter first, lock released inside ip_mrouter_done */ >> + MROUTER_RLOCK(); >> + if (so == V_ip_mrouter && ip_mrouter_done) >> + ip_mrouter_done(MROUTER_RLOCK_PARAM_PTR); >> + > > I believe this is the problem. > > If we do not enter ip_mrouter_done() here we’ll exit the function > without exiting epoch. The epoch tracker on the stack will be > overwritten, and that could produce the panic we see in > ci.freebsd.org. > I’m currently running with this patch: diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index 0566048621ad..ff68b140af7e 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -741,8 +741,13 @@ X_ip_mrouter_done(void *locked) vifi_t vifi; struct bw_upcall *bu; - if (V_ip_mrouter == NULL) - return EINVAL; + if (V_ip_mrouter == NULL) { + if (locked) { + struct epoch_tracker *mrouter_et = locked; + MROUTER_RUNLOCK_PARAM(mrouter_et); + } + return (EINVAL); + } /* * Detach/disable hooks to the reset of the system. diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 08ce848a63f7..4354bee3cfcc 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -887,9 +887,10 @@ rip_detach(struct socket *so) ("rip_detach: not closed")); /* Disable mrouter first, lock released inside ip_mrouter_done */ - MROUTER_RLOCK(); - if (so == V_ip_mrouter && ip_mrouter_done) + if (so == V_ip_mrouter && ip_mrouter_done) { + MROUTER_RLOCK(); ip_mrouter_done(MROUTER_RLOCK_PARAM_PTR); + } INP_WLOCK(inp); INP_HASH_WLOCK(&V_ripcbinfo); However, it’s not at all clear to me what we’re actually accomplishing by entering the net epoch here. As far as I can tell that’s basically a no-op. Kristof