panic possibly on on bridge member removal
John Baldwin
jhb at freebsd.org
Mon Oct 1 18:38:08 UTC 2012
On Monday, October 01, 2012 11:05:30 am Kim Culhan wrote:
> On Mon, Oct 1, 2012 at 9:49 AM, John Baldwin <jhb at freebsd.org> wrote:
> > On Monday, October 01, 2012 9:08:30 am Kim Culhan wrote:
> >> On Mon, Oct 1, 2012 at 8:04 AM, John Baldwin <jhb at freebsd.org> wrote:
> >> > On Saturday, September 29, 2012 8:40:03 am Kim Culhan wrote:
> >> >> After a few hours of operation involving tap0 added to the bridge
> >> >> running openvpn
> >> >> and shutting down openvpn which removes tap0 from the bridge, the
> >> >> machine is found to have a panic:
> >> >>
> >> >> Fatal trap 12: page fault while in kernel mode
> >> >> cpuid = 1; apic id = 01
> >> >> fault virtual address = 0x188
> >> >> fault code = supervisor read data, page not present
> >> >> instruction pointer = 0x20:0xffffffff82a14f96
> >> >> stack pointer = 0x28:0xffffff8000285670
> >> >> frame pointer = 0x28:0xffffff80002856b0
> >> >> code segment = base 0x0, limit 0xfffff, type 0x1b
> >> >> = DPL 0, pres 1, long 1, def32 0, gran 1
> >> >> processor eflags = interrupt enabled, resume, IOPL = 0
> >> >> current process = 12 (swi5: fast taskq)
> >> >> [ thread pid 12 tid 100022 ]
> >> >> Stopped at bridge_enqueue+0x86: calll *0x188(%r12)
> >> >> db> bt
> >> >> Tracing pid 12 tid 100022 td 0xfffffe0003aff000
> >> >> bridge_enqueue() at bridge_enqueue+0x86
> >> >
> >> > Can you run 'gdb /boot/kernel/kernel' and do 'l *bridge_enqueue+0x86'?
> >> >
> >> > --
> >> > John Baldwin
> >>
> >> (gdb) l *bridge_enqueue+0x86
> >> No symbol "bridge_enqueue" in current context.
> >> (gdb)
> >
> > Oh, are you using if_bridge.ko as a module? If so, you can try running 'gdb
> > /boot/kernel/if_bridge.ko' instead.
>
> (gdb) l *bridge_enqueue+0x86
> 0x2f96 is in bridge_enqueue
> (/usr/src/sys/modules/if_bridge/../../net/if_bridge.c:1810).
> 1805 continue;
> 1806 }
> 1807 m->m_flags &= ~M_VLANTAG;
> 1808 }
> 1809
> 1810 if ((err = dst_ifp->if_transmit(dst_ifp, m))) {
> 1811 m_freem(m0);
> 1812 break;
> 1813 }
> 1814 }
> (gdb)
Hmm, try this perhaps. I think there is a race where the departing bridge
member can be destroyed before the bridge code is finishing using it. The
way the bridge driver is supposed to prevent that is by ensuring that
bridge_ifdetach() doesn't return until all other references to the detaching
interface are known to be released in the bridge code. I think what is
happening for you is that a bridge_enqueue() performed outside the lock is
running into dst_ifp that has been if_free()'d and had its if_transmit set
to zero when it was reallocated for something else.
Index: if_bridge.c
===================================================================
--- if_bridge.c (revision 241096)
+++ if_bridge.c (working copy)
@@ -1833,6 +1833,7 @@ static void
bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
{
struct bridge_softc *sc;
+ int error = 0;
sc = ifp->if_bridge;
@@ -1846,18 +1847,30 @@ bridge_dummynet(struct mbuf *m, struct ifnet *ifp)
return;
}
+ BRIDGE_LOCK(sc);
+ BRIDGE_LOCK2REF(sc, error);
+ if (error) {
+ m_freem(m);
+ return;
+ }
+
if (PFIL_HOOKED(&V_inet_pfil_hook)
#ifdef INET6
|| PFIL_HOOKED(&V_inet6_pfil_hook)
#endif
) {
- if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0)
+ if (bridge_pfil(&m, sc->sc_ifp, ifp, PFIL_OUT) != 0) {
+ BRIDGE_UNREF(sc);
return;
- if (m == NULL)
+ }
+ if (m == NULL) {
+ BRIDGE_UNREF(sc);
return;
+ }
}
bridge_enqueue(sc, ifp, m);
+ BRIDGE_UNREF(sc);
}
/*
@@ -1971,8 +1984,13 @@ sendunicast:
return (0);
}
- BRIDGE_UNLOCK(sc);
+ BRIDGE_LOCK2REF(sc, error);
+ if (error) {
+ m_freem(m);
+ return (0);
+ }
bridge_enqueue(sc, dst_if, m);
+ BRIDGE_UNREF(sc);
return (0);
}
@@ -2000,8 +2018,13 @@ bridge_transmit(struct ifnet *ifp, struct mbuf *m)
eh = mtod(m, struct ether_header *);
dst_if = bridge_rtlookup(sc, eh->ether_dhost, 1);
- BRIDGE_UNLOCK(sc);
- error = bridge_enqueue(sc, dst_if, m);
+ BRIDGE_LOCK2REF(sc, error);
+ if (error)
+ m_freem(m);
+ else {
+ error = bridge_enqueue(sc, dst_if, m);
+ BRIDGE_UNREF(sc);
+ }
} else
bridge_broadcast(sc, ifp, m, 0);
@@ -2145,20 +2168,29 @@ bridge_forward(struct bridge_softc *sc, struct bri
dbif->bif_stp.bp_state == BSTP_IFSTATE_DISCARDING)
goto drop;
- BRIDGE_UNLOCK(sc);
+ BRIDGE_LOCK2REF(sc, error);
+ if (error) {
+ m_freem(m);
+ return;
+ }
if (PFIL_HOOKED(&V_inet_pfil_hook)
#ifdef INET6
|| PFIL_HOOKED(&V_inet6_pfil_hook)
#endif
) {
- if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0)
+ if (bridge_pfil(&m, ifp, dst_if, PFIL_OUT) != 0) {
+ BRIDGE_UNREF(sc);
return;
- if (m == NULL)
+ }
+ if (m == NULL) {
+ BRIDGE_UNREF(sc);
return;
+ }
}
bridge_enqueue(sc, dst_if, m);
+ BRIDGE_UNREF(sc);
return;
drop:
--
John Baldwin
More information about the freebsd-current
mailing list