5.4-RELEASE lockups on amd64 SMP
Matthew Grooms
mgrooms at seton.org
Thu Jun 9 13:47:18 GMT 2005
Max,
Not a problem. Looks good so far. Its been up for an hour or a
half with all the debug options turned on. I will let it cook in my
production environment over the weekend and update you on Monday. Thanks
for your help.
Matthew Grooms
Max Laier wrote:
> On Thursday 09 June 2005 01:23, Grooms, Matthew wrote:
>
>>Max,
>>
>> With your patch applied, I get a panic very quickly during the boot
>>cycle with output that looks like this ...
>
>
> My bad, missed the mtx_init() ...
> | @@ -216,6 +219,9 @@
> | callout_init(&sc->sc_tmo, 0);
> | callout_init(&sc->sc_bulk_tmo, 0);
> | callout_init(&sc->sc_bulkfail_tmo, 0);
> | + callout_init(&sc->sc_send_tmo, 0);
> | + mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
> | + MTX_DEF);
> | if_attach(&sc->sc_if);
> |
> | LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
>
> Complete updated patch attached and uploaded to:
> http://people.freebsd.org/~mlaier/if_pfsync.senddef5.diff
>
> Sorry.
>
>
>>net.inet.carp.preempt: 0 -> 1
>>Setting hostname: ---.
>>em: Link is up 100 Mbps Full Duplex
>>panic: mtx_lock() of spin mutex (null) @ ../../../net/if.c:1983
>>cpuid = 1
>>KDB: enter: panic
>>[thread pid 282 tid 100157 ]
>>Stopped at kdb_enter+0x2f: nop
>>db> trace
>>Tracing pid 282 tid 100157 td 0xffffff000af78280
>>kdb_enter() at kdb_enter+0x2f
>>panic() at panic+0x249
>>_mtx_lock_flags() at _mtx_lock_flags+0xd6
>>if_handoff() at if_handoff+0x49
>>pfsync_sendout() at pfsync_sendout+0x268
>>pfsyncioctl() at pfsyncioctl+0x497
>>in_control() at in_control+0x8cb
>>ifioctl() at ifioctl+0x178
>>sooo_ioctl() at soo_ioctl+0x2d6
>>ioctl() at ioctl+0xfc
>>syscall() at syscall+0x4ab
>>Xfast_syscall() at Xfast_syscall+0xa8
>>--- syscall (54, FreeBSD ELF64, ioctl), rip = 0x800793340, rsp =
>>0x7fffffffeca8, rbp = 0x7fffffffef8b --- db> show locks
>>eclusive sleep mutex pf task mtx r = 0 (0xffffffff80752f60) locked @
>>contrib/pf/net/if_pfsync.c:973
>>
>>Rebooting the machine with the same kernel produces an identical panic. Let
>>me know what else I can do to help. Right now I have just been rebooting
>>back to a UP kernel which has never shown any sign of problems.
>>
>>Matthew Grooms
>>
>>-----Original Message-----
>>From: Grooms, Matthew
>>Sent: Wed 6/8/2005 6:22 PM
>>To: Max Laier
>>Cc: Palle Girgensohn; Kris Kennaway; freebsd-stable at freebsd.org;
>>glebius at freebsd.org; pf at freebsd.org Subject: RE: 5.4-RELEASE lockups on
>>amd64 SMP
>>
>>Matthew,
>>
>>can you try the attached diff. Available for 5 and CURRENT. I recall that
>>this problem was seen before, strange that I didn't see the problem.
>>Sounds familiar to you? Please try the patch and let me know if that
>>helps. Thanks a lot.
>>
>>On Wednesday 08 June 2005 01:35, Matthew Grooms wrote:
>>
>>>Once again, here are the backtraces for the panic and lor ...
>>>
>>>Tracing id 110 tid 100089 td 0xffffff012f3f0c80
>>>kdb_enter() at kdb_enter+0x2f
>>>panic() at panic+0x249
>>>uma_dbg_free() at uma_dbg_free+0x188
>>>uma_zfree_arg() at uma_zfree_arg+0x1b0
>>>pf_purge_expired_states() at pf_purge_expired_states+0x41
>>>pfsync_input at pfsync_input+xb35
>>>pf_input() at ip_input+0x10f
>>>netisr_processqueue() at netisr_processqueue+0x17
>>>swi_net() at swi_net+0xa8
>>>ithread_loop() at ithread_loop+0xd9
>>>fork_exit() at fork_exit+0xc3
>>>fork_trampoline() at fork_trampoline+0xe
>>>--- trap 0, rip = 0, rsp = 0xffffffffb44f9d00, rbp = 0 ---
>>>db> continue
>>>boot() called on cpu#0
>>>Uptime: 13h42m43s
>>>Dumping 4864 MB
>>> 16 32 ...
>>>
>>>lock order reversal
>>
>>...
>>
>>
>>>alltraps_with_regs_pushed() at alltraps_with_regs_pushed+0x5
>>>pf_state_tree_lan_ext_RB_REMOVE() at
>>>pf_state_tree_lan_ext_RB_REMOVE+0x10c
>>
>>This LOR is a consequence of the fault, so it can be disregarded.
>
>
>
> ------------------------------------------------------------------------
>
> Index: if_pfsync.c
> ===================================================================
> RCS file: /usr/store/mlaier/fcvs/src/sys/contrib/pf/net/if_pfsync.c,v
> retrieving revision 1.11.2.2
> diff -u -r1.11.2.2 if_pfsync.c
> --- if_pfsync.c 19 May 2005 10:59:22 -0000 1.11.2.2
> +++ if_pfsync.c 8 Jun 2005 23:42:45 -0000
> @@ -130,6 +130,7 @@
>
> static void pfsync_clone_destroy(struct ifnet *);
> static int pfsync_clone_create(struct if_clone *, int);
> +static void pfsync_senddef(void *);
> #else
> void pfsyncattach(int);
> #endif
> @@ -170,6 +171,8 @@
> callout_stop(&sc->sc_bulk_tmo);
> callout_stop(&sc->sc_bulkfail_tmo);
>
> + callout_stop(&sc->sc_send_tmo);
> +
> #if NBPFILTER > 0
> bpfdetach(ifp);
> #endif
> @@ -216,6 +219,9 @@
> callout_init(&sc->sc_tmo, 0);
> callout_init(&sc->sc_bulk_tmo, 0);
> callout_init(&sc->sc_bulkfail_tmo, 0);
> + callout_init(&sc->sc_send_tmo, 0);
> + mtx_init(&sc->sc_ifq.ifq_mtx, ifp->if_xname, "pfsync send queue",
> + MTX_DEF);
> if_attach(&sc->sc_if);
>
> LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
> @@ -913,6 +919,7 @@
> if (pfsyncr.pfsyncr_maxupdates > 255)
> return (EINVAL);
> #ifdef __FreeBSD__
> + callout_drain(&sc->sc_send_tmo);
> PF_LOCK();
> #endif
> sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
> @@ -1634,15 +1641,14 @@
> #endif
>
> pfsyncstats.pfsyncs_opackets++;
> -
> #ifdef __FreeBSD__
> - PF_UNLOCK();
> -#endif
> + if (IF_HANDOFF(&sc->sc_ifq, m, NULL))
> + pfsyncstats.pfsyncs_oerrors++;
> + else
> + callout_reset(&sc->sc_send_tmo, 1, pfsync_senddef, sc);
> +#else
> if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
> pfsyncstats.pfsyncs_oerrors++;
> -
> -#ifdef __FreeBSD__
> - PF_LOCK();
> #endif
> } else
> m_freem(m);
> @@ -1652,6 +1658,22 @@
>
>
> #ifdef __FreeBSD__
> +static void
> +pfsync_senddef(void *arg)
> +{
> + struct pfsync_softc *sc = (struct pfsync_softc *)arg;
> + struct mbuf *m;
> +
> + for(;;) {
> + IF_DEQUEUE(&sc->sc_ifq, m);
> + if (m == NULL)
> + break;
> + if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
> + pfsyncstats.pfsyncs_oerrors++;
> + }
> +}
> +
> +
> static int
> pfsync_modevent(module_t mod, int type, void *data)
> {
> Index: if_pfsync.h
> ===================================================================
> RCS file: /usr/store/mlaier/fcvs/src/sys/contrib/pf/net/if_pfsync.h,v
> retrieving revision 1.4
> diff -u -r1.4 if_pfsync.h
> --- if_pfsync.h 16 Jun 2004 23:24:00 -0000 1.4
> +++ if_pfsync.h 8 Jun 2005 23:42:59 -0000
> @@ -158,8 +158,12 @@
> struct timeout sc_bulkfail_tmo;
> #endif
> struct in_addr sc_sendaddr;
> - struct mbuf *sc_mbuf; /* current cummulative mbuf */
> - struct mbuf *sc_mbuf_net; /* current cummulative mbuf */
> + struct mbuf *sc_mbuf; /* current cumulative mbuf */
> + struct mbuf *sc_mbuf_net; /* current cumulative mbuf */
> +#ifdef __FreeBSD__
> + struct ifqueue sc_ifq;
> + struct callout sc_send_tmo;
> +#endif
> union sc_statep sc_statep;
> union sc_statep sc_statep_net;
> u_int32_t sc_ureq_received;
More information about the freebsd-pf
mailing list