Netgraph/mpd5 stability issues
Przemyslaw Frasunek
przemyslaw at frasunek.com
Fri Jan 14 09:05:33 UTC 2011
Hello,
I'm using mpd 5.5 on three PPPoE routers, each servicing about 300 PPPoE
concurrent sessions. Routers are based on Intel SR1630GP hardware platforms and
runs FreeBSD 7.3-RELEASE.
I'm experiencing stability issues related to Netgraph. None of above routers can
survive more than 20-30 days of uptime under typical load. There are different
flavors of kernel panics, but all are somehow related to netgraph. Typical
backtraces follow:
(kgdb) bt
#1 0xc0836ac7 in boot (howto=260) at ../../../kern/kern_shutdown.c:418
#2 0xc0836d99 in panic (fmt=Variable "fmt" is not available.
) at ../../../kern/kern_shutdown.c:574
#3 0xc0b5ef1c in trap_fatal (frame=0xe7ce6820, eva=152)
at ../../../i386/i386/trap.c:950
#4 0xc0b5f1a0 in trap_pfault (frame=0xe7ce6820, usermode=0, eva=152)
at ../../../i386/i386/trap.c:863
#5 0xc0b5fb95 in trap (frame=0xe7ce6820) at ../../../i386/i386/trap.c:541
#6 0xc0b42e7b in calltrap () at ../../../i386/i386/exception.s:166
#7 0xc5f486b9 in ng_name2noderef (here=0xc62a0b80, name=0xe7ce6894 "ng366")
at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:896
#8 0xc5f488cc in ng_path2noderef (here=0xc62a0b80,
address=0xcc4c2110 "ng366:", destp=0xe7ce6ac8, lasthook=0xe7ce6ac4)
at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:1673
#9 0xc5f48cc0 in ng_address_path (here=0xc62a0b80, item=0xc5e42ae0,
address=0xcc4c2110 "ng366:", retaddr=0)
at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:3488
#10 0xc5f431d3 in ngc_send (so=0xc5b53340, flags=0, m=0xd4c6cb00,
addr=0xccac9780, control=0x0, td=0xc65a2b40)
at /usr/src/sys/modules/netgraph/socket/../../../netgraph/ng_socket.c:288
#11 0xc0894bfa in sosend_generic (so=0xc5b53340, addr=0xccac9780,
uio=0xe7ce6be8, top=0xd4c6cb00, control=0x0, flags=0, td=0xc65a2b40)
at ../../../kern/uipc_socket.c:1243
#12 0xc0890a3f in sosend (so=0xc5b53340, addr=0xccac9780, uio=0xe7ce6be8,
top=0x0, control=0x0, flags=0, td=0xc65a2b40)
at ../../../kern/uipc_socket.c:1285
#13 0xc0897fa6 in kern_sendit (td=0xc65a2b40, s=5, mp=0xe7ce6c64, flags=0,
control=0x0, segflg=UIO_USERSPACE) at ../../../kern/uipc_syscalls.c:805
#14 0xc089b181 in sendit (td=0xc65a2b40, s=5, mp=0xe7ce6c64, flags=0)
at ../../../kern/uipc_syscalls.c:742
#15 0xc089b298 in sendto (td=0xc65a2b40, uap=0xe7ce6cfc)
at ../../../kern/uipc_syscalls.c:857
#16 0xc0b5f4f5 in syscall (frame=0xe7ce6d38) at ../../../i386/i386/trap.c:1101
#17 0xc0b42ee0 in Xint0x80_syscall () at ../../../i386/i386/exception.s:262
#18 0x00000033 in ?? ()
(kgdb) frame 7
#7 0xc5f486b9 in ng_name2noderef (here=0xc62a0b80, name=0xe7ce6894 "ng366")
at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:896
896 LIST_FOREACH(node, &ng_name_hash[hash], nd_nodes) {
(kgdb) list
891 }
892
893 /* Find node by name */
894 NG_NAMEHASH(name, hash);
895 mtx_lock(&ng_namehash_mtx);
896 LIST_FOREACH(node, &ng_name_hash[hash], nd_nodes) {
897 if (NG_NODE_IS_VALID(node) &&
898 (strcmp(NG_NODE_NAME(node), name) == 0)) {
899 break;
900 }
(kgdb) print node
$1 = 0x74
(kgdb) print ng_name_hash
$3 = {{lh_first = 0xcbab6200}, {lh_first = 0x0}, {lh_first = 0xc6538300}, {
lh_first = 0xc67e6400}, {lh_first = 0xc6538700}, {lh_first = 0xca2abc00}, {
lh_first = 0xc66d5000}, {lh_first = 0xca8f9200}, {lh_first = 0xca815580}, {
lh_first = 0xc62a2180}, {lh_first = 0xca2ab180}, {lh_first = 0xc6af7d00}, {
lh_first = 0xcbe09a00}, {lh_first = 0xca81b800}, {lh_first = 0xc5b4e980}, {
lh_first = 0xcbc1f080}, {lh_first = 0xca2a5480}, {lh_first = 0xc672b580}, {
lh_first = 0xcbdb1e80}, {lh_first = 0xcc772c00}, {lh_first = 0xc6a99980}, {
lh_first = 0xc629d600}, {lh_first = 0xc6733000}, {lh_first = 0xca967800}, {
lh_first = 0xc5b3b780}, {lh_first = 0xc629c280}, {lh_first = 0xc6396980}, {
lh_first = 0xc6a5f300}, {lh_first = 0xc5bf2280}, {lh_first = 0xcc5ebe80}, {
lh_first = 0xc5e0a400}, {lh_first = 0xc6608100}, {lh_first = 0xc6520e00}, {
lh_first = 0xc6642680}, {lh_first = 0xca8f7b80}, {lh_first = 0xcbd9ce80}, {
lh_first = 0xca81b380}, {lh_first = 0x0} <repeats 13 times>, {
lh_first = 0xc67b8080}, {lh_first = 0xc6455c80}, {lh_first = 0xc652a380}, {
lh_first = 0xc6a74780}, {lh_first = 0xc62d8400}, {lh_first = 0xcc154400}, {
lh_first = 0xca852b80}, {lh_first = 0xcc351580}, {lh_first = 0xc6396a80}, {
lh_first = 0xc66f9580}, {lh_first = 0xc58c8e00}, {lh_first = 0xcc01a000}, {
lh_first = 0xc6614e80}, {lh_first = 0xc6750800}, {lh_first = 0xcc154e80}, {
lh_first = 0xcc32f080}, {lh_first = 0xcbb10e80}, {lh_first = 0xcc1e3700}, {
lh_first = 0xcc020280}, {lh_first = 0xcc75ad00}, {lh_first = 0xca901b00}, {
lh_first = 0xcc3c8380}, {lh_first = 0xcbd90580}, {lh_first = 0xcbb0c480}, {
lh_first = 0xcbed1300}, {lh_first = 0xc6644480}, {lh_first = 0xcc02ca80}, {
lh_first = 0xcc0d1980}, {lh_first = 0xcc35e200}, {lh_first = 0xcc0dc200}, {
lh_first = 0xca9dc200}, {lh_first = 0xcbecf880}, {lh_first = 0xcc065080}, {
lh_first = 0xcc47b280}, {lh_first = 0xcc722a80}, {lh_first = 0xcc28cd80}, {
lh_first = 0xcbd73400}, {lh_first = 0xcbf76b00}, {lh_first = 0xcbbfc280}, {
lh_first = 0xc629c800}, {lh_first = 0xc6700200}, {lh_first = 0x0}, {
lh_first = 0x0}, {lh_first = 0xc5e0b700}, {lh_first = 0xc672a200}, {
lh_first = 0xc62a2080}, {lh_first = 0x0}, {lh_first = 0xc673fc80}, {
lh_first = 0xc5bf2600}, {lh_first = 0xca969800}, {lh_first = 0xc6aa6700}, {
lh_first = 0xc6750b80}, {lh_first = 0xcc0bc200}, {lh_first = 0xcbeead80}, {
lh_first = 0xcc484e00}, {lh_first = 0xcbae6900}, {lh_first = 0xcbbef800}, {
lh_first = 0xcc797500}, {lh_first = 0xc65f3d80}, {lh_first = 0xcbe95900}, {
lh_first = 0xcba8fb80}, {lh_first = 0xcbdb1580}, {lh_first = 0xcc75b080}, {
lh_first = 0xcbd7fb80}, {lh_first = 0xcc75db80}, {lh_first = 0xc5e59500}, {
lh_first = 0xcbd6fb00}, {lh_first = 0xc6a7ed00}, {lh_first = 0xcbe0bc80}, {
lh_first = 0xcc3c1180}, {lh_first = 0xc7486d00}, {lh_first = 0xcba93880}, {
lh_first = 0xcc0c6000}, {lh_first = 0x0}, {lh_first = 0x0}, {
lh_first = 0x0}, {lh_first = 0x0}, {lh_first = 0x0}}
Another one:
(kgdb) bt
#0 doadump () at pcpu.h:196
#1 0xc0836ac7 in boot (howto=260) at ../../../kern/kern_shutdown.c:418
#2 0xc0836d99 in panic (fmt=Variable "fmt" is not available.
) at ../../../kern/kern_shutdown.c:574
#3 0xc0b5ef1c in trap_fatal (frame=0xc53dbaac, eva=36)
at ../../../i386/i386/trap.c:950
#4 0xc0b5f1a0 in trap_pfault (frame=0xc53dbaac, usermode=0, eva=36)
at ../../../i386/i386/trap.c:863
#5 0xc0b5fb95 in trap (frame=0xc53dbaac) at ../../../i386/i386/trap.c:541
#6 0xc0b42e7b in calltrap () at ../../../i386/i386/exception.s:166
#7 0xc5f39d95 in ng_address_hook (here=0x0, item=0xc66619f0, hook=0xcc87f680,
retaddr=0)
at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:3456
#8 0xc5f339ff in ngd_send (so=0xc5b68680, flags=0, m=0xd58aec00,
addr=0xc666d870, control=0x0, td=0xc5910000)
at /usr/src/sys/modules/netgraph/socket/../../../netgraph/ng_socket.c:445
#9 0xc0894bfa in sosend_generic (so=0xc5b68680, addr=0xc666d870,
uio=0xc53dbbe8, top=0xd58aec00, control=0x0, flags=0, td=0xc5910000)
at ../../../kern/uipc_socket.c:1243
#10 0xc0890a3f in sosend (so=0xc5b68680, addr=0xc666d870, uio=0xc53dbbe8,
top=0x0, control=0x0, flags=0, td=0xc5910000)
at ../../../kern/uipc_socket.c:1285
#11 0xc0897fa6 in kern_sendit (td=0xc5910000, s=6, mp=0xc53dbc64, flags=0,
control=0x0, segflg=UIO_USERSPACE) at ../../../kern/uipc_syscalls.c:805
#12 0xc089b181 in sendit (td=0xc5910000, s=6, mp=0xc53dbc64, flags=0)
at ../../../kern/uipc_syscalls.c:742
#13 0xc089b298 in sendto (td=0xc5910000, uap=0xc53dbcfc)
at ../../../kern/uipc_syscalls.c:857
#14 0xc0b5f4f5 in syscall (frame=0xc53dbd38) at ../../../i386/i386/trap.c:1101
#15 0xc0b42ee0 in Xint0x80_syscall () at ../../../i386/i386/exception.s:262
#16 0x00000033 in ?? ()
(kgdb) frame 7
#7 0xc5f39d95 in ng_address_hook (here=0x0, item=0xc66619f0, hook=0xcc87f680,
retaddr=0)
at /usr/src/sys/modules/netgraph/netgraph/../../../netgraph/ng_base.c:3456
3456 if ((hook == NULL) ||
(kgdb) list
3451 * Quick sanity check..
3452 * Since a hook holds a reference on it's node, once we know
3453 * that the peer is still connected (even if invalid,) we know
3454 * that the peer node is present, though maybe invalid.
3455 */
3456 if ((hook == NULL) ||
3457 NG_HOOK_NOT_VALID(hook) ||
3458 NG_HOOK_NOT_VALID(peer = NG_HOOK_PEER(hook)) ||
3459 NG_NODE_NOT_VALID(peernode = NG_PEER_NODE(hook))) {
3460 NG_FREE_ITEM(item);
(kgdb) x/i $eip
0xc5f39d95 <ng_address_hook+69>: testb $0x1,0x24(%edi)
(kgdb) info reg edi
edi 0x0 0
(kgdb) print *hook
$2 = {hk_name = "b99", '\0' <repeats 28 times>, hk_private = 0xc5b27140,
hk_flags = 0, hk_refs = 2, hk_type = 0, hk_peer = 0xc647bc00,
hk_node = 0xc592d500, hk_hooks = {le_next = 0xc69a1b00,
le_prev = 0xc6991238}, hk_rcvmsg = 0, hk_rcvdata = 0}
Besides of that, I had interesting issue, when one of misconfigured customer's
router tried to establish several PPPoE sessions per second. Such "stress test"
caused multiple kernel panics, each occuring after few minutes of uptime. I have
no backtrace, but I can remember, that it was similar to one of above.
I'll be grateful for any advices.
More information about the freebsd-net
mailing list