freebsd-5.4-stable panics

Rob Watt rob at hudson-trading.com
Thu Sep 29 10:06:29 PDT 2005


Robert,

We have gotten some more information from our type1 crash:

>sh lockedvnods
Locked vnodes

>sh alllocks
Process 2204 (dataplay) thread 0xffffff00b1726a000 (100214)
exclusive sleep mutex inp (udpinp) f = 0 (0xffffff00cc90fcc8) locked @
/usr/src/sys/netinet/udp_usrreq.c:762
Process 62 (pagedaemon) thread 0xffffff00e358c280 (100049)
exclusive sleep mutex UMA lock r = 0 (0xffffffff8068bf80) locked @
/usr/src/sys/vm/uma_core.c:1491
exclusive sleep mutex Giant r = 0 (0xffffffff8062ed80) locked @
/usr/src/sys/vm/vm_pageout.c:717
Process 48 (swi1:net) thread 0xffffff00e3597780 (100027)
exclusive sleep mutex IPFW static rules r = 0 (0xffffffff8067ae50) locked
@ /usr/src/sys/netinet/ip_fw2.c:149

>sh pcpu
cpuid=0
currthread      = 0xffffff00e358c280: pid 63 "pagedaemon"
currpcb         = 0xffffffffb34e3d10
fpcurrthread    = none
idle thread     = 0xffffff00e35b6000: pid 14 (idle cpu0)
spin locks held =

>sh pcpu 1
cpuid=1
currthread      = 0xffffff00e358b3c80: pid 13 "idle cpu1"
currpcb         = 0xffffffffffb34e7d10
fpcurrthread    = none
idle thread     = 0xffffff00e358b3c80: pid 13 (idle cpu1)
spin locks held =

>sh pcpu 2
cpuid=2
currthread      = 0xffffff00e35e4000: pid 2715 "bonnie"
currpcb         = 0xffffffffffb636dd10
fpcurrthread    = none
idle thread     = 0xffffff00e35b3a00: pid 12 (idle cpu2)
spin locks held =

>sh pcpu 3
cpuid=3
currthread      = 0xffffff00e35aea00: pid 40 "irq27: em1 em2"
currpcb         = 0xffffffffffb34b6d10
fpcurrthread    = none
idle thread     = 0xffffff00e35b3780: pid 11 (idle cpu0)
spin locks held =


I have attached the core output as type1-core.2.txt, but unfortunately it
does not help us determine the area of code that triggered the exception.

If I can get more DDB output from the type2 crash I will post it.

There is some encouraging news: since we stopped running top, our
6.0-BETA5 test machine has not crashed (its been running tests now for
over 26 hours).

We also started running tests on a dual single-core machine running
5-STABLE. That machine has been running for 50 hours without crashing.

This means that we are now only hitting these bugs with dual dual-core
machines running 5-STABLE.

-
Rob Watt
-------------- next part --------------
DDB:
>sh lockedvnods
Locked vnodes

>sh alllocks
Process 2204 (dataplay) thread 0xffffff00b1726a000 (100214)
exclusive sleep mutex inp (udpinp) f = 0 (0xffffff00cc90fcc8) locked @ /usr/src/sys/netinet/udp_usrreq.c:762
Process 62 (pagedaemon) thread 0xffffff00e358c280 (100049)
exclusive sleep mutex UMA lock r = 0 (0xffffffff8068bf80) locked @ /usr/src/sys/vm/uma_core.c:1491
exclusive sleep mutex Giant r = 0 (0xffffffff8062ed80) locked @ /usr/src/sys/vm/vm_pageout.c:717
Process 48 (swi1:net) thread 0xffffff00e3597780 (100027)
exclusive sleep mutex IPFW static rules r = 0 (0xffffffff8067ae50) locked @ /usr/src/sys/netinet/ip_fw2.c:149

>sh pcpu
cpuid=0
currthread      = 0xffffff00e358c280: pid 63 "pagedaemon"
currpcb         = 0xffffffffb34e3d10
fpcurrthread    = none
idle thread     = 0xffffff00e35b6000: pid 14 (idle cpu0)
spin locks held =

>sh pcpu 1
cpuid=1
currthread      = 0xffffff00e358b3c80: pid 13 "idle cpu1"
currpcb         = 0xffffffffffb34e7d10
fpcurrthread    = none
idle thread     = 0xffffff00e358b3c80: pid 13 "idle cpu1"
spin locks held =

>sh pcpu 2
cpuid=2
currthread      = 0xffffff00e35e4000: pid 2715 "bonnie"
currpcb         = 0xffffffffffb636dd10
fpcurrthread    = none
idle thread     = 0xffffff00e35b3a00: pid 12 "idle cpu2"
spin locks held =

>sh pcpu 3 
cpuid=3
currthread      = 0xffffff00e35aea00: pid 40 "irq27: em1 em2"
currpcb         = 0xffffffffffb34b6d10
fpcurrthread    = none
idle thread     = 0xffffff00e35b3780: pid 11 "idle cpu0"
spin locks held =



KGDB:
Unread portion of the kernel message buffer:
panic: No TID bitmap?
cpuid = 0
KDB: enter: panic

#0  doadump () at pcpu.h:167
167     pcpu.h: No such file or directory.
        in pcpu.h
(kgdb) bt
#0  doadump () at pcpu.h:167
#1  0xffffffff801924f6 in db_fncall (dummy1=0, dummy2=0, dummy3=0, dummy4=0x0) at /usr/src/sys/ddb/db_command.c:531
#2  0xffffffff80192985 in db_command_loop () at /usr/src/sys/ddb/db_command.c:349
#3  0xffffffff80194833 in db_trap (type=-1286719648, code=0) at /usr/src/sys/ddb/db_main.c:221
#4  0xffffffff802cb8f0 in kdb_trap (type=3, code=0, tf=0x0) at /usr/src/sys/kern/subr_kdb.c:470
#5  0xffffffff804169dc in trap (frame=
      {tf_rdi = 0, tf_rsi = -2136928256, tf_rdx = 0, tf_rcx = 523776, tf_r8 = -1286719440, tf_r9 = 10, tf_rax = 18, tf_rbx = -2142686258, tf_rbp = -1286719200, tf_r10 = 20765, tf_r11 = 0, tf_r12 = 0, tf_r13 = 256, tf_r14 = -1095697382784, tf_r15 = 768605, tf_trapno = 3, tf_addr = 0, tf_flags = 256, tf_err = 0, tf_rip = -2144554161, tf_cs = 8, tf_rflags = 642, tf_rsp = -1286719200, tf_ss = 16}) at /usr/src/sys/amd64/amd64/trap.c:431
#6  0xffffffff804046fb in calltrap () at /usr/src/sys/amd64/amd64/exception.S:171
#7  0x0000000000000000 in ?? ()
#8  0xffffffff80a11000 in ?? ()
#9  0x0000000000000000 in ?? ()
#10 0x000000000007fe00 in ?? ()
#11 0xffffffffb34e3830 in ?? ()
#12 0x000000000000000a in ?? ()
#13 0x0000000000000012 in ?? ()
#14 0xffffffff804933ce in __func__.0 ()
#15 0xffffffffb34e3920 in ?? ()
#16 0x000000000000511d in ?? ()
#17 0x0000000000000000 in ?? ()
#18 0x0000000000000000 in ?? ()
#19 0x0000000000000100 in ?? ()
#20 0xffffff00e358c280 in ?? ()
#21 0x00000000000bba5d in ?? ()
#22 0x0000000000000003 in ?? ()
#23 0x0000000000000000 in ?? ()
#24 0x0000000000000100 in ?? ()
#25 0x0000000000000000 in ?? ()
#26 0xffffffff802cb34f in kdb_enter (msg=0x0) at cpufunc.h:59
#27 0xffffffff802b0189 in panic (fmt=0xffffffff804933ce "No TID bitmap?") at /usr/src/sys/kern/kern_shutdown.c:552
#28 0xffffffff802bb6d9 in thread_fini (mem=0x0, size=0) at /usr/src/sys/kern/kern_thread.c:269
#29 0xffffffff803f95e9 in zone_drain (zone=0x1) at /usr/src/sys/vm/uma_core.c:749
#30 0xffffffff803f74d6 in zone_foreach (zfunc=0xffffffff803f9440 <zone_drain>) at /usr/src/sys/vm/uma_core.c:1494
#31 0xffffffff803fa911 in uma_reclaim () at /usr/src/sys/vm/uma_core.c:2623
#32 0xffffffff803f50ea in vm_pageout () at /usr/src/sys/vm/vm_pageout.c:725
#33 0xffffffff80299fa3 in fork_exit (callout=0xffffffff803f4db0 <vm_pageout>, arg=0x0, frame=0xffffffffb34e3c50)
    at /usr/src/sys/kern/kern_fork.c:791
#34 0xffffffff804048fe in fork_trampoline () at /usr/src/sys/amd64/amd64/exception.S:296
#35 0x0000000000000000 in ?? ()
#36 0x0000000000000000 in ?? ()
#37 0x0000000000000001 in ?? ()
#38 0x0000000000000000 in ?? ()
#39 0x0000000000000000 in ?? ()
#40 0x0000000000000000 in ?? ()
#41 0x0000000000000000 in ?? ()
#42 0x0000000000000000 in ?? ()
#43 0x0000000000000000 in ?? ()
#44 0x0000000000000000 in ?? ()
#45 0x0000000000000000 in ?? ()
#46 0x0000000000000000 in ?? ()
#47 0x0000000000000000 in ?? ()
#48 0x0000000000000000 in ?? ()
#49 0x0000000000000000 in ?? ()
#50 0x0000000000000000 in ?? ()
#51 0x0000000000000000 in ?? ()
#52 0x0000000000000000 in ?? ()
#53 0x0000000000000000 in ?? ()
#54 0x0000000000000000 in ?? ()
#55 0x0000000000000000 in ?? ()
#56 0x0000000000000000 in ?? ()
#57 0x0000000000000000 in ?? ()
#58 0x0000000000000000 in ?? ()
#59 0x0000000000000000 in ?? ()
#60 0x0000000000000000 in ?? ()
#61 0x0000000000000000 in ?? ()
#62 0x0000000000000000 in ?? ()
#63 0x0000000000000000 in ?? ()
#64 0x0000000000000000 in ?? ()
#65 0x0000000000000000 in ?? ()
#66 0x0000000000000000 in ?? ()
#67 0x0000000000868000 in ?? ()
#68 0x00000000000bba5d in ?? ()
#69 0x0000000000000001 in ?? ()
#70 0xffffff00e35715d0 in ?? ()
#71 0xffffff003a897c80 in ?? ()
#72 0xffffffffb34e39d0 in ?? ()
#73 0xffffffffb34e39a8 in ?? ()
#74 0xffffff00e358c280 in ?? ()
#75 0xffffffff802c342e in sched_switch (td=0x0, newtd=0xffffffff803f4db0, flags=1) at /usr/src/sys/kern/sched_4bsd.c:881
Previous frame inner to this frame (corrupt stack?)
(kgdb) frame 28
#28 0xffffffff802bb6d9 in thread_fini (mem=0x0, size=0) at /usr/src/sys/kern/kern_thread.c:269
269             KASSERT(bmp != NULL, ("No TID bitmap?"));
(kgdb) l
264             STAILQ_FOREACH(bmp, &tid_bitmap, bmp_next) {
265                     if (td->td_tid >= bmp->bmp_base &&
266                         td->td_tid < bmp->bmp_base + TID_IDS_PER_PART)
267                             break;
268             }
269             KASSERT(bmp != NULL, ("No TID bitmap?"));
270             mtx_lock(&tid_lock);
271             tid = td->td_tid - bmp->bmp_base;
272             idx = tid / TID_IDS_PER_IDX;
273             bit = 1UL << (tid % TID_IDS_PER_IDX);
(kgdb) p bmp
$1 = (struct tid_bitmap_part *) 0x0
(kgdb) i reg
rax            0x0      0
rbx            0x0      0
rcx            0x0      0
rdx            0x0      0
rsi            0x0      0
rdi            0x0      0
rbp            0x0      0x0
rsp            0xffffffffb34e3c50       0xffffffffb34e3c50
r8             0x0      0
r9             0x0      0
r10            0x0      0
r11            0x0      0
r12            0xffffffff803f4db0       -2143334992
r13            0xffffffff80628ea0       -2141024608
r14            0x1      1
r15            0x0      0
rip            0xffffffff804048fe       0xffffffff804048fe <fork_trampoline+14>
eflags         0x82     130
cs             0x0      0
ss             0x0      0
ds             0x0      0
es             0x0      0
fs             0x0      0
gs             0x0      0
(kgdb) l *0xffffffff804048fe
0xffffffff804048fe is at /usr/src/sys/amd64/amd64/exception.S:298.
293             movq    %r12, %rdi              /* function */
294             movq    %rbx, %rsi              /* arg1 */
295             movq    %rsp, %rdx              /* trapframe pointer */
296             call    fork_exit
297             MEXITCOUNT
298             jmp     doreti                  /* Handle any ASTs */
299
300     /*
301      * To efficiently implement classification of trap and interrupt handlers
302      * for profiling, there must be only trap handlers between the labels btrap



More information about the freebsd-amd64 mailing list