Another reboot hang
Svatopluk Kraus
onwahe at gmail.com
Tue Apr 26 20:18:25 UTC 2016
On Tue, Apr 26, 2016 at 8:30 PM, bob prohaska <fbsd at www.zefox.net> wrote:
> Recently I've been observing hangs on reboot for RPI2.
> Here's an example from r298542. It had just finished building 298591.
>
> The previous two or three reboots were successful, prior to those
> hangs were observed several times. All attempts to reboot via ssh
> have been successful.
>
> Here's the backtrace:
>
> root at www:/usr/src # shutdown -r now
> Shutdown NOW!
> shutdown: [pid 28505]
> root at www:/usr/src #
> *** FINAL System shutdown message from root at www.zefox.com ***
>
> System going down IMMEDIATELY
>
>
>
> System shutdown time has arrived
> Apr 26 11:15:56 www shutdown: reboot by root:
> Stopping cron.
> Waiting for PIDS: 620.
> Stopping sshd.
> Waiting for PIDS: 601.
> Stopping powerd.
> Waiting for PIDS: 562.
> Stopping casperd.
> Waiting for PIDS: 507.
> Stopping devd.
> Waiting for PIDS: 335.
> Writing entropy file:.
> .
> Terminated
> Apr 26 11:16:06 www syslogd: exiting on signal 15
> Waiting (max 60 seconds) for system process `vnlru' to stop...done
> Waiting (max 60 seconds) for system process `bufdaemon' to stop...done
> Waiting (max 60 seconds) for system process `syncer' to stop...
> Syncing disks, vnodes remaining...2 3 0 0 0 done
> All buffers synced.
> KDB: enter: Break to debugger
> [ thread pid 1 tid 100001 ]
> Stopped at $d.8: ldrb r15, [r15, r15, ror r15]!
> db> bt
> Tracing pid 1 tid 100001 td 0xc3a5f000
> db_trace_self() at db_trace_self
> pc = 0xc05659f4 lr = 0xc0147a8c (db_stack_trace+0x108)
> sp = 0xd75eb650 fp = 0xd75eb668
> db_stack_trace() at db_stack_trace+0x108
> pc = 0xc0147a8c lr = 0xc01476d0 (db_command+0x270)
> sp = 0xd75eb670 fp = 0xd75eb710
> r4 = 0x00000001 r5 = 0x00000000
> r6 = 0xc05fdd21 r10 = 0xc07bc738
> db_command() at db_command+0x270
> pc = 0xc01476d0 lr = 0xc0147450 (db_command_loop+0x74)
> sp = 0xd75eb718 fp = 0xd75eb728
> r4 = 0xc05d4a0c r5 = 0xc05f64b7
> r6 = 0xc07bc724 r7 = 0xd75eb8f8
> r8 = 0xc07adfc0 r9 = 0xc06c3318
> r10 = 0xc07adfc4
> db_command_loop() at db_command_loop+0x74
> pc = 0xc0147450 lr = 0xc014a14c (db_trap+0x120)
> sp = 0xd75eb730 fp = 0xd75eb848
> r4 = 0x00000001 r5 = 0x00000000
> r6 = 0xc07bc730 r10 = 0xc07adfc4
> db_trap() at db_trap+0x120
> pc = 0xc014a14c lr = 0xc02fe450 (kdb_trap+0x180)
> sp = 0xd75eb850 fp = 0xd75eb878
> r4 = 0x00000000 r5 = 0x00000001
> r6 = 0xc07adfe0 r10 = 0xc07adfc4
> kdb_trap() at kdb_trap+0x180
> pc = 0xc02fe450 lr = 0xc0583570 (undefinedinstruction+0x318)
> sp = 0xd75eb880 fp = 0xd75eb8f0
> r4 = 0x00000000 r5 = 0x00000000
> r6 = 0xc05831a8 r7 = 0xe7ffffff
> r8 = 0xc3a5f000 r9 = 0xc02fdd7c
> r10 = 0xd75eb8f8
> undefinedinstruction() at undefinedinstruction+0x318
> pc = 0xc0583570 lr = 0xc05687fc (exception_exit)
> sp = 0xd75eb8f8 fp = 0xd75eb998
> r4 = 0x60000193 r5 = 0xc3b12088
> r6 = 0x00000093 r7 = 0x00040000
> r8 = 0x00000000 r9 = 0x00000000
> r10 = 0xc06d2be8
> exception_exit() at exception_exit
> pc = 0xc05687fc lr = 0xc02fdd64 (kdb_alt_break_internal+0x1c4)
> sp = 0xd75eb988 fp = 0xd75eb998
> r0 = 0xc07adfd4 r1 = 0x00000000
> r2 = 0x00000001 r3 = 0x60000193
> r4 = 0xc3b12000 r5 = 0xc3b12088
> r6 = 0x00000093 r7 = 0x00040000
> r8 = 0x00000000 r9 = 0x00000000
> r10 = 0xc06d2be8 r12 = 0x00000000
> $a.9() at $a.9
> pc = 0xc02fdd80 lr = 0xc02fdb98 (kdb_alt_break+0x10)
> sp = 0xd75eb9a0 fp = 0xd75eb9a0
> r4 = 0xc3b12000 r5 = 0xc3b12088
> r6 = 0x00000093 r7 = 0x00040000
> kdb_alt_break() at kdb_alt_break+0x10
> pc = 0xc02fdb98 lr = 0xc01a089c (uart_intr_rxready+0x8c)
> sp = 0xd75eb9a8 fp = 0xd75eb9b8
> uart_intr_rxready() at uart_intr_rxready+0x8c
> pc = 0xc01a089c lr = 0xc01a16bc (uart_intr+0x118)
> sp = 0xd75eb9c0 fp = 0xd75eb9f8
> r4 = 0xc3b37900 r5 = 0xc3b12000
> r6 = 0xc3b12194 r10 = 0xc06d2be8
> uart_intr() at uart_intr+0x118
> pc = 0xc01a16bc lr = 0xc0289e6c (intr_event_handle+0x80)
> sp = 0xd75eba00 fp = 0xd75eba20
> r4 = 0xc3b37900 r5 = 0xd75ebaf8
> r6 = 0xc3b2f058 r7 = 0xc3a5f000
> r8 = 0x00000000 r9 = 0xc05effbb
> r10 = 0xc3b26d80
> intr_event_handle() at intr_event_handle+0x80
> pc = 0xc0289e6c lr = 0xc056a350 (intr_isrc_dispatch+0x68)
> sp = 0xd75eba28 fp = 0xd75eba30
> r4 = 0xc3b2f058 r5 = 0xd75ebaf8
> r6 = 0xc3b2f058 r7 = 0x00000041
> r8 = 0xc062d2ab r9 = 0xc3b2e018
> r10 = 0xbc1fffff
> intr_isrc_dispatch() at intr_isrc_dispatch+0x68
> pc = 0xc056a350 lr = 0xc0590c10 (bcm2835_intc_intr+0x8c)
> sp = 0xd75eba38 fp = 0xd75eba58
> r4 = 0xc3b2e000 r5 = 0x00000000
> bcm2835_intc_intr() at bcm2835_intc_intr+0x8c
> pc = 0xc0590c10 lr = 0xc0289e6c (intr_event_handle+0x80)
> sp = 0xd75eba60 fp = 0xd75eba80
> r4 = 0xc3936100 r5 = 0xd75ebaf8
> r6 = 0x00000000 r7 = 0xc3a5f000
> r8 = 0x00000000 r9 = 0xc05effbb
> r10 = 0xc3b24340
> intr_event_handle() at intr_event_handle+0x80
> pc = 0xc0289e6c lr = 0xc056a350 (intr_isrc_dispatch+0x68)
> sp = 0xd75eba88 fp = 0xd75eba90
> r4 = 0xc3a3c604 r5 = 0xd75ebaf8
> r6 = 0x00000000 r7 = 0x00000000
> r8 = 0x00000100 r9 = 0xc3fdb2b0
> r10 = 0xd75ebaf8
> intr_isrc_dispatch() at intr_isrc_dispatch+0x68
> pc = 0xc056a350 lr = 0xc0588cec (bcm_lintc_intr+0x230)
> sp = 0xd75eba98 fp = 0xd75ebad8
> r4 = 0xc3a3c400 r5 = 0x000000c0
> bcm_lintc_intr() at bcm_lintc_intr+0x230
> pc = 0xc0588cec lr = 0xc056a2c4 (intr_irq_handler+0x68)
> sp = 0xd75ebae0 fp = 0xd75ebaf0
> r4 = 0xd75ebaf8 r5 = 0xc07b7778
> r6 = 0xc3a5f000 r7 = 0x00000000
> r8 = 0x00000000 r9 = 0xc3fdb2b0
> r10 = 0xd75ebbf0
> intr_irq_handler() at intr_irq_handler+0x68
> pc = 0xc056a2c4 lr = 0xc05687fc (exception_exit)
> sp = 0xd75ebaf8 fp = 0xd75ebba8
> r4 = 0xc0620546 r5 = 0x00000001
> r6 = 0xc3fdb2c0 r7 = 0x00000608
> exception_exit() at exception_exit
> pc = 0xc05687fc lr = 0xc0515a44 (ffs_sync+0x104)
> sp = 0xd75ebb88 fp = 0xd75ebba8
> r0 = 0x00000004 r1 = 0xc3a5f000
> r2 = 0xc3fdb2c0 r3 = 0xc3a5f000
> r4 = 0xc0620546 r5 = 0x00000001
> r6 = 0xc3fdb2c0 r7 = 0x00000608
> r8 = 0x00000000 r9 = 0xc3fdb2b0
> r10 = 0xd75ebbf0 r12 = 0xc061cf05
The break to debugger happened here. So system was in
__mtx_lock_flags() called from ffs_sync(). It's impossible to say from
the backtrace if system was stuck or you just break it there. However,
interrupts was working, so system was not dead totally. One method how
to learn more is to break to debugger repeatedly to see if system is
stuck to one place or not. You also can try to type the following
commands in ddb to see if there is any deadlock.
show all pcpu
show all procs
show all locks
For the second command, kernel must be built with WITNESS option.
> __mtx_lock_flags() at __mtx_lock_flags+0xb4
> pc = 0xc02a4f60 lr = 0xc0515a44 (ffs_sync+0x104)
> sp = 0xd75ebbb0 fp = 0xd75ebc20
> r4 = 0xc3a5f000 r5 = 0xc3fdb2c0
> r6 = 0x00000000 r7 = 0xc0620546
> r8 = 0xc3fdb2b0 r9 = 0xc3a5f000
> ffs_sync() at ffs_sync+0x104
> pc = 0xc0515a44 lr = 0xc0380aa0 (vfs_write_suspend+0x19c)
> sp = 0xd75ebc28 fp = 0xd75ebc58
> r4 = 0xc3fdb2b0 r5 = 0xc3fdb2b0
> r6 = 0x00000000 r7 = 0x00000002
> r8 = 0x00000000 r9 = 0xc3fdb2c0
> r10 = 0xc4351600
> vfs_write_suspend() at vfs_write_suspend+0x19c
> pc = 0xc0380aa0 lr = 0xc0380cd0 (vfs_write_suspend_umnt+0x40)
> sp = 0xd75ebc60 fp = 0xd75ebc70
> r4 = 0xc3fdb2b0 r5 = 0xc3fdb2b0
> r6 = 0xc3a5f000 r7 = 0x00000002
> r8 = 0x00000000 r9 = 0xc44fa000
> r10 = 0xc4351600
> vfs_write_suspend_umnt() at vfs_write_suspend_umnt+0x40
> pc = 0xc0380cd0 lr = 0xc051533c (ffs_unmount+0x48)
> sp = 0xd75ebc78 fp = 0xd75ebcb8
> r4 = 0x00080000 r5 = 0xc3fdb2b0
> r6 = 0xc3a5f000 r7 = 0x00000002
> ffs_unmount() at ffs_unmount+0x48
> pc = 0xc051533c lr = 0xc0368734 (dounmount+0x594)
> sp = 0xd75ebcc0 fp = 0xd75ebd08
> r4 = 0x00080000 r5 = 0x10201000
> r6 = 0x00000000 r7 = 0x00000000
> r8 = 0x00080000 r9 = 0xc4534360
> r10 = 0xc3a5f000
> dounmount() at dounmount+0x594
> pc = 0xc0368734 lr = 0xc0371dc8 (vfs_unmountall+0x7c)
> sp = 0xd75ebd10 fp = 0xd75ebd30
> r4 = 0xc07bed0c r5 = 0x00000000
> r6 = 0xc43a1ac0 r7 = 0xc3fdb2b0
> r8 = 0xc06078f6 r9 = 0xc061a20b
> r10 = 0xc06078df
> vfs_unmountall() at vfs_unmountall+0x7c
> pc = 0xc0371dc8 lr = 0xc0352a90 (bufshutdown+0x39c)
> sp = 0xd75ebd38 fp = 0xd75ebd78
> r4 = 0x00000000 r5 = 0xd76146b8
> r6 = 0xd76146b8 r7 = 0x00000000
> r8 = 0xc07b1cc8 r9 = 0xc060353c
> r10 = 0xc06034fb
> bufshutdown() at bufshutdown+0x39c
> pc = 0xc0352a90 lr = 0xc02bd8e8 (kern_reboot+0x1e0)
> sp = 0xd75ebd80 fp = 0xd75ebda8
> r4 = 0x00000000 r5 = 0xc3afbb80
> r6 = 0xc3afbb9c r7 = 0xc05f6199
> r8 = 0xc070ab5c r9 = 0x00000000
> r10 = 0x0000001f
> kern_reboot() at kern_reboot+0x1e0
> pc = 0xc02bd8e8 lr = 0xc02bd694 (sys_reboot+0x3a8)
> sp = 0xd75ebdb0 fp = 0xd75ebde0
> r4 = 0x00000000 r5 = 0xd75ebe00
> r6 = 0x00000000 r7 = 0xc07be170
> r8 = 0x00000000 r9 = 0xd75ebdf8
> sys_reboot() at sys_reboot+0x3a8
> pc = 0xc02bd694 lr = 0xc05822b0 (swi_handler+0x318)
> sp = 0xd75ebde8 fp = 0xd75ebe48
> r4 = 0xc3a5f000 r5 = 0xc3a5b000
> r6 = 0x00000000 r7 = 0xc07be170
> r8 = 0x00000000 r9 = 0xd75ebdf8
> r10 = 0x0000001f
> swi_handler() at swi_handler+0x318
> pc = 0xc05822b0 lr = 0xc056878c (swi_exit)
> sp = 0xd75ebe50 fp = 0xbfbfe978
> r4 = 0x00008ab8 r5 = 0x000ed82c
> r6 = 0x0000004e r7 = 0x00000037
> r8 = 0x0000000f r9 = 0x0000001e
> r10 = 0x0000001f
> swi_exit() at swi_exit
> pc = 0xc056878c lr = 0xc056878c (swi_exit)
> sp = 0xd75ebe50 fp = 0xbfbfe978
> db>
>
> Thanks for reading and any guidance.
> _______________________________________________
> freebsd-arm at freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/freebsd-arm
> To unsubscribe, send any mail to "freebsd-arm-unsubscribe at freebsd.org"
More information about the freebsd-arm
mailing list