debugging frequent kernel panics on 8.2-RELEASE
John Baldwin
jhb at freebsd.org
Fri Aug 19 12:14:03 UTC 2011
On Thursday, August 18, 2011 4:09:35 pm Andriy Gapon wrote:
> on 17/08/2011 23:21 Andriy Gapon said the following:
> > It seems like everything starts with some kind of a race between terminating
> > processes in a jail and termination of the jail itself. This is where the
> > details are very thin so far. What we see is that a process (http) is in
> > exit(2) syscall, in exit1() function actually, and past the place where P_WEXIT
> > flag is set and even past the place where p_limit is freed and reset to NULL.
> > At that place the thread calls prison_proc_free(), which calls prison_deref().
> > Then, we see that in prison_deref() the thread gets a page fault because of what
> > seems like a NULL pointer dereference. That's just the start of the problem and
> > its root cause.
> >
> > Then, trap_pfault() gets invoked and, because addresses close to NULL look like
> > userspace addresses, vm_fault/vm_fault_hold gets called, which in its turn goes
> > on to call vm_map_growstack. First thing that vm_map_growstack does is a call
> > to lim_cur(), but because p_limit is already NULL, that call results in a NULL
> > pointer dereference and a page fault. Goto the beginning of this paragraph.
> >
> > So we get this recursion of sorts, which only ends when a stack is exhausted and
> > a CPU generates a double-fault.
>
> BTW, does anyone has an idea why the thread in question would "disappear" from
> the kgdb's point of view?
>
> (kgdb) p cpuid_to_pcpu[2]->pc_curthread->td_tid
> $3 = 102057
> (kgdb) tid 102057
> invalid tid
>
> info threads also doesn't list the thread.
>
> Is it because the panic happened while the thread was somewhere in exit1()?
Yes, it is a bug in kgdb that it only walks allproc and not zombproc. Try this:
Index: kthr.c
===================================================================
--- kthr.c (revision 224879)
+++ kthr.c (working copy)
@@ -73,11 +73,52 @@ kgdb_thr_first(void)
return (first);
}
+static void
+kgdb_thr_add_procs(uintptr_t paddr)
+{
+ struct proc p;
+ struct thread td;
+ struct kthr *kt;
+ CORE_ADDR addr;
+
+ while (paddr != 0) {
+ if (kvm_read(kvm, paddr, &p, sizeof(p)) != sizeof(p)) {
+ warnx("kvm_read: %s", kvm_geterr(kvm));
+ break;
+ }
+ addr = (uintptr_t)TAILQ_FIRST(&p.p_threads);
+ while (addr != 0) {
+ if (kvm_read(kvm, addr, &td, sizeof(td)) !=
+ sizeof(td)) {
+ warnx("kvm_read: %s", kvm_geterr(kvm));
+ break;
+ }
+ kt = malloc(sizeof(*kt));
+ kt->next = first;
+ kt->kaddr = addr;
+ if (td.td_tid == dumptid)
+ kt->pcb = dumppcb;
+ else if (td.td_state == TDS_RUNNING && stoppcbs != 0 &&
+ CPU_ISSET(td.td_oncpu, &stopped_cpus))
+ kt->pcb = (uintptr_t)stoppcbs +
+ sizeof(struct pcb) * td.td_oncpu;
+ else
+ kt->pcb = (uintptr_t)td.td_pcb;
+ kt->kstack = td.td_kstack;
+ kt->tid = td.td_tid;
+ kt->pid = p.p_pid;
+ kt->paddr = paddr;
+ kt->cpu = td.td_oncpu;
+ first = kt;
+ addr = (uintptr_t)TAILQ_NEXT(&td, td_plist);
+ }
+ paddr = (uintptr_t)LIST_NEXT(&p, p_list);
+ }
+}
+
struct kthr *
kgdb_thr_init(void)
{
- struct proc p;
- struct thread td;
long cpusetsize;
struct kthr *kt;
CORE_ADDR addr;
@@ -113,37 +154,11 @@ kgdb_thr_init(void)
stoppcbs = kgdb_lookup("stoppcbs");
- while (paddr != 0) {
- if (kvm_read(kvm, paddr, &p, sizeof(p)) != sizeof(p)) {
- warnx("kvm_read: %s", kvm_geterr(kvm));
- break;
- }
- addr = (uintptr_t)TAILQ_FIRST(&p.p_threads);
- while (addr != 0) {
- if (kvm_read(kvm, addr, &td, sizeof(td)) !=
- sizeof(td)) {
- warnx("kvm_read: %s", kvm_geterr(kvm));
- break;
- }
- kt = malloc(sizeof(*kt));
- kt->next = first;
- kt->kaddr = addr;
- if (td.td_tid == dumptid)
- kt->pcb = dumppcb;
- else if (td.td_state == TDS_RUNNING && stoppcbs != 0 &&
- CPU_ISSET(td.td_oncpu, &stopped_cpus))
- kt->pcb = (uintptr_t) stoppcbs + sizeof(struct pcb) * td.td_oncpu;
- else
- kt->pcb = (uintptr_t)td.td_pcb;
- kt->kstack = td.td_kstack;
- kt->tid = td.td_tid;
- kt->pid = p.p_pid;
- kt->paddr = paddr;
- kt->cpu = td.td_oncpu;
- first = kt;
- addr = (uintptr_t)TAILQ_NEXT(&td, td_plist);
- }
- paddr = (uintptr_t)LIST_NEXT(&p, p_list);
+ kgdb_thr_add_procs(paddr);
+ addr = kgdb_lookup("zombproc");
+ if (addr != 0) {
+ kvm_read(kvm, addr, &paddr, sizeof(paddr));
+ kgdb_thr_add_procs(paddr);
}
curkthr = kgdb_thr_lookup_tid(dumptid);
if (curkthr == NULL)
> is there an easy way to examine its stack in this case?
Hmm, you can use something like this from my kgdb macros.
For amd64:
# Do a backtrace given %rip and %rbp as args
define bt
set $_rip = $arg0
set $_rbp = $arg1
set $i = 0
while ($_rbp != 0 || $_rip != 0)
printf "%2d: pc ", $i
if ($_rip != 0)
x/1i $_rip
else
printf "\n"
end
if ($_rbp == 0)
set $_rip = 0
else
set $fr = (struct amd64_frame *)$_rbp
set $_rbp = $fr->f_frame
set $_rip = $fr->f_retaddr
set $i = $i + 1
end
end
end
document bt
Given values for %rip and %rbp, perform a manual backtrace.
end
define btf
bt $arg0.tf_rip $arg0.tf_rbp
end
document btf
Do a manual backtrace from a specified trapframe.
end
For i386:
# Do a backtrace given %eip and %ebp as args
define bt
set $_eip = $arg0
set $_ebp = $arg1
set $i = 0
while ($_ebp != 0 || $_eip != 0)
printf "%2d: pc ", $i
if ($_eip != 0)
x/1i $_eip
else
printf "\n"
end
if ($_ebp == 0)
set $_eip = 0
else
set $fr = (struct i386_frame *)$_ebp
set $_ebp = $fr->f_frame
set $_eip = $fr->f_retaddr
set $i = $i + 1
end
end
end
document bt
Given values for %eip and %ebp, perform a manual backtrace.
end
define btf
bt $arg0.tf_eip $arg0.tf_ebp
end
document btf
Do a manual backtrace from a specified trapframe.
end
--
John Baldwin
More information about the freebsd-hackers
mailing list