Panics with GEOM
Wojciech Puchar
wojtek at wojtek.tensor.gdynia.pl
Fri Feb 1 05:14:00 PST 2008
> advise where to post if this is not appropriate.
>
> An amd64/SMP server of mine worked quite reliably for some times.
> Now I added two more disks and created a gmirror with them; from that point
> on it experienced locks, crash and panics.
i use amd64/SMP+gmirror+gstripe+geli and works stable for a long time.
problem is probably somewhere else
> Since it was a 6.2 at the time, I immediately upgraded to 6.3: this did not
> solve, but, at least, the box would reboot and get me a crash dump.
>
> So here it is: looks like the i/o subsystem has problems, so the two disk
> might really be related, but still, I'm not sure.
>
>
>
>> # kgdb kernel.debug /var/crash/vmcore.5
>> [GDB will not be able to debug user-mode threads: /usr/lib/libthread_db.so:
>> Undefined symbol "ps_pglobal_lookup"]
>> GNU gdb 6.1.1 [FreeBSD]
>> Copyright 2004 Free Software Foundation, Inc.
>> GDB is free software, covered by the GNU General Public License, and you
>> are
>> welcome to change it and/or distribute copies of it under certain
>> conditions.
>> Type "show copying" to see the conditions.
>> There is absolutely no warranty for GDB. Type "show warranty" for details.
>> This GDB was configured as "amd64-marcel-freebsd".
>>
>> Unread portion of the kernel message buffer:
>>
>>
>> Fatal trap 12: page fault while in kernel mode
>> cpuid = 1; apic id = 01
>> fault virtual address = 0x50006
>> fault code = supervisor read data, page not present
>> instruction pointer = 0x8:0xffffffff8020e076
>> stack pointer = 0x10:0xffffffffa831c7a0
>> frame pointer = 0x10:0xffffffffa831c7e0
>> code segment = base 0x0, limit 0xfffff, type 0x1b
>> = DPL 0, pres 1, long 1, def32 0, gran 1
>> processor eflags = interrupt enabled, resume, IOPL = 0
>> current process = 88587 (clamscan)
>> trap number = 12
>> panic: page fault
>> cpuid = 1
>> Uptime: 3d8h32m39s
>> Dumping 1023 MB (2 chunks)
>> chunk 0: 1MB (151 pages) ... ok
>> chunk 1: 1023MB (261744 pages) 1007 991 975 959 943
>> <110>ipfw: 65534 Deny TCP 192.168.101.1:58319 192.168.101.4:54663 in via
>> fxp0
>> <110>ipfw: 65534 Deny TCP 192.168.101.1:58319 192.168.101.4:54663 in via
>> fxp0
>> 927 911 895 879 863 847 831 815 799 783 767 751 735 719 703 687 671 655
>> 639 623 607 591 575 559 543 527 511 495 479 463 447 431 415 399 383 367 351
>> 335 319 303 287 271 255 239 223 207 191 175panic: ahd_run_qoutfifo
>> recursion
>> cpuid = 1
>> 159 143 127 111 95 79 63 47 31 15
>>
>> #0 doadump () at pcpu.h:172
>> 172 __asm __volatile("movq %%gs:0,%0" : "=r" (td));
>> (kgdb) bt
>> #0 doadump () at pcpu.h:172
>> #1 0xffffffff80257115 in boot (howto=260) at
>> /usr/src/sys/kern/kern_shutdown.c:409
>> #2 0xffffffff80257825 in panic (fmt=0xffffff00110e9980 "X\023\021\021") at
>> /usr/src/sys/kern/kern_shutdown.c:565
>> #3 0xffffffff803b50c6 in trap_fatal (frame=0xc, eva=18446742974484093312)
>> at /usr/src/sys/amd64/amd64/trap.c:669
>> #4 0xffffffff803b546d in trap_pfault (frame=0xffffffffa831c6f0,
>> usermode=0) at /usr/src/sys/amd64/amd64/trap.c:580
>> #5 0xffffffff803b56cd in trap (frame=
>> {tf_rdi = -1098891843040, tf_rsi = -1098516260992, tf_rdx =
>> -1098692440992, tf_rcx = 1, tf_r8 = 0, tf_r9 = 327686, tf_rax = 2048,
>> tf_rbx = -1098891843040, tf_rbp = -1473132576, tf_r10 = -1098978658048,
>> tf_r11 = -1098938410752, tf_r12 = -1098516260992, tf_r13 = 327686, tf_r14 =
>> -1098891842864, tf_r15 = -1705935624, tf_trapno = 12, tf_addr = 327686,
>> tf_flags = 1108101564416, tf_err = 0, tf_rip = -2145329034, tf_cs = 8,
>> tf_rflags = 66178, tf_rsp = -1473132624, tf_ss = 16}) at
>> /usr/src/sys/amd64/amd64/trap.c:353
>> #6 0xffffffff8039c49b in calltrap () at
>> /usr/src/sys/amd64/amd64/exception.S:168
>> #7 0xffffffff8020e076 in g_io_request (bp=0xffffff0024f12a20,
>> cp=0xffffff003b541780) at /usr/src/sys/geom/geom_io.c:275
>> #8 0xffffffff803709ad in ufs_strategy (ap=0xffffff0024f12a20) at
>> /usr/src/sys/ufs/ufs/ufs_vnops.c:1973
>> #9 0xffffffff803e5549 in VOP_STRATEGY_APV (vop=0xffffffff805702c0,
>> a=0xffffffffa831c840) at vnode_if.c:1796
>> #10 0xffffffff802b510c in bufstrategy (bo=0xffffff0024f12a20,
>> bp=0xffffff003b541780) at vnode_if.h:928
>> #11 0xffffffff802b4575 in breadn (vp=0xffffff000a55eba0,
>> blkno=-1098516260992, size=819186784, rablkno=0x0, rabsize=0x0, cnt=0,
>> cred=0x0, bpp=0x800) at buf.h:426
>> #12 0xffffffff802b48fe in bread (vp=0xffffff0024f12a20,
>> blkno=-1098516260992, size=819186784, cred=0x1, bpp=0x0) at
>> /usr/src/sys/kern/vfs_bio.c:723
>> #13 0xffffffff80363886 in ffs_read (ap=0xffffff0024f12a20) at
>> /usr/src/sys/ufs/ffs/ffs_vnops.c:523
>> #14 0xffffffff803e3efa in VOP_READ_APV (vop=0x800, a=0xffffff003b541780) at
>> vnode_if.c:643
>> #15 0xffffffff80370649 in ufs_readdir (ap=0xffffffffa831cad0) at
>> vnode_if.h:343
>> #16 0xffffffff803e419d in VOP_READDIR_APV (vop=0x800, a=0xffffff003b541780)
>> at vnode_if.c:1427
>> #17 0xffffffff802d0657 in getdirentries (td=0xffffff00110e9980,
>> uap=0xffffffffa831cbc0) at vnode_if.h:746
>> #18 0xffffffff803b6052 in syscall (frame=
>> {tf_rdi = 4, tf_rsi = 58564608, tf_rdx = 4096, tf_rcx = 58550056,
>> tf_r8 = 0, tf_r9 = 140737488347784, tf_rax = 196, tf_rbx = 58550016, tf_rbp
>> = 58550016, tf_r10 = 34367908128, tf_r11 = 58626048, tf_r12 = 5320784,
>> tf_r13 = 58550016, tf_r14 = 58540768, tf_r15 = 3, tf_trapno = 12, tf_addr =
>> 34365898752, tf_flags = 31845, tf_err = 2, tf_rip = 34377717596, tf_cs =
>> 43, tf_rflags = 582, tf_rsp = 140737488348456, tf_ss = 35}) at
>> /usr/src/sys/amd64/amd64/trap.c:807
>> #19 0xffffffff8039c698 in Xfast_syscall () at
>> /usr/src/sys/amd64/amd64/exception.S:287
>> #20 0x000000080112575c in ?? ()
>> Previous frame inner to this frame (corrupt stack?)
>
>
> I guess what I should look into is this (see the arrow):
>
>> (kgdb) list
>> 270 KASSERT(bp->bio_length % cp->provider->sectorsize
> == 0,
>> 271 ("wrong length %jd for sectorsize %u",
>> 272 bp->bio_length, cp->provider->sectorsize));
>> 273 }
>> 274
>> 275 ------->>> g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd
> %d",
>> 276 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd);
>> 277
>> 278 bp->bio_from = cp;
>> 279 bp->bio_to = pp;
>
>> (kgdb) p bp
>> $9 = (struct bio *) 0xffffff0024f12a20
>> (kgdb) p cp
>> $10 = (struct g_consumer *) 0xffffff003b541780
>> (kgdb) p pp
>> $11 = (struct g_provider *) 0x50006
>> (kgdb) p *bp
>> $12 = {bio_cmd = 1 '\001', bio_flags = 0 '\0', bio_cflags = 0 '\0',
>> bio_pflags = 0 '\0', bio_dev = 0x0, bio_disk = 0x0, bio_offset = 6160384,
>> bio_bcount = 0,
>> bio_data = 0xffffffff9dfdc000 "MANT.shx\220@?\024\"\200~@", bio_error =
>> 0, bio_resid = 0, bio_done = 0xffffffff802110c0 <g_vfs_done>, bio_driver1 =
>> 0x0, bio_driver2 = 0x0, bio_caller1 = 0x0,
>> bio_caller2 = 0xffffffff9a517cf8, bio_queue = {tqe_next = 0x0, tqe_prev =
>> 0x0}, bio_attribute = 0x0, bio_from = 0x0, bio_to = 0x0, bio_length = 2048,
>> bio_completed = 0, bio_children = 0,
>> bio_inbed = 0, bio_parent = 0x0, bio_t0 = {sec = 0, frac = 0}, bio_task =
>> 0, bio_task_arg = 0x0, bio_pblkno = 0}
>> (kgdb) p *cp
>> $13 = {geom = 0xffffff0030d3cc60, consumer = {le_next = 0x0, le_prev =
>> 0xffffffff9a6718f8}, provider = 0xd0006, consumers = {le_next =
>> 0xffffff003b541380, le_prev = 0xffffff002e4072a0},
>> acr = 680, acw = 0, ace = 6531640, spoiled = 0, stat = 0x0, nstart = 0,
>> nend = 0, private = 0xffffff0002f8b3c0, index = 995365760}
>> (kgdb) p *pp
>> Cannot access memory at address 0x50006
>> (kgdb)
>
>
> Can anyone provide some insight?
>
>
>
> bye & Thanks
> av.
>
> _______________________________________________
> freebsd-questions at freebsd.org mailing list
> http://lists.freebsd.org/mailman/listinfo/freebsd-questions
> To unsubscribe, send any mail to "freebsd-questions-unsubscribe at freebsd.org"
>
>
More information about the freebsd-questions
mailing list