Re: Pinebook pro IOMMU enabled crashes
- In reply to: Jesper Schmitz Mouridsen : "Re: Pinebook pro IOMMU enabled crashes"
- Go to: [ bottom of page ] [ top of archives ] [ this month ]
Date: Sun, 03 Oct 2021 19:36:28 UTC
On 30.09.2021 18.39, Jesper Schmitz Mouridsen wrote: >>>>> main-n249225-f673cc5edac3-dirty >>>>> nvme0: <Generic NVMe Device> at device 0.0 on pci1 >>>>> Fatal data abort: >>>>> x0: 0 >>>>> x1: 1000 >>>>> x2: 10040 >>>>> x3: 2000 >>>>> x4: 1 >>>>> x5: ffff00009a7a0168 >>>>> x6: 1d00000000000000 >>>>> x7: 10000000000000 >>>>> x8: 1168 >>>>> x9: 1 >>>>> x10: 0 >>>>> x11: ffff000000f35140 >>>>> x12: ffff000000f350c0 >>>>> x13: 1 >>>>> x14: 10000 >>>>> x15: 1 >>>>> x16: 10000 >>>>> x17: ffff000000f350dc >>>>> x18: ffff00000110d180 >>>>> x19: ffff00000110d248 >>>>> x20: 0 >>>>> x21: ffff00009a79f000 >>>>> x22: ffffa000010b0a00 >>>>> x23: ffffa000010a2880 >>>>> x24: ffffa0000116da00 >>>>> x25: ffff000000b4fd78 >>>>> x26: ffffa0000116db00 >>>>> x27: ffff000000e83000 >>>>> x28: 7 >>>>> x29: ffff00000110d190 >>>>> sp: ffff00000110d180 >>>>> lr: ffff00000077520c >>>>> elr: ffff0000007a03ac >>>>> spsr: 200000c5 >>>>> far: 0 >>>>> esr: 96000004 >>>>> panic: vm_fault failed: ffff0000007a03ac error 1 >>>>> cpuid = 0 >>>>> time = 1 >>>>> KDB: stack backtrace: >>>>> db_trace_self() at db_trace_self >>>>> db_trace_self_wrapper() at db_trace_self_wrapper+0x30 >>>>> vpanic() at vpanic+0x184 >>>>> panic() at panic+0x44 >>>>> data_abort() at data_abort+0x23c >>>>> handle_el1h_sync() at handle_el1h_sync+0x78 >>>>> --- exception, esr 0x96000004 >>>>> iommu_map_msi() at iommu_map_msi+0x20 >>>>> gicv3_iommu_init() at gicv3_iommu_init+0x4c >>>>> intr_alloc_msix() at intr_alloc_msix+0x13c >>>>> rk_pcie_alloc_msix() at rk_pcie_alloc_msix+0xfc >>>>> pci_alloc_msix_method() at pci_alloc_msix_method+0x1a8 >>>>> nvme_pci_attach() at nvme_pci_attach+0x378 >>>>> device_attach() at device_attach+0x400 >>>>> device_probe_and_attach() at device_probe_and_attach+0x7c >>>>> bus_generic_attach() at bus_generic_attach+0x18 >>>>> pci_attach() at pci_attach+0xe8 >>>>> device_attach() at device_attach+0x400 >>>>> device_probe_and_attach() at device_probe_and_attach+0x7c >>>>> bus_generic_attach() at bus_generic_attach+0x18 >>>>> device_attach() at device_attach+0x400 >>>>> device_probe_and_attach() at device_probe_and_attach+0x7c >>>>> bus_generic_attach() at bus_generic_attach+0x18 >>>>> pci_attach() at pci_attach+0xe8 >>>>> device_attach() at device_attach+0x400 >>>>> device_probe_and_attach() at device_probe_and_attach+0x7c >>>>> bus_generic_attach() at bus_generic_attach+0x18 >>>>> rk_pcie_attach() at rk_pcie_attach+0x14cc >>>>> device_attach() at device_attach+0x400 >>>>> device_probe_and_attach() at device_probe_and_attach+0x7c >>>>> bus_generic_new_pass() at bus_generic_new_pass+0xf8 >>>>> bus_generic_new_pass() at bus_generic_new_pass+0xa8 >>>>> bus_generic_new_pass() at bus_generic_new_pass+0xa8 >>>>> bus_set_pass() at bus_set_pass+0x4c >>>>> mi_startup() at mi_startup+0x12c >>>>> virtdone() at virtdone+0x6c >>>>> >>>> That's an old commit. Did you see this panic only recently or ? >>>> >>> >> >> >> Even on stable/13-n247374-9faebc1e664d-dirty >> >> I get the same backtrace when IOMMU is enabled and the nvme is attached. >> >> pcib1: <PCI-PCI bridge> at device 0.0 on pci0 >> pcib0: failed to reserve resource for pcib1 >> pcib1: failed to allocate initial memory window: 0-0xfffff >> pci1: <PCI bus> on pcib1 >> nvme0: <Generic NVMe Device> at device 0.0 on pci1 >> Fatal data abort: >> x0: 0 >> x1: 1000 >> x2: 10040 >> x3: 2000 >> x4: 1 >> x5: ffff00009a99e160 >> x6: 1400000000000000 >> x7: 10000000000000 >> x8: 1160 >> x9: ffff000000cd7cc0 >> x10: 0 >> x11: ffff000000d89540 >> x12: ffff000000d894c0 >> x13: 1 >> x14: 10000 >> x15: 1 >> x16: 10000 >> x17: 0 >> x18: ffff000000f5c250 >> x19: ffff000000f5c318 >> x20: 0 >> x21: ffff00009a99d000 >> x22: ffffa00000f06200 >> x23: ffffa00000f49700 >> x24: ffffa00000f8f500 >> x25: ffff0000009b85f8 >> x26: ffffa00000f8f600 >> x27: ffff000000cd7000 >> x28: 7 >> x29: ffff000000f5c260 >> sp: ffff000000f5c250 >> lr: ffff0000006bf3dc >> elr: ffff0000006e15d0 >> spsr: 600001c5 >> far: 0 >> esr: 96000004 >> panic: vm_fault failed: ffff0000006e15d0 >> cpuid = 0 >> time = 1 >> KDB: stack backtrace: >> #0 0xffff00000047c304 at kdb_backtrace+0x60 >> #1 0xffff000000437fd8 at vpanic+0x184 >> #2 0xffff000000437e50 at panic+0x44 >> #3 0xffff0000006d692c at data_abort+0x204 >> #4 0xffff0000006bb874 at handle_el1h_sync+0x74 >> #5 0xffff0000006bf3d8 at gicv3_iommu_init+0x4c >> #6 0xffff0000006bf3d8 at gicv3_iommu_init+0x4c >> #7 0xffff0000006b1940 at intr_alloc_msix+0x110 >> #8 0xffff0000007860c0 at rk_pcie_alloc_msix+0xfc >> #9 0xffff000000219bbc at pci_alloc_msix_method+0x1a8 >> #10 0xffff00000020ba64 at nvme_pci_attach+0x378 >> #11 0xffff00000046bd80 at device_attach+0x400 >> #12 0xffff00000046d14c at bus_generic_attach+0x4c >> #13 0xffff000000221f30 at pci_attach+0xe0 >> #14 0xffff00000046bd80 at device_attach+0x400 >> #15 0xffff00000046d14c at bus_generic_attach+0x4c >> #16 0xffff00000046bd80 at device_attach+0x400 >> #17 0xffff00000046d14c at bus_generic_attach+0x4c >> Uptime: 1s >> >> > git checkout 50cedfede3d21824ec6023324b3ad41a435e1815 > sys/arm64/arm64/gicv3_its.c and the problem goes away. The commit is one > before > Add IOMMU support to GICv3 Interrupt Translation Service (ITS) driver. > (ba196aec7dad1b73a9a3b86a06259d5e81f16fad) > It turns out iommu_get_dev_ctx returns NULL for at least my nvme device. (Kingston A2000 M.2 NVMe SSD) So the below patch "fixes" it.. diff --git a/sys/arm64/arm64/gicv3_its.c b/sys/arm64/arm64/gicv3_its.c index 1a0e7a79e76b..28e2bcf70a5d 100644 --- a/sys/arm64/arm64/gicv3_its.c +++ b/sys/arm64/arm64/gicv3_its.c @@ -316,7 +316,7 @@ static const struct { static device_attach_t gicv3_its_attach; static device_detach_t gicv3_its_detach; -static pic_disable_intr_t gicv3_its_disable_intr; +tatic pic_disable_intr_t gicv3_its_disable_intr; static pic_enable_intr_t gicv3_its_enable_intr; static pic_map_intr_t gicv3_its_map_intr; static pic_setup_intr_t gicv3_its_setup_intr; @@ -1473,6 +1473,10 @@ gicv3_iommu_init(device_t dev, device_t child, struct iommu_domain **domain) sc = device_get_softc(dev); ctx = iommu_get_dev_ctx(child); + + if(ctx == NULL) + return (ENXIO); + error = iommu_map_msi(ctx, PAGE_SIZE, GITS_TRANSLATER, IOMMU_MAP_ENTRY_WRITE, IOMMU_MF_CANWAIT, &sc->ma); *domain = iommu_get_ctx_domain(ctx); $