kern/187238: vm.pmap.pcid_enabled="1" causes Java to coredump in FBSD 10
Konstantin Belousov
kostikbel at gmail.com
Mon Mar 24 19:11:45 UTC 2014
On Sun, Mar 23, 2014 at 01:03:00PM +0100, Henrik Gulbrandsen wrote:
> This is the most time-consuming bug I've encountered in my life, and not
> only because I started looking for it in the JVM, but now it seems to
> have
> been hiding in plain sight. I'm pretty sure that pmap->pm_save is
> handled
> incorrectly in the current kernel. Judging from the code, it's supposed
> to
> include all CPUs where the pmap has been active since the latest call to
> pmap_invalidate_all(...). However, that means that it should always be a
> superset of pmap->pm_active, since any CPU where the pmap is active may
> cache pmap information at any time. Currently, this is not the case, and
> since only CPUs in pmap->pm_save are targeted in the TLB shootdown, we
> are left with inconsistencies that crash the process soon afterwards.
>
> The attached patch solves this by only clearing a CPU from pmap->pm_save
> if it is not currently included in pmap->pm_active. As far as I can
> tell,
> that eliminates the bug. The patch is against STABLE, since that's what
> I'm currently running, but CURRENT should be pretty close, except for
> the
> default setting of pmap_pcid_enabled.
Yes, I think that the analysis and the patch (for stable/10) is correct.
Thank you for tracking this down.
>
> By the way, the logic in the invalidation functions is a bit messy now
> and can probably be simplified. Also, is there a good reason for
> ignoring
> the pmap argument in smp_masked_invltlb(...)?
I think this was an ommission.
I adopted the patch to HEAD, and apparantly the rewrite of the page
invalidation IPI handlers in C introduced a regression, so it took
some more time to find the issue. Apparently, the invlrng_handler()
did the checks for special cases in the wrong order, doing the
invpcid(INVPCID_ADDR) before checking for special PCIDs.
Show me the first lines of the verbose dmesg for your machine.
Below is the cumulative patch for HEAD. If somebody can test this
with jdk build on HEAD, it would be useful.
For testing, the tunable vm.pmap.pcid_enabled must be set to 1
from the loader prompt.
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 80b4e12..afc1bef 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -1257,7 +1257,7 @@ smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
{
if (smp_started) {
- smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, NULL, 0, 0);
+ smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
@@ -1517,6 +1517,7 @@ void
invltlb_pcid_handler(void)
{
uint64_t cr3;
+ u_int cpuid;
#ifdef COUNT_XINVLTLB_HITS
xhits_gbl[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
@@ -1524,14 +1525,13 @@ invltlb_pcid_handler(void)
(*ipi_invltlb_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
- cr3 = rcr3();
if (smp_tlb_invpcid.pcid != (uint64_t)-1 &&
smp_tlb_invpcid.pcid != 0) {
-
if (invpcid_works) {
invpcid(&smp_tlb_invpcid, INVPCID_CTX);
} else {
/* Otherwise reload %cr3 twice. */
+ cr3 = rcr3();
if (cr3 != pcid_cr3) {
load_cr3(pcid_cr3);
cr3 |= CR3_PCID_SAVE;
@@ -1541,8 +1541,11 @@ invltlb_pcid_handler(void)
} else {
invltlb_globpcid();
}
- if (smp_tlb_pmap != NULL)
- CPU_CLR_ATOMIC(PCPU_GET(cpuid), &smp_tlb_pmap->pm_save);
+ if (smp_tlb_pmap != NULL) {
+ cpuid = PCPU_GET(cpuid);
+ if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active))
+ CPU_CLR_ATOMIC(cpuid, &smp_tlb_pmap->pm_save);
+ }
atomic_add_int(&smp_tlb_wait, 1);
}
@@ -1608,7 +1611,10 @@ invlpg_range(vm_offset_t start, vm_offset_t end)
void
invlrng_handler(void)
{
+ struct invpcid_descr d;
vm_offset_t addr;
+ uint64_t cr3;
+ u_int cpuid;
#ifdef COUNT_XINVLTLB_HITS
xhits_rng[PCPU_GET(cpuid)]++;
#endif /* COUNT_XINVLTLB_HITS */
@@ -1618,15 +1624,7 @@ invlrng_handler(void)
addr = smp_tlb_invpcid.addr;
if (pmap_pcid_enabled) {
- if (invpcid_works) {
- struct invpcid_descr d;
-
- d = smp_tlb_invpcid;
- do {
- invpcid(&d, INVPCID_ADDR);
- d.addr += PAGE_SIZE;
- } while (d.addr < smp_tlb_addr2);
- } else if (smp_tlb_invpcid.pcid == 0) {
+ if (smp_tlb_invpcid.pcid == 0) {
/*
* kernel pmap - use invlpg to invalidate
* global mapping.
@@ -1635,12 +1633,18 @@ invlrng_handler(void)
} else if (smp_tlb_invpcid.pcid == (uint64_t)-1) {
invltlb_globpcid();
if (smp_tlb_pmap != NULL) {
- CPU_CLR_ATOMIC(PCPU_GET(cpuid),
- &smp_tlb_pmap->pm_save);
+ cpuid = PCPU_GET(cpuid);
+ if (!CPU_ISSET(cpuid, &smp_tlb_pmap->pm_active))
+ CPU_CLR_ATOMIC(cpuid,
+ &smp_tlb_pmap->pm_save);
}
+ } else if (invpcid_works) {
+ d = smp_tlb_invpcid;
+ do {
+ invpcid(&d, INVPCID_ADDR);
+ d.addr += PAGE_SIZE;
+ } while (d.addr <= smp_tlb_addr2);
} else {
- uint64_t cr3;
-
cr3 = rcr3();
if (cr3 != pcid_cr3)
load_cr3(pcid_cr3 | CR3_PCID_SAVE);
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 51ac9be..183a456 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -838,7 +838,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
kernel_pmap->pm_cr3 = KPML4phys;
CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */
- CPU_ZERO(&kernel_pmap->pm_save);
+ CPU_FILL(&kernel_pmap->pm_save); /* always superset of pm_active */
TAILQ_INIT(&kernel_pmap->pm_pvchunk);
kernel_pmap->pm_flags = pmap_flags;
@@ -1494,7 +1494,8 @@ pmap_invalidate_all(pmap_t pmap)
} else {
invltlb_globpcid();
}
- CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
+ if (!CPU_ISSET(cpuid, &pmap->pm_active))
+ CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
smp_invltlb(pmap);
} else {
other_cpus = all_cpus;
@@ -1528,7 +1529,8 @@ pmap_invalidate_all(pmap_t pmap)
}
} else if (CPU_ISSET(cpuid, &pmap->pm_active))
invltlb();
- CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
+ if (!CPU_ISSET(cpuid, &pmap->pm_active))
+ CPU_CLR_ATOMIC(cpuid, &pmap->pm_save);
if (pmap_pcid_enabled)
CPU_AND(&other_cpus, &pmap->pm_save);
else
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 834 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-java/attachments/20140324/f93dc255/attachment.sig>
More information about the freebsd-java
mailing list