svn commit: r233647 - in stable/9/sys: amd64/amd64 amd64/include
i386/conf i386/i386 i386/include kern sys vm
Alan Cox
alc at FreeBSD.org
Thu Mar 29 04:54:35 UTC 2012
Author: alc
Date: Thu Mar 29 04:54:34 2012
New Revision: 233647
URL: http://svn.freebsd.org/changeset/base/233647
Log:
MFC r233291
Handle spurious page faults that may occur in no-fault sections of the
kernel.
Modified:
stable/9/sys/amd64/amd64/trap.c
stable/9/sys/amd64/include/proc.h
stable/9/sys/i386/i386/trap.c
stable/9/sys/i386/include/proc.h
stable/9/sys/kern/kern_sysctl.c
stable/9/sys/kern/subr_uio.c
stable/9/sys/sys/proc.h
stable/9/sys/vm/vm_fault.c
Directory Properties:
stable/9/sys/ (props changed)
stable/9/sys/amd64/include/xen/ (props changed)
stable/9/sys/boot/ (props changed)
stable/9/sys/boot/i386/efi/ (props changed)
stable/9/sys/boot/ia64/efi/ (props changed)
stable/9/sys/boot/ia64/ski/ (props changed)
stable/9/sys/boot/powerpc/boot1.chrp/ (props changed)
stable/9/sys/boot/powerpc/ofw/ (props changed)
stable/9/sys/cddl/contrib/opensolaris/ (props changed)
stable/9/sys/conf/ (props changed)
stable/9/sys/contrib/dev/acpica/ (props changed)
stable/9/sys/contrib/octeon-sdk/ (props changed)
stable/9/sys/contrib/pf/ (props changed)
stable/9/sys/contrib/x86emu/ (props changed)
stable/9/sys/fs/ (props changed)
stable/9/sys/fs/ntfs/ (props changed)
stable/9/sys/i386/conf/XENHVM (props changed)
Modified: stable/9/sys/amd64/amd64/trap.c
==============================================================================
--- stable/9/sys/amd64/amd64/trap.c Thu Mar 29 03:13:43 2012 (r233646)
+++ stable/9/sys/amd64/amd64/trap.c Thu Mar 29 04:54:34 2012 (r233647)
@@ -301,26 +301,6 @@ trap(struct trapframe *frame)
}
code = frame->tf_err;
- if (type == T_PAGEFLT) {
- /*
- * If we get a page fault while in a critical section, then
- * it is most likely a fatal kernel page fault. The kernel
- * is already going to panic trying to get a sleep lock to
- * do the VM lookup, so just consider it a fatal trap so the
- * kernel can print out a useful trap message and even get
- * to the debugger.
- *
- * If we get a page fault while holding a non-sleepable
- * lock, then it is most likely a fatal kernel page fault.
- * If WITNESS is enabled, then it's going to whine about
- * bogus LORs with various VM locks, so just skip to the
- * fatal trap handling directly.
- */
- if (td->td_critnest != 0 ||
- WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
- "Kernel page fault") != 0)
- trap_fatal(frame, frame->tf_addr);
- }
if (ISPL(frame->tf_cs) == SEL_UPL) {
/* user trap */
@@ -653,6 +633,50 @@ trap_pfault(frame, usermode)
struct proc *p = td->td_proc;
vm_offset_t eva = frame->tf_addr;
+ if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
+ /*
+ * Due to both processor errata and lazy TLB invalidation when
+ * access restrictions are removed from virtual pages, memory
+ * accesses that are allowed by the physical mapping layer may
+ * nonetheless cause one spurious page fault per virtual page.
+ * When the thread is executing a "no faulting" section that
+ * is bracketed by vm_fault_{disable,enable}_pagefaults(),
+ * every page fault is treated as a spurious page fault,
+ * unless it accesses the same virtual address as the most
+ * recent page fault within the same "no faulting" section.
+ */
+ if (td->td_md.md_spurflt_addr != eva ||
+ (td->td_pflags & TDP_RESETSPUR) != 0) {
+ /*
+ * Do nothing to the TLB. A stale TLB entry is
+ * flushed automatically by a page fault.
+ */
+ td->td_md.md_spurflt_addr = eva;
+ td->td_pflags &= ~TDP_RESETSPUR;
+ return (0);
+ }
+ } else {
+ /*
+ * If we get a page fault while in a critical section, then
+ * it is most likely a fatal kernel page fault. The kernel
+ * is already going to panic trying to get a sleep lock to
+ * do the VM lookup, so just consider it a fatal trap so the
+ * kernel can print out a useful trap message and even get
+ * to the debugger.
+ *
+ * If we get a page fault while holding a non-sleepable
+ * lock, then it is most likely a fatal kernel page fault.
+ * If WITNESS is enabled, then it's going to whine about
+ * bogus LORs with various VM locks, so just skip to the
+ * fatal trap handling directly.
+ */
+ if (td->td_critnest != 0 ||
+ WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
+ "Kernel page fault") != 0) {
+ trap_fatal(frame, eva);
+ return (-1);
+ }
+ }
va = trunc_page(eva);
if (va >= VM_MIN_KERNEL_ADDRESS) {
/*
Modified: stable/9/sys/amd64/include/proc.h
==============================================================================
--- stable/9/sys/amd64/include/proc.h Thu Mar 29 03:13:43 2012 (r233646)
+++ stable/9/sys/amd64/include/proc.h Thu Mar 29 04:54:34 2012 (r233647)
@@ -46,6 +46,7 @@ struct proc_ldt {
struct mdthread {
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
+ register_t md_spurflt_addr; /* (k) Spurious page fault address. */
};
struct mdproc {
Modified: stable/9/sys/i386/i386/trap.c
==============================================================================
--- stable/9/sys/i386/i386/trap.c Thu Mar 29 03:13:43 2012 (r233646)
+++ stable/9/sys/i386/i386/trap.c Thu Mar 29 04:54:34 2012 (r233647)
@@ -329,28 +329,13 @@ trap(struct trapframe *frame)
* For some Cyrix CPUs, %cr2 is clobbered by
* interrupts. This problem is worked around by using
* an interrupt gate for the pagefault handler. We
- * are finally ready to read %cr2 and then must
- * reenable interrupts.
- *
- * If we get a page fault while in a critical section, then
- * it is most likely a fatal kernel page fault. The kernel
- * is already going to panic trying to get a sleep lock to
- * do the VM lookup, so just consider it a fatal trap so the
- * kernel can print out a useful trap message and even get
- * to the debugger.
- *
- * If we get a page fault while holding a non-sleepable
- * lock, then it is most likely a fatal kernel page fault.
- * If WITNESS is enabled, then it's going to whine about
- * bogus LORs with various VM locks, so just skip to the
- * fatal trap handling directly.
+ * are finally ready to read %cr2 and conditionally
+ * reenable interrupts. If we hold a spin lock, then
+ * we must not reenable interrupts. This might be a
+ * spurious page fault.
*/
eva = rcr2();
- if (td->td_critnest != 0 ||
- WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
- "Kernel page fault") != 0)
- trap_fatal(frame, eva);
- else
+ if (td->td_md.md_spinlock_count == 0)
enable_intr();
}
@@ -803,6 +788,50 @@ trap_pfault(frame, usermode, eva)
struct thread *td = curthread;
struct proc *p = td->td_proc;
+ if (__predict_false((td->td_pflags & TDP_NOFAULTING) != 0)) {
+ /*
+ * Due to both processor errata and lazy TLB invalidation when
+ * access restrictions are removed from virtual pages, memory
+ * accesses that are allowed by the physical mapping layer may
+ * nonetheless cause one spurious page fault per virtual page.
+ * When the thread is executing a "no faulting" section that
+ * is bracketed by vm_fault_{disable,enable}_pagefaults(),
+ * every page fault is treated as a spurious page fault,
+ * unless it accesses the same virtual address as the most
+ * recent page fault within the same "no faulting" section.
+ */
+ if (td->td_md.md_spurflt_addr != eva ||
+ (td->td_pflags & TDP_RESETSPUR) != 0) {
+ /*
+ * Do nothing to the TLB. A stale TLB entry is
+ * flushed automatically by a page fault.
+ */
+ td->td_md.md_spurflt_addr = eva;
+ td->td_pflags &= ~TDP_RESETSPUR;
+ return (0);
+ }
+ } else {
+ /*
+ * If we get a page fault while in a critical section, then
+ * it is most likely a fatal kernel page fault. The kernel
+ * is already going to panic trying to get a sleep lock to
+ * do the VM lookup, so just consider it a fatal trap so the
+ * kernel can print out a useful trap message and even get
+ * to the debugger.
+ *
+ * If we get a page fault while holding a non-sleepable
+ * lock, then it is most likely a fatal kernel page fault.
+ * If WITNESS is enabled, then it's going to whine about
+ * bogus LORs with various VM locks, so just skip to the
+ * fatal trap handling directly.
+ */
+ if (td->td_critnest != 0 ||
+ WITNESS_CHECK(WARN_SLEEPOK | WARN_GIANTOK, NULL,
+ "Kernel page fault") != 0) {
+ trap_fatal(frame, eva);
+ return (-1);
+ }
+ }
va = trunc_page(eva);
if (va >= KERNBASE) {
/*
Modified: stable/9/sys/i386/include/proc.h
==============================================================================
--- stable/9/sys/i386/include/proc.h Thu Mar 29 03:13:43 2012 (r233646)
+++ stable/9/sys/i386/include/proc.h Thu Mar 29 04:54:34 2012 (r233647)
@@ -51,6 +51,7 @@ struct proc_ldt {
struct mdthread {
int md_spinlock_count; /* (k) */
register_t md_saved_flags; /* (k) */
+ register_t md_spurflt_addr; /* (k) Spurious page fault address. */
};
struct mdproc {
Modified: stable/9/sys/kern/kern_sysctl.c
==============================================================================
--- stable/9/sys/kern/kern_sysctl.c Thu Mar 29 03:13:43 2012 (r233646)
+++ stable/9/sys/kern/kern_sysctl.c Thu Mar 29 04:54:34 2012 (r233647)
@@ -1294,8 +1294,8 @@ kernel_sysctlbyname(struct thread *td, c
static int
sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
{
- int error = 0;
size_t i, len, origidx;
+ int error;
origidx = req->oldidx;
req->oldidx += l;
@@ -1316,10 +1316,14 @@ sysctl_old_user(struct sysctl_req *req,
else {
if (i > len - origidx)
i = len - origidx;
- error = copyout(p, (char *)req->oldptr + origidx, i);
+ if (req->lock == REQ_WIRED) {
+ error = copyout_nofault(p, (char *)req->oldptr +
+ origidx, i);
+ } else
+ error = copyout(p, (char *)req->oldptr + origidx, i);
+ if (error != 0)
+ return (error);
}
- if (error)
- return (error);
if (i < l)
return (ENOMEM);
return (0);
Modified: stable/9/sys/kern/subr_uio.c
==============================================================================
--- stable/9/sys/kern/subr_uio.c Thu Mar 29 03:13:43 2012 (r233646)
+++ stable/9/sys/kern/subr_uio.c Thu Mar 29 04:54:34 2012 (r233647)
@@ -187,8 +187,12 @@ uiomove_faultflag(void *cp, int n, struc
/* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
newflags = TDP_DEADLKTREAT;
- if (uio->uio_segflg == UIO_USERSPACE && nofault)
- newflags |= TDP_NOFAULTING;
+ if (uio->uio_segflg == UIO_USERSPACE && nofault) {
+ /*
+ * Fail if a non-spurious page fault occurs.
+ */
+ newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
+ }
save = curthread_pflags_set(newflags);
while (n > 0 && uio->uio_resid) {
Modified: stable/9/sys/sys/proc.h
==============================================================================
--- stable/9/sys/sys/proc.h Thu Mar 29 03:13:43 2012 (r233646)
+++ stable/9/sys/sys/proc.h Thu Mar 29 04:54:34 2012 (r233647)
@@ -416,6 +416,7 @@ do { \
#define TDP_IGNSUSP 0x00800000 /* Permission to ignore the MNTK_SUSPEND* */
#define TDP_AUDITREC 0x01000000 /* Audit record pending on thread */
#define TDP_RFPPWAIT 0x02000000 /* Handle RFPPWAIT on syscall exit */
+#define TDP_RESETSPUR 0x04000000 /* Reset spurious page fault history. */
/*
* Reasons that the current thread can not be run yet.
Modified: stable/9/sys/vm/vm_fault.c
==============================================================================
--- stable/9/sys/vm/vm_fault.c Thu Mar 29 03:13:43 2012 (r233646)
+++ stable/9/sys/vm/vm_fault.c Thu Mar 29 04:54:34 2012 (r233647)
@@ -1468,11 +1468,17 @@ vm_fault_additional_pages(m, rbehind, ra
return i;
}
+/*
+ * Block entry into the machine-independent layer's page fault handler by
+ * the calling thread. Subsequent calls to vm_fault() by that thread will
+ * return KERN_PROTECTION_FAILURE. Enable machine-dependent handling of
+ * spurious page faults.
+ */
int
vm_fault_disable_pagefaults(void)
{
- return (curthread_pflags_set(TDP_NOFAULTING));
+ return (curthread_pflags_set(TDP_NOFAULTING | TDP_RESETSPUR));
}
void
More information about the svn-src-stable-9
mailing list