[PATCH] Add ktrace records for user page faults
John Baldwin
jhb at freebsd.org
Mon May 2 19:37:21 UTC 2011
One thing I have found useful is knowing when processes are in the kernel
instead of in userland. ktrace already provides records for syscall
entry/exit. The other major source of time spent in the kernel that I've seen
is page fault handling. To that end, I have a patch that adds ktrace records
to the beginning and end of VM faults. This gives a pair of records so a user
can see how long a fault took (similar to how one can see how long a syscall
takes now). Sample output from kdump is below:
47565 echo CALL mmap(0x800a87000,0x179000,PROT_READ|
PROT_WRITE,MAP_PRIVATE|MAP_ANON,0xffffffff,0)
47565 echo RET mmap 34370777088/0x800a87000
47565 echo PFLT 0x800723000 VM_PROT_EXECUTE
47565 echo RET KERN_SUCCESS
47565 echo CALL munmap(0x800887000,0x179000)
47565 echo RET munmap 0
47565 echo PFLT 0x800a00000 VM_PROT_WRITE
47565 echo RET KERN_SUCCESS
The patch is available at www.freebsd.org/~jhb/patches/ktrace_fault.patch and
included below.
Index: usr.bin/kdump/kdump.c
===================================================================
--- usr.bin/kdump/kdump.c (.../mirror/FreeBSD/stable/8) (revision 221926)
+++ usr.bin/kdump/kdump.c (.../stable/8) (revision 221926)
@@ -103,6 +103,8 @@
void ktrsockaddr(struct sockaddr *);
void ktrstat(struct stat *);
void ktrstruct(char *, size_t);
+void ktrfault(struct ktr_fault *);
+void ktrfaultend(struct ktr_faultend *);
void usage(void);
void sockfamilyname(int);
const char *ioctlname(u_long);
@@ -306,6 +308,12 @@
case KTR_STRUCT:
ktrstruct(m, ktrlen);
break;
+ case KTR_FAULT:
+ ktrfault((struct ktr_fault *)m);
+ break;
+ case KTR_FAULTEND:
+ ktrfaultend((struct ktr_faultend *)m);
+ break;
default:
printf("\n");
break;
@@ -445,6 +453,12 @@
/* FALLTHROUGH */
case KTR_PROCDTOR:
return;
+ case KTR_FAULT:
+ type = "PFLT";
+ break;
+ case KTR_FAULTEND:
+ type = "RET ";
+ break;
default:
(void)sprintf(unknown, "UNKNOWN(%d)", kth->ktr_type);
type = unknown;
@@ -1505,6 +1519,23 @@
printf("invalid record\n");
}
+void
+ktrfault(struct ktr_fault *ktr)
+{
+
+ printf("0x%jx ", ktr->vaddr);
+ vmprotname(ktr->type);
+ printf("\n");
+}
+
+void
+ktrfaultend(struct ktr_faultend *ktr)
+{
+
+ vmresultname(ktr->result);
+ printf("\n");
+}
+
#if defined(__amd64__) || defined(__i386__)
void
linux_ktrsyscall(struct ktr_syscall *ktr)
Index: usr.bin/kdump/kdump_subr.h
===================================================================
--- usr.bin/kdump/kdump_subr.h (.../mirror/FreeBSD/stable/8) (revision
221926)
+++ usr.bin/kdump/kdump_subr.h (.../stable/8) (revision 221926)
@@ -45,3 +45,5 @@
void minheritname (int);
void quotactlname (int);
void ptraceopname (int);
+void vmprotname (int);
+void vmresultname (int);
Index: usr.bin/kdump/mksubr
===================================================================
--- usr.bin/kdump/mksubr (.../mirror/FreeBSD/stable/8) (revision 221926)
+++ usr.bin/kdump/mksubr (.../stable/8) (revision 221926)
@@ -160,6 +160,8 @@
#include <sys/shm.h>
#include <nfsserver/nfs.h>
#include <ufs/ufs/quota.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
#include "kdump_subr.h"
@@ -304,6 +306,26 @@
}
}
+/*
+ * MANUAL
+ *
+ * Used for page fault type. Cannot use auto_or_type since the macro
+ * values contain a cast. Also, VM_PROT_NONE has to be handled specially.
+ */
+void
+vmprotname (int type)
+{
+ int or = 0;
+
+ if (type == VM_PROT_NONE) {
+ (void)printf("VM_PROT_NONE");
+ return;
+ }
+ if_print_or(type, VM_PROT_READ, or);
+ if_print_or(type, VM_PROT_WRITE, or);
+ if_print_or(type, VM_PROT_EXECUTE, or);
+ if_print_or(type, VM_PROT_OVERRIDE_WRITE, or);
+}
_EOF_
auto_or_type "modename" "S_[A-Z]+[[:space:]]+[0-6]{7}" "sys/stat.h"
@@ -344,6 +366,7 @@
auto_switch_type "sockoptname" "SO_[A-Z]+[[:space:]]+0x[0-9]+" "sys/socket.h"
auto_switch_type "socktypename" "SOCK_[A-Z]+[[:space:]]+[1-9]+[0-9]*"
"sys/socket.h"
auto_switch_type "ptraceopname" "PT_[[:alnum:]]+[[:space:]]+[0-9]+"
"sys/ptrace.h"
+auto_switch_type "vmresultname" "KERN_[A-Z]+[[:space:]]+[0-9]+"
"vm/vm_param.h"
cat <<_EOF_
/*
Index: usr.bin/ktrace/ktrace.h
===================================================================
--- usr.bin/ktrace/ktrace.h (.../mirror/FreeBSD/stable/8) (revision 221926)
+++ usr.bin/ktrace/ktrace.h (.../stable/8) (revision 221926)
@@ -36,7 +36,8 @@
#define DEF_POINTS (KTRFAC_SYSCALL | KTRFAC_SYSRET | KTRFAC_NAMEI | \
KTRFAC_GENIO | KTRFAC_PSIG | KTRFAC_USER | \
- KTRFAC_STRUCT | KTRFAC_SYSCTL)
+ KTRFAC_STRUCT | KTRFAC_SYSCTL | KTRFAC_FAULT | \
+ KTRFAC_FAULTEND)
#define PROC_ABI_POINTS (KTRFAC_PROCCTOR | KTRFAC_PROCDTOR)
Index: usr.bin/ktrace/ktrace.1
===================================================================
--- usr.bin/ktrace/ktrace.1 (.../mirror/FreeBSD/stable/8) (revision 221926)
+++ usr.bin/ktrace/ktrace.1 (.../stable/8) (revision 221926)
@@ -112,6 +112,8 @@
.Bl -tag -width flag -compact
.It Cm c
trace system calls
+.It Cm f
+trace page faults
.It Cm i
trace
.Tn I/O
@@ -131,7 +133,7 @@
requests
.It Cm +
trace the default set of trace points -
-.Cm c , i , n , s , t , u , y
+.Cm c , f , i , n , s , t , u , y
.El
.It Ar command
Execute
Index: usr.bin/ktrace/subr.c
===================================================================
--- usr.bin/ktrace/subr.c (.../mirror/FreeBSD/stable/8) (revision 221926)
+++ usr.bin/ktrace/subr.c (.../stable/8) (revision 221926)
@@ -65,6 +65,9 @@
case 'c':
facs |= KTRFAC_SYSCALL | KTRFAC_SYSRET;
break;
+ case 'f':
+ facs |= KTRFAC_FAULT | KTRFAC_FAULTEND;
+ break;
case 'n':
facs |= KTRFAC_NAMEI;
break;
Index: sys/kern/kern_ktrace.c
===================================================================
--- sys/kern/kern_ktrace.c (.../mirror/FreeBSD/stable/8) (revision 221926)
+++ sys/kern/kern_ktrace.c (.../stable/8) (revision 221926)
@@ -98,6 +98,8 @@
struct ktr_genio ktr_genio;
struct ktr_psig ktr_psig;
struct ktr_csw ktr_csw;
+ struct ktr_fault ktr_fault;
+ struct ktr_faultend ktr_faultend;
} ktr_data;
STAILQ_ENTRY(ktr_request) ktr_list;
};
@@ -115,6 +117,8 @@
0, /* KTR_SYSCTL */
sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */
0, /* KTR_PROCDTOR */
+ sizeof(struct ktr_fault), /* KTR_FAULT */
+ sizeof(struct ktr_faultend), /* KTR_FAULTEND */
};
static STAILQ_HEAD(, ktr_request) ktr_free;
@@ -767,6 +769,38 @@
req->ktr_header.ktr_len = buflen;
ktr_submitrequest(curthread, req);
}
+
+void
+ktrfault(vaddr, type)
+ vm_offset_t vaddr;
+ int type;
+{
+ struct ktr_request *req;
+ struct ktr_fault *kf;
+
+ req = ktr_getrequest(KTR_FAULT);
+ if (req == NULL)
+ return;
+ kf = &req->ktr_data.ktr_fault;
+ kf->vaddr = vaddr;
+ kf->type = type;
+ ktr_enqueuerequest(curthread, req);
+}
+
+void
+ktrfaultend(result)
+ int result;
+{
+ struct ktr_request *req;
+ struct ktr_faultend *kf;
+
+ req = ktr_getrequest(KTR_FAULTEND);
+ if (req == NULL)
+ return;
+ kf = &req->ktr_data.ktr_faultend;
+ kf->result = result;
+ ktr_enqueuerequest(curthread, req);
+}
#endif /* KTRACE */
/* Interface and common routines */
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c (.../mirror/FreeBSD/stable/8) (revision 221926)
+++ sys/vm/vm_fault.c (.../stable/8) (revision 221926)
@@ -74,6 +74,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_ktrace.h"
#include "opt_vm.h"
#include <sys/param.h>
@@ -86,6 +87,9 @@
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -114,6 +118,9 @@
static int vm_fault_additional_pages(vm_page_t, int, int, vm_page_t *, int
*);
static void vm_fault_prefault(pmap_t, vm_offset_t, vm_map_entry_t);
+#ifdef KTRACE
+static int vm_fault_traced(vm_map_t, vm_offset_t, vm_prot_t, int);
+#endif
#define VM_FAULT_READ_AHEAD 8
#define VM_FAULT_READ_BEHIND 7
@@ -209,7 +216,25 @@
int
vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags)
+#ifdef KTRACE
{
+ struct thread *td;
+ int result;
+
+ td = curthread;
+ if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
+ ktrfault(vaddr, fault_type);
+ result = vm_fault_traced(map, vaddr, fault_type, fault_flags);
+ if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
+ ktrfaultend(result);
+ return (result);
+}
+
+int
+vm_fault_traced(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags)
+#endif
+{
vm_prot_t prot;
int is_first_object_locked, result;
boolean_t are_queues_locked, growstack, wired;
Index: sys/sys/ktrace.h
===================================================================
--- sys/sys/ktrace.h (.../mirror/FreeBSD/stable/8) (revision 221926)
+++ sys/sys/ktrace.h (.../stable/8) (revision 221926)
@@ -178,6 +182,23 @@
#define KTR_PROCDTOR 11
/*
+ * KTR_FAULT - page fault record
+ */
+#define KTR_FAULT 12
+struct ktr_fault {
+ vm_offset_t vaddr;
+ int type;
+};
+
+/*
+ * KTR_FAULTEND - end of page fault record
+ */
+#define KTR_FAULTEND 13
+struct ktr_faultend {
+ int result;
+};
+
+/*
* KTR_DROP - If this bit is set in ktr_type, then at least one event
* between the previous record and this record was dropped.
*/
@@ -198,6 +219,8 @@
#define KTRFAC_SYSCTL (1<<KTR_SYSCTL)
#define KTRFAC_PROCCTOR (1<<KTR_PROCCTOR)
#define KTRFAC_PROCDTOR (1<<KTR_PROCDTOR)
+#define KTRFAC_FAULT (1<<KTR_FAULT)
+#define KTRFAC_FAULTEND (1<<KTR_FAULTEND)
/*
* trace flags (also in p_traceflags)
@@ -210,6 +233,8 @@
void ktrnamei(char *);
void ktrcsw(int, int);
void ktrpsig(int, sig_t, sigset_t *, int);
+void ktrfault(vm_offset_t, int);
+void ktrfaultend(int);
void ktrgenio(int, enum uio_rw, struct uio *, int);
void ktrsyscall(int, int narg, register_t args[]);
void ktrsysctl(int *name, u_int namelen);
--
John Baldwin
More information about the freebsd-arch
mailing list