PERFORCE change 31043 for review
Peter Wemm
peter at FreeBSD.org
Mon May 12 19:09:07 PDT 2003
http://perforce.freebsd.org/chv.cgi?CH=31043
Change 31043 by peter at peter_hammer on 2003/05/12 19:08:19
And now for something completely different... i386 binary emulation!
This was submitted by p4/i386 on the amd64 box itself!
Affected files ...
.. //depot/projects/hammer/sys/amd64/amd64/cpu_switch.S#4 edit
.. //depot/projects/hammer/sys/amd64/amd64/exception.S#11 edit
.. //depot/projects/hammer/sys/amd64/amd64/genassym.c#12 edit
.. //depot/projects/hammer/sys/amd64/amd64/machdep.c#30 edit
.. //depot/projects/hammer/sys/amd64/amd64/support.S#6 edit
.. //depot/projects/hammer/sys/amd64/amd64/trap.c#14 edit
.. //depot/projects/hammer/sys/amd64/amd64/vm_machdep.c#6 edit
.. //depot/projects/hammer/sys/amd64/conf/GENERIC#8 edit
.. //depot/projects/hammer/sys/amd64/ia32/Makefile#1 add
.. //depot/projects/hammer/sys/amd64/ia32/ia32.h#1 add
.. //depot/projects/hammer/sys/amd64/ia32/ia32_misc.c#1 add
.. //depot/projects/hammer/sys/amd64/ia32/ia32_proto.h#1 add
.. //depot/projects/hammer/sys/amd64/ia32/ia32_syscall.h#1 add
.. //depot/projects/hammer/sys/amd64/ia32/ia32_sysent.c#1 add
.. //depot/projects/hammer/sys/amd64/ia32/ia32_sysvec.c#1 add
.. //depot/projects/hammer/sys/amd64/ia32/ia32_util.h#1 add
.. //depot/projects/hammer/sys/amd64/ia32/syscalls.conf#1 add
.. //depot/projects/hammer/sys/amd64/ia32/syscalls.master#1 add
.. //depot/projects/hammer/sys/amd64/include/cpufunc.h#6 edit
.. //depot/projects/hammer/sys/amd64/include/elf.h#7 edit
.. //depot/projects/hammer/sys/amd64/include/pcb.h#8 edit
.. //depot/projects/hammer/sys/conf/files.amd64#9 edit
.. //depot/projects/hammer/sys/conf/options.amd64#8 edit
Differences ...
==== //depot/projects/hammer/sys/amd64/amd64/cpu_switch.S#4 (text+ko) ====
@@ -106,6 +106,12 @@
pushfq /* PSL */
popq PCB_RFLAGS(%r8)
+ /* Save kernel %gs.base */
+ movl $MSR_GSBASE,%ecx
+ rdmsr
+ movl %eax,PCB_KGSBASE(%r8)
+ movl %edx,PCB_KGSBASE+4(%r8)
+
/* Save userland %fs */
movl $MSR_FSBASE,%ecx
rdmsr
@@ -118,6 +124,12 @@
movl %eax,PCB_GSBASE(%r8)
movl %edx,PCB_GSBASE+4(%r8)
+ /* Save segment selector numbers */
+ movl %ds,PCB_DS(%r8)
+ movl %es,PCB_ES(%r8)
+ movl %fs,PCB_FS(%r8)
+ movl %gs,PCB_GS(%r8)
+
/* have we used fp, and need a save? */
cmpq %rdi,PCPU(FPCURTHREAD)
jne 1f
@@ -160,6 +172,18 @@
*/
movq TD_PCB(%rsi),%r8
+ /* Restore segment selector numbers */
+ movl PCB_DS(%r8),%ds
+ movl PCB_ES(%r8),%es
+ movl PCB_FS(%r8),%fs
+ movl PCB_GS(%r8),%gs
+
+ /* Restore kernel %gs.base */
+ movl $MSR_GSBASE,%ecx
+ movl PCB_KGSBASE(%r8),%eax
+ movl PCB_KGSBASE+4(%r8),%edx
+ wrmsr
+
/* Restore userland %fs */
movl $MSR_FSBASE,%ecx
movl PCB_FSBASE(%r8),%eax
==== //depot/projects/hammer/sys/amd64/amd64/exception.S#11 (text+ko) ====
@@ -247,7 +247,7 @@
movq %r14,TF_R14(%rsp)
movq %r15,TF_R15(%rsp)
FAKE_MCOUNT(13*4(%rsp))
- call syscall
+ call ia32_syscall
MEXITCOUNT
jmp doreti
==== //depot/projects/hammer/sys/amd64/amd64/genassym.c#12 (text+ko) ====
@@ -125,6 +125,11 @@
ASSYM(PCB_RFLAGS, offsetof(struct pcb, pcb_rflags));
ASSYM(PCB_FSBASE, offsetof(struct pcb, pcb_fsbase));
ASSYM(PCB_GSBASE, offsetof(struct pcb, pcb_gsbase));
+ASSYM(PCB_KGSBASE, offsetof(struct pcb, pcb_kgsbase));
+ASSYM(PCB_DS, offsetof(struct pcb, pcb_ds));
+ASSYM(PCB_ES, offsetof(struct pcb, pcb_es));
+ASSYM(PCB_FS, offsetof(struct pcb, pcb_fs));
+ASSYM(PCB_GS, offsetof(struct pcb, pcb_gs));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_FULLCTX, PCB_FULLCTX);
@@ -181,8 +186,10 @@
ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL));
ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL));
ASSYM(KUDSEL, GSEL(GUDATA_SEL, SEL_UPL));
+ASSYM(KUC32SEL, GSEL(GUCODE32_SEL, SEL_UPL));
ASSYM(MSR_FSBASE, MSR_FSBASE);
+ASSYM(MSR_GSBASE, MSR_GSBASE);
ASSYM(MSR_KGSBASE, MSR_KGSBASE);
ASSYM(GPROC0_SEL, GPROC0_SEL);
==== //depot/projects/hammer/sys/amd64/amd64/machdep.c#30 (text+ko) ====
@@ -129,7 +129,7 @@
static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL)
-int _udatasel, _ucodesel;
+int _udatasel, _ucodesel, _ucode32sel;
u_long atdevbase;
u_int64_t modulep; /* phys addr of metadata table */
@@ -466,11 +466,25 @@
{
struct trapframe *regs = td->td_frame;
struct pcb *pcb = td->td_pcb;
+ u_int64_t pc;
+ wrmsr(MSR_FSBASE, 0);
+ wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
pcb->pcb_fsbase = 0;
pcb->pcb_gsbase = 0;
- wrmsr(MSR_FSBASE, 0);
- wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */
+ pcb->pcb_kgsbase = rdmsr(MSR_GSBASE);
+ load_ds(_udatasel);
+ load_es(_udatasel);
+ load_fs(_udatasel);
+ critical_enter();
+ pc = rdmsr(MSR_GSBASE);
+ load_gs(_udatasel); /* Clobbers kernel %GS.base */
+ wrmsr(MSR_GSBASE, pc);
+ critical_exit();
+ pcb->pcb_ds = _udatasel;
+ pcb->pcb_es = _udatasel;
+ pcb->pcb_fs = _udatasel;
+ pcb->pcb_gs = _udatasel;
bzero((char *)regs, sizeof(struct trapframe));
regs->tf_rip = entry;
@@ -589,7 +603,7 @@
0xfffff, /* length - all address space */
SDT_MEMERA, /* segment type */
SEL_UPL, /* segment descriptor priority level */
- 0, /* segment descriptor present */
+ 1, /* segment descriptor present */
0, /* long */
1, /* default 32 vs 16 bit size */
1 /* limit granularity (byte/page units)*/ },
@@ -1289,10 +1303,12 @@
_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
+ _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL);
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
thread0.td_pcb->pcb_cr3 = IdlePML4;
+ thread0.td_pcb->pcb_kgsbase = (u_int64_t)pc;
thread0.td_frame = &proc0_tf;
}
==== //depot/projects/hammer/sys/amd64/amd64/support.S#6 (text+ko) ====
@@ -358,6 +358,9 @@
movq $0,PCB_ONFAULT(%rcx)
ret
+ENTRY(fuword)
+ jmp fuword64
+
ENTRY(fuword32)
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -366,15 +369,10 @@
cmpq %rax,%rdi /* verify address is valid */
ja fusufault
-/* XXX use the 64 extend */
- xorq %rax, %rax
movl (%rdi),%eax
movq $0,PCB_ONFAULT(%rcx)
ret
-ENTRY(fuword)
- jmp fuword32
-
/*
* These two routines are called from the profiling code, potentially
* at interrupt time. If they fail, that's okay, good things will
@@ -397,8 +395,6 @@
cmpq %rax,%rdi
ja fusufault
-/* XXX use the 64 extend */
- xorq %rax, %rax
movzwl (%rdi),%eax
movq $0,PCB_ONFAULT(%rcx)
ret
@@ -414,8 +410,6 @@
cmpq %rax,%rdi
ja fusufault
-/* XXX use the 64 extend */
- xorq %rax, %rax
movzbl (%rdi),%eax
movq $0,PCB_ONFAULT(%rcx)
ret
@@ -448,6 +442,9 @@
movq %rax,PCB_ONFAULT(%rcx)
ret
+ENTRY(suword)
+ jmp suword64
+
ENTRY(suword32)
movq PCPU(CURPCB),%rcx
movq $fusufault,PCB_ONFAULT(%rcx)
@@ -462,9 +459,6 @@
movq %rax,PCB_ONFAULT(%rcx)
ret
-ENTRY(suword)
- jmp suword32
-
/*
* suword16 - MP SAFE
*/
==== //depot/projects/hammer/sys/amd64/amd64/trap.c#14 (text+ko) ====
@@ -91,7 +91,7 @@
extern void trap(struct trapframe frame);
extern void syscall(struct trapframe frame);
-static int trap_pfault(struct trapframe *, int, vm_offset_t);
+static int trap_pfault(struct trapframe *, int);
static void trap_fatal(struct trapframe *, vm_offset_t);
void dblfault_handler(void);
@@ -161,13 +161,13 @@
struct proc *p = td->td_proc;
u_int sticks = 0;
int i = 0, ucode = 0, type, code;
- vm_offset_t eva;
atomic_add_int(&cnt.v_trap, 1);
type = frame.tf_trapno;
#ifdef DDB
if (db_active) {
+ vm_offset_t eva;
eva = (type == T_PAGEFLT ? frame.tf_addr : 0);
trap_fatal(&frame, eva);
goto out;
@@ -202,7 +202,6 @@
}
}
- eva = 0;
code = frame.tf_err;
if (type == T_PAGEFLT) {
/*
@@ -213,9 +212,8 @@
* kernel can print out a useful trap message and even get
* to the debugger.
*/
- eva = frame.tf_addr;
if (PCPU_GET(spinlocks) != NULL)
- trap_fatal(&frame, eva);
+ trap_fatal(&frame, frame.tf_addr);
}
#ifdef DEVICE_POLLING
@@ -261,7 +259,7 @@
break;
case T_PAGEFLT: /* page fault */
- i = trap_pfault(&frame, TRUE, eva);
+ i = trap_pfault(&frame, TRUE);
if (i == -1)
goto userout;
if (i == 0)
@@ -331,7 +329,7 @@
("kernel trap doesn't have ucred"));
switch (type) {
case T_PAGEFLT: /* page fault */
- (void) trap_pfault(&frame, FALSE, eva);
+ (void) trap_pfault(&frame, FALSE);
goto out;
case T_DNA:
@@ -430,7 +428,7 @@
#endif /* DEV_ISA */
}
- trap_fatal(&frame, eva);
+ trap_fatal(&frame, 0);
goto out;
}
@@ -445,7 +443,7 @@
uprintf("fatal process exception: %s",
trap_msg[type]);
if ((type == T_PAGEFLT) || (type == T_PROTFLT))
- uprintf(", fault VA = 0x%lx", eva);
+ uprintf(", fault VA = 0x%lx", frame.tf_addr);
uprintf("\n");
}
#endif
@@ -462,10 +460,9 @@
}
static int
-trap_pfault(frame, usermode, eva)
+trap_pfault(frame, usermode)
struct trapframe *frame;
int usermode;
- vm_offset_t eva;
{
vm_offset_t va;
struct vmspace *vm = NULL;
@@ -474,6 +471,7 @@
vm_prot_t ftype;
struct thread *td = curthread;
struct proc *p = td->td_proc;
+ vm_offset_t eva = frame->tf_addr;
va = trunc_page(eva);
if (va >= KERNBASE) {
@@ -542,7 +540,7 @@
return (-1);
}
-printf("trap_pfault: pid %d %s %s %s eva %p, rip %p, rax %p, rbx %p, rcx %p, rdx %p, rsp %p, rvp %p, rsi %p, rdi %p\n", p->p_pid,
+printf("trap_pfault: pid %d %s %s %s eva %p, rip %p, rax %p, rbx %p, rcx %p, rdx %p, rsp %p, rbp %p, rsi %p, rdi %p\n", p->p_pid,
frame->tf_err & PGEX_U ? "user" : "supervisor",
frame->tf_err & PGEX_W ? "write" : "read",
frame->tf_err & PGEX_P ? "protection violation" : "page not present",
@@ -820,3 +818,173 @@
mtx_assert(&Giant, MA_NOTOWNED);
}
+void ia32_syscall(struct trapframe frame);
+void
+ia32_syscall(struct trapframe frame)
+{
+ caddr_t params;
+ int i;
+ struct sysent *callp;
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ register_t orig_tf_rflags;
+ u_int sticks;
+ int error;
+ int narg;
+ u_int32_t args[8];
+ u_int64_t args64[8];
+ u_int code;
+
+ /*
+ * note: PCPU_LAZY_INC() can only be used if we can afford
+ * occassional inaccuracy in the count.
+ */
+ cnt.v_syscall++;
+
+ sticks = td->td_sticks;
+ td->td_frame = &frame;
+ if (td->td_ucred != p->p_ucred)
+ cred_update_thread(td);
+ params = (caddr_t)frame.tf_rsp + sizeof(u_int32_t);
+ code = frame.tf_rax;
+ orig_tf_rflags = frame.tf_rflags;
+
+ if (p->p_sysent->sv_prepsyscall) {
+ /*
+ * The prep code is MP aware.
+ */
+ (*p->p_sysent->sv_prepsyscall)(&frame, args, &code, ¶ms);
+ } else {
+ /*
+ * Need to check if this is a 32 bit or 64 bit syscall.
+ * fuword is MP aware.
+ */
+ if (code == SYS_syscall) {
+ /*
+ * Code is first argument, followed by actual args.
+ */
+ code = fuword32(params);
+ params += sizeof(int);
+ } else if (code == SYS___syscall) {
+ /*
+ * Like syscall, but code is a quad, so as to maintain
+ * quad alignment for the rest of the arguments.
+ * We use a 32-bit fetch in case params is not
+ * aligned.
+ */
+ code = fuword32(params);
+ params += sizeof(quad_t);
+ }
+ }
+
+ if (p->p_sysent->sv_mask)
+ code &= p->p_sysent->sv_mask;
+
+ if (code >= p->p_sysent->sv_size)
+ callp = &p->p_sysent->sv_table[0];
+ else
+ callp = &p->p_sysent->sv_table[code];
+
+ narg = callp->sy_narg & SYF_ARGMASK;
+
+ /*
+ * copyin and the ktrsyscall()/ktrsysret() code is MP-aware
+ */
+ if (params != NULL && narg != 0)
+ error = copyin(params, (caddr_t)args,
+ (u_int)(narg * sizeof(int)));
+ else
+ error = 0;
+
+ for (i = 0; i < narg; i++)
+ args64[i] = args[i];
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_SYSCALL))
+ ktrsyscall(code, narg, args64);
+#endif
+ /*
+ * Try to run the syscall without Giant if the syscall
+ * is MP safe.
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0)
+ mtx_lock(&Giant);
+
+ if (error == 0) {
+ td->td_retval[0] = 0;
+ td->td_retval[1] = frame.tf_rdx;
+
+ STOPEVENT(p, S_SCE, narg);
+
+ error = (*callp->sy_call)(td, args64);
+ }
+
+ switch (error) {
+ case 0:
+ frame.tf_rax = td->td_retval[0];
+ frame.tf_rdx = td->td_retval[1];
+ frame.tf_rflags &= ~PSL_C;
+ break;
+
+ case ERESTART:
+ /*
+ * Reconstruct pc, assuming lcall $X,y is 7 bytes,
+ * int 0x80 is 2 bytes. We saved this in tf_err.
+ */
+ frame.tf_rip -= frame.tf_err;
+ break;
+
+ case EJUSTRETURN:
+ break;
+
+ default:
+ if (p->p_sysent->sv_errsize) {
+ if (error >= p->p_sysent->sv_errsize)
+ error = -1; /* XXX */
+ else
+ error = p->p_sysent->sv_errtbl[error];
+ }
+ frame.tf_rax = error;
+ frame.tf_rflags |= PSL_C;
+ break;
+ }
+
+ /*
+ * Release Giant if we previously set it.
+ */
+ if ((callp->sy_narg & SYF_MPSAFE) == 0)
+ mtx_unlock(&Giant);
+
+ /*
+ * Traced syscall.
+ */
+ if (orig_tf_rflags & PSL_T) {
+ frame.tf_rflags &= ~PSL_T;
+ trapsignal(td, SIGTRAP, 0);
+ }
+
+ /*
+ * Handle reschedule and other end-of-syscall issues
+ */
+ userret(td, &frame, sticks);
+
+#ifdef KTRACE
+ if (KTRPOINT(td, KTR_SYSRET))
+ ktrsysret(code, error, td->td_retval[0]);
+#endif
+
+ /*
+ * This works because errno is findable through the
+ * register set. If we ever support an emulation where this
+ * is not the case, this code will need to be revisited.
+ */
+ STOPEVENT(p, S_SCX, code);
+
+#ifdef DIAGNOSTIC
+ cred_free_thread(td);
+#endif
+ WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning",
+ (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???");
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_NOTOWNED);
+}
==== //depot/projects/hammer/sys/amd64/amd64/vm_machdep.c#6 (text+ko) ====
@@ -76,7 +76,6 @@
#include <amd64/isa/isa.h>
static void cpu_reset_real(void);
-extern int _ucodesel, _udatasel;
/*
* Finish a fork operation, with process p2 nearly set up.
@@ -143,6 +142,7 @@
* pcb2->pcb_savefpu: cloned above.
* pcb2->pcb_flags: cloned above.
* pcb2->pcb_onfault: cloned above (always NULL here?).
+ * pcb2->pcb_[fg]sbase: cloned above
*/
/*
==== //depot/projects/hammer/sys/amd64/conf/GENERIC#8 (text+ko) ====
@@ -23,6 +23,7 @@
ident GENERIC
maxusers 0
options NDA #Avoid accidental cut/paste of NDA'ed stuff
+options IA32
makeoptions NO_MODULES=not_yet
@@ -42,6 +43,10 @@
options INVARIANTS #Enable calls of extra sanity checking
options INVARIANT_SUPPORT #Extra sanity checks of internal structures, required by INVARIANTS
+options SYSVMSG
+options SYSVSEM
+options SYSVSHM
+
device isa
device pci
==== //depot/projects/hammer/sys/amd64/include/cpufunc.h#6 (text+ko) ====
@@ -447,7 +447,6 @@
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
-/* XXX these are replaced with rdmsr/wrmsr */
static __inline u_int
rfs(void)
{
@@ -465,6 +464,18 @@
}
static __inline void
+load_ds(u_int sel)
+{
+ __asm __volatile("movl %0,%%ds" : : "rm" (sel));
+}
+
+static __inline void
+load_es(u_int sel)
+{
+ __asm __volatile("movl %0,%%es" : : "rm" (sel));
+}
+
+static __inline void
load_fs(u_int sel)
{
__asm __volatile("movl %0,%%fs" : : "rm" (sel));
==== //depot/projects/hammer/sys/amd64/include/elf.h#7 (text+ko) ====
@@ -33,9 +33,12 @@
* ELF definitions for the AMD64 architecture.
*/
-#include <sys/elf64.h> /* Definitions common to all 64 bit architectures. */
+#ifndef __ELF_WORD_SIZE
#define __ELF_WORD_SIZE 64 /* Used by <sys/elf_generic.h> */
+#endif
+#include <sys/elf32.h> /* Definitions common to all 32 bit architectures. */
+#include <sys/elf64.h> /* Definitions common to all 64 bit architectures. */
#include <sys/elf_generic.h>
#define ELF_ARCH EM_X86_64
@@ -48,6 +51,13 @@
* The i386 supplement to the SVR4 ABI specification names this "auxv_t",
* but POSIX lays claim to all symbols ending with "_t".
*/
+typedef struct { /* Auxiliary vector entry on initial stack */
+ int a_type; /* Entry type. */
+ union {
+ int a_val; /* Integer value. */
+ } a_un;
+} Elf32_Auxinfo;
+
typedef struct { /* Auxiliary vector entry on initial stack */
long a_type; /* Entry type. */
@@ -118,7 +128,11 @@
#define R_X86_64_COUNT 16 /* Count of defined relocation types. */
/* Define "machine" characteristics */
-#define ELF_TARG_CLASS ELFCLASS64
+#if __ELF_WORD_SIZE == 32
+#define ELF_TARG_CLASS ELFCLASS32
+#else
+#define ELF_TARG_CLASS ELFCLASS64
+#endif
#define ELF_TARG_DATA ELFDATA2LSB
#define ELF_TARG_MACH EM_X86_64
#define ELF_TARG_VER 1
==== //depot/projects/hammer/sys/amd64/include/pcb.h#8 (text+ko) ====
@@ -59,6 +59,11 @@
register_t pcb_rflags;
register_t pcb_fsbase;
register_t pcb_gsbase;
+ register_t pcb_kgsbase;
+ u_int32_t pcb_ds;
+ u_int32_t pcb_es;
+ u_int32_t pcb_fs;
+ u_int32_t pcb_gs;
struct savefpu pcb_save;
u_long pcb_flags;
==== //depot/projects/hammer/sys/conf/files.amd64#9 (text+ko) ====
@@ -64,7 +64,10 @@
amd64/pci/pci_cfgreg.c optional pci
amd64/pci/pci_bus.c optional pci
-
+amd64/ia32/ia32_misc.c optional ia32
+amd64/ia32/ia32_sysent.c optional ia32
+amd64/ia32/ia32_sysvec.c optional ia32
+kern/imgact_elf32.c optional ia32
# This file tells config what files go into building a kernel,
# files marked standard are always included.
==== //depot/projects/hammer/sys/conf/options.amd64#8 (text+ko) ====
@@ -60,3 +60,4 @@
PSM_HOOKRESUME opt_psm.h
PSM_RESETAFTERSUSPEND opt_psm.h
PSM_DEBUG opt_psm.h
+IA32
More information about the p4-projects
mailing list