svn commit: r273995 - in head/sys: amd64/amd64 i386/i386 i386/include i386/isa i386/linux x86/acpica
John Baldwin
jhb at FreeBSD.org
Sun Nov 2 22:58:35 UTC 2014
Author: jhb
Date: Sun Nov 2 22:58:30 2014
New Revision: 273995
URL: https://svnweb.freebsd.org/changeset/base/273995
Log:
MFamd64: Add support for extended FPU states on i386. This includes
support for AVX on i386.
- Similar to amd64, move the FPU save area out of the PCB and instead
store saved FPU state in a variable-sized buffer after the PCB on the
stack.
- To support the variable PCB location, alter the locore code to only use
the bottom-most page of proc0stack for init386(). init386() returns
the correct stack pointer to locore which adjusts the stack for thread0
before calling mi_startup().
- Don't bother setting cr3 in thread0's pcb in locore before calling
init386(). It wasn't used (init386() overwrote it at the end) and
it doesn't work with the variable-sized FPU save area.
- Remove the new-bus attachment from npx. This was only ever useful for
external co-processors using IRQ13, but those have not been supported
for several years. npxinit() is now called much earlier during boot
(init386()) similar to amd64.
- Implement PT_{GET,SET}XSTATE and I386_GET_XFPUSTATE.
- npxsave() is now only called from context switch contexts so it can
use XSAVEOPT.
Differential Revision: https://reviews.freebsd.org/D1058
Reviewed by: kib
Tested on: FreeBSD/i386 VM under bhyve on Intel i5-2520
Modified:
head/sys/amd64/amd64/genassym.c
head/sys/amd64/amd64/sys_machdep.c
head/sys/amd64/amd64/vm_machdep.c
head/sys/i386/i386/genassym.c
head/sys/i386/i386/initcpu.c
head/sys/i386/i386/locore.s
head/sys/i386/i386/machdep.c
head/sys/i386/i386/mp_machdep.c
head/sys/i386/i386/ptrace_machdep.c
head/sys/i386/i386/sys_machdep.c
head/sys/i386/i386/trap.c
head/sys/i386/i386/vm_machdep.c
head/sys/i386/include/cpufunc.h
head/sys/i386/include/md_var.h
head/sys/i386/include/npx.h
head/sys/i386/include/pcb.h
head/sys/i386/isa/npx.c
head/sys/i386/linux/linux_ptrace.c
head/sys/x86/acpica/acpi_wakeup.c
Modified: head/sys/amd64/amd64/genassym.c
==============================================================================
--- head/sys/amd64/amd64/genassym.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/amd64/amd64/genassym.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -156,8 +156,6 @@ ASSYM(PCB_ONFAULT, offsetof(struct pcb,
ASSYM(PCB_GS32SD, offsetof(struct pcb, pcb_gs32sd));
ASSYM(PCB_TSSP, offsetof(struct pcb, pcb_tssp));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(struct savefpu));
-ASSYM(PCB_USERFPU, sizeof(struct pcb));
ASSYM(PCB_EFER, offsetof(struct pcb, pcb_efer));
ASSYM(PCB_STAR, offsetof(struct pcb, pcb_star));
ASSYM(PCB_LSTAR, offsetof(struct pcb, pcb_lstar));
Modified: head/sys/amd64/amd64/sys_machdep.c
==============================================================================
--- head/sys/amd64/amd64/sys_machdep.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/amd64/amd64/sys_machdep.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -319,7 +319,7 @@ sysarch(td, uap)
fpugetregs(td);
error = copyout((char *)(get_pcb_user_save_td(td) + 1),
a64xfpu.addr, a64xfpu.len);
- return (error);
+ break;
default:
error = EINVAL;
Modified: head/sys/amd64/amd64/vm_machdep.c
==============================================================================
--- head/sys/amd64/amd64/vm_machdep.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/amd64/amd64/vm_machdep.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -127,7 +127,7 @@ get_pcb_td(struct thread *td)
void *
alloc_fpusave(int flags)
{
- struct pcb *res;
+ void *res;
struct savefpu_ymm *sf;
res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
Modified: head/sys/i386/i386/genassym.c
==============================================================================
--- head/sys/i386/i386/genassym.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/genassym.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -144,7 +144,6 @@ ASSYM(PCB_DR2, offsetof(struct pcb, pcb_
ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3));
ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6));
ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
-ASSYM(PCB_USERFPU, offsetof(struct pcb, pcb_user_save));
ASSYM(PCB_PSL, offsetof(struct pcb, pcb_psl));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
@@ -154,7 +153,6 @@ ASSYM(PCB_GSD, offsetof(struct pcb, pcb_
ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86));
ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags));
ASSYM(PCB_SAVEFPU, offsetof(struct pcb, pcb_save));
-ASSYM(PCB_SAVEFPU_SIZE, sizeof(union savefpu));
ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault));
ASSYM(PCB_SIZE, sizeof(struct pcb));
Modified: head/sys/i386/i386/initcpu.c
==============================================================================
--- head/sys/i386/i386/initcpu.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/initcpu.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -102,6 +102,7 @@ u_int cpu_mxcsr_mask; /* Valid bits in
#endif
u_int cpu_clflush_line_size = 32;
u_int cpu_stdext_feature;
+u_int cpu_max_ext_state_size;
u_int cpu_mon_mwait_flags; /* MONITOR/MWAIT flags (CPUID.05H.ECX) */
u_int cpu_mon_min_size; /* MONITOR minimum range size, bytes */
u_int cpu_mon_max_size; /* MONITOR minimum range size, bytes */
Modified: head/sys/i386/i386/locore.s
==============================================================================
--- head/sys/i386/i386/locore.s Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/locore.s Sun Nov 2 22:58:30 2014 (r273995)
@@ -302,17 +302,14 @@ NON_GPROF_ENTRY(btext)
begin:
/* set up bootstrap stack */
movl proc0kstack,%eax /* location of in-kernel stack */
- /* bootstrap stack end location */
- leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
- xorl %ebp,%ebp /* mark end of frames */
+ /*
+ * Only use bottom page for init386(). init386() calculates the
+ * PCB + FPU save area size and returns the true top of stack.
+ */
+ leal PAGE_SIZE(%eax),%esp
-#ifdef PAE
- movl IdlePDPT,%esi
-#else
- movl IdlePTD,%esi
-#endif
- movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
+ xorl %ebp,%ebp /* mark end of frames */
pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */
@@ -324,6 +321,9 @@ begin:
*/
addl $4,%esp
+ /* Switch to true top of stack. */
+ movl %eax,%esp
+
call mi_startup /* autoconfiguration, mountroot etc */
/* NOTREACHED */
addl $0,%esp /* for db_numargs() again */
Modified: head/sys/i386/i386/machdep.c
==============================================================================
--- head/sys/i386/i386/machdep.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/machdep.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -181,7 +181,7 @@ extern unsigned long physfree;
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
-extern void init386(int first);
+extern register_t init386(int first);
extern void dblfault_handler(void);
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
@@ -193,8 +193,10 @@ extern void dblfault_handler(void);
static void cpu_startup(void *);
static void fpstate_drop(struct thread *td);
-static void get_fpcontext(struct thread *td, mcontext_t *mcp);
-static int set_fpcontext(struct thread *td, const mcontext_t *mcp);
+static void get_fpcontext(struct thread *td, mcontext_t *mcp,
+ char *xfpusave, size_t xfpusave_len);
+static int set_fpcontext(struct thread *td, const mcontext_t *mcp,
+ char *xfpustate, size_t xfpustate_len);
#ifdef CPU_ENABLE_SSE
static void set_fpregs_xmm(struct save87 *, struct savexmm *);
static void fill_fpregs_xmm(struct savexmm *, struct save87 *);
@@ -363,7 +365,7 @@ cpu_startup(dummy)
* Send an interrupt to process.
*
* Stack is set up to allow sigcode stored
- * at top to call routine, followed by kcall
+ * at top to call routine, followed by call
* to sigreturn routine below. After sigreturn
* resets the signal mask, the stack, and the
* frame pointer, it returns to the user
@@ -642,6 +644,8 @@ sendsig(sig_t catcher, ksiginfo_t *ksi,
char *sp;
struct trapframe *regs;
struct segment_descriptor *sdp;
+ char *xfpusave;
+ size_t xfpusave_len;
int sig;
int oonstack;
@@ -666,6 +670,14 @@ sendsig(sig_t catcher, ksiginfo_t *ksi,
regs = td->td_frame;
oonstack = sigonstack(regs->tf_esp);
+ if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) {
+ xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu);
+ xfpusave = __builtin_alloca(xfpusave_len);
+ } else {
+ xfpusave_len = 0;
+ xfpusave = NULL;
+ }
+
/* Save user context. */
bzero(&sf, sizeof(sf));
sf.sf_uc.uc_sigmask = *mask;
@@ -676,7 +688,7 @@ sendsig(sig_t catcher, ksiginfo_t *ksi,
sf.sf_uc.uc_mcontext.mc_gs = rgs();
bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs));
sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */
- get_fpcontext(td, &sf.sf_uc.uc_mcontext);
+ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len);
fpstate_drop(td);
/*
* Unconditionally fill the fsbase and gsbase into the mcontext.
@@ -687,7 +699,6 @@ sendsig(sig_t catcher, ksiginfo_t *ksi,
sdp = &td->td_pcb->pcb_gsd;
sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 |
sdp->sd_lobase;
- sf.sf_uc.uc_mcontext.mc_flags = 0;
bzero(sf.sf_uc.uc_mcontext.mc_spare2,
sizeof(sf.sf_uc.uc_mcontext.mc_spare2));
bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__));
@@ -695,13 +706,19 @@ sendsig(sig_t catcher, ksiginfo_t *ksi,
/* Allocate space for the signal handler context. */
if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
SIGISMEMBER(psp->ps_sigonstack, sig)) {
- sp = td->td_sigstk.ss_sp +
- td->td_sigstk.ss_size - sizeof(struct sigframe);
+ sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size;
#if defined(COMPAT_43)
td->td_sigstk.ss_flags |= SS_ONSTACK;
#endif
} else
- sp = (char *)regs->tf_esp - sizeof(struct sigframe);
+ sp = (char *)regs->tf_esp - 128;
+ if (xfpusave != NULL) {
+ sp -= xfpusave_len;
+ sp = (char *)((unsigned int)sp & ~0x3F);
+ sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp;
+ }
+ sp -= sizeof(struct sigframe);
+
/* Align to 16 bytes. */
sfp = (struct sigframe *)((unsigned int)sp & ~0xF);
@@ -762,7 +779,10 @@ sendsig(sig_t catcher, ksiginfo_t *ksi,
/*
* Copy the sigframe out to the user's stack.
*/
- if (copyout(&sf, sfp, sizeof(*sfp)) != 0) {
+ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 ||
+ (xfpusave != NULL && copyout(xfpusave,
+ (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len)
+ != 0)) {
#ifdef DEBUG
printf("process %ld has trashed its stack\n", (long)p->p_pid);
#endif
@@ -1022,11 +1042,16 @@ sys_sigreturn(td, uap)
} */ *uap;
{
ucontext_t uc;
+ struct proc *p;
struct trapframe *regs;
ucontext_t *ucp;
+ char *xfpustate;
+ size_t xfpustate_len;
int cs, eflags, error, ret;
ksiginfo_t ksi;
+ p = td->td_proc;
+
error = copyin(uap->sigcntxp, &uc, sizeof(uc));
if (error != 0)
return (error);
@@ -1101,7 +1126,30 @@ sys_sigreturn(td, uap)
return (EINVAL);
}
- ret = set_fpcontext(td, &ucp->uc_mcontext);
+ if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) {
+ xfpustate_len = uc.uc_mcontext.mc_xfpustate_len;
+ if (xfpustate_len > cpu_max_ext_state_size -
+ sizeof(union savefpu)) {
+ uprintf(
+ "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n",
+ p->p_pid, td->td_name, xfpustate_len);
+ return (EINVAL);
+ }
+ xfpustate = __builtin_alloca(xfpustate_len);
+ error = copyin((const void *)uc.uc_mcontext.mc_xfpustate,
+ xfpustate, xfpustate_len);
+ if (error != 0) {
+ uprintf(
+ "pid %d (%s): sigreturn copying xfpustate failed\n",
+ p->p_pid, td->td_name);
+ return (error);
+ }
+ } else {
+ xfpustate = NULL;
+ xfpustate_len = 0;
+ }
+ ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate,
+ xfpustate_len);
if (ret != 0)
return (ret);
bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs));
@@ -1599,7 +1647,7 @@ exec_setregs(struct thread *td, struct i
*/
reset_dbregs();
}
- pcb->pcb_flags &= ~PCB_DBREGS;
+ pcb->pcb_flags &= ~PCB_DBREGS;
}
pcb->pcb_initial_npxcw = __INITIAL_NPXCW__;
@@ -2853,14 +2901,14 @@ do_next:
#ifdef XEN
#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
-void
+register_t
init386(first)
int first;
{
unsigned long gdtmachpfn;
int error, gsel_tss, metadata_missing, x, pa;
- size_t kstack0_sz;
struct pcpu *pc;
+ struct xstate_hdr *xhdr;
struct callback_register event = {
.type = CALLBACKTYPE_event,
.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
@@ -2872,8 +2920,6 @@ init386(first)
thread0.td_kstack = proc0kstack;
thread0.td_kstack_pages = KSTACK_PAGES;
- kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
- thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
/*
* This may be done better later if it gets more high level
@@ -2953,7 +2999,6 @@ init386(first)
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
- PCPU_SET(curpcb, thread0.td_pcb);
/*
* Initialize mutexes.
@@ -3035,15 +3080,6 @@ init386(first)
initializecpu(); /* Initialize CPU registers */
initializecpucache();
- /* make an initial tss so cpu can get interrupt stack on syscall! */
- /* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
- kstack0_sz - sizeof(struct pcb) - 16);
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
- PCPU_GET(common_tss.tss_esp0));
-
/* pointer to selector slot for %fs/%gs */
PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
@@ -3071,6 +3107,30 @@ init386(first)
/* now running on new page tables, configured,and u/iom is accessible */
msgbufinit(msgbufp, msgbufsize);
+#ifdef DEV_NPX
+ npxinit(true);
+#endif
+ /*
+ * Set up thread0 pcb after npxinit calculated pcb + fpu save
+ * area size. Zero out the extended state header in fpu save
+ * area.
+ */
+ thread0.td_pcb = get_pcb_td(&thread0);
+ bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+ if (use_xsave) {
+ xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+ 1);
+ xhdr->xstate_bv = xsave_mask;
+ }
+ PCPU_SET(curpcb, thread0.td_pcb);
+ /* make an initial tss so cpu can get interrupt stack on syscall! */
+ /* Note: -16 is so we can grow the trapframe if we came from vm86 */
+ PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
+ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+ gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+ HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
+ PCPU_GET(common_tss.tss_esp0));
+
/* transfer to user mode */
_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
@@ -3089,22 +3149,23 @@ init386(first)
thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
cpu_probe_amdc1e();
+
+ /* Location of kernel stack for locore */
+ return ((register_t)thread0.td_pcb);
}
#else
-void
+register_t
init386(first)
int first;
{
struct gate_descriptor *gdp;
int gsel_tss, metadata_missing, x, pa;
- size_t kstack0_sz;
struct pcpu *pc;
+ struct xstate_hdr *xhdr;
thread0.td_kstack = proc0kstack;
thread0.td_kstack_pages = KSTACK_PAGES;
- kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE;
- thread0.td_pcb = (struct pcb *)(thread0.td_kstack + kstack0_sz) - 1;
/*
* This may be done better later if it gets more high level
@@ -3165,7 +3226,6 @@ init386(first)
first += DPCPU_SIZE;
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
- PCPU_SET(curpcb, thread0.td_pcb);
/*
* Initialize mutexes.
@@ -3320,17 +3380,6 @@ init386(first)
initializecpu(); /* Initialize CPU registers */
initializecpucache();
- /* make an initial tss so cpu can get interrupt stack on syscall! */
- /* Note: -16 is so we can grow the trapframe if we came from vm86 */
- PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
- kstack0_sz - sizeof(struct pcb) - 16);
- PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
- gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
- PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
- PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
- PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
- ltr(gsel_tss);
-
/* pointer to selector slot for %fs/%gs */
PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
@@ -3358,6 +3407,31 @@ init386(first)
/* now running on new page tables, configured,and u/iom is accessible */
msgbufinit(msgbufp, msgbufsize);
+#ifdef DEV_NPX
+ npxinit(true);
+#endif
+ /*
+ * Set up thread0 pcb after npxinit calculated pcb + fpu save
+ * area size. Zero out the extended state header in fpu save
+ * area.
+ */
+ thread0.td_pcb = get_pcb_td(&thread0);
+ bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size);
+ if (use_xsave) {
+ xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) +
+ 1);
+ xhdr->xstate_bv = xsave_mask;
+ }
+ PCPU_SET(curpcb, thread0.td_pcb);
+ /* make an initial tss so cpu can get interrupt stack on syscall! */
+ /* Note: -16 is so we can grow the trapframe if we came from vm86 */
+ PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
+ PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+ gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+ PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
+ PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+ PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+ ltr(gsel_tss);
/* make a call gate to reenter kernel with */
gdp = &ldt[LSYS5CALLS_SEL].gd;
@@ -3396,6 +3470,9 @@ init386(first)
#ifdef FDT
x86_init_fdt();
#endif
+
+ /* Location of kernel stack for locore */
+ return ((register_t)thread0.td_pcb);
}
#endif
@@ -3678,11 +3755,11 @@ fill_fpregs(struct thread *td, struct fp
#endif
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
- fill_fpregs_xmm(&td->td_pcb->pcb_user_save.sv_xmm,
+ fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm,
(struct save87 *)fpregs);
else
#endif /* CPU_ENABLE_SSE */
- bcopy(&td->td_pcb->pcb_user_save.sv_87, fpregs,
+ bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs,
sizeof(*fpregs));
return (0);
}
@@ -3694,10 +3771,10 @@ set_fpregs(struct thread *td, struct fpr
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
set_fpregs_xmm((struct save87 *)fpregs,
- &td->td_pcb->pcb_user_save.sv_xmm);
+ &get_pcb_user_save_td(td)->sv_xmm);
else
#endif /* CPU_ENABLE_SSE */
- bcopy(fpregs, &td->td_pcb->pcb_user_save.sv_87,
+ bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87,
sizeof(*fpregs));
#ifdef DEV_NPX
npxuserinited(td);
@@ -3743,12 +3820,14 @@ get_mcontext(struct thread *td, mcontext
mcp->mc_esp = tp->tf_esp;
mcp->mc_ss = tp->tf_ss;
mcp->mc_len = sizeof(*mcp);
- get_fpcontext(td, mcp);
+ get_fpcontext(td, mcp, NULL, 0);
sdp = &td->td_pcb->pcb_fsd;
mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
sdp = &td->td_pcb->pcb_gsd;
mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase;
mcp->mc_flags = 0;
+ mcp->mc_xfpustate = 0;
+ mcp->mc_xfpustate_len = 0;
bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2));
return (0);
}
@@ -3763,6 +3842,7 @@ int
set_mcontext(struct thread *td, const mcontext_t *mcp)
{
struct trapframe *tp;
+ char *xfpustate;
int eflags, ret;
tp = td->td_frame;
@@ -3770,30 +3850,43 @@ set_mcontext(struct thread *td, const mc
return (EINVAL);
eflags = (mcp->mc_eflags & PSL_USERCHANGE) |
(tp->tf_eflags & ~PSL_USERCHANGE);
- if ((ret = set_fpcontext(td, mcp)) == 0) {
- tp->tf_fs = mcp->mc_fs;
- tp->tf_es = mcp->mc_es;
- tp->tf_ds = mcp->mc_ds;
- tp->tf_edi = mcp->mc_edi;
- tp->tf_esi = mcp->mc_esi;
- tp->tf_ebp = mcp->mc_ebp;
- tp->tf_ebx = mcp->mc_ebx;
- tp->tf_edx = mcp->mc_edx;
- tp->tf_ecx = mcp->mc_ecx;
- tp->tf_eax = mcp->mc_eax;
- tp->tf_eip = mcp->mc_eip;
- tp->tf_eflags = eflags;
- tp->tf_esp = mcp->mc_esp;
- tp->tf_ss = mcp->mc_ss;
- td->td_pcb->pcb_gs = mcp->mc_gs;
- ret = 0;
- }
- return (ret);
+ if (mcp->mc_flags & _MC_HASFPXSTATE) {
+ if (mcp->mc_xfpustate_len > cpu_max_ext_state_size -
+ sizeof(union savefpu))
+ return (EINVAL);
+ xfpustate = __builtin_alloca(mcp->mc_xfpustate_len);
+ ret = copyin((void *)mcp->mc_xfpustate, xfpustate,
+ mcp->mc_xfpustate_len);
+ if (ret != 0)
+ return (ret);
+ } else
+ xfpustate = NULL;
+ ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len);
+ if (ret != 0)
+ return (ret);
+ tp->tf_fs = mcp->mc_fs;
+ tp->tf_es = mcp->mc_es;
+ tp->tf_ds = mcp->mc_ds;
+ tp->tf_edi = mcp->mc_edi;
+ tp->tf_esi = mcp->mc_esi;
+ tp->tf_ebp = mcp->mc_ebp;
+ tp->tf_ebx = mcp->mc_ebx;
+ tp->tf_edx = mcp->mc_edx;
+ tp->tf_ecx = mcp->mc_ecx;
+ tp->tf_eax = mcp->mc_eax;
+ tp->tf_eip = mcp->mc_eip;
+ tp->tf_eflags = eflags;
+ tp->tf_esp = mcp->mc_esp;
+ tp->tf_ss = mcp->mc_ss;
+ td->td_pcb->pcb_gs = mcp->mc_gs;
+ return (0);
}
static void
-get_fpcontext(struct thread *td, mcontext_t *mcp)
+get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave,
+ size_t xfpusave_len)
{
+ size_t max_len, len;
#ifndef DEV_NPX
mcp->mc_fpformat = _MC_FPFMT_NODEV;
@@ -3801,37 +3894,54 @@ get_fpcontext(struct thread *td, mcontex
bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate));
#else
mcp->mc_ownedfp = npxgetregs(td);
- bcopy(&td->td_pcb->pcb_user_save, &mcp->mc_fpstate[0],
+ bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0],
sizeof(mcp->mc_fpstate));
mcp->mc_fpformat = npxformat();
+ if (!use_xsave || xfpusave_len == 0)
+ return;
+ max_len = cpu_max_ext_state_size - sizeof(union savefpu);
+ len = xfpusave_len;
+ if (len > max_len) {
+ len = max_len;
+ bzero(xfpusave + max_len, len - max_len);
+ }
+ mcp->mc_flags |= _MC_HASFPXSTATE;
+ mcp->mc_xfpustate_len = len;
+ bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len);
#endif
}
static int
-set_fpcontext(struct thread *td, const mcontext_t *mcp)
+set_fpcontext(struct thread *td, const mcontext_t *mcp, char *xfpustate,
+ size_t xfpustate_len)
{
+ union savefpu *fpstate;
+ int error;
if (mcp->mc_fpformat == _MC_FPFMT_NODEV)
return (0);
else if (mcp->mc_fpformat != _MC_FPFMT_387 &&
mcp->mc_fpformat != _MC_FPFMT_XMM)
return (EINVAL);
- else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE)
+ else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) {
/* We don't care what state is left in the FPU or PCB. */
fpstate_drop(td);
- else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
+ error = 0;
+ } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU ||
mcp->mc_ownedfp == _MC_FPOWNED_PCB) {
#ifdef DEV_NPX
+ fpstate = (union savefpu *)&mcp->mc_fpstate;
#ifdef CPU_ENABLE_SSE
if (cpu_fxsr)
- ((union savefpu *)&mcp->mc_fpstate)->sv_xmm.sv_env.
- en_mxcsr &= cpu_mxcsr_mask;
+ fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask;
#endif
- npxsetregs(td, (union savefpu *)&mcp->mc_fpstate);
+ error = npxsetregs(td, fpstate, xfpustate, xfpustate_len);
+#else
+ error = EINVAL;
#endif
} else
return (EINVAL);
- return (0);
+ return (error);
}
static void
Modified: head/sys/i386/i386/mp_machdep.c
==============================================================================
--- head/sys/i386/i386/mp_machdep.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/mp_machdep.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -749,7 +749,7 @@ init_secondary(void)
initializecpu();
/* set up FPU state on the AP */
- npxinit();
+ npxinit(false);
if (cpu_ops.cpu_init)
cpu_ops.cpu_init();
@@ -1512,11 +1512,11 @@ cpususpend_handler(void)
cpu = PCPU_GET(cpuid);
if (savectx(&susppcbs[cpu]->sp_pcb)) {
- npxsuspend(&susppcbs[cpu]->sp_fpususpend);
+ npxsuspend(susppcbs[cpu]->sp_fpususpend);
wbinvd();
CPU_SET_ATOMIC(cpu, &suspended_cpus);
} else {
- npxresume(&susppcbs[cpu]->sp_fpususpend);
+ npxresume(susppcbs[cpu]->sp_fpususpend);
pmap_init_pat();
initializecpu();
PCPU_SET(switchtime, 0);
Modified: head/sys/i386/i386/ptrace_machdep.c
==============================================================================
--- head/sys/i386/i386/ptrace_machdep.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/ptrace_machdep.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <machine/md_var.h>
@@ -41,6 +42,47 @@ __FBSDID("$FreeBSD$");
#define CPU_ENABLE_SSE
#endif
+#ifdef CPU_ENABLE_SSE
+static int
+cpu_ptrace_xstate(struct thread *td, int req, void *addr, int data)
+{
+ char *savefpu;
+ int error;
+
+ if (!use_xsave)
+ return (EOPNOTSUPP);
+
+ switch (req) {
+ case PT_GETXSTATE:
+ npxgetregs(td);
+ savefpu = (char *)(get_pcb_user_save_td(td) + 1);
+ error = copyout(savefpu, addr,
+ cpu_max_ext_state_size - sizeof(union savefpu));
+ break;
+
+ case PT_SETXSTATE:
+ if (data > cpu_max_ext_state_size - sizeof(union savefpu)) {
+ error = EINVAL;
+ break;
+ }
+ savefpu = malloc(data, M_TEMP, M_WAITOK);
+ error = copyin(addr, savefpu, data);
+ if (error == 0) {
+ npxgetregs(td);
+ error = npxsetxstate(td, savefpu, data);
+ }
+ free(savefpu, M_TEMP);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+#endif
+
int
cpu_ptrace(struct thread *td, int req, void *addr, int data)
{
@@ -51,7 +93,7 @@ cpu_ptrace(struct thread *td, int req, v
if (!cpu_fxsr)
return (EINVAL);
- fpstate = &td->td_pcb->pcb_user_save.sv_xmm;
+ fpstate = &get_pcb_user_save_td(td)->sv_xmm;
switch (req) {
case PT_GETXMMREGS:
npxgetregs(td);
@@ -64,6 +106,11 @@ cpu_ptrace(struct thread *td, int req, v
fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask;
break;
+ case PT_GETXSTATE:
+ case PT_SETXSTATE:
+ error = cpu_ptrace_xstate(td, req, addr, data);
+ break;
+
default:
return (EINVAL);
}
Modified: head/sys/i386/i386/sys_machdep.c
==============================================================================
--- head/sys/i386/i386/sys_machdep.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/sys_machdep.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -105,6 +105,7 @@ sysarch(td, uap)
union {
struct i386_ldt_args largs;
struct i386_ioperm_args iargs;
+ struct i386_get_xfpustate xfpu;
} kargs;
uint32_t base;
struct segment_descriptor sd, *sdp;
@@ -126,6 +127,7 @@ sysarch(td, uap)
case I386_SET_FSBASE:
case I386_GET_GSBASE:
case I386_SET_GSBASE:
+ case I386_GET_XFPUSTATE:
break;
case I386_SET_IOPERM:
@@ -154,6 +156,11 @@ sysarch(td, uap)
if (kargs.largs.num > MAX_LD || kargs.largs.num <= 0)
return (EINVAL);
break;
+ case I386_GET_XFPUSTATE:
+ if ((error = copyin(uap->parms, &kargs.xfpu,
+ sizeof(struct i386_get_xfpustate))) != 0)
+ return (error);
+ break;
default:
break;
}
@@ -270,6 +277,14 @@ sysarch(td, uap)
load_gs(GSEL(GUGS_SEL, SEL_UPL));
}
break;
+ case I386_GET_XFPUSTATE:
+ if (kargs.xfpu.len > cpu_max_ext_state_size -
+ sizeof(union savefpu))
+ return (EINVAL);
+ npxgetregs(td);
+ error = copyout((char *)(get_pcb_user_save_td(td) + 1),
+ kargs.xfpu.addr, kargs.xfpu.len);
+ break;
default:
error = EINVAL;
break;
Modified: head/sys/i386/i386/trap.c
==============================================================================
--- head/sys/i386/i386/trap.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/trap.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -1157,7 +1157,7 @@ syscall(struct trapframe *frame)
KASSERT(PCB_USER_FPU(td->td_pcb),
("System call %s returning with kernel FPU ctx leaked",
syscallname(td->td_proc, sa.code)));
- KASSERT(td->td_pcb->pcb_save == &td->td_pcb->pcb_user_save,
+ KASSERT(td->td_pcb->pcb_save == get_pcb_user_save_td(td),
("System call %s returning with mangled pcb_save",
syscallname(td->td_proc, sa.code)));
Modified: head/sys/i386/i386/vm_machdep.c
==============================================================================
--- head/sys/i386/i386/vm_machdep.c Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/i386/vm_machdep.c Sun Nov 2 22:58:30 2014 (r273995)
@@ -118,7 +118,50 @@ static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
+union savefpu *
+get_pcb_user_save_td(struct thread *td)
+{
+ vm_offset_t p;
+
+ p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+ cpu_max_ext_state_size;
+ KASSERT((p % 64) == 0, ("Unaligned pcb_user_save area"));
+ return ((union savefpu *)p);
+}
+
+union savefpu *
+get_pcb_user_save_pcb(struct pcb *pcb)
+{
+ vm_offset_t p;
+
+ p = (vm_offset_t)(pcb + 1);
+ return ((union savefpu *)p);
+}
+
+struct pcb *
+get_pcb_td(struct thread *td)
+{
+ vm_offset_t p;
+ p = td->td_kstack + td->td_kstack_pages * PAGE_SIZE -
+ cpu_max_ext_state_size - sizeof(struct pcb);
+ return ((struct pcb *)p);
+}
+
+void *
+alloc_fpusave(int flags)
+{
+ void *res;
+ struct savefpu_ymm *sf;
+
+ res = malloc(cpu_max_ext_state_size, M_DEVBUF, flags);
+ if (use_xsave) {
+ sf = (struct savefpu_ymm *)res;
+ bzero(&sf->sv_xstate.sx_hd, sizeof(sf->sv_xstate.sx_hd));
+ sf->sv_xstate.sx_hd.xstate_bv = xsave_mask;
+ }
+ return (res);
+}
/*
* Finish a fork operation, with process p2 nearly set up.
* Copy and update the pcb, set up the stack so that the child
@@ -168,15 +211,16 @@ cpu_fork(td1, p2, td2, flags)
#endif
/* Point the pcb to the top of the stack */
- pcb2 = (struct pcb *)(td2->td_kstack +
- td2->td_kstack_pages * PAGE_SIZE) - 1;
+ pcb2 = get_pcb_td(td2);
td2->td_pcb = pcb2;
/* Copy td1's pcb */
bcopy(td1->td_pcb, pcb2, sizeof(*pcb2));
/* Properly initialize pcb_save */
- pcb2->pcb_save = &pcb2->pcb_user_save;
+ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+ bcopy(get_pcb_user_save_td(td1), get_pcb_user_save_pcb(pcb2),
+ cpu_max_ext_state_size);
/* Point mdproc and then copy over td1's contents */
mdp2 = &p2->p_md;
@@ -353,12 +397,18 @@ cpu_thread_swapout(struct thread *td)
void
cpu_thread_alloc(struct thread *td)
{
+ struct pcb *pcb;
+ struct xstate_hdr *xhdr;
- td->td_pcb = (struct pcb *)(td->td_kstack +
- td->td_kstack_pages * PAGE_SIZE) - 1;
- td->td_frame = (struct trapframe *)((caddr_t)td->td_pcb - 16) - 1;
- td->td_pcb->pcb_ext = NULL;
- td->td_pcb->pcb_save = &td->td_pcb->pcb_user_save;
+ td->td_pcb = pcb = get_pcb_td(td);
+ td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
+ pcb->pcb_ext = NULL;
+ pcb->pcb_save = get_pcb_user_save_pcb(pcb);
+ if (use_xsave) {
+ xhdr = (struct xstate_hdr *)(pcb->pcb_save + 1);
+ bzero(xhdr, sizeof(*xhdr));
+ xhdr->xstate_bv = xsave_mask;
+ }
}
void
@@ -426,7 +476,9 @@ cpu_set_upcall(struct thread *td, struct
bcopy(td0->td_pcb, pcb2, sizeof(*pcb2));
pcb2->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE |
PCB_KERNNPX);
- pcb2->pcb_save = &pcb2->pcb_user_save;
+ pcb2->pcb_save = get_pcb_user_save_pcb(pcb2);
+ bcopy(get_pcb_user_save_td(td0), pcb2->pcb_save,
+ cpu_max_ext_state_size);
/*
* Create a new fresh stack for the new thread.
Modified: head/sys/i386/include/cpufunc.h
==============================================================================
--- head/sys/i386/include/cpufunc.h Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/include/cpufunc.h Sun Nov 2 22:58:30 2014 (r273995)
@@ -457,6 +457,25 @@ rcr4(void)
return (data);
}
+static __inline uint64_t
+rxcr(u_int reg)
+{
+ u_int low, high;
+
+ __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg));
+ return (low | ((uint64_t)high << 32));
+}
+
+static __inline void
+load_xcr(u_int reg, uint64_t val)
+{
+ u_int low, high;
+
+ low = val;
+ high = val >> 32;
+ __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high));
+}
+
/*
* Global TLB flush (except for thise for pages marked PG_G)
*/
Modified: head/sys/i386/include/md_var.h
==============================================================================
--- head/sys/i386/include/md_var.h Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/include/md_var.h Sun Nov 2 22:58:30 2014 (r273995)
@@ -52,6 +52,7 @@ extern u_int cpu_stdext_feature;
extern u_int cpu_fxsr;
extern u_int cpu_high;
extern u_int cpu_id;
+extern u_int cpu_max_ext_state_size;
extern u_int cpu_mxcsr_mask;
extern u_int cpu_procinfo;
extern u_int cpu_procinfo2;
@@ -80,14 +81,19 @@ extern int vm_page_dump_size;
extern int workaround_erratum383;
extern int _udatasel;
extern int _ucodesel;
+extern int use_xsave;
+extern uint64_t xsave_mask;
typedef void alias_for_inthand_t(u_int cs, u_int ef, u_int esp, u_int ss);
+struct pcb;
+union savefpu;
struct thread;
struct reg;
struct fpreg;
struct dbreg;
struct dumperinfo;
+void *alloc_fpusave(int flags);
void bcopyb(const void *from, void *to, size_t len);
void busdma_swi(void);
void cpu_setregs(void);
@@ -118,5 +124,8 @@ void printcpuinfo(void);
void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
int user_dbreg_trap(void);
int minidumpsys(struct dumperinfo *);
+union savefpu *get_pcb_user_save_td(struct thread *td);
+union savefpu *get_pcb_user_save_pcb(struct pcb *pcb);
+struct pcb *get_pcb_td(struct thread *td);
#endif /* !_MACHINE_MD_VAR_H_ */
Modified: head/sys/i386/include/npx.h
==============================================================================
--- head/sys/i386/include/npx.h Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/include/npx.h Sun Nov 2 22:58:30 2014 (r273995)
@@ -45,17 +45,24 @@
#ifdef _KERNEL
+struct fpu_kern_ctx;
+
#define PCB_USER_FPU(pcb) (((pcb)->pcb_flags & PCB_KERNNPX) == 0)
+#define XSAVE_AREA_ALIGN 64
+
int npxdna(void);
void npxdrop(void);
void npxexit(struct thread *td);
int npxformat(void);
int npxgetregs(struct thread *td);
-void npxinit(void);
+void npxinit(bool bsp);
void npxresume(union savefpu *addr);
void npxsave(union savefpu *addr);
-void npxsetregs(struct thread *td, union savefpu *addr);
+int npxsetregs(struct thread *td, union savefpu *addr,
+ char *xfpustate, size_t xfpustate_size);
+int npxsetxstate(struct thread *td, char *xfpustate,
+ size_t xfpustate_size);
void npxsuspend(union savefpu *addr);
int npxtrap_x87(void);
int npxtrap_sse(void);
@@ -68,8 +75,12 @@ int fpu_kern_leave(struct thread *td, st
int fpu_kern_thread(u_int flags);
int is_fpu_kern_thread(u_int flags);
+union savefpu *fpu_save_area_alloc(void);
+void fpu_save_area_free(union savefpu *fsa);
+void fpu_save_area_reset(union savefpu *fsa);
+
/*
- * Flags for fpu_kern_enter() and fpu_kern_thread().
+ * Flags for fpu_kern_alloc_ctx(), fpu_kern_enter() and fpu_kern_thread().
*/
#define FPU_KERN_NORMAL 0x0000
#define FPU_KERN_NOWAIT 0x0001
Modified: head/sys/i386/include/pcb.h
==============================================================================
--- head/sys/i386/include/pcb.h Sun Nov 2 22:42:19 2014 (r273994)
+++ head/sys/i386/include/pcb.h Sun Nov 2 22:58:30 2014 (r273995)
@@ -45,17 +45,23 @@
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-head
mailing list