PERFORCE change 28905 for review
Marcel Moolenaar
marcel at FreeBSD.org
Sun Apr 13 19:57:07 PDT 2003
http://perforce.freebsd.org/chv.cgi?CH=28905
Change 28905 by marcel at marcel_nfs on 2003/04/13 19:56:37
Implement lazy context switching for the high FP registers.
This does not use any synchronization and/or locking yet.
Also, we currently disable the high FP registers whenever
we enter and leave the kernel. We may want to leave the high
FP registers enabled if we leave the kernel and the CPU
holds the high FP registers of the process we're returning
to. For now we just let it trap. I expect to have to
revisit this...
Affected files ...
.. //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#9 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/interrupt.c#2 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#11 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/mp_machdep.c#4 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/trap.c#8 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/vm_machdep.c#7 edit
.. //depot/projects/ia64_epc/sys/ia64/include/cpu.h#5 edit
.. //depot/projects/ia64_epc/sys/ia64/include/smp.h#3 edit
Differences ...
==== //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#9 (text+ko) ====
@@ -327,7 +327,7 @@
;;
}
{ .mlx
- ssm psr.ic
+ ssm psr.ic|psr.dfh
movl gp=__gp
;;
}
@@ -506,15 +506,20 @@
(p14) br.cond.sptk 1f
;;
}
-
+{ .mii
// Switch register stack
alloc r31=ar.pfs,0,0,0,0 // discard current frame
shl r30=r25,16 // value for ar.rsc
+ dep r24=-1,r24,19,1 // XXX disable high FP.
;;
+}
+{ .mmi
mov ar.rsc=r30 // setup for loadrs
;;
loadrs // load user regs
+ nop 0
;;
+}
mov r31=ar.bspstore
;;
mov ar.bspstore=r20
==== //depot/projects/ia64_epc/sys/ia64/ia64/interrupt.c#2 (text+ko) ====
@@ -152,6 +152,9 @@
} else if (vector == ipi_vector[IPI_AST]) {
asts[PCPU_GET(cpuid)]++;
CTR1(KTR_SMP, "IPI_AST, cpuid=%d", PCPU_GET(cpuid));
+ } else if (vector == ipi_vector[IPI_HIGH_FP]) {
+ if (PCPU_GET(fpcurthread) != NULL)
+ ia64_highfp_save(PCPU_GET(fpcurthread));
} else if (vector == ipi_vector[IPI_RENDEZVOUS]) {
rdvs[PCPU_GET(cpuid)]++;
CTR1(KTR_SMP, "IPI_RENDEZVOUS, cpuid=%d", PCPU_GET(cpuid));
==== //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#11 (text+ko) ====
@@ -74,6 +74,9 @@
#include <machine/mca.h>
#include <machine/pal.h>
#include <machine/sal.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
#include <machine/bootinfo.h>
#include <machine/mutex.h>
#include <machine/vmparam.h>
@@ -723,9 +726,6 @@
ia64_set_cflg((CR0_PE | CR0_PG)
| ((long)(CR4_XMM | CR4_FXSR) << 32));
- /* We pretend to own FP state so that ia64_fpstate_check() works */
- PCPU_SET(fpcurthread, &thread0);
-
/*
* Initialize the rest of proc 0's PCB.
*
@@ -974,6 +974,9 @@
printf("sigreturn: pid %d, scp %p\n", p->p_pid, uap->sigcntxp);
#endif
+ /* Throw away the high FP registers. */
+ ia64_highfp_drop(td);
+
/*
* Fetch the entire context structure at once for speed.
* We don't use a normal argument to simplify RSE handling.
@@ -990,6 +993,7 @@
frame->tf_scratch = mc->mc_scratch;
frame->tf_scratch_fp = mc->mc_scratch_fp;
}
+
/*
* XXX preserved registers.
* XXX High FP.
@@ -1008,9 +1012,6 @@
signotify(td);
PROC_UNLOCK(p);
- /* XXX ksc.sc_ownedfp ? */
- ia64_fpstate_drop(td);
-
#ifdef DEBUG
if (sigdebug & SDB_FOLLOW)
printf("sigreturn(%d): returns\n", p->p_pid);
@@ -1092,9 +1093,6 @@
suword((caddr_t)frame->tf_special.bspstore - 24, stack);
suword((caddr_t)frame->tf_special.bspstore - 16, ps_strings);
suword((caddr_t)frame->tf_special.bspstore - 8, 0);
-
- td->td_md.md_flags &= ~MDP_FPUSED;
- ia64_fpstate_drop(td);
}
int
@@ -1181,10 +1179,11 @@
struct trapframe *frame = td->td_frame;
struct pcb *pcb = td->td_pcb;
+ /* Save the high FP registers. */
+ ia64_highfp_save(td);
+
fpregs->fpr_scratch = frame->tf_scratch_fp;
- /* XXX preserved */
-
- ia64_fpstate_save(td, 0);
+ /* XXX preserved_fp */
fpregs->fpr_high = pcb->pcb_high_fp;
return (0);
}
@@ -1195,14 +1194,87 @@
struct trapframe *frame = td->td_frame;
struct pcb *pcb = td->td_pcb;
+ /* Throw away the high FP registers (should be redundant). */
+ ia64_highfp_drop(td);
+
frame->tf_scratch_fp = fpregs->fpr_scratch;
- /* XXX preserved */
-
- ia64_fpstate_drop(td);
+ /* XXX preserved_fp */
pcb->pcb_high_fp = fpregs->fpr_high;
return (0);
}
+/*
+ * High FP register functions.
+ * XXX no synchronization yet.
+ */
+
+int
+ia64_highfp_drop(struct thread *td)
+{
+ struct pcb *pcb;
+ struct pcpu *cpu;
+ struct thread *thr;
+
+ pcb = td->td_pcb;
+ cpu = pcb->pcb_fpcpu;
+ if (cpu == NULL)
+ return (0);
+ pcb->pcb_fpcpu = NULL;
+ thr = cpu->pc_fpcurthread;
+ cpu->pc_fpcurthread = NULL;
+
+ /* Post-mortem sanity checking. */
+ KASSERT(thr == td, ("Inconsistent high FP state"));
+ return (1);
+}
+
+int
+ia64_highfp_load(struct thread *td)
+{
+ struct pcb *pcb;
+
+ pcb = td->td_pcb;
+ KASSERT(pcb->pcb_fpcpu == NULL, ("FP race on thread"));
+ KASSERT(PCPU_GET(fpcurthread) == NULL, ("FP race on pcpu"));
+ restore_high_fp(&pcb->pcb_high_fp);
+ PCPU_SET(fpcurthread, td);
+ pcb->pcb_fpcpu = pcpup;
+ return (1);
+}
+
+int
+ia64_highfp_save(struct thread *td)
+{
+ struct pcb *pcb;
+ struct pcpu *cpu;
+ struct thread *thr;
+
+ /* Don't save if the high FP registers weren't modified. */
+ if ((td->td_frame->tf_special.psr & IA64_PSR_MFH) == 0)
+ return (ia64_highfp_drop(td));
+
+ pcb = td->td_pcb;
+ cpu = pcb->pcb_fpcpu;
+ if (cpu == NULL)
+ return (0);
+#ifdef SMP
+ if (cpu != pcpup) {
+ ipi_send(cpu->pc_lid, IPI_HIGH_FP);
+ while (pcb->pcb_fpcpu != cpu)
+ DELAY(100);
+ return (1);
+ }
+#endif
+ save_high_fp(&pcb->pcb_high_fp);
+ pcb->pcb_fpcpu = NULL;
+ thr = cpu->pc_fpcurthread;
+ cpu->pc_fpcurthread = NULL;
+
+ /* Post-mortem sanity cxhecking. */
+ KASSERT(thr == td, ("Inconsistent high FP state"));
+ return (1);
+}
+
#ifndef DDB
void
Debugger(const char *msg)
@@ -1231,72 +1303,6 @@
SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock,
CTLFLAG_RW, &wall_cmos_clock, 0, "");
-void
-ia64_fpstate_check(struct thread *td)
-{
- if ((td->td_frame->tf_special.psr & IA64_PSR_DFH) == 0)
- if (td != PCPU_GET(fpcurthread))
- panic("ia64_fpstate_check: bogus");
-}
-
-/*
- * Save the high floating point state in the pcb. Use this to get
- * read-only access to the floating point state. If write is true, the
- * current fp process is cleared so that fp state can safely be
- * modified. The process will automatically reload the changed state
- * by generating a disabled fp trap.
- */
-void
-ia64_fpstate_save(struct thread *td, int write)
-{
- if (td == PCPU_GET(fpcurthread)) {
- _get_high_fp(&td->td_pcb->pcb_high_fp);
-
- if (write) {
- td->td_frame->tf_special.psr |= IA64_PSR_DFH;
- PCPU_SET(fpcurthread, NULL);
- }
- }
-}
-
-/*
- * Relinquish ownership of the FP state. This is called instead of
- * ia64_save_fpstate() if the entire FP state is being changed
- * (e.g. on sigreturn).
- */
-void
-ia64_fpstate_drop(struct thread *td)
-{
- if (td == PCPU_GET(fpcurthread)) {
- td->td_frame->tf_special.psr |= IA64_PSR_DFH;
- PCPU_SET(fpcurthread, NULL);
- }
-}
-
-/*
- * Switch the current owner of the fp state to p, reloading the state
- * from the pcb.
- */
-void
-ia64_fpstate_switch(struct thread *td)
-{
- struct thread *fptd;
-
- fptd = PCPU_GET(fpcurthread);
- if (fptd != NULL) {
- /* Dump the old fp state if its valid. */
- _get_high_fp(&fptd->td_pcb->pcb_high_fp);
- fptd->td_frame->tf_special.psr |= IA64_PSR_DFH;
- }
-
- /* Remember the new FP owner and reload its state. */
- PCPU_SET(fpcurthread, td);
- _set_high_fp(&td->td_pcb->pcb_high_fp);
- td->td_frame->tf_special.psr &= ~IA64_PSR_DFH;
-
- td->td_md.md_flags |= MDP_FPUSED;
-}
-
/*
* Utility functions for manipulating instruction bundles.
*/
==== //depot/projects/ia64_epc/sys/ia64/ia64/mp_machdep.c#4 (text+ko) ====
@@ -75,7 +75,6 @@
volatile int ap_awake;
volatile int ap_spin;
-static void ipi_send(u_int64_t, int);
static void cpu_mp_unleash(void *);
void
@@ -346,7 +345,7 @@
* cr.lid (CR64) contents of the target processor. Only the id and eid
* fields are used here.
*/
-static void
+void
ipi_send(u_int64_t lid, int ipi)
{
volatile u_int64_t *pipi;
==== //depot/projects/ia64_epc/sys/ia64/ia64/trap.c#8 (text+ko) ====
@@ -62,6 +62,9 @@
#include <machine/pal.h>
#include <machine/fpu.h>
#include <machine/efi.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
#ifdef KTRACE
#include <sys/uio.h>
@@ -513,19 +516,81 @@
}
}
- case IA64_VEC_DISABLED_FP:
+ case IA64_VEC_DISABLED_FP: { /* High FP registers are disabled. */
+ struct pcpu *pcpu;
+ struct pcb *pcb;
+ struct thread *thr;
+
+ pcb = td->td_pcb;
+ pcpu = pcb->pcb_fpcpu;
+
+ /*
+ * The pcpu variable holds the address of the per-CPU
+ * structure of the CPU currently holding this threads
+ * high FP registers (or NULL if no CPU holds these
+ * registers). We have to interrupt that CPU and wait
+ * for it to have saved the registers.
+ */
+ if (pcpu != NULL) {
+ thr = pcpu->pc_fpcurthread;
+ KASSERT(thr == td, ("High FP state out of sync"));
+
+ if (pcpu == pcpup) {
+ /*
+ * Short-circuit handling the trap when this
+ * CPU already holds the high FP registers for
+ * this thread. We really shouldn't get the
+ * trap in the first place, but since it's
+ * only a performance issue and not a
+ * correctness issue, we emit a message for
+ * now, enable the high FP registers and
+ * return.
+ */
+ printf("XXX: bogusly disabled high FP regs\n");
+ framep->tf_special.psr &= ~IA64_PSR_DFH;
+ goto out;
+ }
+#ifdef SMP
+ /*
+ * Interrupt the other CPU so that it saves the high
+ * FP registers of this thread. Note that this can
+ * only happen for the SMP case.
+ */
+ ipi_send(pcpu->pc_lid, IPI_HIGH_FP);
+#endif
+#ifdef DIAGNOSTICS
+ } else {
+ KASSERT(PCPU_GET(fpcurthread) != td,
+ ("High FP state out of sync"));
+#endif
+ }
+
+ thr = PCPU_GET(fpcurthread);
+
/*
- * on exit from the kernel, if thread == fpcurthread,
- * FP is enabled.
+ * The thr variable holds the thread that owns the high FP
+ * registers currently on this CPU. Free this CPU so that
+ * we can load the current threads high FP registers.
*/
- if (PCPU_GET(fpcurthread) == td) {
- printf("trap: fp disabled for fpcurthread == %p", td);
- goto dopanic;
+ if (thr != NULL) {
+ KASSERT(thr != td, ("High FP state out of sync"));
+ pcb = thr->td_pcb;
+ KASSERT(pcb->pcb_fpcpu == pcpup,
+ ("High FP state out of sync"));
+ ia64_highfp_save(thr);
}
-
- ia64_fpstate_switch(td);
+
+ /*
+ * Wait for the other CPU to have saved out high FP
+ * registers (if applicable).
+ */
+ while (pcpu && pcpu->pc_fpcurthread == td);
+
+ ia64_highfp_load(td);
+ framep->tf_special.psr &= ~IA64_PSR_DFH;
goto out;
break;
+ }
case IA64_VEC_PAGE_NOT_PRESENT:
case IA64_VEC_INST_ACCESS_RIGHTS:
==== //depot/projects/ia64_epc/sys/ia64/ia64/vm_machdep.c#7 (text+ko) ====
@@ -148,43 +148,18 @@
(td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
td2->td_md.md_flags = td1->td_md.md_flags & (MDP_FPUSED | MDP_UAC_MASK);
- /*
- * Copy floating point state from the FP chip to the PCB
- * if this process has state stored there.
- */
- ia64_fpstate_save(td1, 0);
+ /* Save the high FP registers so that we can copy them. */
+ ia64_highfp_save(td1);
/*
* Copy pcb and stack from proc p1 to p2. We do this as
* cheaply as possible, copying only the active part of the
- * stack. The stack and pcb need to agree. Make sure that the
+ * stack. The stack and pcb need to agree. Make sure that the
* new process has FEN disabled.
*/
bcopy(td1->td_pcb, td2->td_pcb, sizeof(struct pcb));
/*
- * Set the floating point state.
- */
-#if 0
- if ((td2->td_pcb->pcb_fp_control & IEEE_INHERIT) == 0) {
- td2->td_pcb->pcb_fp_control = 0;
- td2->td_pcb->pcb_fp.fpr_cr = (FPCR_DYN_NORMAL
- | FPCR_INVD | FPCR_DZED
- | FPCR_OVFD | FPCR_INED
- | FPCR_UNFD);
- }
-#endif
-
- /*
- * Arrange for a non-local goto when the new process
- * is started, to resume here, returning nonzero from setjmp.
- */
-#ifdef DIAGNOSTIC
- if (td1 == curthread)
- ia64_fpstate_check(td1);
-#endif
-
- /*
* create the child's kernel stack, from scratch.
*
* Pick a stack pointer, leaving room for a trapframe;
@@ -316,11 +291,11 @@
* When the proc is reaped, cpu_wait() will gc the VM state.
*/
void
-cpu_exit(td)
- register struct thread *td;
+cpu_exit(struct thread *td)
{
- ia64_fpstate_drop(td);
+ /* Throw away the high FP registers. */
+ ia64_highfp_drop(td);
}
void
==== //depot/projects/ia64_epc/sys/ia64/include/cpu.h#5 (text+ko) ====
@@ -112,12 +112,11 @@
void exception_restore(void); /* MAGIC */
void frametoreg(struct trapframe *, struct reg *);
long fswintrberr(void); /* MAGIC */
+int ia64_highfp_drop(struct thread *);
+int ia64_highfp_load(struct thread *);
+int ia64_highfp_save(struct thread *);
+void ia64_init(u_int64_t, u_int64_t);
int ia64_pa_access(u_long);
-void ia64_init(u_int64_t, u_int64_t);
-void ia64_fpstate_check(struct thread *p);
-void ia64_fpstate_save(struct thread *p, int write);
-void ia64_fpstate_drop(struct thread *p);
-void ia64_fpstate_switch(struct thread *p);
void init_prom_interface(struct rpb*);
void interrupt(u_int64_t, struct trapframe *);
void machine_check
==== //depot/projects/ia64_epc/sys/ia64/include/smp.h#3 (text+ko) ====
@@ -33,6 +33,7 @@
void ipi_all_but_self(int ipi);
void ipi_selected(u_int64_t cpus, int ipi);
void ipi_self(int ipi);
+void ipi_send(u_int64_t lid, int ipi);
#endif /* !LOCORE */
#endif /* _KERNEL */
More information about the p4-projects
mailing list