PERFORCE change 29149 for review
Marcel Moolenaar
marcel at FreeBSD.org
Thu Apr 17 13:28:22 PDT 2003
http://perforce.freebsd.org/chv.cgi?CH=29149
Change 29149 by marcel at marcel_nfs on 2003/04/17 13:27:57
Now, this I like: we don't have to do anything special when
we enter the child's user space after a fork. We can simply
jump to where we return from a syscall.
It also turns out that we don't have to worry about the RSE
backing store, provided we flushed it. The kernel backing
store is always PAGE_SIZE aligned, which in all reasonable
cases is sufficient to not have to worry about differently
aligned backing stores and thus different NaT collection
points (512 byte alignment is required minimally to be able
to copy backing stores around without having to deal with
the NaT collection). Note also that we can guarentee the
alignment by virtue of always having switched from user
space. We never execute a fork() from an exception (which is
when we can nest and thus loose the guarantee that the RSE
backing store is sufficiently aligned. Thus: we only have
to copy "ndirty" bytes from the parent's backing store to
the child's and that's it. Sweet!
Affected files ...
.. //depot/projects/ia64_epc/sys/ia64/ia64/locore.s#12 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/vm_machdep.c#8 edit
Differences ...
==== //depot/projects/ia64_epc/sys/ia64/ia64/locore.s#12 (text+ko) ====
@@ -158,69 +158,10 @@
.global enter_userland
.type enter_userland, @function
enter_userland:
-{ .mmi
- alloc r16=ar.pfs,0,0,0,0
- mov ar.rsc=0
- add r14=32,sp
- ;;
-}
-{ .mmi
- loadrs
- ld8 r31=[r14],32 // sp
- add r15=48,sp
- ;;
-}
-{ .mmi
- ld8 r16=[r15],24 // rp
- mov r30=ar.bspstore
- add r29=16,sp
- ;;
-}
-{ .mmi
- ld8 r17=[r14],40 // pfs
- ld8 r18=[r15],40 // bspstore
- mov rp=r16
- ;;
-}
-{ .mlx
- mov ar.bspstore=r18
- movl r16=0x180000
- ;;
-}
-{ .mmi
- ld8 r28=[r29]
- mov ar.rsc=r16
- mov sp=r31
- ;;
-}
-{ .mmi
- loadrs
- ld8 r16=[r14],16 // rsc
- add r29=r28,r29
- ;;
-}
-{ .mmi
- mov ar.rsc=r16
- ld8 r18=[r15] // fpsr
- mov ar.pfs=r17
- ;;
-}
-{ .mmb
- ld8 r16=[r14] // psr
- mov ar.fpsr=r18
+{ .mfb
nop 0
- ;;
-}
-{ .mmi
- mov psr.l=r16
- mov ar.k7=r29
nop 0
- ;;
-}
-{ .mmb
- srlz.d
- mov ar.k6=r30
- br.ret.sptk rp
+ br.sptk syscall_return
;;
}
END(fork_trampoline)
==== //depot/projects/ia64_epc/sys/ia64/ia64/vm_machdep.c#8 (text+ko) ====
@@ -127,144 +127,65 @@
* ready to run and return to user mode.
*/
void
-cpu_fork(td1, p2, td2, flags)
- register struct thread *td1;
- register struct proc *p2;
- register struct thread *td2;
- int flags;
+cpu_fork(struct thread *td1, struct proc *p2 __unused, struct thread *td2,
+ int flags)
{
- struct proc *p1;
- struct trapframe *p2tf;
- u_int64_t bspstore, *p1bs, *p2bs, rnat;
+ char *stackp;
KASSERT(td1 == curthread || td1 == &thread0,
- ("cpu_fork: p1 not curproc and not proc0"));
+ ("cpu_fork: td1 not curthread and not thread0"));
if ((flags & RFPROC) == 0)
return;
- p1 = td1->td_proc;
- td2->td_pcb = (struct pcb *)
- (td2->td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
- td2->td_md.md_flags = td1->td_md.md_flags & (MDP_FPUSED | MDP_UAC_MASK);
-
- /* Save the high FP registers so that we can copy them. */
- ia64_highfp_save(td1);
-
/*
- * Copy pcb and stack from proc p1 to p2. We do this as
- * cheaply as possible, copying only the active part of the
- * stack. The stack and pcb need to agree. Make sure that the
- * new process has FEN disabled.
+ * Save the preserved registers and the high FP registers in the
+ * PCB if we're the parent (ie td1 == curthread) so that we have
+ * a valid PCB. This also causes a RSE flush. We don't have to
+ * do that otherwise, because there wouldn't be anything important
+ * to save.
*/
- bcopy(td1->td_pcb, td2->td_pcb, sizeof(struct pcb));
-
- /*
- * create the child's kernel stack, from scratch.
- *
- * Pick a stack pointer, leaving room for a trapframe;
- * copy trapframe from parent so return to user mode
- * will be to right address, with correct registers. Clear the
- * high-fp enable for the new process so that it is forced to
- * load its state from the pcb.
- */
- td2->td_frame = (struct trapframe *)td2->td_pcb - 1;
- bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
- td2->td_frame->tf_special.psr |= IA64_PSR_DFH;
-
- /*
- * Set up return-value registers as fork() libc stub expects.
- */
- p2tf = td2->td_frame;
- if (p2tf->tf_special.psr & IA64_PSR_IS) {
- p2tf->tf_scratch.gr8 = 0; /* child returns zero (eax) */
- p2tf->tf_scratch.gr10 = 1; /* is child (edx) */
- } else {
- p2tf->tf_scratch.gr8 = 0; /* child's pid (linux) */
- p2tf->tf_scratch.gr9 = 1; /* is child (FreeBSD) */
- p2tf->tf_scratch.gr10 = 0; /* no error */
+ if (td1 == curthread) {
+ if (savectx(td1->td_pcb) != 0)
+ panic("unexpected return from savectx()");
+ ia64_highfp_save(td1);
}
/*
- * Turn off RSE for a moment and work out our current
- * ar.bspstore. This assumes that td1==curthread. Also
- * flush dirty regs to ensure that the user's stacked
- * regs are written out to backing store.
- *
- * We could cope with td1!=curthread by digging values
- * out of its PCB but I don't see the point since
- * current usage only allows &thread0 when creating kernel
- * threads and &thread0 doesn't have any dirty regs.
+ * create the child's kernel stack and backing store. We basicly
+ * create an image of the parent's stack and backing store and
+ * adjust where necessary.
*/
+ stackp = (char *)(td2->td_kstack + KSTACK_PAGES * PAGE_SIZE);
- p1bs = (u_int64_t *)td1->td_kstack;
- p2bs = (u_int64_t *)td2->td_kstack;
+ stackp -= sizeof(struct pcb);
+ td2->td_pcb = (struct pcb *)stackp;
+ bcopy(td1->td_pcb, td2->td_pcb, sizeof(struct pcb));
- if (td1 == curthread) {
- __asm __volatile("mov ar.rsc=0;;");
- __asm __volatile("flushrs;;" ::: "memory");
- __asm __volatile("mov %0=ar.bspstore" : "=r"(bspstore));
- } else {
- bspstore = (u_int64_t) p1bs;
- }
+ stackp -= sizeof(struct trapframe);
+ td2->td_frame = (struct trapframe *)stackp;
+ bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
+ td2->td_frame->tf_length = sizeof(struct trapframe);
-#if 0
- /*
- * Copy enough of td1's backing store to include all
- * the user's stacked regs.
- */
- bcopy(p1bs, p2bs, td1->td_frame->tf_ndirty);
+ bcopy((void*)td1->td_kstack, (void*)td2->td_kstack,
+ td2->td_frame->tf_special.ndirty);
- /*
- * To calculate the ar.rnat for td2, we need to decide
- * if td1's ar.bspstore has advanced past the place
- * where the last ar.rnat which covers the user's
- * saved registers would be placed. If so, we read
- * that one from memory, otherwise we take td1's
- * current ar.rnat. If we are simply spawning a new kthread
- * from &thread0 we don't care about ar.rnat.
- */
- if (td1 == curthread) {
- uint64_t rnatloc = (u_int64_t)p1bs + td1->td_frame->tf_ndirty;
- rnatloc |= 0x1f8;
- if (bspstore > rnatloc)
- rnat = *(u_int64_t *) rnatloc;
- else
- __asm __volatile("mov %0=ar.rnat;;" : "=r"(rnat));
-
- /*
- * Switch the RSE back on.
- */
- __asm __volatile("mov ar.rsc=3;;");
+ /* Set-up the return values as expected by the fork() libc stub. */
+ if (td2->td_frame->tf_special.psr & IA64_PSR_IS) {
+ td2->td_frame->tf_scratch.gr8 = 0;
+ td2->td_frame->tf_scratch.gr10 = 1;
} else {
- rnat = 0;
+ td2->td_frame->tf_scratch.gr8 = 0;
+ td2->td_frame->tf_scratch.gr9 = 1;
+ td2->td_frame->tf_scratch.gr10 = 0;
}
-#else
- rnat = 0;
-#endif
- /*
- * Setup the child's pcb so that its ar.bspstore
- * starts just above the region which we copied. This
- * should work since the child will normally return
- * straight into exception_restore. Also initialise its
- * pmap to the containing proc's vmspace.
- */
- td2->td_pcb->pcb_special.bspstore = (u_int64_t)p2bs;
- td2->td_pcb->pcb_special.rnat = rnat;
+ td2->td_pcb->pcb_special.bspstore = td2->td_kstack +
+ td2->td_frame->tf_special.ndirty;
td2->td_pcb->pcb_special.pfs = 0;
td2->td_pcb->pcb_current_pmap = vmspace_pmap(td2->td_proc->p_vmspace);
- /*
- * Arrange for continuation at fork_return(), which
- * will return to exception_restore(). Note that the
- * child process doesn't stay in the kernel for long!
- *
- * The extra 16 bytes subtracted from sp is part of the ia64
- * ABI - a function can assume that the 16 bytes above sp are
- * available as scratch space.
- */
- td2->td_pcb->pcb_special.sp = (u_int64_t)p2tf - 16;
+ td2->td_pcb->pcb_special.sp = (uintptr_t)stackp - 16;
td2->td_pcb->pcb_special.rp = FDESC_FUNC(fork_trampoline);
cpu_set_fork_handler(td2, (void (*)(void*))fork_return, td2);
}
More information about the p4-projects
mailing list