PERFORCE change 29320 for review
Marcel Moolenaar
marcel at FreeBSD.org
Sun Apr 20 17:49:38 PDT 2003
http://perforce.freebsd.org/chv.cgi?CH=29320
Change 29320 by marcel at marcel_nfs on 2003/04/20 17:49:18
o execve fix: When we jump to the entry point of the newly
loaded program, we still have dirty registers on the kernel
stack from when the execve was performed. In exec_setregs
we cleared the trapframe, causing us to not completely
"unwind" to the bottom of the kernel stack. This caused
problems because we assume that when we enter the kernel
from user space though a syscall, we start at the bottom
of the kernel stack (see cpu_fork()). The fix is to
discard a multiple of 512 bytes of the register stack by
relocating the current stack base and copying "life"
registers. The remaining dirty bytes (<512) are simply
discarded by masking of the lower 9 bits of the kernel
register stack pointer before we save it in ar.k6. This
two part operation guarantees that we keep in sync with
the NaT collections (every 512 bytes).
o Only define ar.k7 (the kernel memory stack of the thread)
when we enter user space. We used to define ar.k7 when we
would return to kernel space as well. This is completely
harmless, but remove a possible INVARIANTS test. By not
defining ar.k7 when we return to kernel space, we have
ar.k6 (the saved register stack base) and ar.k7 (the
saved memory stack top) at a fixed distance from each
other at all times: namely KSTACK_PAGES * PAGE_SIZE -
SIZEOF_PCB. Lost or corrupted state is then more easily
identified.
Affected files ...
.. //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#14 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#14 edit
.. //depot/projects/ia64_epc/sys/ia64/ia64/syscall.s#9 edit
Differences ...
==== //depot/projects/ia64_epc/sys/ia64/ia64/exception.s#14 (text+ko) ====
@@ -354,32 +354,25 @@
* been flushed.
*/
{ .mmi
+ rsm psr.ic|psr.i
+ ;;
+ srlz.d
add sp=16,sp
;;
- ld8 r9=[sp] // length
- add r3=SIZEOF_TRAPFRAME-32,sp
- ;;
}
{ .mmi
- rsm psr.ic|psr.i
+ add r3=SIZEOF_TRAPFRAME-32,sp
+ add r2=SIZEOF_TRAPFRAME-16,sp
+ add r8=SIZEOF_SPECIAL+16,sp
;;
- srlz.d
- add r2=16,r3
- ;;
}
-{ .mmi
+
ldf.fill f15=[r2],-32 // f15
ldf.fill f14=[r3],-32 // f14
- add r8=SIZEOF_SPECIAL+16,sp
;;
-}
-{ .mmi
ldf.fill f13=[r2],-32 // f13
ldf.fill f12=[r3],-32 // f12
- add r9=r9,sp
;;
-}
-
ldf.fill f11=[r2],-32 // f11
ldf.fill f10=[r3],-32 // f10
;;
@@ -392,96 +385,93 @@
{ .mmi
ld8 r8=[r8] // unat (after)
- mov ar.k7=r9
+ ;;
+ mov ar.unat=r8
nop 0
;;
}
-{ .mmi
+
ld8 r10=[r2],-16 // ssd
ld8 r11=[r3],-16 // csd
- nop 0
;;
-}
-{ .mmi
- mov ar.unat=r8
mov ar.ssd=r10
- nop 0
-}
-{ .mmi
+ mov ar.csd=r11
+
ld8 r14=[r2],-16 // ccv
ld8 r15=[r3],-16 // b7
- nop 0
;;
-}
+
{ .mmi
- mov ar.csd=r11
mov ar.ccv=r14
+ ld8 r8=[r2],-16 // b6
mov b7=r15
;;
}
{ .mmi
- ld8 r8=[r2],-16 // b6
ld8.fill r31=[r3],-16 // r31
- nop 0
- ;;
-}
-{ .mmi
ld8.fill r30=[r2],-16 // r30
- ld8.fill r29=[r3],-16 // r29
mov b6=r8
;;
}
+ ld8.fill r29=[r3],-16 // r29
ld8.fill r28=[r2],-16 // r28
+ ;;
ld8.fill r27=[r3],-16 // r27
+ ld8.fill r26=[r2],-16 // r26
;;
- ld8.fill r26=[r2],-16 // r26
ld8.fill r25=[r3],-16 // r25
+ ld8.fill r24=[r2],-16 // r24
;;
- ld8.fill r24=[r2],-16 // r24
ld8.fill r23=[r3],-16 // r23
+ ld8.fill r22=[r2],-16 // r22
;;
- ld8.fill r22=[r2],-16 // r22
ld8.fill r21=[r3],-16 // r21
+ ld8.fill r20=[r2],-16 // r20
;;
- ld8.fill r20=[r2],-16 // r20
ld8.fill r19=[r3],-16 // r19
- ;;
ld8.fill r18=[r2],-16 // r18
- ld8.fill r17=[r3],-16 // r17
;;
{ .mmb
+ ld8.fill r17=[r3],-16 // r17
ld8.fill r16=[r2],-16 // r16
- ld8.fill r15=[r3],-16 // r15
bsw.0
;;
}
{ .mmi
+ ld8.fill r15=[r3],-16 // r15
ld8.fill r14=[r2],-16 // r14
+ add r31=16,sp
+ ;;
+}
+{ .mmi
+ ld8 r16=[sp] // tf_length
ld8.fill r11=[r3],-16 // r11
- add r31=16,sp
+ add r30=24,sp
;;
}
{ .mmi
ld8.fill r10=[r2],-16 // r10
ld8.fill r9=[r3],-16 // r9
- add r30=24,sp
+ add r16=r16,sp // ar.k7
;;
}
-
+{ .mmi
ld8.fill r8=[r2],-16 // r8
ld8.fill r3=[r3] // r3
;;
+}
+
ld8.fill r2=[r2] // r2
ld8.fill sp=[r31],16 // sp
;;
- ld8 r16=[r30],16 // unat
- ld8 r17=[r31],16 // rp
+ ld8 r17=[r30],16 // unat
+ ld8 r29=[r31],16 // rp
;;
ld8 r18=[r30],16 // pr
ld8 r19=[r31],16 // pfs
- mov rp=r17
+ mov rp=r29
;;
ld8 r20=[r30],24 // bspstore
ld8 r21=[r31],24 // rnat
@@ -521,18 +511,25 @@
nop 0
;;
}
+{ .mmi
mov r31=ar.bspstore
+ ;;
mov ar.bspstore=r20
+ dep r31=0,r31,0,9
;;
+}
+
mov ar.k6=r31
+ mov ar.k7=r16
+ ;;
mov ar.rnat=r21
mov r13=r29
;;
1:
- mov ar.unat=r16
+ mov ar.unat=r17
+ mov ar.fpsr=r23
mov ar.pfs=r19
- mov ar.fpsr=r23
mov cr.ipsr=r24
mov cr.ifs=r26
mov cr.iip=r27
==== //depot/projects/ia64_epc/sys/ia64/ia64/machdep.c#14 (text+ko) ====
@@ -851,18 +851,19 @@
mc->mc_flags |= IA64_MC_FLAGS_SCRATCH_VALID;
mc->mc_scratch = frame->tf_scratch;
mc->mc_scratch_fp = frame->tf_scratch_fp;
+ /*
+ * XXX High FP. If the process has never used the high FP,
+ * mark the high FP as valid (zero defaults). If the process
+ * did use the high FP, then store them in the PCB if not
+ * already there (ie get them from the CPU that has them)
+ * and write them in the context.
+ */
}
+
/*
* XXX preserved registers. We don't have the preserved registers
* in the trapframe. We don't worry about it now.
*/
- /*
- * XXX High FP. If the process has never used the high FP, mark
- * the high FP as valid (zero defaults). If the process did use
- * the high FP, then store them in the PCB if not already there
- * (ie get them from the CPU that has them) and write them in
- * the context.
- */
/*
* Allocate and validate space for the signal handler
@@ -1071,14 +1072,45 @@
exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings)
{
struct trapframe *tf;
+ char *kstack;
+ uint64_t bspst, ndirty;
tf = td->td_frame;
+ kstack = (char*)td->td_kstack;
+
+ /*
+ * RSE magic: We have ndirty registers of the process on the kernel
+ * stack which don't belong to the new image. Discard them. Note
+ * that for the "legacy" syscall support we need to keep 3 registers
+ * worth of dirty bytes. These 3 registers are the initial arguments
+ * to the newly executing program.
+ * However, we cannot discard all the ndirty registers by simply
+ * moving the kernel related registers to the bottom of the kernel
+ * stack and lowering the current bspstore, because we get into
+ * trouble with the NaT collections. We need to keep that in sync
+ * with the registers. Hence, we can only copy a multiple of 512
+ * bytes. Consequently, we may end up with some registers of the
+ * previous image on the kernel stack. This we ignore by making
+ * sure we mask-off the lower 9 bits of the bspstore value just
+ * prior to saving it in ar.k6.
+ */
+ if ((tf->tf_flags & FRAME_SYSCALL) == 0)
+ tf->tf_special.ndirty -= 24;
+ ndirty = tf->tf_special.ndirty & ~0x1ff;
+ if (ndirty > 0) {
+ __asm __volatile("mov ar.rsc=0;;");
+ __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst));
+ bcopy(kstack + ndirty, kstack, ndirty);
+ bspst -= ndirty;
+ __asm __volatile("mov ar.bspstore=%0;;" :: "r"(bspst));
+ __asm __volatile("mov ar.rsc=3;;");
+ tf->tf_special.ndirty -= ndirty;
+ }
+ ndirty = tf->tf_special.ndirty;
+
+ bzero(&tf->tf_special, sizeof(tf->tf_special));
+
if ((tf->tf_flags & FRAME_SYSCALL) == 0) { /* break syscalls. */
- uint64_t *args;
- uint64_t ndirty;
- ndirty = tf->tf_special.ndirty;
- bzero(&tf->tf_special, sizeof(tf->tf_special));
- tf->tf_special.ndirty = ndirty;
bzero(&tf->tf_scratch, sizeof(tf->tf_scratch));
bzero(&tf->tf_scratch_fp, sizeof(tf->tf_scratch_fp));
tf->tf_special.iip = entry;
@@ -1086,20 +1118,12 @@
tf->tf_special.bspstore = td->td_md.md_bspstore;
/*
* Copy the arguments onto the kernel register stack so that
- * they get loaded by the loadrs. This involves some NaT
- * collection magic.
+ * they get loaded by the loadrs instruction.
*/
- args = (uint64_t*)(td->td_kstack + ndirty);
- args -= (((uintptr_t)args & 0x1ff) < 24) ? 4 : 3;
- *args++ = stack;
- if (((uintptr_t)args & 0x1ff) == 0x1f8)
- args++;
- *args++ = ps_strings;
- if (((uintptr_t)args & 0x1ff) == 0x1f8)
- args++;
- *args = 0;
+ *(uint64_t*)(kstack + ndirty - 24) = stack;
+ *(uint64_t*)(kstack + ndirty - 16) = ps_strings;
+ *(uint64_t*)(kstack + ndirty - 8) = 0;
} else { /* epc syscalls (default). */
- bzero(&tf->tf_special, sizeof(tf->tf_special));
tf->tf_special.rp = entry;
tf->tf_special.pfs = (3UL<<62) | (3UL<<7) | 3UL;
tf->tf_special.bspstore = td->td_md.md_bspstore + 24;
@@ -1113,6 +1137,7 @@
suword((caddr_t)tf->tf_special.bspstore - 16, ps_strings);
suword((caddr_t)tf->tf_special.bspstore - 8, 0);
}
+
tf->tf_special.sp = (stack & ~15) - 16;
tf->tf_special.rsc = 0xf;
tf->tf_special.fpsr = IA64_FPSR_DEFAULT;
==== //depot/projects/ia64_epc/sys/ia64/ia64/syscall.s#9 (text+ko) ====
@@ -329,20 +329,21 @@
}
{ .mmi
loadrs
- mov ar.k7=r31
+ mov r14=ar.k5
dep r26=-1,r26,19,1 // Set psr.dfh
;;
}
{ .mmi
- mov r31=ar.bspstore
+ mov r30=ar.bspstore
+ ;;
mov ar.bspstore=r21
- mov r13=r23
+ dep r30=0,r30,0,9
;;
}
{ .mmi
- mov r14=ar.k5
- mov ar.k6=r31
- nop 0
+ mov ar.k6=r30
+ mov ar.k7=r31
+ mov r13=r23
;;
}
{ .mmi
More information about the p4-projects
mailing list