svn commit: r260907 - in projects/amd64_xen_pv/sys/amd64: include xen

Cherry G. Mathew cherry at FreeBSD.org
Mon Jan 20 14:48:53 UTC 2014


Author: cherry
Date: Mon Jan 20 14:48:52 2014
New Revision: 260907
URL: http://svnweb.freebsd.org/changeset/base/260907

Log:
  Handle %gs/%fs restore from appropriate pcb->pcb_{gs,fs}offset
  properly, during userland exit. When either is loaded with a new
  selector in the kernel, the base address offsets are reset, and have
  to be reloaded.
  
  In a xen pv container, this is done via hypercalls rather than
  rdmsr/wrmsr. We break out xen_set_proc() into kernel stack switch and
  just the tls switch via xen_load_tls(). This takes care of the ring #3
  component of the %gs/%fs restore before exit to userland.
  
  %gs is slightly special in that the kernel per-cpu base offset is
  stored in the hidden offset register of %gs, in addition to the
  userland context. Additionally, xen takes care of the 'swapgs'
  operation when a user/kernel transition occurs. What all this means is
  that the exit path needs to reload the current *kernel* %gs offset
  when %gs is reloaded for userland, in addition to the userland
  offset. This kernel offset is assumed to be the per-cpu structure for
  the specific cpu on which the transition to userland is
  happening. Since we don't have access to the per-cpu msr where this
  information is saved, we take the slighly circuitous and presumptive
  route of obtaining the cpu number on which we are running before
  resetting %gs, and then using this information to find the appropriate
  offset value.
  
  Approved by:	gibbs@ (implicit)

Modified:
  projects/amd64_xen_pv/sys/amd64/include/cpufunc.h
  projects/amd64_xen_pv/sys/amd64/xen/exception.S
  projects/amd64_xen_pv/sys/amd64/xen/machdep.c

Modified: projects/amd64_xen_pv/sys/amd64/include/cpufunc.h
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/include/cpufunc.h	Mon Jan 20 14:37:02 2014	(r260906)
+++ projects/amd64_xen_pv/sys/amd64/include/cpufunc.h	Mon Jan 20 14:48:52 2014	(r260907)
@@ -52,6 +52,8 @@ extern u_long xen_rcr2(void);
 extern void xen_load_cr3(u_long data);
 extern void xen_tlb_flush(void);
 extern void xen_invlpg(vm_offset_t addr);
+extern void xen_load_kgsbase(uint64_t kgsbase);
+extern void xen_load_tls(struct pcb *pcb);
 extern void xen_set_proc(struct pcb *newpcb);
 extern void write_rflags(u_long rflags);
 extern u_long read_rflags(void);

Modified: projects/amd64_xen_pv/sys/amd64/xen/exception.S
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/exception.S	Mon Jan 20 14:37:02 2014	(r260906)
+++ projects/amd64_xen_pv/sys/amd64/xen/exception.S	Mon Jan 20 14:48:52 2014	(r260907)
@@ -101,9 +101,11 @@
 #define SAVE_SEGMENT_REGS \
 	movw	%es, TF_ES(%rsp) ; \
 	movw	%ds, TF_DS(%rsp) ; \
-	movw	%fs, TF_FS(%rsp)
+	movw	%fs, TF_FS(%rsp) ; \
+	movw	%gs, TF_FS(%rsp) ; \
 
 /* Restore generic data segment registers from the stack */
+/* This macro overwrites some general registers */
 #define RESTORE_SEGMENT_REGS \
 	call	restore_segment_regs
 
@@ -234,20 +236,79 @@
 	jmp hypercall_page + (__HYPERVISOR_iret * 32)
 	
 NON_GPROF_ENTRY(restore_segment_regs)
+	.globl	doreti_iret
 	.globl  ld_es
 	.globl	ld_ds
 	.globl	ld_fs
 	.globl	ld_gs
+doreti_iret:
+	/* Note: The trapframe is on the *caller* stackframe */
+	movq	PCPU(CURPCB),%r8
+
+	/*
+	 * Do not reload segment registers for kernel.
+	 * Since we do not reload segments registers with sane
+	 * values on kernel entry, descriptors referenced by
+	 * segments registers might be not valid.  This is fatal
+	 * for user mode, but is not a problem for the kernel.
+	 */
+	testb	$SEL_RPL_MASK,(TF_CS + 8)(%rsp)
+	jz	segs_done
+	testl	$PCB_FULL_IRET,PCB_FLAGS(%r8)
+	jz	segs_done
+	testl	$TF_HASSEGS,(TF_FLAGS + 8)(%rsp)
+	jne	1f
+
+	/* reload with sane values */
+	movw	$KUDSEL,%ax
+	movw	%ax,(TF_DS + 8)(%rsp)
+	movw	%ax,(TF_ES + 8)(%rsp)
+	movw	$KUF32SEL,(TF_FS + 8)(%rsp)
+	movw	$KUG32SEL,(TF_GS + 8)(%rsp)
+	
+1:
+ld_fs:
+	xorq	%rbx, %rbx
+	movw	(TF_FS + 8)(%rsp), %ax
+	movw	%ax, %fs	/* blew away fsbase here */
+	cmpw	$KUF32SEL, %ax
+	jne	2f
+	movq	$0x666, %rbx	/* Kernel doesn't use %fs */
+
+2:	
+ld_gs:
+	movl	PCPU(CPUID), %edi
+	callq	pcpu_find
+	movq	%rax, %rdi	/* kgsbase == pcpu_find(PCPU_GET(cpuid)) */
+	
+	movw	(TF_GS + 8)(%rsp), %ax
+	movw	%ax, %gs	/* blew away (k)gsbase here */
+
+	/* Kernel gsbase reload */
+	callq	xen_load_kgsbase /* reload kgsbase from %rdi */
+
+	movw	%gs, %ax
+	cmpw	$KUG32SEL, %ax
+	jne	3f
+	movq	$0x666, %rbx
+3:	
+	cmpq	$0x666, %rbx	/* reload user %fs/%gs ? */
+	jne	4f
+
+	movq	PCPU(CURPCB), %rdi
+	callq	xen_load_tls 	/* Update user %fs/%gs to pcb_fsbase and pcb_gsbase */
+	
+4:	/* done with %fs/%gs */
+
 ld_es:
-	movw	(TF_ES + 8)(%rsp,1), %es ; /* Save on previous frame */
+	movw	(TF_ES + 8)(%rsp), %es ;
 ld_ds:
-	movw	(TF_DS + 8)(%rsp,1), %ds ; /* Save on previous frame */
-ld_fs:  /* %fs == 0 and the per-proc base is updated via xen_set_proc() */
-ld_gs: 	/* XEN manages %gs (swapgs) */
+	movw	(TF_DS + 8)(%rsp), %ds ;
+
+segs_done:	
 	ret
 	
 	/* The path below should not have been reached. */
-	.globl	doreti_iret
 	.globl	doreti_iret_fault
 	.globl	ld_gsbase
 	.globl	ld_fsbase
@@ -256,7 +317,6 @@ ld_gs: 	/* XEN manages %gs (swapgs) */
 	.globl	mca_intr
 	.globl	fs_load_fault /* XXX: revisit */
 	.globl	gs_load_fault /* XXX: revisit */
-doreti_iret:
 doreti_iret_fault:
 ld_gsbase:
 ld_fsbase:
@@ -279,161 +339,161 @@ es_load_fault:
 	call	trap
 	movw	$KUDSEL,TF_ES(%rsp)
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 	
 IDTVEC(de)			/* Divide-By-Zero-Error */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_DIVIDE)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 	
 IDTVEC(db)			/* Debug */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_TRCTRAP);
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(nmi)			/* Non-Maskable-Interrupt */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_NMI)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(bp)			/* Breakpoint */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_BPTFLT)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(of)			/* Overflow */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_OFLOW)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(br)			/* Bound-Range */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_BOUND)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(ud)			/* Invalid-Opcode */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_PRIVINFLT) ;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(nm)			/* Device-Not-Available */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_DNA)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(df)			/* Double-Fault */
 	TRAP_FRAME_ENTER_ERR	;
 	TRAP_PROLOGUE(T_DOUBLEFLT)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_ERR   	;
 	INTR_EXIT		;
 
 IDTVEC(ts)			/* Invalid-TSS */
 	TRAP_FRAME_ENTER_ERR	;
 	TRAP_PROLOGUE(T_TSSFLT)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
-	RESTORE_GENERAL_REGS	;
 	RESTORE_SEGMENT_REGS	;
+	RESTORE_GENERAL_REGS	; /* overwrites some general registers */
 	TRAP_FRAME_EXIT_ERR   	;
 	INTR_EXIT		;
 
 IDTVEC(np)			/* Segment-Not-Present */
 	TRAP_FRAME_ENTER_ERR	;
 	TRAP_PROLOGUE(T_SEGNPFLT)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
-	RESTORE_GENERAL_REGS	;
 	RESTORE_SEGMENT_REGS	;
+	RESTORE_GENERAL_REGS	; /* overwrites some general registers */
 	TRAP_FRAME_EXIT_ERR   	;
 	INTR_EXIT		;
 
 IDTVEC(ss)			/* Stack */
 	TRAP_FRAME_ENTER_ERR	;
 	TRAP_PROLOGUE(T_STKFLT)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_ERR   	;
 	INTR_EXIT		;
 
@@ -446,86 +506,86 @@ IDTVEC(gp)			/* General-Protection */
 	CALLTRAP		;
 	DO_AST_MAYBE		;
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	TRAP_FRAME_EXIT_ERR   	;
 	INTR_EXIT		;
 
 IDTVEC(pf)			/* Page-Fault */
 	TRAP_FRAME_ENTER_ERR	;
 	TRAP_PROLOGUE(T_PAGEFLT);
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	SETUP_TF_ADDR		; /* Fault Address - clobbers %rsi %rdi */
 	CALLTRAP		; /* %rsi is ignored */
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_ERR   	;
 	INTR_EXIT		;
 
 IDTVEC(mf)			/* x87 Floating-Point Exception Pending */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_ARITHTRAP)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(ac)			/* Alignment-Check */
 	TRAP_FRAME_ENTER_ERR	;
 	TRAP_PROLOGUE(T_ALIGNFLT)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_ERR   	;
 	INTR_EXIT		;
 
 IDTVEC(mc)			/* Machine-Check */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_MCHK)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(xf)			/* SIMD Floating-Point */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_XMMFLT)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(rs)			/* Reserved */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_RESERVED) ;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	DO_STI_MAYBE		;
 	CALLTRAP		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
@@ -535,21 +595,21 @@ ENTRY(fork_trampoline)
 	movq	%rsp,%rdx		/* trapframe pointer */
 	call	fork_exit
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
 IDTVEC(hypervisor_callback)	/* Xen only */
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_EVENT)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	EVENT_UPCALL		;
 	DO_STI_MAYBE		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	;
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		;
 
@@ -561,15 +621,15 @@ msgfailsafe:	.asciz	"Failsafe upcall tri
 IDTVEC(syscall_callback)
 	TRAP_FRAME_ENTER_NOERR	;
 	TRAP_PROLOGUE(T_USER)	;
-	SAVE_SEGMENT_REGS	;
 	SAVE_GENERAL_REGS	;
+	SAVE_SEGMENT_REGS	;
 	movq	%r11, TF_RFLAGS(%rsp) ; /* Tweak for INTR_EXIT */
 	movq	%r10, TF_RCX(%rsp) ; /* Translate to C abi. see trap.c:cpu_fetch_syscall_args() */
 	DO_STI_MAYBE		; /* Clobbers %rdi */
 	movq	TF_RDI(%rsp), %rdi ;
 	CALLSYSCALL		;
 	DO_AST_MAYBE		;
+	RESTORE_SEGMENT_REGS	; /* overwrites some general registers */
 	RESTORE_GENERAL_REGS	; /* XXX: optimise for SYSRET */
-	RESTORE_SEGMENT_REGS	;
 	TRAP_FRAME_EXIT_NOERR   ;
 	INTR_EXIT		; /* XXX: SYSRET is more optimal */

Modified: projects/amd64_xen_pv/sys/amd64/xen/machdep.c
==============================================================================
--- projects/amd64_xen_pv/sys/amd64/xen/machdep.c	Mon Jan 20 14:37:02 2014	(r260906)
+++ projects/amd64_xen_pv/sys/amd64/xen/machdep.c	Mon Jan 20 14:48:52 2014	(r260907)
@@ -430,12 +430,11 @@ initxen(struct start_info *si)
 	 * Setup kernel PCPU base. pcpu needs them, and other
 	 * parts of the early startup path use pcpu variables before
 	 * we have loaded the new Global Descriptor Table.
+	 * XXX: revisit
 	 */
 
 	pc = &__pcpu[0];
-	HYPERVISOR_set_segment_base (SEGBASE_FS, 0);
 	HYPERVISOR_set_segment_base (SEGBASE_GS_KERNEL, (uint64_t) pc);
-	HYPERVISOR_set_segment_base (SEGBASE_GS_USER, 0);
 
 	/* Setup paging */
 	/* 
@@ -488,6 +487,7 @@ initxen(struct start_info *si)
 	        kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
 
 	boothowto |= xen_boothowto(kern_envp);
+	boothowto |= RB_SINGLE;
 
 #ifdef DDB /* XXX: */
 	ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t);
@@ -515,17 +515,19 @@ initxen(struct start_info *si)
 		/* NOTREACHED */
 	}
 
-	lgdt(NULL); /* See: support.S */
+	lgdt(NULL); /* Load all segment registers - See: support.S */
 
 	/* 
 	 * Refresh kernel tls registers since we've blown them away
-	 * via new GDT load. pcpu needs them.
+	 * via new GDT load and segment reloads. pcpu needs them.
 	 */
+
+	HYPERVISOR_set_segment_base (SEGBASE_FS, 0);
 	HYPERVISOR_set_segment_base (SEGBASE_GS_KERNEL, (uint64_t) pc);
+	HYPERVISOR_set_segment_base (SEGBASE_GS_USER, (uint64_t) 0);
 
 	/* per cpu structures for cpu0 */
 	pcpu_init(pc, 0, sizeof(struct pcpu));
-
 	dpcpu_init((void *)(PTOV(physfree)), 0);
 	physfree += DPCPU_SIZE;
 
@@ -543,6 +545,7 @@ initxen(struct start_info *si)
 	PCPU_SET(curthread, &thread0);
 	PCPU_SET(tssp, &common_tss[0]); /* Dummy - see definition */
 	PCPU_SET(commontssp, &common_tss[0]); /* Dummy - see definition */
+	/* XXX: ldt */
 	PCPU_SET(fs32p, (void *)xpmap_ptom(VTOP(&gdt[GUFS32_SEL]))); /* Note: On Xen PV, we set the machine address. */
 	PCPU_SET(gs32p, (void *)xpmap_ptom(VTOP(&gdt[GUGS32_SEL]))); /* Note: On Xen PV, we set the machine address. */
 
@@ -608,21 +611,29 @@ initxen(struct start_info *si)
 	/* setup user mode selector glue */
 	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
 	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
+	/* XXX: _ucode32sel & compat_32 */
 	_ufssel = GSEL(GUFS32_SEL, SEL_UPL);
 	_ugssel = GSEL(GUGS32_SEL, SEL_UPL);
 
-	/* Load thread0 context */
-	load_ds(_udatasel);
-	load_es(_udatasel);
-	load_fs(0); /* reset %fs to 0 before 64bit base load */
-	HYPERVISOR_set_segment_base (SEGBASE_FS, 0);
-	HYPERVISOR_set_segment_base (SEGBASE_GS_USER_SEL, (uint64_t) 0);
-	HYPERVISOR_set_segment_base (SEGBASE_GS_USER, (uint64_t) 0);
+	/* 
+	 * Native does a "transfer to user mode" - which seems rather
+	 * suspect^wunfinished to me (cherry@).
+	 *
+	 * We don't do this on xen, since this thread eventually
+	 * becomes vm/vm_glue.c:swapper() , which assumes that it is
+	 * running in kernel mode.
+	 *
+	 * Note, cherry@: I don't think it's worth the trouble setting
+	 * up a separate "swapper" user context for this thread,
+	 * unless a strong case for performance savings (TLB hits ?)
+	 * can be made.
+	 */
 
 	/* setup proc 0's pcb */
 	thread0.td_pcb->pcb_flags = 0;
 	thread0.td_pcb->pcb_cr3 = xpmap_ptom(VTOP(KPML4phys));
 	thread0.td_frame = &proc0_tf;
+	thread0.td_pcb->pcb_gsbase = (uint64_t) pc;
 
         env = getenv("kernelname");
 	if (env != NULL)
@@ -1437,6 +1448,42 @@ xen_rcr2(void)
 	return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
 }
 
+/* 
+ * Set kernel %gs base
+ * This is required after a %gs reload from kernel context
+ */
+void
+xen_load_kgsbase(uint64_t gsbase)
+{
+	HYPERVISOR_set_segment_base (SEGBASE_GS_KERNEL, gsbase);
+}
+
+/* Set Usermode TLS registers from pcb context */
+void
+xen_load_tls(struct pcb *pcb)
+{
+	/* XXX: compat32 */
+	if (pcb->pcb_flags & PCB_32BIT) {
+		struct user_segment_descriptor gsd;
+		gsd = gdt[GUGS32_SEL];
+		USD_SETBASE(&gsd, pcb->pcb_gsbase);
+		xen_set_descriptor((vm_paddr_t)PCPU_GET(gs32p), (void *)&gsd);
+
+		if (pcb->pcb_flags & PCB_32BIT) {
+			gsd = gdt[GUFS32_SEL];
+			USD_SETBASE(&gsd, pcb->pcb_fsbase);
+			xen_set_descriptor((vm_paddr_t)PCPU_GET(fs32p), (void *)&gsd);
+		}
+	} else {
+		HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 
+					    _ugssel);
+		HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 
+					    pcb->pcb_gsbase);
+		HYPERVISOR_set_segment_base(SEGBASE_FS,
+					    pcb->pcb_fsbase);
+	}
+}
+
 void
 xen_set_proc(struct pcb *newpcb)
 {
@@ -1444,26 +1491,7 @@ xen_set_proc(struct pcb *newpcb)
 		(unsigned long) newpcb & ~0xFul);
 
 	if (!(curthread->td_pflags & TDP_KTHREAD)) { /* Only for user proc */
-		/* XXX: compat32 */
-		if (newpcb->pcb_flags & PCB_32BIT) {
-			struct user_segment_descriptor gsd;
-			gsd = gdt[GUGS32_SEL];
-			USD_SETBASE(&gsd, newpcb->pcb_gsbase);
-			xen_set_descriptor((vm_paddr_t)PCPU_GET(gs32p), (void *)&gsd);
-
-			if (newpcb->pcb_flags & PCB_32BIT) {
-				gsd = gdt[GUFS32_SEL];
-				USD_SETBASE(&gsd, newpcb->pcb_fsbase);
-				xen_set_descriptor((vm_paddr_t)PCPU_GET(fs32p), (void *)&gsd);
-			}
-		} else {
-			HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 
-						    0);
-			HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 
-						    newpcb->pcb_gsbase);
-			HYPERVISOR_set_segment_base(SEGBASE_FS,
-						    newpcb->pcb_fsbase);
-		}
+		xen_load_tls(newpcb);
 	}
 }
 


More information about the svn-src-projects mailing list