git: a641444939d1 - stable/13 - TLS: Use <machine/tls.h> for libc and rtld.

From: John Baldwin <jhb_at_FreeBSD.org>
Date: Fri, 29 Apr 2022 20:55:17 UTC
The branch stable/13 has been updated by jhb:

URL: https://cgit.FreeBSD.org/src/commit/?id=a641444939d170dfa803fca75d92655c2bf28244

commit a641444939d170dfa803fca75d92655c2bf28244
Author:     John Baldwin <jhb@FreeBSD.org>
AuthorDate: 2021-12-09 21:17:54 +0000
Commit:     John Baldwin <jhb@FreeBSD.org>
CommitDate: 2022-04-29 20:50:05 +0000

    TLS: Use <machine/tls.h> for libc and rtld.
    
    - Include <machine/tls.h> in MD rtld_machdep.h headers.
    
    - Remove local definitions of TLS_* constants from rtld_machdep.h
      headers and libc using the values from <machine/tls.h> instead.
    
    - Use _tcb_set() instead of inlined versions in MD
      allocate_initial_tls() routines in rtld.  The one exception is amd64
      whose _tcb_set() invokes the amd64_set_fsbase ifunc.  rtld cannot
      use ifuncs, so amd64 inlines the logic to optionally write to fsbase
      directly.
    
    - Use _tcb_set() instead of _set_tp() in libc.
    
    - Use '&_tcb_get()->tcb_dtv' instead of _get_tp() in both rtld and libc.
      This permits removing _get_tp.c from rtld.
    
    - Use TLS_TCB_SIZE and TLS_TCB_ALIGN with allocate_tls() in MD
      allocate_initial_tls() routines in rtld.
    
    Reviewed by:    kib, jrtc27 (earlier version)
    Differential Revision:  https://reviews.freebsd.org/D33353
    
    (cherry picked from commit 8bcdb144ebe391ce243c71caf06cf417d96ce335)
---
 lib/libc/gen/tls.c                        | 25 +++++--------------------
 libexec/rtld-elf/aarch64/reloc.c          |  9 +++------
 libexec/rtld-elf/aarch64/rtld_machdep.h   |  5 +----
 libexec/rtld-elf/amd64/reloc.c            | 11 ++++++++---
 libexec/rtld-elf/amd64/rtld_machdep.h     |  4 +---
 libexec/rtld-elf/arm/reloc.c              | 24 ++++--------------------
 libexec/rtld-elf/arm/rtld_machdep.h       |  5 +----
 libexec/rtld-elf/i386/reloc.c             | 12 ++++++------
 libexec/rtld-elf/i386/rtld_machdep.h      |  4 +---
 libexec/rtld-elf/mips/reloc.c             | 11 ++++-------
 libexec/rtld-elf/mips/rtld_machdep.h      |  2 --
 libexec/rtld-elf/powerpc/reloc.c          | 17 ++++-------------
 libexec/rtld-elf/powerpc/rtld_machdep.h   |  6 +-----
 libexec/rtld-elf/powerpc64/reloc.c        | 13 ++++---------
 libexec/rtld-elf/powerpc64/rtld_machdep.h |  6 +-----
 libexec/rtld-elf/riscv/reloc.c            | 10 +++-------
 libexec/rtld-elf/riscv/rtld_machdep.h     |  5 +----
 libexec/rtld-elf/rtld-libc/Makefile.inc   |  2 --
 libexec/rtld-elf/rtld.c                   |  8 ++++----
 libexec/rtld-elf/rtld.h                   |  3 +--
 20 files changed, 53 insertions(+), 129 deletions(-)

diff --git a/lib/libc/gen/tls.c b/lib/libc/gen/tls.c
index 5995cf605ed3..d75b883b68db 100644
--- a/lib/libc/gen/tls.c
+++ b/lib/libc/gen/tls.c
@@ -72,18 +72,6 @@ void _rtld_free_tls(void *tls, size_t tcbsize, size_t tcbalign);
 void *__libc_allocate_tls(void *oldtls, size_t tcbsize, size_t tcbalign);
 void __libc_free_tls(void *tls, size_t tcbsize, size_t tcbalign);
 
-#if defined(__amd64__) || defined(__aarch64__) || defined(__riscv)
-#define TLS_TCB_ALIGN 16
-#elif defined(__arm__) || defined(__mips__)
-#define TLS_TCB_ALIGN 8
-#elif defined(__powerpc__)
-#define TLS_TCB_ALIGN TLS_TCB_SIZE
-#elif defined(__i386__)
-#define TLS_TCB_ALIGN 4
-#else
-#error TLS_TCB_ALIGN undefined for target architecture
-#endif
-
 #ifndef PIC
 
 static size_t libc_tls_static_space;
@@ -95,11 +83,10 @@ static void *libc_tls_init;
 void *
 __libc_tls_get_addr(void *vti)
 {
-	Elf_Addr **dtvp, *dtv;
+	uintptr_t *dtv;
 	tls_index *ti;
 
-	dtvp = _get_tp();
-	dtv = *dtvp;
+	dtv = _tcb_get()->tcb_dtv;
 	ti = vti;
 	return ((char *)(dtv[ti->ti_module + 1] + ti->ti_offset) +
 	    TLS_DTV_OFFSET);
@@ -165,7 +152,7 @@ libc_free_aligned(void *ptr)
  *   described in [3] where TP points (with bias) to TLS and TCB immediately
  *   precedes TLS without any alignment gap[4]. Only TLS should be aligned.
  *   The TCB[0] points to DTV vector and DTV values are biased by constant
- *   value (0x8000) from real addresses[5].
+ *   value (TLS_DTV_OFFSET) from real addresses[5].
  *
  * [1] Ulrich Drepper: ELF Handling for Thread-Local Storage
  *     www.akkadia.org/drepper/tls.pdf
@@ -178,7 +165,7 @@ libc_free_aligned(void *ptr)
  *     https://members.openpowerfoundation.org/document/dl/576
  *
  * [4] Its unclear if "without any alignment gap" is hard ABI requirement,
- *     but we must follow this rule due to suboptimal _set_tp()
+ *     but we must follow this rule due to suboptimal _tcb_set()
  *     (aka <ARCH>_SET_TP) implementation. This function doesn't expect TP but
  *     TCB as argument.
  *
@@ -310,8 +297,6 @@ __libc_allocate_tls(void *oldtcb, size_t tcbsize, size_t tcbalign)
 
 #ifdef TLS_VARIANT_II
 
-#define	TLS_TCB_SIZE	(3 * sizeof(Elf_Addr))
-
 /*
  * Free Static TLS using the Variant II method.
  */
@@ -465,6 +450,6 @@ _init_tls(void)
 	}
 	tls = _rtld_allocate_tls(NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN);
 
-	_set_tp(tls);
+	_tcb_set(tls);
 #endif
 }
diff --git a/libexec/rtld-elf/aarch64/reloc.c b/libexec/rtld-elf/aarch64/reloc.c
index f697e4a03054..d83da4df0ec5 100644
--- a/libexec/rtld-elf/aarch64/reloc.c
+++ b/libexec/rtld-elf/aarch64/reloc.c
@@ -516,7 +516,6 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int flags,
 void
 allocate_initial_tls(Obj_Entry *objs)
 {
-	Elf_Addr **tp;
 
 	/*
 	* Fix the size of the static TLS block by using the maximum
@@ -526,16 +525,14 @@ allocate_initial_tls(Obj_Entry *objs)
 	tls_static_space = tls_last_offset + tls_last_size +
 	    RTLD_STATIC_TLS_EXTRA;
 
-	tp = (Elf_Addr **) allocate_tls(objs, NULL, TLS_TCB_SIZE, 16);
-
-	asm volatile("msr	tpidr_el0, %0" : : "r"(tp));
+	_tcb_set(allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
 }
 
 void *
 __tls_get_addr(tls_index* ti)
 {
-	Elf_Addr **dtvp;
+	uintptr_t **dtvp;
 
-	dtvp = _get_tp();
+	dtvp = &_tcb_get()->tcb_dtv;
 	return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
 }
diff --git a/libexec/rtld-elf/aarch64/rtld_machdep.h b/libexec/rtld-elf/aarch64/rtld_machdep.h
index 585689afe197..ce6f6fc6c7c0 100644
--- a/libexec/rtld-elf/aarch64/rtld_machdep.h
+++ b/libexec/rtld-elf/aarch64/rtld_machdep.h
@@ -35,6 +35,7 @@
 
 #include <sys/types.h>
 #include <machine/atomic.h>
+#include <machine/tls.h>
 
 struct Struct_Obj_Entry;
 
@@ -79,7 +80,6 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target,
 #define calculate_tls_post_size(align) \
 	round(TLS_TCB_SIZE, align) - TLS_TCB_SIZE
 
-#define	TLS_TCB_SIZE	16
 typedef struct {
     unsigned long ti_module;
     unsigned long ti_offset;
@@ -92,7 +92,4 @@ extern void *__tls_get_addr(tls_index *ti);
 
 #define md_abi_variant_hook(x)
 
-#define	TLS_VARIANT_I	1
-#define	TLS_DTV_OFFSET	0
-
 #endif
diff --git a/libexec/rtld-elf/amd64/reloc.c b/libexec/rtld-elf/amd64/reloc.c
index 62547f1bb4a2..c7cf7bd58845 100644
--- a/libexec/rtld-elf/amd64/reloc.c
+++ b/libexec/rtld-elf/amd64/reloc.c
@@ -530,7 +530,12 @@ allocate_initial_tls(Obj_Entry *objs)
 	 */
 	tls_static_space = tls_last_offset + RTLD_STATIC_TLS_EXTRA;
 
-	addr = allocate_tls(objs, 0, 3 * sizeof(Elf_Addr), 16);
+	addr = allocate_tls(objs, 0, TLS_TCB_SIZE, TLS_TCB_ALIGN);
+
+	/*
+	 * This does not use _tcb_set() as it calls amd64_set_fsbase()
+	 * which is an ifunc and rtld must not use ifuncs.
+	 */
 	if (__getosreldate() >= P_OSREL_WRFSBASE &&
 	    (cpu_stdext_feature & CPUID_STDEXT_FSGSBASE) != 0)
 		wrfsbase((uintptr_t)addr);
@@ -541,9 +546,9 @@ allocate_initial_tls(Obj_Entry *objs)
 void *
 __tls_get_addr(tls_index *ti)
 {
-	Elf_Addr **dtvp;
+	uintptr_t **dtvp;
 
-	dtvp = _get_tp();
+	dtvp = &_tcb_get()->tcb_dtv;
 	return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
 }
 
diff --git a/libexec/rtld-elf/amd64/rtld_machdep.h b/libexec/rtld-elf/amd64/rtld_machdep.h
index baa926445006..99f020e60221 100644
--- a/libexec/rtld-elf/amd64/rtld_machdep.h
+++ b/libexec/rtld-elf/amd64/rtld_machdep.h
@@ -33,6 +33,7 @@
 
 #include <sys/types.h>
 #include <machine/atomic.h>
+#include <machine/tls.h>
 
 struct Struct_Obj_Entry;
 
@@ -73,9 +74,6 @@ void *__tls_get_addr(tls_index *ti) __exported;
 
 #define md_abi_variant_hook(x)
 
-#define	TLS_VARIANT_II	1
-#define	TLS_DTV_OFFSET	0
-
 size_t calculate_first_tls_offset(size_t size, size_t align, size_t offset);
 size_t calculate_tls_offset(size_t prev_offset, size_t prev_size, size_t size,
     size_t align, size_t offset);
diff --git a/libexec/rtld-elf/arm/reloc.c b/libexec/rtld-elf/arm/reloc.c
index 4eb3fc7e6929..c3219d52bf7e 100644
--- a/libexec/rtld-elf/arm/reloc.c
+++ b/libexec/rtld-elf/arm/reloc.c
@@ -491,9 +491,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
 void
 allocate_initial_tls(Obj_Entry *objs)
 {
-#ifdef ARM_TP_ADDRESS
-	void **_tp = (void **)ARM_TP_ADDRESS;
-#endif
 
 	/*
 	* Fix the size of the static TLS block by using the maximum
@@ -503,27 +500,14 @@ allocate_initial_tls(Obj_Entry *objs)
 
 	tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA;
 
-#ifdef ARM_TP_ADDRESS
-	(*_tp) = (void *) allocate_tls(objs, NULL, TLS_TCB_SIZE, 8);
-#else
-	sysarch(ARM_SET_TP, allocate_tls(objs, NULL, TLS_TCB_SIZE, 8));
-#endif
+	_tcb_set(allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
 }
 
 void *
 __tls_get_addr(tls_index* ti)
 {
-	char *p;
-#ifdef ARM_TP_ADDRESS
-	void **_tp = (void **)ARM_TP_ADDRESS;
-
-	p = tls_get_addr_common((Elf_Addr **)(*_tp), ti->ti_module, ti->ti_offset);
-#else
-	void *_tp;
-	__asm __volatile("mrc  p15, 0, %0, c13, c0, 3"		\
-	    : "=r" (_tp));
-	p = tls_get_addr_common((Elf_Addr **)(_tp), ti->ti_module, ti->ti_offset);
-#endif
+	uintptr_t **dtvp;
 
-	return (p);
+	dtvp = &_tcb_get()->tcb_dtv;
+	return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
 }
diff --git a/libexec/rtld-elf/arm/rtld_machdep.h b/libexec/rtld-elf/arm/rtld_machdep.h
index 26e677f26d5a..c9ec047ec34e 100644
--- a/libexec/rtld-elf/arm/rtld_machdep.h
+++ b/libexec/rtld-elf/arm/rtld_machdep.h
@@ -34,6 +34,7 @@
 #include <sys/types.h>
 #include <machine/atomic.h>
 #include <machine/acle-compat.h>
+#include <machine/tls.h>
 
 struct Struct_Obj_Entry;
 
@@ -56,7 +57,6 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target,
 #define	call_ifunc_resolver(ptr) \
 	(((Elf_Addr (*)(void))ptr)())
 
-#define	TLS_TCB_SIZE	8
 typedef struct {
 	unsigned long ti_module;
 	unsigned long ti_offset;
@@ -84,7 +84,4 @@ extern void arm_abi_variant_hook(Elf_Auxinfo **);
 #define md_abi_variant_hook(x)
 #endif
 
-#define	TLS_VARIANT_I	1
-#define	TLS_DTV_OFFSET	0
-
 #endif
diff --git a/libexec/rtld-elf/i386/reloc.c b/libexec/rtld-elf/i386/reloc.c
index 603a93a2338c..b9c16e2cd154 100644
--- a/libexec/rtld-elf/i386/reloc.c
+++ b/libexec/rtld-elf/i386/reloc.c
@@ -513,8 +513,8 @@ allocate_initial_tls(Obj_Entry *objs)
      * use.
      */
     tls_static_space = tls_last_offset + RTLD_STATIC_TLS_EXTRA;
-    tls = allocate_tls(objs, NULL, 3*sizeof(Elf_Addr), sizeof(Elf_Addr));
-    i386_set_gsbase(tls);
+    tls = allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN);
+    _tcb_set(tls);
 }
 
 /* GNU ABI */
@@ -522,9 +522,9 @@ __attribute__((__regparm__(1)))
 void *
 ___tls_get_addr(tls_index *ti)
 {
-	Elf_Addr **dtvp;
+	uintptr_t **dtvp;
 
-	dtvp = _get_tp();
+	dtvp = &_tcb_get()->tcb_dtv;
 	return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
 }
 
@@ -532,9 +532,9 @@ ___tls_get_addr(tls_index *ti)
 void *
 __tls_get_addr(tls_index *ti)
 {
-	Elf_Addr **dtvp;
+	uintptr_t **dtvp;
 
-	dtvp = _get_tp();
+	dtvp = &_tcb_get()->tcb_dtv;
 	return (tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset));
 }
 
diff --git a/libexec/rtld-elf/i386/rtld_machdep.h b/libexec/rtld-elf/i386/rtld_machdep.h
index 33e7d95f966d..ee41e31bd591 100644
--- a/libexec/rtld-elf/i386/rtld_machdep.h
+++ b/libexec/rtld-elf/i386/rtld_machdep.h
@@ -33,6 +33,7 @@
 
 #include <sys/types.h>
 #include <machine/atomic.h>
+#include <machine/tls.h>
 
 struct Struct_Obj_Entry;
 
@@ -74,9 +75,6 @@ void *__tls_get_addr(tls_index *ti) __exported;
 
 #define md_abi_variant_hook(x)
 
-#define	TLS_VARIANT_II	1
-#define	TLS_DTV_OFFSET	0
-
 size_t calculate_first_tls_offset(size_t size, size_t align, size_t offset);
 size_t calculate_tls_offset(size_t prev_offset, size_t prev_size, size_t size,
     size_t align, size_t offset);
diff --git a/libexec/rtld-elf/mips/reloc.c b/libexec/rtld-elf/mips/reloc.c
index a39a96e20b00..12bfb01ba4ad 100644
--- a/libexec/rtld-elf/mips/reloc.c
+++ b/libexec/rtld-elf/mips/reloc.c
@@ -762,7 +762,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
 void
 allocate_initial_tls(Obj_Entry *objs)
 {
-	char *tls;
 	
 	/*
 	 * Fix the size of the static TLS block by using the maximum
@@ -771,19 +770,17 @@ allocate_initial_tls(Obj_Entry *objs)
 	 */
 	tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA;
 
-	tls = (char *) allocate_tls(objs, NULL, TLS_TCB_SIZE, 8);
-
-	sysarch(MIPS_SET_TLS, tls);
+	_tcb_set(allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
 }
 
 void *
 __tls_get_addr(tls_index* ti)
 {
-	Elf_Addr **tls;
+	uintptr_t **dtvp;
 	char *p;
 
-	tls = _get_tp();
-	p = tls_get_addr_common(tls, ti->ti_module, ti->ti_offset);
+	dtvp = &_tcb_get()->tcb_dtv;
+	p = tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset);
 
 	return (p + TLS_DTV_OFFSET);
 }
diff --git a/libexec/rtld-elf/mips/rtld_machdep.h b/libexec/rtld-elf/mips/rtld_machdep.h
index eac122beaa46..2826a56f34c1 100644
--- a/libexec/rtld-elf/mips/rtld_machdep.h
+++ b/libexec/rtld-elf/mips/rtld_machdep.h
@@ -77,6 +77,4 @@ extern void *__tls_get_addr(tls_index *ti);
 
 #define md_abi_variant_hook(x)
 
-#define	TLS_VARIANT_I	1
-
 #endif
diff --git a/libexec/rtld-elf/powerpc/reloc.c b/libexec/rtld-elf/powerpc/reloc.c
index b624599d7df4..ab09b1536d80 100644
--- a/libexec/rtld-elf/powerpc/reloc.c
+++ b/libexec/rtld-elf/powerpc/reloc.c
@@ -812,7 +812,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
 void
 allocate_initial_tls(Obj_Entry *list)
 {
-	Elf_Addr **tp;
 
 	/*
 	* Fix the size of the static TLS block by using the maximum
@@ -822,25 +821,17 @@ allocate_initial_tls(Obj_Entry *list)
 
 	tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA;
 
-	tp = (Elf_Addr **)((char *) allocate_tls(list, NULL, TLS_TCB_SIZE, 8)
-	    + TLS_TP_OFFSET + TLS_TCB_SIZE);
-
-	/*
-	 * XXX gcc seems to ignore 'tp = _tp;' 
-	 */
-	 
-	__asm __volatile("mr 2,%0" :: "r"(tp));
+	_tcb_set(allocate_tls(list, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
 }
 
 void*
 __tls_get_addr(tls_index* ti)
 {
-	register Elf_Addr **tp;
+	uintptr_t **dtvp;
 	char *p;
 
-	__asm __volatile("mr %0,2" : "=r"(tp));
-	p = tls_get_addr_common((Elf_Addr**)((Elf_Addr)tp - TLS_TP_OFFSET 
-	    - TLS_TCB_SIZE), ti->ti_module, ti->ti_offset);
+	dtvp = &_tcb_get()->tcb_dtv;
+	p = tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset);
 
 	return (p + TLS_DTV_OFFSET);
 }
diff --git a/libexec/rtld-elf/powerpc/rtld_machdep.h b/libexec/rtld-elf/powerpc/rtld_machdep.h
index c98dc7d3acb5..b3e2e3fafa9f 100644
--- a/libexec/rtld-elf/powerpc/rtld_machdep.h
+++ b/libexec/rtld-elf/powerpc/rtld_machdep.h
@@ -33,6 +33,7 @@
 
 #include <sys/types.h>
 #include <machine/atomic.h>
+#include <machine/tls.h>
 
 struct Struct_Obj_Entry;
 
@@ -73,11 +74,6 @@ void _rtld_powerpc_pltcall(void);
  * TLS
  */
 
-#define	TLS_VARIANT_I	1
-#define TLS_TP_OFFSET	0x7000
-#define TLS_DTV_OFFSET	0x8000
-#define TLS_TCB_SIZE	8
-
 #define round(size, align) \
     (((size) + (align) - 1) & ~((align) - 1))
 #define calculate_first_tls_offset(size, align, offset)	\
diff --git a/libexec/rtld-elf/powerpc64/reloc.c b/libexec/rtld-elf/powerpc64/reloc.c
index a7a963a290e8..1dcdd2478403 100644
--- a/libexec/rtld-elf/powerpc64/reloc.c
+++ b/libexec/rtld-elf/powerpc64/reloc.c
@@ -709,7 +709,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
 void
 allocate_initial_tls(Obj_Entry *list)
 {
-	Elf_Addr **tp;
 
 	/*
 	* Fix the size of the static TLS block by using the maximum
@@ -719,21 +718,17 @@ allocate_initial_tls(Obj_Entry *list)
 
 	tls_static_space = tls_last_offset + tls_last_size + RTLD_STATIC_TLS_EXTRA;
 
-	tp = (Elf_Addr **)((char *)allocate_tls(list, NULL, TLS_TCB_SIZE, 16)
-	    + TLS_TP_OFFSET + TLS_TCB_SIZE);
-
-	__asm __volatile("mr 13,%0" :: "r"(tp));
+	_tcb_set(allocate_tls(list, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
 }
 
 void*
 __tls_get_addr(tls_index* ti)
 {
-	Elf_Addr **tp;
+	uintptr_t **dtvp;
 	char *p;
 
-	__asm __volatile("mr %0,13" : "=r"(tp));
-	p = tls_get_addr_common((Elf_Addr**)((Elf_Addr)tp - TLS_TP_OFFSET 
-	    - TLS_TCB_SIZE), ti->ti_module, ti->ti_offset);
+	dtvp = &_tcb_get()->tcb_dtv;
+	p = tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset);
 
 	return (p + TLS_DTV_OFFSET);
 }
diff --git a/libexec/rtld-elf/powerpc64/rtld_machdep.h b/libexec/rtld-elf/powerpc64/rtld_machdep.h
index 98bdbb33affe..c8dcebe45ae2 100644
--- a/libexec/rtld-elf/powerpc64/rtld_machdep.h
+++ b/libexec/rtld-elf/powerpc64/rtld_machdep.h
@@ -33,6 +33,7 @@
 
 #include <sys/types.h>
 #include <machine/atomic.h>
+#include <machine/tls.h>
 
 struct Struct_Obj_Entry;
 
@@ -65,11 +66,6 @@ extern u_long cpu_features2; /* r4 */
  * TLS
  */
 
-#define	TLS_VARIANT_I	1
-#define TLS_TP_OFFSET	0x7000
-#define TLS_DTV_OFFSET	0x8000
-#define TLS_TCB_SIZE	16
-
 #define round(size, align) \
     (((size) + (align) - 1) & ~((align) - 1))
 #define calculate_first_tls_offset(size, align, offset)	\
diff --git a/libexec/rtld-elf/riscv/reloc.c b/libexec/rtld-elf/riscv/reloc.c
index 70db8379ade5..48d513b94ec6 100644
--- a/libexec/rtld-elf/riscv/reloc.c
+++ b/libexec/rtld-elf/riscv/reloc.c
@@ -387,7 +387,6 @@ ifunc_init(Elf_Auxinfo aux_info[__min_size(AT_COUNT)] __unused)
 void
 allocate_initial_tls(Obj_Entry *objs)
 {
-	Elf_Addr **tp;
 
 	/*
 	 * Fix the size of the static TLS block by using the maximum
@@ -397,19 +396,16 @@ allocate_initial_tls(Obj_Entry *objs)
 	tls_static_space = tls_last_offset + tls_last_size +
 	    RTLD_STATIC_TLS_EXTRA;
 
-	tp = (Elf_Addr **)((char *)allocate_tls(objs, NULL, TLS_TCB_SIZE, 16)
-	    + TLS_TP_OFFSET + TLS_TCB_SIZE);
-
-	__asm __volatile("mv  tp, %0" :: "r"(tp));
+	_tcb_set(allocate_tls(objs, NULL, TLS_TCB_SIZE, TLS_TCB_ALIGN));
 }
 
 void *
 __tls_get_addr(tls_index* ti)
 {
-	Elf_Addr **dtvp;
+	uintptr_t **dtvp;
 	void *p;
 
-	dtvp = _get_tp();
+	dtvp = &_tcb_get()->tcb_dtv;
 	p = tls_get_addr_common(dtvp, ti->ti_module, ti->ti_offset);
 
 	return ((char*)p + TLS_DTV_OFFSET);
diff --git a/libexec/rtld-elf/riscv/rtld_machdep.h b/libexec/rtld-elf/riscv/rtld_machdep.h
index 7a087ca2ab7c..e422905f3850 100644
--- a/libexec/rtld-elf/riscv/rtld_machdep.h
+++ b/libexec/rtld-elf/riscv/rtld_machdep.h
@@ -40,6 +40,7 @@
 
 #include <sys/types.h>
 #include <machine/atomic.h>
+#include <machine/tls.h>
 
 struct Struct_Obj_Entry;
 
@@ -82,10 +83,6 @@ Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target,
 /*
  * TLS
  */
-#define	TLS_VARIANT_I	1
-#define	TLS_TP_OFFSET	0x0
-#define	TLS_DTV_OFFSET	0x800
-#define	TLS_TCB_SIZE	16
 
 #define round(size, align) \
     (((size) + (align) - 1) & ~((align) - 1))
diff --git a/libexec/rtld-elf/rtld-libc/Makefile.inc b/libexec/rtld-elf/rtld-libc/Makefile.inc
index ade2dc962aa2..99da16d06509 100644
--- a/libexec/rtld-elf/rtld-libc/Makefile.inc
+++ b/libexec/rtld-elf/rtld-libc/Makefile.inc
@@ -67,8 +67,6 @@ _libc_other_objects+=syncicache abs
 _libc_other_objects+=syncicache
 .endif
 
-_libc_other_objects+=_get_tp
-
 # Extract all the .o files from libc_nossp_pic.a. This ensures that
 # we don't accidentally pull in the interposing table or similar by linking
 # directly against libc_nossp_pic.a
diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
index 1b874327ce0c..8920522a50c4 100644
--- a/libexec/rtld-elf/rtld.c
+++ b/libexec/rtld-elf/rtld.c
@@ -4188,14 +4188,14 @@ dlinfo(void *handle, int request, void *p)
 static void
 rtld_fill_dl_phdr_info(const Obj_Entry *obj, struct dl_phdr_info *phdr_info)
 {
-	Elf_Addr **dtvp;
+	uintptr_t **dtvp;
 
 	phdr_info->dlpi_addr = (Elf_Addr)obj->relocbase;
 	phdr_info->dlpi_name = obj->path;
 	phdr_info->dlpi_phdr = obj->phdr;
 	phdr_info->dlpi_phnum = obj->phsize / sizeof(obj->phdr[0]);
 	phdr_info->dlpi_tls_modid = obj->tlsindex;
-	dtvp = _get_tp();
+	dtvp = &_tcb_get()->tcb_dtv;
 	phdr_info->dlpi_tls_data = (char *)tls_get_addr_slow(dtvp,
 	    obj->tlsindex, 0, true) + TLS_DTV_OFFSET;
 	phdr_info->dlpi_adds = obj_loads;
@@ -5216,9 +5216,9 @@ tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset, bool locked)
 }
 
 void *
-tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset)
+tls_get_addr_common(uintptr_t **dtvp, int index, size_t offset)
 {
-	Elf_Addr *dtv;
+	uintptr_t *dtv;
 
 	dtv = *dtvp;
 	/* Check dtv generation in case new modules have arrived */
diff --git a/libexec/rtld-elf/rtld.h b/libexec/rtld-elf/rtld.h
index 48b3ad526828..0dab41c9b7d6 100644
--- a/libexec/rtld-elf/rtld.h
+++ b/libexec/rtld-elf/rtld.h
@@ -396,7 +396,7 @@ void _rtld_bind_start(void);
 void *rtld_resolve_ifunc(const Obj_Entry *obj, const Elf_Sym *def);
 void symlook_init(SymLook *, const char *);
 int symlook_obj(SymLook *, const Obj_Entry *);
-void *tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset);
+void *tls_get_addr_common(uintptr_t **dtvp, int index, size_t offset);
 void *allocate_tls(Obj_Entry *, void *, size_t, size_t);
 void free_tls(void *, size_t, size_t);
 void *allocate_module_tls(int index);
@@ -404,7 +404,6 @@ bool allocate_tls_offset(Obj_Entry *obj);
 void free_tls_offset(Obj_Entry *obj);
 const Ver_Entry *fetch_ventry(const Obj_Entry *obj, unsigned long);
 int convert_prot(int elfflags);
-void *_get_tp(void);	/* libc implementation */
 bool check_elf_headers(const Elf_Ehdr *hdr, const char *path);
 
 /*