svn commit: r342113 - in head/libexec/rtld-elf: . aarch64 amd64 arm i386 mips powerpc powerpc64 riscv sparc64
Michal Meloun
mmel at FreeBSD.org
Sat Dec 15 10:38:11 UTC 2018
Author: mmel
Date: Sat Dec 15 10:38:07 2018
New Revision: 342113
URL: https://svnweb.freebsd.org/changeset/base/342113
Log:
Improve R_AARCH64_TLSDESC relocation.
The original code did not support dynamically loaded libraries and used
suboptimal access to TLS variables.
New implementation removes lazy resolving of TLS relocation - due to flaw
in TLSDESC design is impossible to switch resolver function at runtime
without expensive locking.
Due to this, 3 specialized resolvers are implemented:
- load time resolver for TLS relocation from libraries loaded with main
executable (thus with known TLS offset).
- resolver for undefined thread weak symbols.
- slower lazy resolver for dynamically loaded libraries with fast path for
already resolved symbols.
PR: 228892, 232149, 233204, 232311
MFC after: 2 weeks
Differential Revision: https://reviews.freebsd.org/D18417
Modified:
head/libexec/rtld-elf/aarch64/reloc.c
head/libexec/rtld-elf/aarch64/rtld_start.S
head/libexec/rtld-elf/amd64/reloc.c
head/libexec/rtld-elf/arm/reloc.c
head/libexec/rtld-elf/i386/reloc.c
head/libexec/rtld-elf/mips/reloc.c
head/libexec/rtld-elf/powerpc/reloc.c
head/libexec/rtld-elf/powerpc64/reloc.c
head/libexec/rtld-elf/riscv/reloc.c
head/libexec/rtld-elf/rtld.c
head/libexec/rtld-elf/rtld.h
head/libexec/rtld-elf/sparc64/reloc.c
Modified: head/libexec/rtld-elf/aarch64/reloc.c
==============================================================================
--- head/libexec/rtld-elf/aarch64/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/aarch64/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -49,7 +49,8 @@ __FBSDID("$FreeBSD$");
* This is not the correct prototype, but we only need it for
* a function pointer to a simple asm function.
*/
-void *_rtld_tlsdesc(void *);
+void *_rtld_tlsdesc_static(void *);
+void *_rtld_tlsdesc_undef(void *);
void *_rtld_tlsdesc_dynamic(void *);
void _exit(int);
@@ -122,78 +123,58 @@ do_copy_relocations(Obj_Entry *dstobj)
}
struct tls_data {
- int64_t index;
- Obj_Entry *obj;
- const Elf_Rela *rela;
+ Elf_Addr dtv_gen;
+ int tls_index;
+ Elf_Addr tls_offs;
};
-int64_t rtld_tlsdesc_handle(struct tls_data *tlsdesc, int flags);
-
-static struct tls_data *
-reloc_tlsdesc_alloc(Obj_Entry *obj, const Elf_Rela *rela)
+static Elf_Addr
+reloc_tlsdesc_alloc(int tlsindex, Elf_Addr tlsoffs)
{
struct tls_data *tlsdesc;
tlsdesc = xmalloc(sizeof(struct tls_data));
- tlsdesc->index = -1;
- tlsdesc->obj = obj;
- tlsdesc->rela = rela;
+ tlsdesc->dtv_gen = tls_dtv_generation;
+ tlsdesc->tls_index = tlsindex;
+ tlsdesc->tls_offs = tlsoffs;
- return (tlsdesc);
+ return ((Elf_Addr)tlsdesc);
}
-/*
- * Look up the symbol to find its tls index
- */
-static int64_t
-rtld_tlsdesc_handle_locked(struct tls_data *tlsdesc, int flags,
- RtldLockState *lockstate)
+static void
+reloc_tlsdesc(const Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr *where,
+ int flags, RtldLockState *lockstate)
{
- const Elf_Rela *rela;
const Elf_Sym *def;
const Obj_Entry *defobj;
- Obj_Entry *obj;
+ Elf_Addr offs;
- rela = tlsdesc->rela;
- obj = tlsdesc->obj;
- def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags, NULL,
- lockstate);
- if (def == NULL)
- rtld_die();
+ offs = 0;
+ if (ELF_R_SYM(rela->r_info) != 0) {
+ def = find_symdef(ELF_R_SYM(rela->r_info), obj, &defobj, flags,
+ NULL, lockstate);
+ if (def == NULL)
+ rtld_die();
+ offs = def->st_value;
+ obj = defobj;
+ if (def->st_shndx == SHN_UNDEF) {
+ /* Weak undefined thread variable */
+ where[0] = (Elf_Addr)_rtld_tlsdesc_undef;
+ where[1] = rela->r_addend;
+ return;
+ }
+ }
+ offs += rela->r_addend;
- tlsdesc->index = defobj->tlsoffset + def->st_value + rela->r_addend;
-
- return (tlsdesc->index);
-}
-
-int64_t
-rtld_tlsdesc_handle(struct tls_data *tlsdesc, int flags)
-{
- RtldLockState lockstate;
-
- /* We have already found the index, return it */
- if (tlsdesc->index >= 0)
- return (tlsdesc->index);
-
- wlock_acquire(rtld_bind_lock, &lockstate);
- /* tlsdesc->index may have been set by another thread */
- if (tlsdesc->index == -1)
- rtld_tlsdesc_handle_locked(tlsdesc, flags, &lockstate);
- lock_release(rtld_bind_lock, &lockstate);
-
- return (tlsdesc->index);
-}
-
-static void
-reloc_tlsdesc(Obj_Entry *obj, const Elf_Rela *rela, Elf_Addr *where)
-{
- if (ELF_R_SYM(rela->r_info) == 0) {
- where[0] = (Elf_Addr)_rtld_tlsdesc;
- where[1] = obj->tlsoffset + rela->r_addend;
+ if (obj->tlsoffset != 0) {
+ /* Variable is in initialy allocated TLS segment */
+ where[0] = (Elf_Addr)_rtld_tlsdesc_static;
+ where[1] = obj->tlsoffset + offs;
} else {
+ /* TLS offest is unknown at load time, use dynamic resolving */
where[0] = (Elf_Addr)_rtld_tlsdesc_dynamic;
- where[1] = (Elf_Addr)reloc_tlsdesc_alloc(obj, rela);
+ where[1] = reloc_tlsdesc_alloc(obj->tlsindex, offs);
}
}
@@ -201,7 +182,7 @@ reloc_tlsdesc(Obj_Entry *obj, const Elf_Rela *rela, El
* Process the PLT relocations.
*/
int
-reloc_plt(Obj_Entry *obj)
+reloc_plt(Obj_Entry *obj, int flags, RtldLockState *lockstate)
{
const Elf_Rela *relalim;
const Elf_Rela *rela;
@@ -218,7 +199,8 @@ reloc_plt(Obj_Entry *obj)
*where += (Elf_Addr)obj->relocbase;
break;
case R_AARCH64_TLSDESC:
- reloc_tlsdesc(obj, rela, where);
+ reloc_tlsdesc(obj, rela, where, SYMLOOK_IN_PLT | flags,
+ lockstate);
break;
case R_AARCH64_IRELATIVE:
obj->irelative = true;
@@ -458,7 +440,7 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int
}
break;
case R_AARCH64_TLSDESC:
- reloc_tlsdesc(obj, rela, where);
+ reloc_tlsdesc(obj, rela, where, flags, lockstate);
break;
case R_AARCH64_TLS_TPREL64:
/*
@@ -478,9 +460,25 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int
return (-1);
}
}
-
- *where = def->st_value + rela->r_addend +
- defobj->tlsoffset;
+ /* Test weak undefined thread variable */
+ if (def->st_shndx != SHN_UNDEF) {
+ *where = def->st_value + rela->r_addend +
+ defobj->tlsoffset;
+ } else {
+ /*
+ * XXX We should relocate undefined thread
+ * weak variable address to NULL, but how?
+ * Can we return error in this situation?
+ */
+ rtld_printf("%s: Unable to relocate undefined "
+ "weak TLS variable\n", obj->path);
+#if 0
+ return (-1);
+#else
+ *where = def->st_value + rela->r_addend +
+ defobj->tlsoffset;
+#endif
+ }
break;
/*
Modified: head/libexec/rtld-elf/aarch64/rtld_start.S
==============================================================================
--- head/libexec/rtld-elf/aarch64/rtld_start.S Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/aarch64/rtld_start.S Sat Dec 15 10:38:07 2018 (r342113)
@@ -118,55 +118,145 @@ ENTRY(_rtld_bind_start)
END(_rtld_bind_start)
/*
- * uint64_t _rtld_tlsdesc(struct tlsdesc *);
+ * struct rel_tlsdesc {
+ * uint64_t resolver_fnc;
+ * uint64_t resolver_arg;
*
- * struct tlsdesc {
- * uint64_t ptr;
- * uint64_t data;
- * };
*
- * Returns the data.
+ * uint64_t _rtld_tlsdesc_static(struct rel_tlsdesc *);
+ *
+ * Resolver function for TLS symbols resolved at load time
*/
-ENTRY(_rtld_tlsdesc)
+ENTRY(_rtld_tlsdesc_static)
+ .cfi_startproc
ldr x0, [x0, #8]
ret
-END(_rtld_tlsdesc)
+ .cfi_endproc
+END(_rtld_tlsdesc_static)
/*
- * uint64_t _rtld_tlsdesc_dynamic(struct tlsdesc *);
+ * uint64_t _rtld_tlsdesc_undef(void);
*
- * TODO: We could lookup the saved index here to skip saving the entire stack.
+ * Resolver function for weak and undefined TLS symbols
*/
+ENTRY(_rtld_tlsdesc_undef)
+ .cfi_startproc
+ str x1, [sp, #-16]!
+ .cfi_adjust_cfa_offset 16
+
+ mrs x1, tpidr_el0
+ ldr x0, [x0, #8]
+ sub x0, x0, x1
+
+ ldr x1, [sp], #16
+ .cfi_adjust_cfa_offset -16
+ .cfi_endproc
+ ret
+END(_rtld_tlsdesc_undef)
+
+/*
+ * uint64_t _rtld_tlsdesc_dynamic(struct rel_tlsdesc *);
+ *
+ * Resolver function for TLS symbols from dlopen()
+ */
ENTRY(_rtld_tlsdesc_dynamic)
- /* Store any registers we may use in rtld_tlsdesc_handle */
- stp x29, x30, [sp, #-(10 * 16)]!
+ .cfi_startproc
+
+ /* Save registers used in fast path */
+ stp x1, x2, [sp, #(-2 * 16)]!
+ stp x3, x4, [sp, #(1 * 16)]
+ .cfi_adjust_cfa_offset 2 * 16
+ .cfi_rel_offset x1, 0
+ .cfi_rel_offset x2, 8
+ .cfi_rel_offset x3, 16
+ .cfi_rel_offset x4, 24
+
+ /* Test fastpath - inlined version of tls_get_addr_common(). */
+ ldr x1, [x0, #8] /* tlsdesc ptr */
+ mrs x4, tpidr_el0
+ ldr x0, [x4] /* DTV pointer */
+ ldr x2, [x0] /* dtv[0] (generation count) */
+ ldr x3, [x1] /* tlsdec->dtv_gen */
+ cmp x2, x3
+ b.ne 1f /* dtv[0] != tlsdec->dtv_gen */
+
+ ldr w2, [x1, #8] /* tlsdec->tls_index */
+ add w2, w2, #1
+ ldr x3, [x0, w2, sxtw #3] /* dtv[tlsdesc->tls_index + 1] */
+ cbz x3, 1f
+
+ /* Return (dtv[tlsdesc->tls_index + 1] + tlsdesc->tls_offs - tp) */
+ ldr x2, [x1, #16] /* tlsdec->tls_offs */
+ add x2, x2, x3
+ sub x0, x2, x4
+ /* Restore registers and return */
+ ldp x3, x4, [sp, #(1 * 16)]
+ ldp x1, x2, [sp], #(2 * 16)
+ .cfi_adjust_cfa_offset -2 * 16
+ ret
+
+ /*
+ * Slow path
+ * return(
+ * tls_get_addr_common(tp, tlsdesc->tls_index, tlsdesc->tls_offs));
+ *
+ */
+1:
+ /* Save all interger registers */
+ stp x29, x30, [sp, #-(8 * 16)]!
+ .cfi_adjust_cfa_offset 8 * 16
+ .cfi_rel_offset x29, 0
+ .cfi_rel_offset x30, 8
+
mov x29, sp
- stp x1, x2, [sp, #(1 * 16)]
- stp x3, x4, [sp, #(2 * 16)]
- stp x5, x6, [sp, #(3 * 16)]
- stp x7, x8, [sp, #(4 * 16)]
- stp x9, x10, [sp, #(5 * 16)]
- stp x11, x12, [sp, #(6 * 16)]
- stp x13, x14, [sp, #(7 * 16)]
- stp x15, x16, [sp, #(8 * 16)]
- stp x17, x18, [sp, #(9 * 16)]
+ stp x5, x6, [sp, #(1 * 16)]
+ stp x7, x8, [sp, #(2 * 16)]
+ stp x9, x10, [sp, #(3 * 16)]
+ stp x11, x12, [sp, #(4 * 16)]
+ stp x13, x14, [sp, #(5 * 16)]
+ stp x15, x16, [sp, #(6 * 16)]
+ stp x17, x18, [sp, #(7 * 16)]
+ .cfi_rel_offset x5, 16
+ .cfi_rel_offset x6, 24
+ .cfi_rel_offset x7, 32
+ .cfi_rel_offset x8, 40
+ .cfi_rel_offset x9, 48
+ .cfi_rel_offset x10, 56
+ .cfi_rel_offset x11, 64
+ .cfi_rel_offset x12, 72
+ .cfi_rel_offset x13, 80
+ .cfi_rel_offset x14, 88
+ .cfi_rel_offset x15, 96
+ .cfi_rel_offset x16, 104
+ .cfi_rel_offset x17, 112
+ .cfi_rel_offset x18, 120
/* Find the tls offset */
- ldr x0, [x0, #8]
- mov x1, #1
- bl rtld_tlsdesc_handle
+ mov x0, x4 /* tp */
+ mov x3, x1 /* tlsdesc ptr */
+ ldr w1, [x3, #8] /* tlsdec->tls_index */
+ ldr x2, [x3, #16] /* tlsdec->tls_offs */
+ bl tls_get_addr_common
+ mrs x1, tpidr_el0
+ sub x0, x0, x1
- /* Restore the registers */
- ldp x17, x18, [sp, #(9 * 16)]
- ldp x15, x16, [sp, #(8 * 16)]
- ldp x13, x14, [sp, #(7 * 16)]
- ldp x11, x12, [sp, #(6 * 16)]
- ldp x9, x10, [sp, #(5 * 16)]
- ldp x7, x8, [sp, #(4 * 16)]
- ldp x5, x6, [sp, #(3 * 16)]
- ldp x3, x4, [sp, #(2 * 16)]
- ldp x1, x2, [sp, #(1 * 16)]
- ldp x29, x30, [sp], #(10 * 16)
+ /* Restore slow patch registers */
+ ldp x17, x18, [sp, #(7 * 16)]
+ ldp x15, x16, [sp, #(6 * 16)]
+ ldp x13, x14, [sp, #(5 * 16)]
+ ldp x11, x12, [sp, #(4 * 16)]
+ ldp x9, x10, [sp, #(3 * 16)]
+ ldp x7, x8, [sp, #(2 * 16)]
+ ldp x5, x6, [sp, #(1 * 16)]
+ ldp x29, x30, [sp], #(8 * 16)
+ .cfi_adjust_cfa_offset -8 * 16
+ .cfi_restore x29
+ .cfi_restore x30
+ /* Restore fast path registers and return */
+ ldp x3, x4, [sp, #16]
+ ldp x1, x2, [sp], #(2 * 16)
+ .cfi_adjust_cfa_offset -2 * 16
+ .cfi_endproc
ret
END(_rtld_tlsdesc_dynamic)
Modified: head/libexec/rtld-elf/amd64/reloc.c
==============================================================================
--- head/libexec/rtld-elf/amd64/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/amd64/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -323,7 +323,7 @@ done:
/* Process the PLT relocations. */
int
-reloc_plt(Obj_Entry *obj)
+reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
{
const Elf_Rela *relalim;
const Elf_Rela *rela;
Modified: head/libexec/rtld-elf/arm/reloc.c
==============================================================================
--- head/libexec/rtld-elf/arm/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/arm/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -389,7 +389,7 @@ done:
* * Process the PLT relocations.
* */
int
-reloc_plt(Obj_Entry *obj)
+reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
{
const Elf_Rel *rellim;
const Elf_Rel *rel;
Modified: head/libexec/rtld-elf/i386/reloc.c
==============================================================================
--- head/libexec/rtld-elf/i386/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/i386/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -274,7 +274,7 @@ done:
/* Process the PLT relocations. */
int
-reloc_plt(Obj_Entry *obj)
+reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
{
const Elf_Rel *rellim;
const Elf_Rel *rel;
Modified: head/libexec/rtld-elf/mips/reloc.c
==============================================================================
--- head/libexec/rtld-elf/mips/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/mips/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -652,7 +652,7 @@ reloc_non_plt(Obj_Entry *obj, Obj_Entry *obj_rtld, int
* Process the PLT relocations.
*/
int
-reloc_plt(Obj_Entry *obj)
+reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
{
const Elf_Rel *rellim;
const Elf_Rel *rel;
Modified: head/libexec/rtld-elf/powerpc/reloc.c
==============================================================================
--- head/libexec/rtld-elf/powerpc/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/powerpc/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -402,7 +402,7 @@ reloc_plt_object(Obj_Entry *obj, const Elf_Rela *rela)
* Process the PLT relocations.
*/
int
-reloc_plt(Obj_Entry *obj)
+reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
{
const Elf_Rela *relalim;
const Elf_Rela *rela;
Modified: head/libexec/rtld-elf/powerpc64/reloc.c
==============================================================================
--- head/libexec/rtld-elf/powerpc64/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/powerpc64/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -376,7 +376,7 @@ reloc_plt_object(Obj_Entry *obj, const Elf_Rela *rela)
* Process the PLT relocations.
*/
int
-reloc_plt(Obj_Entry *obj)
+reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
{
const Elf_Rela *relalim;
const Elf_Rela *rela;
Modified: head/libexec/rtld-elf/riscv/reloc.c
==============================================================================
--- head/libexec/rtld-elf/riscv/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/riscv/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -145,7 +145,7 @@ do_copy_relocations(Obj_Entry *dstobj)
* Process the PLT relocations.
*/
int
-reloc_plt(Obj_Entry *obj)
+reloc_plt(Obj_Entry *obj, int flags __unused, RtldLockState *lockstate __unused)
{
const Elf_Rela *relalim;
const Elf_Rela *rela;
Modified: head/libexec/rtld-elf/rtld.c
==============================================================================
--- head/libexec/rtld-elf/rtld.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/rtld.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -2890,7 +2890,7 @@ relocate_object(Obj_Entry *obj, bool bind_now, Obj_Ent
init_pltgot(obj);
/* Process the PLT relocations. */
- if (reloc_plt(obj) == -1)
+ if (reloc_plt(obj, flags, lockstate) == -1)
return (-1);
/* Relocate the jump slots if we are doing immediate binding. */
if ((obj->bind_now || bind_now) && reloc_jmpslots(obj, flags,
Modified: head/libexec/rtld-elf/rtld.h
==============================================================================
--- head/libexec/rtld-elf/rtld.h Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/rtld.h Sat Dec 15 10:38:07 2018 (r342113)
@@ -400,7 +400,7 @@ int convert_prot(int elfflags);
int do_copy_relocations(Obj_Entry *);
int reloc_non_plt(Obj_Entry *, Obj_Entry *, int flags,
struct Struct_RtldLockState *);
-int reloc_plt(Obj_Entry *);
+int reloc_plt(Obj_Entry *, int flags, struct Struct_RtldLockState *);
int reloc_jmpslots(Obj_Entry *, int flags, struct Struct_RtldLockState *);
int reloc_iresolve(Obj_Entry *, struct Struct_RtldLockState *);
int reloc_gnu_ifunc(Obj_Entry *, int flags, struct Struct_RtldLockState *);
Modified: head/libexec/rtld-elf/sparc64/reloc.c
==============================================================================
--- head/libexec/rtld-elf/sparc64/reloc.c Sat Dec 15 09:12:19 2018 (r342112)
+++ head/libexec/rtld-elf/sparc64/reloc.c Sat Dec 15 10:38:07 2018 (r342113)
@@ -487,7 +487,8 @@ reloc_nonplt_object(Obj_Entry *obj, const Elf_Rela *re
}
int
-reloc_plt(Obj_Entry *obj __unused)
+reloc_plt(Obj_Entry *obj __unused, int flags __unused,
+ RtldLockState *lockstate __unused)
{
#if 0
const Obj_Entry *defobj;
More information about the svn-src-all
mailing list