Re: git: 64ba1f4cf3a6 - main - rtld: Implement LD_SHOW_AUXV
Date: Sat, 13 Nov 2021 18:29:24 UTC
On 13 Nov 2021, at 17:57, Jessica Clarke <jrtc27@FreeBSD.org> wrote: > > On 13 Nov 2021, at 17:54, Jessica Clarke <jrtc27@FreeBSD.org> wrote: >> >> On 13 Nov 2021, at 17:33, Konstantin Belousov <kib@FreeBSD.org> wrote: >>> >>> The branch main has been updated by kib: >>> >>> URL: https://cgit.FreeBSD.org/src/commit/?id=64ba1f4cf3a6847a1dacf4bab0409d94898fa168 >>> >>> commit 64ba1f4cf3a6847a1dacf4bab0409d94898fa168 >>> Author: Konstantin Belousov <kib@FreeBSD.org> >>> AuthorDate: 2021-11-13 01:18:13 +0000 >>> Commit: Konstantin Belousov <kib@FreeBSD.org> >>> CommitDate: 2021-11-13 17:33:13 +0000 >>> >>> rtld: Implement LD_SHOW_AUXV >>> >>> It dumps auxv as seen by interpreter, right before starting any user >>> code. >>> >>> Copied from: glibc >>> Sponsored by: The FreeBSD Foundation >>> MFC after: 1 week >>> --- >>> libexec/rtld-elf/rtld.1 | 7 +++++- >>> libexec/rtld-elf/rtld.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++ >>> 2 files changed, 73 insertions(+), 1 deletion(-) >>> >>> diff --git a/libexec/rtld-elf/rtld.1 b/libexec/rtld-elf/rtld.1 >>> index 187dc105667a..66aa2bdabd17 100644 >>> --- a/libexec/rtld-elf/rtld.1 >>> +++ b/libexec/rtld-elf/rtld.1 >>> @@ -28,7 +28,7 @@ >>> .\" >>> .\" $FreeBSD$ >>> .\" >>> -.Dd August 15, 2021 >>> +.Dd November 13, 2021 >>> .Dt RTLD 1 >>> .Os >>> .Sh NAME >>> @@ -309,6 +309,11 @@ will process the filtee dependencies of the loaded objects immediately, >>> instead of postponing it until required. >>> Normally, the filtees are opened at the time of the first symbol resolution >>> from the filter object. >>> +.It Ev LD_SHOW_AUXV >>> +If set, causes >>> +.Nm >>> +to dump content of the aux vector to standard output, before passing >>> +control to any user code. >>> .El >>> .Sh DIRECT EXECUTION MODE >>> .Nm >>> diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c >>> index c173c5a6e22e..0475134b0d96 100644 >>> --- a/libexec/rtld-elf/rtld.c >>> +++ b/libexec/rtld-elf/rtld.c >>> @@ -104,6 +104,7 @@ static Obj_Entry *dlopen_object(const char *name, int fd, Obj_Entry *refobj, >>> static Obj_Entry *do_load_object(int, const char *, char *, struct stat *, int); >>> static int do_search_info(const Obj_Entry *obj, int, struct dl_serinfo *); >>> static bool donelist_check(DoneList *, const Obj_Entry *); >>> +static void dump_auxv(Elf_Auxinfo **aux_info); >>> static void errmsg_restore(struct dlerror_save *); >>> static struct dlerror_save *errmsg_save(void); >>> static void *fill_search_info(const char *, size_t, void *); >>> @@ -364,6 +365,7 @@ enum { >>> LD_TRACE_LOADED_OBJECTS_FMT1, >>> LD_TRACE_LOADED_OBJECTS_FMT2, >>> LD_TRACE_LOADED_OBJECTS_ALL, >>> + LD_SHOW_AUXV, >>> }; >>> >>> struct ld_env_var_desc { >>> @@ -396,6 +398,7 @@ static struct ld_env_var_desc ld_env_vars[] = { >>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT1, false), >>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_FMT2, false), >>> LD_ENV_DESC(TRACE_LOADED_OBJECTS_ALL, false), >>> + LD_ENV_DESC(SHOW_AUXV, false), >>> }; >>> >>> static const char * >>> @@ -857,6 +860,9 @@ _rtld(Elf_Addr *sp, func_ptr_type *exit_proc, Obj_Entry **objp) >>> if (rtld_verify_versions(&list_main) == -1 && !ld_tracing) >>> rtld_die(); >>> >>> + if (ld_get_env_var(LD_SHOW_AUXV) != NULL) >>> + dump_auxv(aux_info); >>> + >>> if (ld_tracing) { /* We're done */ >>> trace_loaded_objects(obj_main); >>> exit(0); >>> @@ -6058,6 +6064,67 @@ print_usage(const char *argv0) >>> " <args> Arguments to the executed process\n", argv0); >>> } >>> >>> +#define AUXFMT(at, xfmt) [at] = { .name = #at, .fmt = xfmt } >>> +static const struct auxfmt { >>> + const char *name; >>> + const char *fmt; >>> +} auxfmts[] = { >>> + AUXFMT(AT_NULL, NULL), >>> + AUXFMT(AT_IGNORE, NULL), >>> + AUXFMT(AT_EXECFD, "%d"), >>> + AUXFMT(AT_PHDR, "%p"), >>> + AUXFMT(AT_PHENT, "%u"), >>> + AUXFMT(AT_PHNUM, "%u"), >>> + AUXFMT(AT_PAGESZ, "%u"), >>> + AUXFMT(AT_BASE, "%#lx"), >>> + AUXFMT(AT_FLAGS, "%#lx"), >>> + AUXFMT(AT_ENTRY, "%p"), >>> + AUXFMT(AT_NOTELF, NULL), >>> + AUXFMT(AT_UID, "%d"), >>> + AUXFMT(AT_EUID, "%d"), >>> + AUXFMT(AT_GID, "%d"), >>> + AUXFMT(AT_EGID, "%d"), >>> + AUXFMT(AT_EXECPATH, "%s"), >>> + AUXFMT(AT_CANARY, "%p"), >>> + AUXFMT(AT_CANARYLEN, "%u"), >>> + AUXFMT(AT_OSRELDATE, "%u"), >>> + AUXFMT(AT_NCPUS, "%u"), >>> + AUXFMT(AT_PAGESIZES, "%p"), >>> + AUXFMT(AT_PAGESIZESLEN, "%u"), >>> + AUXFMT(AT_TIMEKEEP, "%p"), >>> + AUXFMT(AT_STACKPROT, "%#x"), >>> + AUXFMT(AT_EHDRFLAGS, "%#lx"), >>> + AUXFMT(AT_HWCAP, "%#lx"), >>> + AUXFMT(AT_HWCAP2, "%#lx"), >>> + AUXFMT(AT_BSDFLAGS, "%#lx"), >>> + AUXFMT(AT_ARGC, "%u"), >>> + AUXFMT(AT_ARGV, "%p"), >>> + AUXFMT(AT_ENVC, "%p"), >>> + AUXFMT(AT_ENVV, "%p"), >>> + AUXFMT(AT_PS_STRINGS, "%p"), >>> + AUXFMT(AT_FXRNG, "%p"), >>> +}; >>> + >>> +static void >>> +dump_auxv(Elf_Auxinfo **aux_info) >>> +{ >>> + Elf_Auxinfo *auxp; >>> + const struct auxfmt *fmt; >>> + int i; >>> + >>> + for (i = 0; i < AT_COUNT; i++) { >>> + auxp = aux_info[i]; >>> + if (auxp == NULL) >>> + continue; >>> + fmt = &auxfmts[i]; >>> + if (fmt->fmt == NULL) >>> + continue; >>> + rtld_fdprintf(STDOUT_FILENO, "%s:\t", fmt->name); >>> + rtld_fdprintfx(STDOUT_FILENO, fmt->fmt, auxp->a_un.a_ptr); >>> + rtld_fdprintf(STDOUT_FILENO, "\n"); >> >> This is undefined behaviour, breaks CHERI, and totally unnecessary. You >> have a handful of cases here, just make an enum and have separate >> rtld_fdprintf calls. In particular, ignoring CHERI, unsigned ints are sign-extended to 64 bits on MIPS and RISC-V. Thus by passing a 64-bit value but using a %u, you are violating the calling convention. I can’t currently get GCC or Clang to exploit the fact that varargs arguments are sign-extended, but on MIPS, and RISC-V GCC (Clang is currently stupid and round-trips via memory even when the va_arg calls have no branching surrounding them, rather than just grabbing from the register) there is a redundant sext.w that can legally be optimised out, but would be broken by this calling convention violation. Then CHERI makes it worse because a_ptr and a_val do not have the same representation, although in practice I think passing a_ptr and nothing further does end up working on CHERI-RISC-V and Morello, just not CHERI-MIPS due to being big-endian. Jess >> Also the table itself is brittle, there’s nothing checking that the >> order perfectly matches up with the defines in the header. Why not use >> designated initialisers to ensure that the right values are in the >> right entries (and handle the possibility that name might be NULL)? > > Scratch that second part, I missed the [at] = in the macro. > > Jess