git: 517a7adb1160 - main - Make hwpmc work for userspace binaries again

From: Andrew Gallatin <gallatin_at_FreeBSD.org>
Date: Wed, 15 Dec 2021 13:43:58 UTC
The branch main has been updated by gallatin:

URL: https://cgit.FreeBSD.org/src/commit/?id=517a7adb1160850746227e4cc30d4bcc3ff04d7d

commit 517a7adb1160850746227e4cc30d4bcc3ff04d7d
Author:     Andrew Gallatin <gallatin@FreeBSD.org>
AuthorDate: 2021-12-15 13:38:36 +0000
Commit:     Andrew Gallatin <gallatin@FreeBSD.org>
CommitDate: 2021-12-15 13:38:36 +0000

    Make hwpmc work for userspace binaries again
    
    hwpmc has been utterly broken for userspace binaries, and has been
    labeling all samples from userspace binaries as dubious frames. The
    issues are that:
    
    -The check for ph.p_offset & (-ph.p_align) == 0 was mostly bogus. The
     intent was to ignore all executable segments other than the first,
     which when using BFD appeared in the first page, but with current LLD
     a read-only data segment appears before the executable segment,
     pushing the latter into the second page or later. This meant no
     executable segment was ever found, and thus pi_vaddr remained
     0. Instead of relying on BFD's layout, track whether we've seen an
     executable segment explicitly with a local bool.
    
    -Shared libraries were not parsing the segments to calculate pi_vaddr,
     resulting in it always being 0. Again, when using BFD, the executable
     segment started at the first page, and so pi_vaddr was genuinely
     meant to be 0, but not with LLD's current layout. This meant that
     pmcstat_image_link's offset calculation gave the base address of the
     segment in memory, rather than the base address of the whole library
     in memory, and so when adding that to pi_start/pi_end to get the
     range of the executable sections in memory it double-counted the
     offset of the first executable segment within the library. Thus we
     need to do the exact same parsing for ET_DYN as we do for ET_EXEC,
     which is simpler to write as special-casing ET_REL to not look for
     segments. Note that, whilst PT_INTERP isn't needed for shared
     libraries, it will be for PIEs, which pmcstat still fails to handle
     due to not knowing the base address of the PIE; we get the base
     address for libraries by MAP_IN events, and for rtld by virtue of the
     process's entry address being rtld's, but have no equivalent for the
     executable.
    
    Fixes courtesy of jrtc27@.
    
    Reviewed by: jrtc27, jhb (earlier version)
    Differential Revision: https://reviews.freebsd.org/D33055
    Sponsored by: Netflix
---
 lib/libpmcstat/libpmcstat_image.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lib/libpmcstat/libpmcstat_image.c b/lib/libpmcstat/libpmcstat_image.c
index 9ee7097e95ec..97109f203806 100644
--- a/lib/libpmcstat/libpmcstat_image.c
+++ b/lib/libpmcstat/libpmcstat_image.c
@@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
 #include <fcntl.h>
 #include <pmc.h>
 #include <pmclog.h>
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -295,6 +296,7 @@ pmcstat_image_get_elf_params(struct pmcstat_image *image,
 	size_t i, nph, nsh;
 	const char *path, *elfbase;
 	char *p, *endp;
+	bool first_exec_segment;
 	uintfptr_t minva, maxva;
 	Elf *e;
 	Elf_Scn *scn;
@@ -384,7 +386,7 @@ pmcstat_image_get_elf_params(struct pmcstat_image *image,
 	 * loaded.  Additionally, for dynamically linked executables,
 	 * save the pathname to the runtime linker.
 	 */
-	if (eh.e_type == ET_EXEC) {
+	if (eh.e_type != ET_REL) {
 		if (elf_getphnum(e, &nph) == 0) {
 			warnx(
 "WARNING: Could not determine the number of program headers in \"%s\": %s.",
@@ -392,6 +394,7 @@ pmcstat_image_get_elf_params(struct pmcstat_image *image,
 			    elf_errmsg(-1));
 			goto done;
 		}
+		first_exec_segment = true;
 		for (i = 0; i < eh.e_phnum; i++) {
 			if (gelf_getphdr(e, i, &ph) != &ph) {
 				warnx(
@@ -416,8 +419,10 @@ pmcstat_image_get_elf_params(struct pmcstat_image *image,
 				break;
 			case PT_LOAD:
 				if ((ph.p_flags & PF_X) != 0 &&
-				    (ph.p_offset & (-ph.p_align)) == 0)
+				    first_exec_segment) {
 					image->pi_vaddr = ph.p_vaddr & (-ph.p_align);
+					first_exec_segment = false;
+				}
 				break;
 			}
 		}