Re: FYI: Why META_MODE rebuilds so much for building again after installworld (no source changes) [code level bug evidence]

From: Mark Millard <marklmi_at_yahoo.com>
Date: Thu, 23 Feb 2023 19:53:49 UTC
cached_realpath only reports its "cached_realpath:" notice
(not the purging one) when it does not find the value via
HashTable_FindValue and so does a HashTable_Set :

const char *
cached_realpath(const char *pathname, char *resolved)
{
        const char *rp;

        if (pathname == NULL || pathname[0] == '\0')
                return NULL;

        rp = HashTable_FindValue(&cached_realpaths, pathname);
        if (rp != NULL) {
                /* a hit */
                strncpy(resolved, rp, MAXPATHLEN);
                resolved[MAXPATHLEN - 1] = '\0';
                return resolved;
        }

        rp = realpath(pathname, resolved);
        if (rp != NULL) {
                HashTable_Set(&cached_realpaths, pathname, bmake_strdup(rp));
                DEBUG2(DIR, "cached_realpath: %s -> %s\n", pathname, rp);
                return resolved;
        }

        /* should we negative-cache? */
        return NULL;
}

cached_realpaths is global:

static HashTable cached_realpaths;

So with -ddM why do I see lots of "cached_realpath:"
notices for the same path? For example:

# grep "tmp/legacy/usr/sbin/ln\>" /usr/obj/BUILDs/main-amd64-nodbg-clang/sys-typescripts/typescript-make-amd64-nodbg-clang-amd64-host-2023-02-23:10:20:26 | more
cached_realpath: /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln -> /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/bin/ln
cached_realpath: /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln -> /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/bin/ln
   Caching 02:49:37 Feb 23, 2023 for /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln
/usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/usr.bin/awk/awkgram.tab.h.meta: 22: file '/usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln' is newer than the target...
cached_realpath: /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln -> /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/bin/ln
   Caching 02:49:37 Feb 23, 2023 for /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln
cached_realpath: /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln -> /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/bin/ln
   Caching 02:49:37 Feb 23, 2023 for /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln
cached_realpath: /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln -> /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/bin/ln
   Caching 02:49:37 Feb 23, 2023 for /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln
cached_realpath: /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln -> /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/bin/ln
   Caching 02:49:37 Feb 23, 2023 for /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln
cached_realpath: /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln -> /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/bin/ln
   Caching 02:49:37 Feb 23, 2023 for /usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln
/usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/usr.bin/awk/awkgram.tab.h.meta: 22: file '/usr/obj/BUILDs/main-amd64-nodbg-clang/usr/main-src/amd64.amd64/tmp/legacy/usr/sbin/ln' is newer than the target...
. . .

A possible cause is something I ran into while looking around:

/* A read-only range of a character array, NOT null-terminated. */
typedef struct Substring {
        const char *start;
        const char *end;
} Substring;
. . .
MAKE_STATIC Substring
Substring_Init(const char *start, const char *end)
{
        Substring sub;

        sub.start = start;
        sub.end = end;
        return sub;
}
. . .
/* Find the entry corresponding to the key, or return NULL. */
HashEntry *
HashTable_FindEntry(HashTable *t, const char *key)
{
        const char *keyEnd;
        unsigned int h = Hash_String(key, &keyEnd);
        return HashTable_Find(t, Substring_Init(key, keyEnd), h);
}
. . .
/* A read-only range of a character array, NOT null-terminated. */
typedef struct Substring {
        const char *start;
        const char *end;
} Substring;
. . .
MAKE_STATIC Substring
Substring_Init(const char *start, const char *end)
{
        Substring sub;

        sub.start = start;
        sub.end = end;
        return sub;
}
. . .
/* Find the entry corresponding to the key, or return NULL. */
HashEntry *
HashTable_FindEntry(HashTable *t, const char *key)
{
        const char *keyEnd;
        unsigned int h = Hash_String(key, &keyEnd);
        return HashTable_Find(t, Substring_Init(key, keyEnd), h);
}
. . .
/* This hash function matches Gosling's Emacs and java.lang.String. */
static unsigned int
Hash_String(const char *key, const char **out_keyEnd)
{
        unsigned int h;
        const char *p;

        h = 0;
        for (p = key; *p != '\0'; p++)
                h = 31 * h + (unsigned char)*p;
        
        *out_keyEnd = p;
        return h;
}

But after the loop: *p=='\0' so *out_keyEnd=='\0'
and the FindEntry Substring_Init(key, keyEnd) ends
up including the '\0' byte.

But note that the h in Hash_String did not include the
'\0' byte. Call this h value h_VALUE0 for later reference.
Then look at:

/* This hash function matches Gosling's Emacs and java.lang.String. */
unsigned int
Hash_Substring(Substring key)
{
        unsigned int h;
        const char *p;
        
        h = 0;
        for (p = key.start; p != key.end; p++)
                h = 31 * h + (unsigned char)*p;
        return h;
}

This h does include the '\0' byte so h==(unsigned int)(31*h_VALUE0).

I expect the mismatched hash values explain the repeated
"cached_realpath:" notices for the same path: inserted
but never found.

===
Mark Millard
marklmi at yahoo.com