git: 54521a2ff93a - main - Merge commit b84d773fd004 from llvm git (by Fangrui Song):

From: Dimitry Andric <dim_at_FreeBSD.org>
Date: Sun, 22 Sep 2024 17:13:19 UTC
The branch main has been updated by dim:

URL: https://cgit.FreeBSD.org/src/commit/?id=54521a2ff93ae06c95c31f79f89dc23c9b51c20b

commit 54521a2ff93ae06c95c31f79f89dc23c9b51c20b
Author:     Dimitry Andric <dim@FreeBSD.org>
AuthorDate: 2024-09-22 17:08:47 +0000
Commit:     Dimitry Andric <dim@FreeBSD.org>
CommitDate: 2024-09-22 17:12:17 +0000

    Merge commit b84d773fd004 from llvm git (by Fangrui Song):
    
      [Parallel] Revert sequential task changes
    
      https://reviews.llvm.org/D148728 introduced `bool Sequential` to unify
      `execute` and the old `spawn` without argument. However, sequential
      tasks might be executed by any worker thread (non-deterministic),
      leading to non-determinism output for ld.lld -z nocombreloc (see
      https://reviews.llvm.org/D133003).
    
      In addition, the extra member variables have overhead.
      This sequential task has only been used for lld parallel relocation
      scanning.
    
      This patch restores the behavior before https://reviews.llvm.org/D148728 .
    
      Fix #105958
    
      Pull Request: https://github.com/llvm/llvm-project/pull/109084
    
    This fixes the non-reproducibility we had noticed when linking our EFI
    loaders, and for which we committed a workaround in f5ce3f4ef562.
    
    MFC after:      3 days
---
 contrib/llvm-project/lld/ELF/Relocations.cpp       | 56 ++++++++++++++--------
 .../llvm/include/llvm/Support/Parallel.h           |  4 +-
 contrib/llvm-project/llvm/lib/Support/Parallel.cpp | 49 +++++--------------
 3 files changed, 48 insertions(+), 61 deletions(-)

diff --git a/contrib/llvm-project/lld/ELF/Relocations.cpp b/contrib/llvm-project/lld/ELF/Relocations.cpp
index 92a1b9baaca3..8cb01b63d2cc 100644
--- a/contrib/llvm-project/lld/ELF/Relocations.cpp
+++ b/contrib/llvm-project/lld/ELF/Relocations.cpp
@@ -1584,30 +1584,44 @@ template <class ELFT> void elf::scanRelocations() {
   bool serial = !config->zCombreloc || config->emachine == EM_MIPS ||
                 config->emachine == EM_PPC64;
   parallel::TaskGroup tg;
-  for (ELFFileBase *f : ctx.objectFiles) {
-    auto fn = [f]() {
+  auto outerFn = [&]() {
+    for (ELFFileBase *f : ctx.objectFiles) {
+      auto fn = [f]() {
+        RelocationScanner scanner;
+        for (InputSectionBase *s : f->getSections()) {
+          if (s && s->kind() == SectionBase::Regular && s->isLive() &&
+              (s->flags & SHF_ALLOC) &&
+              !(s->type == SHT_ARM_EXIDX && config->emachine == EM_ARM))
+            scanner.template scanSection<ELFT>(*s);
+        }
+      };
+      if (serial)
+        fn();
+      else
+        tg.spawn(fn);
+    }
+    auto scanEH = [] {
       RelocationScanner scanner;
-      for (InputSectionBase *s : f->getSections()) {
-        if (s && s->kind() == SectionBase::Regular && s->isLive() &&
-            (s->flags & SHF_ALLOC) &&
-            !(s->type == SHT_ARM_EXIDX && config->emachine == EM_ARM))
-          scanner.template scanSection<ELFT>(*s);
+      for (Partition &part : partitions) {
+        for (EhInputSection *sec : part.ehFrame->sections)
+          scanner.template scanSection<ELFT>(*sec);
+        if (part.armExidx && part.armExidx->isLive())
+          for (InputSection *sec : part.armExidx->exidxSections)
+            if (sec->isLive())
+              scanner.template scanSection<ELFT>(*sec);
       }
     };
-    tg.spawn(fn, serial);
-  }
-
-  tg.spawn([] {
-    RelocationScanner scanner;
-    for (Partition &part : partitions) {
-      for (EhInputSection *sec : part.ehFrame->sections)
-        scanner.template scanSection<ELFT>(*sec);
-      if (part.armExidx && part.armExidx->isLive())
-        for (InputSection *sec : part.armExidx->exidxSections)
-          if (sec->isLive())
-            scanner.template scanSection<ELFT>(*sec);
-    }
-  });
+    if (serial)
+      scanEH();
+    else
+      tg.spawn(scanEH);
+  };
+  // If `serial` is true, call `spawn` to ensure that `scanner` runs in a thread
+  // with valid getThreadIndex().
+  if (serial)
+    tg.spawn(outerFn);
+  else
+    outerFn();
 }
 
 static bool handleNonPreemptibleIfunc(Symbol &sym, uint16_t flags) {
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/Parallel.h b/contrib/llvm-project/llvm/include/llvm/Support/Parallel.h
index 8170da98f15a..c34619ab7d96 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/Parallel.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/Parallel.h
@@ -97,9 +97,7 @@ public:
   // Spawn a task, but does not wait for it to finish.
   // Tasks marked with \p Sequential will be executed
   // exactly in the order which they were spawned.
-  // Note: Sequential tasks may be executed on different
-  // threads, but strictly in sequential order.
-  void spawn(std::function<void()> f, bool Sequential = false);
+  void spawn(std::function<void()> f);
 
   void sync() const { L.sync(); }
 
diff --git a/contrib/llvm-project/llvm/lib/Support/Parallel.cpp b/contrib/llvm-project/llvm/lib/Support/Parallel.cpp
index 9b14b05b5211..26da702969b7 100644
--- a/contrib/llvm-project/llvm/lib/Support/Parallel.cpp
+++ b/contrib/llvm-project/llvm/lib/Support/Parallel.cpp
@@ -12,7 +12,6 @@
 #include "llvm/Support/Threading.h"
 
 #include <atomic>
-#include <deque>
 #include <future>
 #include <thread>
 #include <vector>
@@ -39,7 +38,7 @@ namespace {
 class Executor {
 public:
   virtual ~Executor() = default;
-  virtual void add(std::function<void()> func, bool Sequential = false) = 0;
+  virtual void add(std::function<void()> func) = 0;
   virtual size_t getThreadCount() const = 0;
 
   static Executor *getDefaultExecutor();
@@ -98,13 +97,10 @@ public:
     static void call(void *Ptr) { ((ThreadPoolExecutor *)Ptr)->stop(); }
   };
 
-  void add(std::function<void()> F, bool Sequential = false) override {
+  void add(std::function<void()> F) override {
     {
       std::lock_guard<std::mutex> Lock(Mutex);
-      if (Sequential)
-        WorkQueueSequential.emplace_front(std::move(F));
-      else
-        WorkQueue.emplace_back(std::move(F));
+      WorkStack.push_back(std::move(F));
     }
     Cond.notify_one();
   }
@@ -112,42 +108,23 @@ public:
   size_t getThreadCount() const override { return ThreadCount; }
 
 private:
-  bool hasSequentialTasks() const {
-    return !WorkQueueSequential.empty() && !SequentialQueueIsLocked;
-  }
-
-  bool hasGeneralTasks() const { return !WorkQueue.empty(); }
-
   void work(ThreadPoolStrategy S, unsigned ThreadID) {
     threadIndex = ThreadID;
     S.apply_thread_strategy(ThreadID);
     while (true) {
       std::unique_lock<std::mutex> Lock(Mutex);
-      Cond.wait(Lock, [&] {
-        return Stop || hasGeneralTasks() || hasSequentialTasks();
-      });
+      Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
       if (Stop)
         break;
-      bool Sequential = hasSequentialTasks();
-      if (Sequential)
-        SequentialQueueIsLocked = true;
-      else
-        assert(hasGeneralTasks());
-
-      auto &Queue = Sequential ? WorkQueueSequential : WorkQueue;
-      auto Task = std::move(Queue.back());
-      Queue.pop_back();
+      auto Task = std::move(WorkStack.back());
+      WorkStack.pop_back();
       Lock.unlock();
       Task();
-      if (Sequential)
-        SequentialQueueIsLocked = false;
     }
   }
 
   std::atomic<bool> Stop{false};
-  std::atomic<bool> SequentialQueueIsLocked{false};
-  std::deque<std::function<void()>> WorkQueue;
-  std::deque<std::function<void()>> WorkQueueSequential;
+  std::vector<std::function<void()>> WorkStack;
   std::mutex Mutex;
   std::condition_variable Cond;
   std::promise<void> ThreadsCreated;
@@ -205,16 +182,14 @@ TaskGroup::~TaskGroup() {
   L.sync();
 }
 
-void TaskGroup::spawn(std::function<void()> F, bool Sequential) {
+void TaskGroup::spawn(std::function<void()> F) {
 #if LLVM_ENABLE_THREADS
   if (Parallel) {
     L.inc();
-    detail::Executor::getDefaultExecutor()->add(
-        [&, F = std::move(F)] {
-          F();
-          L.dec();
-        },
-        Sequential);
+    detail::Executor::getDefaultExecutor()->add([&, F = std::move(F)] {
+      F();
+      L.dec();
+    });
     return;
   }
 #endif