svn commit: r304770 - in projects/clang390-import/contrib/llvm: include/llvm/Transforms/Scalar lib/Analysis lib/Target/AArch64 lib/Target/PowerPC lib/Transforms/Scalar lib/Transforms/Utils lib/Tran...
Dimitry Andric
dim at FreeBSD.org
Wed Aug 24 17:43:10 UTC 2016
Author: dim
Date: Wed Aug 24 17:43:08 2016
New Revision: 304770
URL: https://svnweb.freebsd.org/changeset/base/304770
Log:
Update llvm to release_39 branch r279477.
Modified:
projects/clang390-import/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h
projects/clang390-import/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
projects/clang390-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
projects/clang390-import/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
projects/clang390-import/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
projects/clang390-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Directory Properties:
projects/clang390-import/contrib/llvm/ (props changed)
Modified: projects/clang390-import/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h
==============================================================================
--- projects/clang390-import/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h Wed Aug 24 17:39:40 2016 (r304769)
+++ projects/clang390-import/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h Wed Aug 24 17:43:08 2016 (r304770)
@@ -65,7 +65,7 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
private:
- void BuildRankMap(Function &F, ReversePostOrderTraversal<Function *> &RPOT);
+ void BuildRankMap(Function &F);
unsigned getRank(Value *V);
void canonicalizeOperands(Instruction *I);
void ReassociateExpression(BinaryOperator *I);
Modified: projects/clang390-import/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
==============================================================================
--- projects/clang390-import/contrib/llvm/lib/Analysis/ScalarEvolution.cpp Wed Aug 24 17:39:40 2016 (r304769)
+++ projects/clang390-import/contrib/llvm/lib/Analysis/ScalarEvolution.cpp Wed Aug 24 17:43:08 2016 (r304770)
@@ -4822,6 +4822,10 @@ bool ScalarEvolution::isSCEVExprNeverPoi
// from different loops, so that we know which loop to prove that I is
// executed in.
for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) {
+ // I could be an extractvalue from a call to an overflow intrinsic.
+ // TODO: We can do better here in some cases.
+ if (!isSCEVable(I->getOperand(OpIndex)->getType()))
+ return false;
const SCEV *Op = getSCEV(I->getOperand(OpIndex));
if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
bool AllOtherOpsLoopInvariant = true;
Modified: projects/clang390-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
==============================================================================
--- projects/clang390-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Wed Aug 24 17:39:40 2016 (r304769)
+++ projects/clang390-import/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Wed Aug 24 17:43:08 2016 (r304770)
@@ -1258,8 +1258,11 @@ AArch64LoadStoreOpt::findMatchingInsn(Ma
if (MIIsUnscaled) {
// If the unscaled offset isn't a multiple of the MemSize, we can't
// pair the operations together: bail and keep looking.
- if (MIOffset % MemSize)
+ if (MIOffset % MemSize) {
+ trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+ MemInsns.push_back(&MI);
continue;
+ }
MIOffset /= MemSize;
} else {
MIOffset *= MemSize;
@@ -1424,9 +1427,6 @@ bool AArch64LoadStoreOpt::isMatchingUpda
default:
break;
case AArch64::SUBXri:
- // Negate the offset for a SUB instruction.
- Offset *= -1;
- // FALLTHROUGH
case AArch64::ADDXri:
// Make sure it's a vanilla immediate operand, not a relocation or
// anything else we can't handle.
@@ -1444,6 +1444,9 @@ bool AArch64LoadStoreOpt::isMatchingUpda
bool IsPairedInsn = isPairedLdSt(MemMI);
int UpdateOffset = MI.getOperand(2).getImm();
+ if (MI.getOpcode() == AArch64::SUBXri)
+ UpdateOffset = -UpdateOffset;
+
// For non-paired load/store instructions, the immediate must fit in a
// signed 9-bit integer.
if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
@@ -1458,13 +1461,13 @@ bool AArch64LoadStoreOpt::isMatchingUpda
break;
int ScaledOffset = UpdateOffset / Scale;
- if (ScaledOffset > 64 || ScaledOffset < -64)
+ if (ScaledOffset > 63 || ScaledOffset < -64)
break;
}
// If we have a non-zero Offset, we check that it matches the amount
// we're adding to the register.
- if (!Offset || Offset == MI.getOperand(2).getImm())
+ if (!Offset || Offset == UpdateOffset)
return true;
break;
}
Modified: projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
==============================================================================
--- projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp Wed Aug 24 17:39:40 2016 (r304769)
+++ projects/clang390-import/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp Wed Aug 24 17:43:08 2016 (r304770)
@@ -4033,11 +4033,18 @@ PPCTargetLowering::IsEligibleForTailCall
if (CalleeCC != CallingConv::Fast && CalleeCC != CallingConv::C)
return false;
- // Functions containing by val parameters are not supported.
+ // Caller contains any byval parameter is not supported.
if (std::any_of(Ins.begin(), Ins.end(),
[](const ISD::InputArg& IA) { return IA.Flags.isByVal(); }))
return false;
+ // Callee contains any byval parameter is not supported, too.
+ // Note: This is a quick work around, because in some cases, e.g.
+ // caller's stack size > callee's stack size, we are still able to apply
+ // sibling call optimization. See: https://reviews.llvm.org/D23441#513574
+ if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
+ return false;
+
// No TCO/SCO on indirect call because Caller have to restore its TOC
if (!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee))
Modified: projects/clang390-import/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
==============================================================================
--- projects/clang390-import/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp Wed Aug 24 17:39:40 2016 (r304769)
+++ projects/clang390-import/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp Wed Aug 24 17:43:08 2016 (r304770)
@@ -145,8 +145,7 @@ static BinaryOperator *isReassociableOp(
return nullptr;
}
-void ReassociatePass::BuildRankMap(
- Function &F, ReversePostOrderTraversal<Function *> &RPOT) {
+void ReassociatePass::BuildRankMap(Function &F) {
unsigned i = 2;
// Assign distinct ranks to function arguments.
@@ -155,6 +154,7 @@ void ReassociatePass::BuildRankMap(
DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
}
+ ReversePostOrderTraversal<Function *> RPOT(&F);
for (BasicBlock *BB : RPOT) {
unsigned BBRank = RankMap[BB] = ++i << 16;
@@ -2172,28 +2172,13 @@ void ReassociatePass::ReassociateExpress
}
PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
- // Reassociate needs for each instruction to have its operands already
- // processed, so we first perform a RPOT of the basic blocks so that
- // when we process a basic block, all its dominators have been processed
- // before.
- ReversePostOrderTraversal<Function *> RPOT(&F);
- BuildRankMap(F, RPOT);
+ // Calculate the rank map for F.
+ BuildRankMap(F);
MadeChange = false;
- for (BasicBlock *BI : RPOT) {
- // Use a worklist to keep track of which instructions have been processed
- // (and which insts won't be optimized again) so when redoing insts,
- // optimize insts rightaway which won't be processed later.
- SmallSet<Instruction *, 8> Worklist;
-
- // Insert all instructions in the BB
- for (Instruction &I : *BI)
- Worklist.insert(&I);
-
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
// Optimize every instruction in the basic block.
- for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) {
- // This instruction has been processed.
- Worklist.erase(&*II);
+ for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
if (isInstructionTriviallyDead(&*II)) {
EraseInst(&*II++);
} else {
@@ -2202,22 +2187,27 @@ PreservedAnalyses ReassociatePass::run(F
++II;
}
- // If the above optimizations produced new instructions to optimize or
- // made modifications which need to be redone, do them now if they won't
- // be handled later.
- while (!RedoInsts.empty()) {
- Instruction *I = RedoInsts.pop_back_val();
- // Process instructions that won't be processed later, either
- // inside the block itself or in another basic block (based on rank),
- // since these will be processed later.
- if ((I->getParent() != BI || !Worklist.count(I)) &&
- RankMap[I->getParent()] <= RankMap[BI]) {
- if (isInstructionTriviallyDead(I))
- EraseInst(I);
- else
- OptimizeInst(I);
- }
- }
+ // Make a copy of all the instructions to be redone so we can remove dead
+ // instructions.
+ SetVector<AssertingVH<Instruction>> ToRedo(RedoInsts);
+ // Iterate over all instructions to be reevaluated and remove trivially dead
+ // instructions. If any operand of the trivially dead instruction becomes
+ // dead mark it for deletion as well. Continue this process until all
+ // trivially dead instructions have been removed.
+ while (!ToRedo.empty()) {
+ Instruction *I = ToRedo.pop_back_val();
+ if (isInstructionTriviallyDead(I))
+ RecursivelyEraseDeadInsts(I, ToRedo);
+ }
+
+ // Now that we have removed dead instructions, we can reoptimize the
+ // remaining instructions.
+ while (!RedoInsts.empty()) {
+ Instruction *I = RedoInsts.pop_back_val();
+ if (isInstructionTriviallyDead(I))
+ EraseInst(I);
+ else
+ OptimizeInst(I);
}
}
Modified: projects/clang390-import/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp
==============================================================================
--- projects/clang390-import/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp Wed Aug 24 17:39:40 2016 (r304769)
+++ projects/clang390-import/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp Wed Aug 24 17:43:08 2016 (r304770)
@@ -566,6 +566,12 @@ void llvm::CloneAndPruneIntoFromInst(Fun
if (!I)
continue;
+ // Skip over non-intrinsic callsites, we don't want to remove any nodes from
+ // the CGSCC.
+ CallSite CS = CallSite(I);
+ if (CS && CS.getCalledFunction() && !CS.getCalledFunction()->isIntrinsic())
+ continue;
+
// See if this instruction simplifies.
Value *SimpleV = SimplifyInstruction(I, DL);
if (!SimpleV)
Modified: projects/clang390-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
==============================================================================
--- projects/clang390-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Aug 24 17:39:40 2016 (r304769)
+++ projects/clang390-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Wed Aug 24 17:43:08 2016 (r304770)
@@ -82,8 +82,13 @@ static cl::opt<int> MinVectorRegSizeOpti
"slp-min-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
-// FIXME: Set this via cl::opt to allow overriding.
-static const unsigned RecursionMaxDepth = 12;
+static cl::opt<unsigned> RecursionMaxDepth(
+ "slp-recursion-max-depth", cl::init(12), cl::Hidden,
+ cl::desc("Limit the recursion depth when building a vectorizable tree"));
+
+static cl::opt<unsigned> MinTreeSize(
+ "slp-min-tree-size", cl::init(3), cl::Hidden,
+ cl::desc("Only vectorize small trees if they are fully vectorizable"));
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
@@ -1842,7 +1847,7 @@ int BoUpSLP::getTreeCost() {
VectorizableTree.size() << ".\n");
// We only vectorize tiny trees if it is fully vectorizable.
- if (VectorizableTree.size() < 3 && !isFullyVectorizableTinyTree()) {
+ if (VectorizableTree.size() < MinTreeSize && !isFullyVectorizableTinyTree()) {
if (VectorizableTree.empty()) {
assert(!ExternalUses.size() && "We should not have any external users");
}
@@ -2124,11 +2129,61 @@ void BoUpSLP::reorderInputsAccordingToOp
}
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
- Instruction *VL0 = cast<Instruction>(VL[0]);
- BasicBlock::iterator NextInst(VL0);
- ++NextInst;
- Builder.SetInsertPoint(VL0->getParent(), NextInst);
- Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
+
+ // Get the basic block this bundle is in. All instructions in the bundle
+ // should be in this block.
+ auto *Front = cast<Instruction>(VL.front());
+ auto *BB = Front->getParent();
+ assert(all_of(make_range(VL.begin(), VL.end()), [&](Value *V) -> bool {
+ return cast<Instruction>(V)->getParent() == BB;
+ }));
+
+ // The last instruction in the bundle in program order.
+ Instruction *LastInst = nullptr;
+
+ // Find the last instruction. The common case should be that BB has been
+ // scheduled, and the last instruction is VL.back(). So we start with
+ // VL.back() and iterate over schedule data until we reach the end of the
+ // bundle. The end of the bundle is marked by null ScheduleData.
+ if (BlocksSchedules.count(BB)) {
+ auto *Bundle = BlocksSchedules[BB]->getScheduleData(VL.back());
+ if (Bundle && Bundle->isPartOfBundle())
+ for (; Bundle; Bundle = Bundle->NextInBundle)
+ LastInst = Bundle->Inst;
+ }
+
+ // LastInst can still be null at this point if there's either not an entry
+ // for BB in BlocksSchedules or there's no ScheduleData available for
+ // VL.back(). This can be the case if buildTree_rec aborts for various
+ // reasons (e.g., the maximum recursion depth is reached, the maximum region
+ // size is reached, etc.). ScheduleData is initialized in the scheduling
+ // "dry-run".
+ //
+ // If this happens, we can still find the last instruction by brute force. We
+ // iterate forwards from Front (inclusive) until we either see all
+ // instructions in the bundle or reach the end of the block. If Front is the
+ // last instruction in program order, LastInst will be set to Front, and we
+ // will visit all the remaining instructions in the block.
+ //
+ // One of the reasons we exit early from buildTree_rec is to place an upper
+ // bound on compile-time. Thus, taking an additional compile-time hit here is
+ // not ideal. However, this should be exceedingly rare since it requires that
+ // we both exit early from buildTree_rec and that the bundle be out-of-order
+ // (causing us to iterate all the way to the end of the block).
+ if (!LastInst) {
+ SmallPtrSet<Value *, 16> Bundle(VL.begin(), VL.end());
+ for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
+ if (Bundle.erase(&I))
+ LastInst = &I;
+ if (Bundle.empty())
+ break;
+ }
+ }
+
+ // Set the insertion point after the last instruction in the bundle. Set the
+ // debug location to Front.
+ Builder.SetInsertPoint(BB, next(BasicBlock::iterator(LastInst)));
+ Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
Value *BoUpSLP::Gather(ArrayRef<Value *> VL, VectorType *Ty) {
@@ -2206,7 +2261,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
if (E->NeedToGather) {
setInsertPointAfterBundle(E->Scalars);
- return Gather(E->Scalars, VecTy);
+ auto *V = Gather(E->Scalars, VecTy);
+ E->VectorizedValue = V;
+ return V;
}
unsigned Opcode = getSameOpcode(E->Scalars);
@@ -2253,7 +2310,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
E->VectorizedValue = V;
return V;
}
- return Gather(E->Scalars, VecTy);
+ setInsertPointAfterBundle(E->Scalars);
+ auto *V = Gather(E->Scalars, VecTy);
+ E->VectorizedValue = V;
+ return V;
}
case Instruction::ExtractValue: {
if (canReuseExtract(E->Scalars, Instruction::ExtractValue)) {
@@ -2265,7 +2325,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry
E->VectorizedValue = V;
return propagateMetadata(V, E->Scalars);
}
- return Gather(E->Scalars, VecTy);
+ setInsertPointAfterBundle(E->Scalars);
+ auto *V = Gather(E->Scalars, VecTy);
+ E->VectorizedValue = V;
+ return V;
}
case Instruction::ZExt:
case Instruction::SExt:
More information about the svn-src-projects
mailing list