svn commit: r329931 - in vendor/llvm/dist-release_60: docs include/llvm/Bitcode include/llvm/MC include/llvm/Transforms/Utils lib/Analysis lib/Bitcode/Reader lib/Bitcode/Writer lib/Support lib/Targ...
Dimitry Andric
dim at FreeBSD.org
Sat Feb 24 21:27:33 UTC 2018
Author: dim
Date: Sat Feb 24 21:27:30 2018
New Revision: 329931
URL: https://svnweb.freebsd.org/changeset/base/329931
Log:
Vendor import of llvm release_60 branch r325932:
https://llvm.org/svn/llvm-project/llvm/branches/release_60@325932
Added:
vendor/llvm/dist-release_60/test/CodeGen/AArch64/GlobalISel/fp16-copy-gpr.mir
vendor/llvm/dist-release_60/test/CodeGen/PowerPC/pr36292.ll
vendor/llvm/dist-release_60/test/Transforms/InstCombine/pr36362.ll
Modified:
vendor/llvm/dist-release_60/docs/ReleaseNotes.rst
vendor/llvm/dist-release_60/docs/index.rst
vendor/llvm/dist-release_60/include/llvm/Bitcode/LLVMBitCodes.h
vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h
vendor/llvm/dist-release_60/include/llvm/Transforms/Utils/LoopUtils.h
vendor/llvm/dist-release_60/lib/Analysis/ScalarEvolution.cpp
vendor/llvm/dist-release_60/lib/Bitcode/Reader/BitcodeReader.cpp
vendor/llvm/dist-release_60/lib/Bitcode/Writer/BitcodeWriter.cpp
vendor/llvm/dist-release_60/lib/Support/CMakeLists.txt
vendor/llvm/dist-release_60/lib/Target/AArch64/AArch64InstructionSelector.cpp
vendor/llvm/dist-release_60/lib/Target/AMDGPU/SIInstrInfo.cpp
vendor/llvm/dist-release_60/lib/Target/PowerPC/PPCCTRLoops.cpp
vendor/llvm/dist-release_60/lib/Target/X86/X86.td
vendor/llvm/dist-release_60/lib/Transforms/InstCombine/InstCombineSelect.cpp
vendor/llvm/dist-release_60/lib/Transforms/Scalar/LICM.cpp
vendor/llvm/dist-release_60/lib/Transforms/Utils/LoopUtils.cpp
vendor/llvm/dist-release_60/lib/Transforms/Vectorize/LoopVectorize.cpp
vendor/llvm/dist-release_60/test/Bitcode/compatibility-3.6.ll
vendor/llvm/dist-release_60/test/Bitcode/compatibility-3.7.ll
vendor/llvm/dist-release_60/test/Bitcode/compatibility-3.8.ll
vendor/llvm/dist-release_60/test/Bitcode/compatibility-3.9.ll
vendor/llvm/dist-release_60/test/Bitcode/compatibility-4.0.ll
vendor/llvm/dist-release_60/test/Bitcode/compatibility-5.0.ll
vendor/llvm/dist-release_60/test/CodeGen/AArch64/GlobalISel/select-insert-extract.mir
vendor/llvm/dist-release_60/test/CodeGen/AMDGPU/smrd.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/clwb.ll
vendor/llvm/dist-release_60/test/Transforms/LICM/sinking.ll
vendor/llvm/dist-release_60/test/Transforms/LoopVectorize/pr30654-phiscev-sext-trunc.ll
vendor/llvm/dist-release_60/test/Transforms/LoopVectorize/pr35773.ll
vendor/llvm/dist-release_60/test/Transforms/LoopVectorize/reduction-small-size.ll
vendor/llvm/dist-release_60/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll
vendor/llvm/dist-release_60/test/tools/llvm-config/system-libs.windows.test
Modified: vendor/llvm/dist-release_60/docs/ReleaseNotes.rst
==============================================================================
--- vendor/llvm/dist-release_60/docs/ReleaseNotes.rst Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/docs/ReleaseNotes.rst Sat Feb 24 21:27:30 2018 (r329931)
@@ -5,12 +5,6 @@ LLVM 6.0.0 Release Notes
.. contents::
:local:
-.. warning::
- These are in-progress notes for the upcoming LLVM 6 release.
- Release notes for previous releases can be found on
- `the Download Page <http://releases.llvm.org/download.html>`_.
-
-
Introduction
============
@@ -26,11 +20,6 @@ have questions or comments, the `LLVM Developer's Mail
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
them.
-Note that if you are reading this file from a Subversion checkout or the main
-LLVM web page, this document applies to the *next* release, not the current
-one. To see the release notes for a specific release, please see the `releases
-page <http://llvm.org/releases/>`_.
-
Non-comprehensive list of changes in this release
=================================================
.. NOTE
@@ -56,6 +45,9 @@ Non-comprehensive list of changes in this release
* Significantly improved quality of CodeView debug info for Windows.
+* Preliminary support for Sanitizers and sibling features on X86(_64) NetBSD
+ (ASan, UBsan, TSan, MSan, SafeStack, libFuzzer).
+
* Note..
.. NOTE
@@ -71,6 +63,15 @@ Non-comprehensive list of changes in this release
Changes to the LLVM IR
----------------------
+* The fast-math-flags (FMF) have been updated. Previously, the 'fast' flag
+ indicated that floating-point reassociation was allowed and all other flags
+ were set too. The 'fast' flag still exists, but there is a new flag called
+ 'reassoc' to indicate specifically that reassociation is allowed. A new bit
+ called 'afn' was also added to selectively allow approximations for common
+ mathlib functions like square-root. The new flags provide more flexibility
+ to enable/disable specific floating-point optimizations. Making the
+ optimizer respond appropriately to these flags is an ongoing effort.
+
Changes to the AArch64 Target
-----------------------------
@@ -112,9 +113,45 @@ Changes to the Hexagon Target
Changes to the MIPS Target
--------------------------
- During this release ...
+Fixed numerous bugs:
+* fpowi on MIPS64 giving incorrect results when used with a negative integer.
+* Usage of the asm 'c' constraint with the wrong datatype causing an
+ assert/crash.
+* Fixed a conversion bug when using the DSP ASE.
+* Fixed an inconsistency where objects were not marked as using the microMIPS as
+ when the micromips function attribute or the ".set micromips" directive was
+ used.
+* Reordered the MIPSR6 specific hazard scheduler pass to after the delay slot
+ filler, fixing a class of rare edge case bugs where the delay slot filler
+ would violate ISA restrictions.
+* Fixed a crash when using a type of unknown size with gp relative addressing.
+* Corrected the j macro for microMIPS.
+* Corrected the encoding of movep for microMIPS32r6.
+* Fixed an issue with the usage of insert instructions having an invalid set of
+ operands.
+* Fixed an issue where TLS symbols where not marked as such.
+* Enabled the usage of register scavanging with MSA, due to its' shorter offsets
+ for loads and stores.
+* Corrected the ELF headers when using the DSP ASE.
+New features:
+
+* The long branch pass now generates some R6 specific instructions when
+ targeting MIPSR6.
+* The delay slot filler now performs more branch conversions if delay slots
+ cannot be filled.
+* The MIPS MT ASE is now fully supported.
+* Added support for the ``lapc`` pseudo instruction.
+* Improved the selection of multiple instructions (``dext``, ``nmadd``,
+ ``nmsub``).
+* Further improved microMIPS codesize reduction.
+
+Deprecation notices:
+
+* microMIPS64R6 support was been deprecated since 5.0, and has now been
+ completely removed.
+
Changes to the PowerPC Target
-----------------------------
@@ -132,10 +169,42 @@ During this release the SystemZ target has:
Changes to the X86 Target
-------------------------
-During this release ...
+During this release the X86 target has:
-* Got support for enabling SjLj exception handling on platforms where it
+* Added support for enabling SjLj exception handling on platforms where it
isn't the default.
+
+* Added intrinsics for Intel Extensions: VAES, GFNI, VPCLMULQDQ, AVX512VBMI2, AVX512BITALG, AVX512VNNI.
+
+* Added support for Intel Icelake CPU.
+
+* Fixed some X87 codegen bugs.
+
+* Added instruction scheduling information for Intel Sandy Bridge, Ivy Bridge, Haswell, Broadwell, and Skylake CPUs.
+
+* Improved scheduler model for AMD Jaguar CPUs.
+
+* Improved llvm-mc's disassembler for some EVEX encoded instructions.
+
+* Add support for i8 and i16 vector signed/unsigned min/max horizontal reductions.
+
+* Improved codegen for memory comparisons
+
+* Improved codegen for i32 vector multiplies
+
+* Improved codegen for scalar integer absolute values
+
+* Improved codegen for vector integer rotations (XOP and AVX512)
+
+* Improved codegen of data being transferred between GPRs and K-registers.
+
+* Improved codegen for vector truncations.
+
+* Improved folding of address computations into gather/scatter instructions.
+
+* Gained initial support recognizing variable shuffles from vector element extracts and inserts.
+
+* Improved documentation for SSE/AVX intrinsics in *intrin.h header files.
Changes to the AMDGPU Target
-----------------------------
Modified: vendor/llvm/dist-release_60/docs/index.rst
==============================================================================
--- vendor/llvm/dist-release_60/docs/index.rst Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/docs/index.rst Sat Feb 24 21:27:30 2018 (r329931)
@@ -1,11 +1,6 @@
Overview
========
-.. warning::
-
- If you are using a released version of LLVM, see `the download page
- <http://llvm.org/releases/>`_ to find your documentation.
-
The LLVM compiler infrastructure supports a wide range of projects, from
industrial strength compilers to specialized JIT applications to small
research projects.
Modified: vendor/llvm/dist-release_60/include/llvm/Bitcode/LLVMBitCodes.h
==============================================================================
--- vendor/llvm/dist-release_60/include/llvm/Bitcode/LLVMBitCodes.h Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/include/llvm/Bitcode/LLVMBitCodes.h Sat Feb 24 21:27:30 2018 (r329931)
@@ -395,6 +395,20 @@ enum OverflowingBinaryOperatorOptionalFlags {
OBO_NO_SIGNED_WRAP = 1
};
+/// FastMath Flags
+/// This is a fixed layout derived from the bitcode emitted by LLVM 5.0
+/// intended to decouple the in-memory representation from the serialization.
+enum FastMathMap {
+ UnsafeAlgebra = (1 << 0), // Legacy
+ NoNaNs = (1 << 1),
+ NoInfs = (1 << 2),
+ NoSignedZeros = (1 << 3),
+ AllowReciprocal = (1 << 4),
+ AllowContract = (1 << 5),
+ ApproxFunc = (1 << 6),
+ AllowReassoc = (1 << 7)
+};
+
/// PossiblyExactOperatorOptionalFlags - Flags for serializing
/// PossiblyExactOperator's SubclassOptionalData contents.
enum PossiblyExactOperatorOptionalFlags { PEO_EXACT = 0 };
Modified: vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h
==============================================================================
--- vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h Sat Feb 24 21:27:30 2018 (r329931)
@@ -33,6 +33,6 @@ struct MCAsmMacro { (public)
MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
: Name(N), Body(B), Parameters(std::move(P)) {}
};
-}; // namespace llvm
+} // namespace llvm
#endif
Modified: vendor/llvm/dist-release_60/include/llvm/Transforms/Utils/LoopUtils.h
==============================================================================
--- vendor/llvm/dist-release_60/include/llvm/Transforms/Utils/LoopUtils.h Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/include/llvm/Transforms/Utils/LoopUtils.h Sat Feb 24 21:27:30 2018 (r329931)
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
@@ -172,15 +173,25 @@ class RecurrenceDescriptor { (public)
Value *Left, Value *Right);
/// Returns true if Phi is a reduction of type Kind and adds it to the
- /// RecurrenceDescriptor.
+ /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
+ /// non-null, the minimal bit width needed to compute the reduction will be
+ /// computed.
static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
bool HasFunNoNaNAttr,
- RecurrenceDescriptor &RedDes);
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB = nullptr,
+ AssumptionCache *AC = nullptr,
+ DominatorTree *DT = nullptr);
- /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor is
- /// returned in RedDes.
+ /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
+ /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
+ /// non-null, the minimal bit width needed to compute the reduction will be
+ /// computed.
static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
- RecurrenceDescriptor &RedDes);
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB = nullptr,
+ AssumptionCache *AC = nullptr,
+ DominatorTree *DT = nullptr);
/// Returns true if Phi is a first-order recurrence. A first-order recurrence
/// is a non-reduction recurrence relation in which the value of the
@@ -217,24 +228,6 @@ class RecurrenceDescriptor { (public)
/// Returns true if the recurrence kind is an arithmetic kind.
static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
-
- /// Determines if Phi may have been type-promoted. If Phi has a single user
- /// that ANDs the Phi with a type mask, return the user. RT is updated to
- /// account for the narrower bit width represented by the mask, and the AND
- /// instruction is added to CI.
- static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
- SmallPtrSetImpl<Instruction *> &Visited,
- SmallPtrSetImpl<Instruction *> &CI);
-
- /// Returns true if all the source operands of a recurrence are either
- /// SExtInsts or ZExtInsts. This function is intended to be used with
- /// lookThroughAnd to determine if the recurrence has been type-promoted. The
- /// source operands are added to CI, and IsSigned is updated to indicate if
- /// all source operands are SExtInsts.
- static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit,
- Type *RT, bool &IsSigned,
- SmallPtrSetImpl<Instruction *> &Visited,
- SmallPtrSetImpl<Instruction *> &CI);
/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
Modified: vendor/llvm/dist-release_60/lib/Analysis/ScalarEvolution.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Analysis/ScalarEvolution.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Analysis/ScalarEvolution.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -205,6 +205,11 @@ static cl::opt<unsigned>
cl::desc("Max coefficients in AddRec during evolving"),
cl::init(16));
+static cl::opt<bool> VersionUnknown(
+ "scev-version-unknown", cl::Hidden,
+ cl::desc("Use predicated scalar evolution to version SCEVUnknowns"),
+ cl::init(false));
+
//===----------------------------------------------------------------------===//
// SCEV class definitions
//===----------------------------------------------------------------------===//
@@ -11467,6 +11472,8 @@ class SCEVPredicateRewriter : public SCEVRewriteVisito
// couldn't create an AddRec for it, or couldn't add the predicate), we just
// return \p Expr.
const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
+ if (!VersionUnknown)
+ return Expr;
if (!isa<PHINode>(Expr->getValue()))
return Expr;
Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
Modified: vendor/llvm/dist-release_60/lib/Bitcode/Reader/BitcodeReader.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Bitcode/Reader/BitcodeReader.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Bitcode/Reader/BitcodeReader.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -1046,19 +1046,21 @@ static Comdat::SelectionKind getDecodedComdatSelection
static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
FastMathFlags FMF;
- if (0 != (Val & FastMathFlags::AllowReassoc))
+ if (0 != (Val & bitc::UnsafeAlgebra))
+ FMF.setFast();
+ if (0 != (Val & bitc::AllowReassoc))
FMF.setAllowReassoc();
- if (0 != (Val & FastMathFlags::NoNaNs))
+ if (0 != (Val & bitc::NoNaNs))
FMF.setNoNaNs();
- if (0 != (Val & FastMathFlags::NoInfs))
+ if (0 != (Val & bitc::NoInfs))
FMF.setNoInfs();
- if (0 != (Val & FastMathFlags::NoSignedZeros))
+ if (0 != (Val & bitc::NoSignedZeros))
FMF.setNoSignedZeros();
- if (0 != (Val & FastMathFlags::AllowReciprocal))
+ if (0 != (Val & bitc::AllowReciprocal))
FMF.setAllowReciprocal();
- if (0 != (Val & FastMathFlags::AllowContract))
+ if (0 != (Val & bitc::AllowContract))
FMF.setAllowContract(true);
- if (0 != (Val & FastMathFlags::ApproxFunc))
+ if (0 != (Val & bitc::ApproxFunc))
FMF.setApproxFunc();
return FMF;
}
Modified: vendor/llvm/dist-release_60/lib/Bitcode/Writer/BitcodeWriter.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Bitcode/Writer/BitcodeWriter.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Bitcode/Writer/BitcodeWriter.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -1330,19 +1330,19 @@ static uint64_t getOptimizationFlags(const Value *V) {
Flags |= 1 << bitc::PEO_EXACT;
} else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) {
if (FPMO->hasAllowReassoc())
- Flags |= FastMathFlags::AllowReassoc;
+ Flags |= bitc::AllowReassoc;
if (FPMO->hasNoNaNs())
- Flags |= FastMathFlags::NoNaNs;
+ Flags |= bitc::NoNaNs;
if (FPMO->hasNoInfs())
- Flags |= FastMathFlags::NoInfs;
+ Flags |= bitc::NoInfs;
if (FPMO->hasNoSignedZeros())
- Flags |= FastMathFlags::NoSignedZeros;
+ Flags |= bitc::NoSignedZeros;
if (FPMO->hasAllowReciprocal())
- Flags |= FastMathFlags::AllowReciprocal;
+ Flags |= bitc::AllowReciprocal;
if (FPMO->hasAllowContract())
- Flags |= FastMathFlags::AllowContract;
+ Flags |= bitc::AllowContract;
if (FPMO->hasApproxFunc())
- Flags |= FastMathFlags::ApproxFunc;
+ Flags |= bitc::ApproxFunc;
}
return Flags;
@@ -3183,7 +3183,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // flags
if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
FUNCTION_INST_BINOP_FLAGS_ABBREV)
llvm_unreachable("Unexpected abbrev ordering!");
Modified: vendor/llvm/dist-release_60/lib/Support/CMakeLists.txt
==============================================================================
--- vendor/llvm/dist-release_60/lib/Support/CMakeLists.txt Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Support/CMakeLists.txt Sat Feb 24 21:27:30 2018 (r329931)
@@ -4,7 +4,8 @@ if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
endif()
if( MSVC OR MINGW )
# libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc.
- set(system_libs ${system_libs} psapi shell32 ole32 uuid)
+ # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc.
+ set(system_libs ${system_libs} psapi shell32 ole32 uuid advapi32)
elseif( CMAKE_HOST_UNIX )
if( HAVE_LIBRT )
set(system_libs ${system_libs} rt)
Modified: vendor/llvm/dist-release_60/lib/Target/AArch64/AArch64InstructionSelector.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AArch64/AArch64InstructionSelector.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Target/AArch64/AArch64InstructionSelector.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -133,16 +133,21 @@ AArch64InstructionSelector::AArch64InstructionSelector
// for each class in the bank.
static const TargetRegisterClass *
getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
- const RegisterBankInfo &RBI) {
+ const RegisterBankInfo &RBI,
+ bool GetAllRegSet = false) {
if (RB.getID() == AArch64::GPRRegBankID) {
if (Ty.getSizeInBits() <= 32)
- return &AArch64::GPR32RegClass;
+ return GetAllRegSet ? &AArch64::GPR32allRegClass
+ : &AArch64::GPR32RegClass;
if (Ty.getSizeInBits() == 64)
- return &AArch64::GPR64RegClass;
+ return GetAllRegSet ? &AArch64::GPR64allRegClass
+ : &AArch64::GPR64RegClass;
return nullptr;
}
if (RB.getID() == AArch64::FPRRegBankID) {
+ if (Ty.getSizeInBits() <= 16)
+ return &AArch64::FPR16RegClass;
if (Ty.getSizeInBits() == 32)
return &AArch64::FPR32RegClass;
if (Ty.getSizeInBits() == 64)
@@ -310,19 +315,46 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOp
return GenericOpc;
}
+static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI, unsigned SrcReg) {
+ // Copies from gpr32 to fpr16 need to use a sub-register copy.
+ unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY))
+ .addDef(CopyReg)
+ .addUse(SrcReg);
+ unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
+ .addDef(SubRegCopy)
+ .addUse(CopyReg, 0, AArch64::hsub);
+
+ MachineOperand &RegOp = I.getOperand(1);
+ RegOp.setReg(SubRegCopy);
+ return true;
+}
+
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI) {
unsigned DstReg = I.getOperand(0).getReg();
+ unsigned SrcReg = I.getOperand(1).getReg();
+
if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) &&
+ !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+ const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(
+ MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true);
+ if (SrcRC == &AArch64::GPR32allRegClass)
+ return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
+ }
assert(I.isCopy() && "Generic operators do not allow physical registers");
return true;
}
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
- unsigned SrcReg = I.getOperand(1).getReg();
+ (void)DstSize;
const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
(void)SrcSize;
assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
@@ -340,26 +372,38 @@ static bool selectCopy(MachineInstr &I, const TargetIn
"Copy with different width?!");
assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) &&
"GPRs cannot get more than 64-bit width values");
- const TargetRegisterClass *RC = nullptr;
- if (RegBank.getID() == AArch64::FPRRegBankID) {
- if (DstSize <= 16)
- RC = &AArch64::FPR16RegClass;
- else if (DstSize <= 32)
- RC = &AArch64::FPR32RegClass;
- else if (DstSize <= 64)
- RC = &AArch64::FPR64RegClass;
- else if (DstSize <= 128)
- RC = &AArch64::FPR128RegClass;
- else {
- DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
- return false;
+ const TargetRegisterClass *RC = getRegClassForTypeOnBank(
+ MRI.getType(DstReg), RegBank, RBI, /* GetAllRegSet */ true);
+ if (!RC) {
+ DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
+ return false;
+ }
+
+ if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg);
+ const TargetRegisterClass *SrcRC =
+ RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+ const RegisterBank *RB = nullptr;
+ if (!SrcRC) {
+ RB = RegClassOrBank.get<const RegisterBank *>();
+ SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true);
}
- } else {
- assert(RegBank.getID() == AArch64::GPRRegBankID &&
- "Bitcast for the flags?");
- RC =
- DstSize <= 32 ? &AArch64::GPR32allRegClass : &AArch64::GPR64allRegClass;
+ // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG.
+ if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) {
+ unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+ BuildMI(*I.getParent(), I, I.getDebugLoc(),
+ TII.get(AArch64::SUBREG_TO_REG))
+ .addDef(PromoteReg)
+ .addImm(0)
+ .addUse(SrcReg)
+ .addImm(AArch64::hsub);
+ MachineOperand &RegOp = I.getOperand(1);
+ RegOp.setReg(PromoteReg);
+ } else if (RC == &AArch64::FPR16RegClass &&
+ SrcRC == &AArch64::GPR32allRegClass) {
+ selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
+ }
}
// No need to constrain SrcReg. It will get constrained when
@@ -795,15 +839,23 @@ bool AArch64InstructionSelector::select(MachineInstr &
}
case TargetOpcode::G_EXTRACT: {
LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ unsigned SrcSize = SrcTy.getSizeInBits();
// Larger extracts are vectors, same-size extracts should be something else
// by now (either split up or simplified to a COPY).
if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
return false;
- I.setDesc(TII.get(AArch64::UBFMXri));
+ I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
Ty.getSizeInBits() - 1);
+ if (SrcSize < 64) {
+ assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
+ "unexpected G_EXTRACT types");
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
+
unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
TII.get(AArch64::COPY))
@@ -818,16 +870,25 @@ bool AArch64InstructionSelector::select(MachineInstr &
case TargetOpcode::G_INSERT: {
LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
+ LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+ unsigned DstSize = DstTy.getSizeInBits();
+ (void)DstSize;
// Larger inserts are vectors, same-size ones should be something else by
// now (split up or turned into COPYs).
if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
return false;
- I.setDesc(TII.get(AArch64::BFMXri));
+ I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
unsigned LSB = I.getOperand(3).getImm();
unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
- I.getOperand(3).setImm((64 - LSB) % 64);
+ I.getOperand(3).setImm((DstSize - LSB) % DstSize);
MachineInstrBuilder(MF, I).addImm(Width - 1);
+
+ if (DstSize < 64) {
+ assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
+ "unexpected G_INSERT types");
+ return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ }
unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/SIInstrInfo.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/SIInstrInfo.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/SIInstrInfo.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -3797,7 +3797,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) co
}
}
- BuildMI(*MBB, Inst, Inst.getDebugLoc(),
+ MachineInstr *NewInstr =
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(),
get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
.add(*VAddr) // vaddr
.add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
@@ -3806,12 +3807,17 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) co
.addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
.addImm(0) // slc
.addImm(0) // tfe
- .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
+ .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
+ .getInstr();
MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
VDst);
addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
Inst.eraseFromParent();
+
+ // Legalize all operands other than the offset. Notably, convert the srsrc
+ // into SGPRs using v_readfirstlane if needed.
+ legalizeOperands(*NewInstr);
continue;
}
}
Modified: vendor/llvm/dist-release_60/lib/Target/PowerPC/PPCCTRLoops.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/PowerPC/PPCCTRLoops.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Target/PowerPC/PPCCTRLoops.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -454,13 +454,16 @@ bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
return true;
}
+ // FREM is always a call.
+ if (J->getOpcode() == Instruction::FRem)
+ return true;
+
if (STI->useSoftFloat()) {
switch(J->getOpcode()) {
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
- case Instruction::FRem:
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::FPToUI:
Modified: vendor/llvm/dist-release_60/lib/Target/X86/X86.td
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/X86/X86.td Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Target/X86/X86.td Sat Feb 24 21:27:30 2018 (r329931)
@@ -740,7 +740,13 @@ class SkylakeServerProc<string Name> : ProcModel<Name,
def : SkylakeServerProc<"skylake-avx512">;
def : SkylakeServerProc<"skx">; // Legacy alias.
-def CNLFeatures : ProcessorFeatures<SKXFeatures.Value, [
+def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
+ FeatureAVX512,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureBWI,
+ FeatureVLX,
+ FeaturePKU,
FeatureVBMI,
FeatureIFMA,
FeatureSHA
Modified: vendor/llvm/dist-release_60/lib/Transforms/InstCombine/InstCombineSelect.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Transforms/InstCombine/InstCombineSelect.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Transforms/InstCombine/InstCombineSelect.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -1643,11 +1643,25 @@ Instruction *InstCombiner::visitSelectInst(SelectInst
}
}
+ auto canMergeSelectThroughBinop = [](BinaryOperator *BO) {
+ // The select might be preventing a division by 0.
+ switch (BO->getOpcode()) {
+ default:
+ return true;
+ case Instruction::SRem:
+ case Instruction::URem:
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ return false;
+ }
+ };
+
// Try to simplify a binop sandwiched between 2 selects with the same
// condition.
// select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z)
BinaryOperator *TrueBO;
- if (match(TrueVal, m_OneUse(m_BinOp(TrueBO)))) {
+ if (match(TrueVal, m_OneUse(m_BinOp(TrueBO))) &&
+ canMergeSelectThroughBinop(TrueBO)) {
if (auto *TrueBOSI = dyn_cast<SelectInst>(TrueBO->getOperand(0))) {
if (TrueBOSI->getCondition() == CondVal) {
TrueBO->setOperand(0, TrueBOSI->getTrueValue());
@@ -1666,7 +1680,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst
// select(C, Z, binop(select(C, X, Y), W)) -> select(C, Z, binop(Y, W))
BinaryOperator *FalseBO;
- if (match(FalseVal, m_OneUse(m_BinOp(FalseBO)))) {
+ if (match(FalseVal, m_OneUse(m_BinOp(FalseBO))) &&
+ canMergeSelectThroughBinop(FalseBO)) {
if (auto *FalseBOSI = dyn_cast<SelectInst>(FalseBO->getOperand(0))) {
if (FalseBOSI->getCondition() == CondVal) {
FalseBO->setOperand(0, FalseBOSI->getFalseValue());
Modified: vendor/llvm/dist-release_60/lib/Transforms/Scalar/LICM.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Transforms/Scalar/LICM.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Transforms/Scalar/LICM.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -97,7 +97,7 @@ static bool hoist(Instruction &I, const DominatorTree
const LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE);
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+ const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, bool FreeInLoop);
static bool isSafeToExecuteUnconditionally(Instruction &Inst,
const DominatorTree *DT,
@@ -855,10 +855,16 @@ static Instruction *sinkThroughTriviallyReplacablePHI(
return New;
}
-static bool canSplitPredecessors(PHINode *PN) {
+static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
BasicBlock *BB = PN->getParent();
if (!BB->canSplitPredecessors())
return false;
+ // It's not impossible to split EHPad blocks, but if BlockColors already exist
+ // it require updating BlockColors for all offspring blocks accordingly. By
+ // skipping such corner case, we can make updating BlockColors after splitting
+ // predecessor fairly simple.
+ if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad())
+ return false;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
BasicBlock *BBPred = *PI;
if (isa<IndirectBrInst>(BBPred->getTerminator()))
@@ -868,7 +874,8 @@ static bool canSplitPredecessors(PHINode *PN) {
}
static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
- LoopInfo *LI, const Loop *CurLoop) {
+ LoopInfo *LI, const Loop *CurLoop,
+ LoopSafetyInfo *SafetyInfo) {
#ifndef NDEBUG
SmallVector<BasicBlock *, 32> ExitBlocks;
CurLoop->getUniqueExitBlocks(ExitBlocks);
@@ -910,13 +917,21 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, D
// LE:
// %p = phi [%p1, %LE.split], [%p2, %LE.split2]
//
+ auto &BlockColors = SafetyInfo->BlockColors;
SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
while (!PredBBs.empty()) {
BasicBlock *PredBB = *PredBBs.begin();
assert(CurLoop->contains(PredBB) &&
"Expect all predecessors are in the loop");
- if (PN->getBasicBlockIndex(PredBB) >= 0)
- SplitBlockPredecessors(ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
+ if (PN->getBasicBlockIndex(PredBB) >= 0) {
+ BasicBlock *NewPred = SplitBlockPredecessors(
+ ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
+ // Since we do not allow splitting EH-block with BlockColors in
+ // canSplitPredecessors(), we can simply assign predecessor's color to
+ // the new block.
+ if (!BlockColors.empty())
+ BlockColors[NewPred] = BlockColors[PredBB];
+ }
PredBBs.remove(PredBB);
}
}
@@ -927,7 +942,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, D
/// position, and may either delete it or move it to outside of the loop.
///
static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
- const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+ const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
ORE->emit([&]() {
@@ -975,12 +990,12 @@ static bool sink(Instruction &I, LoopInfo *LI, Dominat
if (isTriviallyReplacablePHI(*PN, I))
continue;
- if (!canSplitPredecessors(PN))
+ if (!canSplitPredecessors(PN, SafetyInfo))
return Changed;
// Split predecessors of the PHI so that we can make users trivially
// replacable.
- splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop);
+ splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo);
// Should rebuild the iterators, as they may be invalidated by
// splitPredecessorsOfLoopExit().
Modified: vendor/llvm/dist-release_60/lib/Transforms/Utils/LoopUtils.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Transforms/Utils/LoopUtils.cpp Sat Feb 24 21:25:56 2018 (r329930)
+++ vendor/llvm/dist-release_60/lib/Transforms/Utils/LoopUtils.cpp Sat Feb 24 21:27:30 2018 (r329931)
@@ -23,6 +23,7 @@
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -30,6 +31,7 @@
#include "llvm/IR/ValueHandle.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -77,10 +79,13 @@ bool RecurrenceDescriptor::isArithmeticRecurrenceKind(
return false;
}
-Instruction *
-RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
- SmallPtrSetImpl<Instruction *> &Visited,
- SmallPtrSetImpl<Instruction *> &CI) {
+/// Determines if Phi may have been type-promoted. If Phi has a single user
+/// that ANDs the Phi with a type mask, return the user. RT is updated to
+/// account for the narrower bit width represented by the mask, and the AND
+/// instruction is added to CI.
+static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallPtrSetImpl<Instruction *> &CI) {
if (!Phi->hasOneUse())
return Phi;
@@ -101,70 +106,92 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Typ
return Phi;
}
-bool RecurrenceDescriptor::getSourceExtensionKind(
- Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
- SmallPtrSetImpl<Instruction *> &Visited,
- SmallPtrSetImpl<Instruction *> &CI) {
+/// Compute the minimal bit width needed to represent a reduction whose exit
+/// instruction is given by Exit.
+static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
+ DemandedBits *DB,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
+ bool IsSigned = false;
+ const DataLayout &DL = Exit->getModule()->getDataLayout();
+ uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
+ if (DB) {
+ // Use the demanded bits analysis to determine the bits that are live out
+ // of the exit instruction, rounding up to the nearest power of two. If the
+ // use of demanded bits results in a smaller bit width, we know the value
+ // must be positive (i.e., IsSigned = false), because if this were not the
+ // case, the sign bit would have been demanded.
+ auto Mask = DB->getDemandedBits(Exit);
+ MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
+ }
+
+ if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
+ // If demanded bits wasn't able to limit the bit width, we can try to use
+ // value tracking instead. This can be the case, for example, if the value
+ // may be negative.
+ auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
+ auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
+ MaxBitWidth = NumTypeBits - NumSignBits;
+ KnownBits Bits = computeKnownBits(Exit, DL);
+ if (!Bits.isNonNegative()) {
+ // If the value is not known to be non-negative, we set IsSigned to true,
+ // meaning that we will use sext instructions instead of zext
+ // instructions to restore the original type.
+ IsSigned = true;
+ if (!Bits.isNegative())
+ // If the value is not known to be negative, we don't known what the
+ // upper bit is, and therefore, we don't know what kind of extend we
+ // will need. In this case, just increase the bit width by one bit and
+ // use sext.
+ ++MaxBitWidth;
+ }
+ }
+ if (!isPowerOf2_64(MaxBitWidth))
+ MaxBitWidth = NextPowerOf2(MaxBitWidth);
+
+ return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
+ IsSigned);
+}
+
+/// Collect cast instructions that can be ignored in the vectorizer's cost
+/// model, given a reduction exit value and the minimal type in which the
+/// reduction can be represented.
+static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
+ Type *RecurrenceType,
+ SmallPtrSetImpl<Instruction *> &Casts) {
+
SmallVector<Instruction *, 8> Worklist;
- bool FoundOneOperand = false;
- unsigned DstSize = RT->getPrimitiveSizeInBits();
+ SmallPtrSet<Instruction *, 8> Visited;
Worklist.push_back(Exit);
- // Traverse the instructions in the reduction expression, beginning with the
- // exit value.
while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
- for (Use &U : I->operands()) {
-
- // Terminate the traversal if the operand is not an instruction, or we
- // reach the starting value.
- Instruction *J = dyn_cast<Instruction>(U.get());
- if (!J || J == Start)
+ Instruction *Val = Worklist.pop_back_val();
+ Visited.insert(Val);
+ if (auto *Cast = dyn_cast<CastInst>(Val))
+ if (Cast->getSrcTy() == RecurrenceType) {
+ // If the source type of a cast instruction is equal to the recurrence
+ // type, it will be eliminated, and should be ignored in the vectorizer
+ // cost model.
+ Casts.insert(Cast);
continue;
-
- // Otherwise, investigate the operation if it is also in the expression.
- if (Visited.count(J)) {
- Worklist.push_back(J);
- continue;
}
- // If the operand is not in Visited, it is not a reduction operation, but
- // it does feed into one. Make sure it is either a single-use sign- or
- // zero-extend instruction.
- CastInst *Cast = dyn_cast<CastInst>(J);
- bool IsSExtInst = isa<SExtInst>(J);
- if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
- return false;
-
- // Ensure the source type of the extend is no larger than the reduction
- // type. It is not necessary for the types to be identical.
- unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
- if (SrcSize > DstSize)
- return false;
-
- // Furthermore, ensure that all such extends are of the same kind.
- if (FoundOneOperand) {
- if (IsSigned != IsSExtInst)
- return false;
- } else {
- FoundOneOperand = true;
- IsSigned = IsSExtInst;
- }
-
- // Lastly, if the source type of the extend matches the reduction type,
- // add the extend to CI so that we can avoid accounting for it in the
- // cost model.
- if (SrcSize == DstSize)
- CI.insert(Cast);
- }
+ // Add all operands to the work list if they are loop-varying values that
+ // we haven't yet visited.
+ for (Value *O : cast<User>(Val)->operands())
+ if (auto *I = dyn_cast<Instruction>(O))
+ if (TheLoop->contains(I) && !Visited.count(I))
+ Worklist.push_back(I);
}
- return true;
}
bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
Loop *TheLoop, bool HasFunNoNaNAttr,
- RecurrenceDescriptor &RedDes) {
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB,
+ AssumptionCache *AC,
+ DominatorTree *DT) {
if (Phi->getNumIncomingValues() != 2)
return false;
@@ -353,14 +380,49 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Ph
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
return false;
- // If we think Phi may have been type-promoted, we also need to ensure that
- // all source operands of the reduction are either SExtInsts or ZEstInsts. If
- // so, we will be able to evaluate the reduction in the narrower bit width.
- if (Start != Phi)
- if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
- IsSigned, VisitedInsts, CastInsts))
+ if (Start != Phi) {
+ // If the starting value is not the same as the phi node, we speculatively
+ // looked through an 'and' instruction when evaluating a potential
+ // arithmetic reduction to determine if it may have been type-promoted.
+ //
+ // We now compute the minimal bit width that is required to represent the
+ // reduction. If this is the same width that was indicated by the 'and', we
+ // can represent the reduction in the smaller type. The 'and' instruction
+ // will be eliminated since it will essentially be a cast instruction that
+ // can be ignore in the cost model. If we compute a different type than we
+ // did when evaluating the 'and', the 'and' will not be eliminated, and we
+ // will end up with different kinds of operations in the recurrence
+ // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
+ // the case.
+ //
+ // The vectorizer relies on InstCombine to perform the actual
+ // type-shrinking. It does this by inserting instructions to truncate the
+ // exit value of the reduction to the width indicated by RecurrenceType and
+ // then extend this value back to the original width. If IsSigned is false,
+ // a 'zext' instruction will be generated; otherwise, a 'sext' will be
+ // used.
+ //
+ // TODO: We should not rely on InstCombine to rewrite the reduction in the
+ // smaller type. We should just generate a correctly typed expression
+ // to begin with.
+ Type *ComputedType;
+ std::tie(ComputedType, IsSigned) =
+ computeRecurrenceType(ExitInstruction, DB, AC, DT);
+ if (ComputedType != RecurrenceType)
return false;
+ // The recurrence expression will be represented in a narrower type. If
+ // there are any cast instructions that will be unnecessary, collect them
+ // in CastInsts. Note that the 'and' instruction was already included in
+ // this list.
+ //
+ // TODO: A better way to represent this may be to tag in some way all the
+ // instructions that are a part of the reduction. The vectorizer cost
+ // model could then apply the recurrence type to these instructions,
+ // without needing a white list of instructions to ignore.
+ collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
+ }
+
// We found a reduction var if we have reached the original phi node and we
// only have a single instruction with out-of-loop users.
@@ -480,47 +542,57 @@ bool RecurrenceDescriptor::hasMultipleUsesOf(
return false;
}
bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
- RecurrenceDescriptor &RedDes) {
+ RecurrenceDescriptor &RedDes,
+ DemandedBits *DB, AssumptionCache *AC,
+ DominatorTree *DT) {
BasicBlock *Header = TheLoop->getHeader();
Function &F = *Header->getParent();
bool HasFunNoNaNAttr =
F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
- if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+ if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+ AC, DT)) {
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list