svn commit: r329394 - in vendor/llvm/dist-release_60: docs include/llvm/IR include/llvm/MC include/llvm/Support lib/CodeGen lib/CodeGen/AsmPrinter lib/CodeGen/SelectionDAG lib/IR lib/MC/MCParser li...
Dimitry Andric
dim at FreeBSD.org
Fri Feb 16 19:10:19 UTC 2018
Author: dim
Date: Fri Feb 16 19:10:15 2018
New Revision: 329394
URL: https://svnweb.freebsd.org/changeset/base/329394
Log:
Vendor import of llvm release_60 branch r325330:
https://llvm.org/svn/llvm-project/llvm/branches/release_60@325330
Added:
vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h (contents, props changed)
vendor/llvm/dist-release_60/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
vendor/llvm/dist-release_60/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
vendor/llvm/dist-release_60/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
vendor/llvm/dist-release_60/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
vendor/llvm/dist-release_60/test/CodeGen/ARM/splitkit.ll
vendor/llvm/dist-release_60/test/CodeGen/Thumb/stm-scavenging.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/inline-asm-modifier-V.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/pr36199.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/retpoline-regparm.ll
vendor/llvm/dist-release_60/test/DebugInfo/X86/void-typedef.ll
vendor/llvm/dist-release_60/test/MC/AsmParser/inline_macro_duplication.ll
Modified:
vendor/llvm/dist-release_60/docs/ReleaseNotes.rst
vendor/llvm/dist-release_60/include/llvm/IR/IntrinsicsAMDGPU.td
vendor/llvm/dist-release_60/include/llvm/IR/IntrinsicsX86.td
vendor/llvm/dist-release_60/include/llvm/MC/MCContext.h
vendor/llvm/dist-release_60/include/llvm/Support/GenericDomTreeConstruction.h
vendor/llvm/dist-release_60/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
vendor/llvm/dist-release_60/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
vendor/llvm/dist-release_60/lib/CodeGen/LivePhysRegs.cpp
vendor/llvm/dist-release_60/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
vendor/llvm/dist-release_60/lib/CodeGen/SplitKit.cpp
vendor/llvm/dist-release_60/lib/CodeGen/SplitKit.h
vendor/llvm/dist-release_60/lib/IR/AutoUpgrade.cpp
vendor/llvm/dist-release_60/lib/MC/MCParser/AsmParser.cpp
vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUISelLowering.h
vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.h
vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.td
vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
vendor/llvm/dist-release_60/lib/Target/AMDGPU/SIISelLowering.cpp
vendor/llvm/dist-release_60/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
vendor/llvm/dist-release_60/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
vendor/llvm/dist-release_60/lib/Target/AMDGPU/VOP2Instructions.td
vendor/llvm/dist-release_60/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
vendor/llvm/dist-release_60/lib/Target/X86/X86AsmPrinter.cpp
vendor/llvm/dist-release_60/lib/Target/X86/X86DomainReassignment.cpp
vendor/llvm/dist-release_60/lib/Target/X86/X86ISelLowering.cpp
vendor/llvm/dist-release_60/lib/Target/X86/X86IntrinsicsInfo.h
vendor/llvm/dist-release_60/lib/Target/X86/X86RetpolineThunks.cpp
vendor/llvm/dist-release_60/lib/Transforms/InstCombine/InstCombineCalls.cpp
vendor/llvm/dist-release_60/test/CodeGen/ARM/pr25838.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/avx512-intrinsics.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/avx512-mask-op.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/avx512bw-intrinsics.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/domain-reassignment.mir
vendor/llvm/dist-release_60/test/CodeGen/X86/retpoline-external.ll
vendor/llvm/dist-release_60/test/CodeGen/X86/retpoline.ll
vendor/llvm/dist-release_60/test/MC/X86/x86-64.s
vendor/llvm/dist-release_60/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Modified: vendor/llvm/dist-release_60/docs/ReleaseNotes.rst
==============================================================================
--- vendor/llvm/dist-release_60/docs/ReleaseNotes.rst Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/docs/ReleaseNotes.rst Fri Feb 16 19:10:15 2018 (r329394)
@@ -71,6 +71,13 @@ Non-comprehensive list of changes in this release
Changes to the LLVM IR
----------------------
+Changes to the AArch64 Target
+-----------------------------
+
+During this release:
+
+ * Enabled the new GlobalISel instruction selection framework by default at ``-O0``.
+
Changes to the ARM Target
-------------------------
@@ -80,6 +87,28 @@ During this release the ARM target has:
isn't the default.
+Changes to the Hexagon Target
+-----------------------------
+
+* The Hexagon backend now supports V65 ISA.
+
+* The ``-mhvx`` option now takes an optional value that specified the ISA
+ version of the HVX coprocessor. The available values are v60, v62 and v65.
+ By default, the value is set to be the same as the CPU version.
+
+* The compiler option ``-mhvx-double`` is deprecated and will be removed in
+ the next release of the compiler. Programmers should use ``-mhvx-length``
+ option to specify the desired vector length: ``-mhvx-length=64b`` for
+ 64-byte vectors and ``-mhvx-length=128b`` for 128-byte vectors. While the
+ current default vector length is 64 bytes, users should always specify the
+ length explicitly, since the default value may change in the future.
+
+* The target feature ``hvx-double`` is deprecated and will be removed in the
+ next release. LLVM IR generators should use target features ``hvx-length64b``
+ and ``hvx-length128b`` to indicate the vector length. The length should
+ always be specified when HVX code generation is enabled.
+
+
Changes to the MIPS Target
--------------------------
@@ -90,6 +119,15 @@ Changes to the PowerPC Target
-----------------------------
During this release ...
+
+Changes to the SystemZ Target
+-----------------------------
+
+During this release the SystemZ target has:
+
+* Added support for 128-bit atomic operations.
+
+* Added support for the "o" constraint for inline asm statements.
Changes to the X86 Target
-------------------------
Modified: vendor/llvm/dist-release_60/include/llvm/IR/IntrinsicsAMDGPU.td
==============================================================================
--- vendor/llvm/dist-release_60/include/llvm/IR/IntrinsicsAMDGPU.td Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/include/llvm/IR/IntrinsicsAMDGPU.td Fri Feb 16 19:10:15 2018 (r329394)
@@ -238,6 +238,26 @@ def int_amdgcn_cvt_pkrtz : Intrinsic<
[IntrNoMem, IntrSpeculatable]
>;
+def int_amdgcn_cvt_pknorm_i16 : Intrinsic<
+ [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_cvt_pknorm_u16 : Intrinsic<
+ [llvm_v2i16_ty], [llvm_float_ty, llvm_float_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_cvt_pk_i16 : Intrinsic<
+ [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
+
+def int_amdgcn_cvt_pk_u16 : Intrinsic<
+ [llvm_v2i16_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]
+>;
+
def int_amdgcn_class : Intrinsic<
[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable]
Modified: vendor/llvm/dist-release_60/include/llvm/IR/IntrinsicsX86.td
==============================================================================
--- vendor/llvm/dist-release_60/include/llvm/IR/IntrinsicsX86.td Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/include/llvm/IR/IntrinsicsX86.td Fri Feb 16 19:10:15 2018 (r329394)
@@ -3738,6 +3738,15 @@ let TargetPrefix = "x86" in { // All intrinsics start
def int_x86_avx512_kxnor_w : // TODO: remove this intrinsic
Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
+ def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
+ Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">,
+ Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem]>;
def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
Added: vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ vendor/llvm/dist-release_60/include/llvm/MC/MCAsmMacro.h Fri Feb 16 19:10:15 2018 (r329394)
@@ -0,0 +1,38 @@
+//===- MCAsmMacro.h - Assembly Macros ---------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMMACRO_H
+#define LLVM_MC_MCASMMACRO_H
+
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+
+namespace llvm {
+
+struct MCAsmMacroParameter {
+ StringRef Name;
+ std::vector<AsmToken> Value;
+ bool Required = false;
+ bool Vararg = false;
+
+ MCAsmMacroParameter() = default;
+};
+
+typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
+struct MCAsmMacro {
+ StringRef Name;
+ StringRef Body;
+ MCAsmMacroParameters Parameters;
+
+public:
+ MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
+ : Name(N), Body(B), Parameters(std::move(P)) {}
+};
+}; // namespace llvm
+
+#endif
Modified: vendor/llvm/dist-release_60/include/llvm/MC/MCContext.h
==============================================================================
--- vendor/llvm/dist-release_60/include/llvm/MC/MCContext.h Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/include/llvm/MC/MCContext.h Fri Feb 16 19:10:15 2018 (r329394)
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/MC/MCAsmMacro.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/SectionKind.h"
@@ -268,6 +269,9 @@ namespace llvm {
unsigned UniqueID,
const MCSymbolELF *Associated);
+ /// \brief Map of currently defined macros.
+ StringMap<MCAsmMacro> MacroMap;
+
public:
explicit MCContext(const MCAsmInfo *MAI, const MCRegisterInfo *MRI,
const MCObjectFileInfo *MOFI,
@@ -618,6 +622,17 @@ namespace llvm {
// FIXME: We should really do something about that.
LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L,
const Twine &Msg);
+
+ const MCAsmMacro *lookupMacro(StringRef Name) {
+ StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
+ return (I == MacroMap.end()) ? nullptr : &I->getValue();
+ }
+
+ void defineMacro(StringRef Name, MCAsmMacro Macro) {
+ MacroMap.insert(std::make_pair(Name, std::move(Macro)));
+ }
+
+ void undefineMacro(StringRef Name) { MacroMap.erase(Name); }
};
} // end namespace llvm
Modified: vendor/llvm/dist-release_60/include/llvm/Support/GenericDomTreeConstruction.h
==============================================================================
--- vendor/llvm/dist-release_60/include/llvm/Support/GenericDomTreeConstruction.h Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/include/llvm/Support/GenericDomTreeConstruction.h Fri Feb 16 19:10:15 2018 (r329394)
@@ -698,24 +698,20 @@ struct SemiNCAInfo {
return;
// Recalculate the set of roots.
- DT.Roots = FindRoots(DT, BUI);
- for (const NodePtr R : DT.Roots) {
- const TreeNodePtr TN = DT.getNode(R);
- // A CFG node was selected as a tree root, but the corresponding tree node
- // is not connected to the virtual root. This is because the incremental
- // algorithm does not really know or use the set of roots and can make a
- // different (implicit) decision about which nodes within an infinite loop
- // becomes a root.
- if (TN && !DT.isVirtualRoot(TN->getIDom())) {
- DEBUG(dbgs() << "Root " << BlockNamePrinter(R)
- << " is not virtual root's child\n"
- << "The entire tree needs to be rebuilt\n");
- // It should be possible to rotate the subtree instead of recalculating
- // the whole tree, but this situation happens extremely rarely in
- // practice.
- CalculateFromScratch(DT, BUI);
- return;
- }
+ auto Roots = FindRoots(DT, BUI);
+ if (DT.Roots.size() != Roots.size() ||
+ !std::is_permutation(DT.Roots.begin(), DT.Roots.end(), Roots.begin())) {
+ // The roots chosen in the CFG have changed. This is because the
+ // incremental algorithm does not really know or use the set of roots and
+ // can make a different (implicit) decision about which node within an
+ // infinite loop becomes a root.
+
+ DEBUG(dbgs() << "Roots are different in updated trees\n"
+ << "The entire tree needs to be rebuilt\n");
+ // It may be possible to update the tree without recalculating it, but
+ // we do not know yet how to do it, and it happens rarely in practise.
+ CalculateFromScratch(DT, BUI);
+ return;
}
}
Modified: vendor/llvm/dist-release_60/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -163,7 +163,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITyp
DIType *BaseType = DDTy->getBaseType().resolve();
- assert(BaseType && "Unexpected invalid base type");
+ if (!BaseType)
+ return 0;
// If this is a derived type, go ahead and get the base type, unless it's a
// reference then it's just the size of the field. Pointer types have no need
Modified: vendor/llvm/dist-release_60/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/CodeGen/AsmPrinter/DwarfUnit.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/CodeGen/AsmPrinter/DwarfUnit.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -1391,7 +1391,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
- addType(MemberDie, resolve(DT->getBaseType()));
+ if (DIType *Resolved = resolve(DT->getBaseType()))
+ addType(MemberDie, Resolved);
addSourceLine(MemberDie, DT);
Modified: vendor/llvm/dist-release_60/lib/CodeGen/LivePhysRegs.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/CodeGen/LivePhysRegs.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/CodeGen/LivePhysRegs.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -205,14 +205,18 @@ void LivePhysRegs::addPristines(const MachineFunction
}
void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
- if (!MBB.succ_empty()) {
- // To get the live-outs we simply merge the live-ins of all successors.
- for (const MachineBasicBlock *Succ : MBB.successors())
- addBlockLiveIns(*Succ);
- } else if (MBB.isReturnBlock()) {
- // For the return block: Add all callee saved registers that are saved and
- // restored (somewhere); This does not include callee saved registers that
- // are unused and hence not saved and restored; they are called pristine.
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*Succ);
+ if (MBB.isReturnBlock()) {
+ // Return blocks are a special case because we currently don't mark up
+ // return instructions completely: specifically, there is no explicit
+ // use for callee-saved registers. So we add all callee saved registers
+ // that are saved and restored (somewhere). This does not include
+ // callee saved registers that are unused and hence not saved and
+ // restored; they are called pristine.
+ // FIXME: PEI should add explicit markings to return instructions
+ // instead of implicitly handling them here.
const MachineFunction &MF = *MBB.getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.isCalleeSavedInfoValid()) {
@@ -225,15 +229,8 @@ void LivePhysRegs::addLiveOutsNoPristines(const Machin
void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- if (!MBB.succ_empty()) {
- addPristines(MF);
- addLiveOutsNoPristines(MBB);
- } else if (MBB.isReturnBlock()) {
- // For the return block: Add all callee saved registers.
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid())
- addCalleeSavedRegs(*this, MF);
- }
+ addPristines(MF);
+ addLiveOutsNoPristines(MBB);
}
void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
Modified: vendor/llvm/dist-release_60/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -16409,7 +16409,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N)
N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(0).getOperand(1) == N2 &&
N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
- VT.getVectorNumElements()) {
+ VT.getVectorNumElements() &&
+ N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
+ VT.getSizeInBits()) {
return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
}
Modified: vendor/llvm/dist-release_60/lib/CodeGen/SplitKit.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/CodeGen/SplitKit.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/CodeGen/SplitKit.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -491,9 +491,8 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
return VNI;
}
-void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
- assert(ParentVNI && "Mapping NULL value");
- ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)];
+void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) {
+ ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI.id)];
VNInfo *VNI = VFP.getPointer();
// ParentVNI was either unmapped or already complex mapped. Either way, just
@@ -777,7 +776,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
// the source live range. The spiller also won't try to hoist this copy.
if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) &&
MI->readsVirtualRegister(Edit->getReg())) {
- forceRecompute(0, ParentVNI);
+ forceRecompute(0, *ParentVNI);
defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
return Idx;
}
@@ -835,7 +834,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotInd
// The complement interval will be extended as needed by LRCalc.extend().
if (ParentVNI)
- forceRecompute(0, ParentVNI);
+ forceRecompute(0, *ParentVNI);
DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
RegAssign.insert(Start, End, OpenIdx);
DEBUG(dump());
@@ -878,7 +877,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl<VNI
unsigned RegIdx = AssignI.value();
if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) {
DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n');
- forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def));
+ forceRecompute(RegIdx, *Edit->getParent().getVNInfoAt(Def));
} else {
SlotIndex Kill = LIS.getInstructionIndex(*MBBI).getRegSlot();
DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI);
@@ -982,7 +981,7 @@ void SplitEditor::computeRedundantBackCopies(
}
}
if (!DominatedVNIs.empty()) {
- forceRecompute(0, ParentVNI);
+ forceRecompute(0, *ParentVNI);
for (auto VNI : DominatedVNIs) {
BackCopies.push_back(VNI);
}
@@ -1102,7 +1101,7 @@ void SplitEditor::hoistCopies() {
NotToHoistSet.count(ParentVNI->id))
continue;
BackCopies.push_back(VNI);
- forceRecompute(0, ParentVNI);
+ forceRecompute(0, *ParentVNI);
}
// If it is not beneficial to hoist all the BackCopies, simply remove
@@ -1428,6 +1427,41 @@ void SplitEditor::deleteRematVictims() {
Edit->eliminateDeadDefs(Dead, None, &AA);
}
+void SplitEditor::forceRecomputeVNI(const VNInfo &ParentVNI) {
+ // Fast-path for common case.
+ if (!ParentVNI.isPHIDef()) {
+ for (unsigned I = 0, E = Edit->size(); I != E; ++I)
+ forceRecompute(I, ParentVNI);
+ return;
+ }
+
+ // Trace value through phis.
+ SmallPtrSet<const VNInfo *, 8> Visited; ///< whether VNI was/is in worklist.
+ SmallVector<const VNInfo *, 4> WorkList;
+ Visited.insert(&ParentVNI);
+ WorkList.push_back(&ParentVNI);
+
+ const LiveInterval &ParentLI = Edit->getParent();
+ const SlotIndexes &Indexes = *LIS.getSlotIndexes();
+ do {
+ const VNInfo &VNI = *WorkList.back();
+ WorkList.pop_back();
+ for (unsigned I = 0, E = Edit->size(); I != E; ++I)
+ forceRecompute(I, VNI);
+ if (!VNI.isPHIDef())
+ continue;
+
+ MachineBasicBlock &MBB = *Indexes.getMBBFromIndex(VNI.def);
+ for (const MachineBasicBlock *Pred : MBB.predecessors()) {
+ SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred);
+ VNInfo *PredVNI = ParentLI.getVNInfoBefore(PredEnd);
+ assert(PredVNI && "Value available in PhiVNI predecessor");
+ if (Visited.insert(PredVNI).second)
+ WorkList.push_back(PredVNI);
+ }
+ } while(!WorkList.empty());
+}
+
void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
++NumFinished;
@@ -1444,8 +1478,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LR
// Force rematted values to be recomputed everywhere.
// The new live ranges may be truncated.
if (Edit->didRematerialize(ParentVNI))
- for (unsigned i = 0, e = Edit->size(); i != e; ++i)
- forceRecompute(i, ParentVNI);
+ forceRecomputeVNI(*ParentVNI);
}
// Hoist back-copies to the complement interval when in spill mode.
Modified: vendor/llvm/dist-release_60/lib/CodeGen/SplitKit.h
==============================================================================
--- vendor/llvm/dist-release_60/lib/CodeGen/SplitKit.h Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/CodeGen/SplitKit.h Fri Feb 16 19:10:15 2018 (r329394)
@@ -357,7 +357,11 @@ class LLVM_LIBRARY_VISIBILITY SplitEditor { (private)
/// recomputed by LiveRangeCalc::extend regardless of the number of defs.
/// This is used for values whose live range doesn't match RegAssign exactly.
/// They could have rematerialized, or back-copies may have been moved.
- void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI);
+ void forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI);
+
+ /// Calls forceRecompute() on any affected regidx and on ParentVNI
+ /// predecessors in case of a phi definition.
+ void forceRecomputeVNI(const VNInfo &ParentVNI);
/// defFromParent - Define Reg from ParentVNI at UseIdx using either
/// rematerialization or a COPY from parent. Return the new value.
Modified: vendor/llvm/dist-release_60/lib/IR/AutoUpgrade.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/IR/AutoUpgrade.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/IR/AutoUpgrade.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -75,7 +75,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, Str
Name=="ssse3.pabs.d.128" || // Added in 6.0
Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
- Name.startswith("avx512.kunpck") || //added in 6.0
Name.startswith("avx2.pabs.") || // Added in 6.0
Name.startswith("avx512.mask.pabs.") || // Added in 6.0
Name.startswith("avx512.broadcastm") || // Added in 6.0
@@ -1063,12 +1062,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
CI->getArgOperand(1));
- } else if (IsX86 && (Name.startswith("avx512.kunpck"))) {
- uint64_t Shift = CI->getType()->getScalarSizeInBits() / 2;
- uint64_t And = (1ULL << Shift) - 1;
- Value* LowBits = Builder.CreateAnd(CI->getArgOperand(0), And);
- Value* HighBits = Builder.CreateShl(CI->getArgOperand(1), Shift);
- Rep = Builder.CreateOr(LowBits, HighBits);
} else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
Type *I32Ty = Type::getInt32Ty(C);
Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
Modified: vendor/llvm/dist-release_60/lib/MC/MCParser/AsmParser.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/MC/MCParser/AsmParser.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/MC/MCParser/AsmParser.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -83,27 +83,6 @@ namespace {
typedef std::vector<AsmToken> MCAsmMacroArgument;
typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
-struct MCAsmMacroParameter {
- StringRef Name;
- MCAsmMacroArgument Value;
- bool Required = false;
- bool Vararg = false;
-
- MCAsmMacroParameter() = default;
-};
-
-typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
-
-struct MCAsmMacro {
- StringRef Name;
- StringRef Body;
- MCAsmMacroParameters Parameters;
-
-public:
- MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
- : Name(N), Body(B), Parameters(std::move(P)) {}
-};
-
/// \brief Helper class for storing information about an active macro
/// instantiation.
struct MacroInstantiation {
@@ -164,9 +143,6 @@ class AsmParser : public MCAsmParser { (private)
/// addDirectiveHandler.
StringMap<ExtensionDirectiveHandler> ExtensionDirectiveMap;
- /// \brief Map of currently defined macros.
- StringMap<MCAsmMacro> MacroMap;
-
/// \brief Stack of active macro instantiations.
std::vector<MacroInstantiation*> ActiveMacros;
@@ -308,17 +284,6 @@ class AsmParser : public MCAsmParser { (private)
/// \brief Control a flag in the parser that enables or disables macros.
void setMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;}
- /// \brief Lookup a previously defined macro.
- /// \param Name Macro name.
- /// \returns Pointer to macro. NULL if no such macro was defined.
- const MCAsmMacro* lookupMacro(StringRef Name);
-
- /// \brief Define a new macro with the given name and information.
- void defineMacro(StringRef Name, MCAsmMacro Macro);
-
- /// \brief Undefine a macro. If no such macro was defined, it's a no-op.
- void undefineMacro(StringRef Name);
-
/// \brief Are we inside a macro instantiation?
bool isInsideMacroInstantiation() {return !ActiveMacros.empty();}
@@ -1841,7 +1806,7 @@ bool AsmParser::parseStatement(ParseStatementInfo &Inf
// If macros are enabled, check to see if this is a macro instantiation.
if (areMacrosEnabled())
- if (const MCAsmMacro *M = lookupMacro(IDVal)) {
+ if (const MCAsmMacro *M = getContext().lookupMacro(IDVal)) {
return handleMacroEntry(M, IDLoc);
}
@@ -2720,17 +2685,6 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *
return TokError("too many positional arguments");
}
-const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) {
- StringMap<MCAsmMacro>::iterator I = MacroMap.find(Name);
- return (I == MacroMap.end()) ? nullptr : &I->getValue();
-}
-
-void AsmParser::defineMacro(StringRef Name, MCAsmMacro Macro) {
- MacroMap.insert(std::make_pair(Name, std::move(Macro)));
-}
-
-void AsmParser::undefineMacro(StringRef Name) { MacroMap.erase(Name); }
-
bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
// Arbitrarily limit macro nesting depth (default matches 'as'). We can
// eliminate this, although we should protect against infinite loops.
@@ -4249,7 +4203,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc
eatToEndOfStatement();
}
- if (lookupMacro(Name)) {
+ if (getContext().lookupMacro(Name)) {
return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
}
@@ -4257,7 +4211,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc
const char *BodyEnd = EndToken.getLoc().getPointer();
StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
checkForBadMacro(DirectiveLoc, Name, Body, Parameters);
- defineMacro(Name, MCAsmMacro(Name, Body, std::move(Parameters)));
+ getContext().defineMacro(Name, MCAsmMacro(Name, Body, std::move(Parameters)));
return false;
}
@@ -4416,10 +4370,10 @@ bool AsmParser::parseDirectivePurgeMacro(SMLoc Directi
"unexpected token in '.purgem' directive"))
return true;
- if (!lookupMacro(Name))
+ if (!getContext().lookupMacro(Name))
return Error(DirectiveLoc, "macro '" + Name + "' is not defined");
- undefineMacro(Name);
+ getContext().undefineMacro(Name);
return false;
}
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -3957,6 +3957,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(un
NODE_NAME_CASE(CVT_F32_UBYTE2)
NODE_NAME_CASE(CVT_F32_UBYTE3)
NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
+ NODE_NAME_CASE(CVT_PKNORM_I16_F32)
+ NODE_NAME_CASE(CVT_PKNORM_U16_F32)
+ NODE_NAME_CASE(CVT_PK_I16_I32)
+ NODE_NAME_CASE(CVT_PK_U16_U32)
NODE_NAME_CASE(FP_TO_FP16)
NODE_NAME_CASE(FP16_ZEXT)
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUISelLowering.h
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUISelLowering.h Fri Feb 16 19:10:15 2018 (r329394)
@@ -417,6 +417,10 @@ enum NodeType : unsigned {
// Convert two float 32 numbers into a single register holding two packed f16
// with round to zero.
CVT_PKRTZ_F16_F32,
+ CVT_PKNORM_I16_F32,
+ CVT_PKNORM_U16_F32,
+ CVT_PK_I16_I32,
+ CVT_PK_U16_U32,
// Same as the standard node, except the high bits of the resulting integer
// are known 0.
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -108,3 +108,21 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) cons
return MCOp;
}
+
+// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
+bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
+ const Value *Ptr = MMO->getValue();
+ // UndefValue means this is a load of a kernel input. These are uniform.
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
+ return true;
+
+ if (const Argument *Arg = dyn_cast<Argument>(Ptr))
+ return AMDGPU::isArgPassedInSGPR(Arg);
+
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
+ return I && I->getMetadata("amdgpu.uniform");
+}
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.h
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.h Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.h Fri Feb 16 19:10:15 2018 (r329394)
@@ -50,6 +50,8 @@ class AMDGPUInstrInfo : public AMDGPUGenInstrInfo { (p
/// Return -1 if the target-specific opcode for the pseudo instruction does
/// not exist. If Opcode is not a pseudo instruction, this is identity.
int pseudoToMCOpcode(int Opcode) const;
+
+ static bool isUniformMMO(const MachineMemOperand *MMO);
};
} // End llvm namespace
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.td
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.td Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPUInstrInfo.td Fri Feb 16 19:10:15 2018 (r329394)
@@ -35,6 +35,10 @@ def AMDGPUFPPackOp : SDTypeProfile<1, 2,
[SDTCisFP<1>, SDTCisSameAs<1, 2>]
>;
+def AMDGPUIntPackOp : SDTypeProfile<1, 2,
+ [SDTCisInt<1>, SDTCisSameAs<1, 2>]
+>;
+
def AMDGPUDivScaleOp : SDTypeProfile<2, 3,
[SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>]
>;
@@ -142,6 +146,10 @@ def AMDGPUrsq_clamp : SDNode<"AMDGPUISD::RSQ_CLAMP", S
def AMDGPUldexp : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
def AMDGPUpkrtz_f16_f32 : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
+def AMDGPUpknorm_i16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
+def AMDGPUpknorm_u16_f32 : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
+def AMDGPUpk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>;
+def AMDGPUpk_u16_u32 : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>;
def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>;
def AMDGPUfp16_zext : SDNode<"AMDGPUISD::FP16_ZEXT" , SDTFPToIntOp>;
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -120,7 +120,7 @@ static bool isInstrUniform(const MachineInstr &MI) {
return false;
const MachineMemOperand *MMO = *MI.memoperands_begin();
- return AMDGPU::isUniformMMO(MMO);
+ return AMDGPUInstrInfo::isUniformMMO(MMO);
}
const RegisterBankInfo::InstructionMapping &
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/SIISelLowering.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/SIISelLowering.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/SIISelLowering.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -205,6 +205,7 @@ SITargetLowering::SITargetLowering(const TargetMachine
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2i16, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f16, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
@@ -1085,7 +1086,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned S
bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
const MemSDNode *MemNode = cast<MemSDNode>(N);
- return AMDGPU::isUniformMMO(MemNode->getMemOperand());
+ return AMDGPUInstrInfo::isUniformMMO(MemNode->getMemOperand());
}
TargetLoweringBase::LegalizeTypeAction
@@ -3517,7 +3518,8 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
}
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- if (IID == Intrinsic::amdgcn_cvt_pkrtz) {
+ switch (IID) {
+ case Intrinsic::amdgcn_cvt_pkrtz: {
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
SDLoc SL(N);
@@ -3526,6 +3528,29 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2f16, Cvt));
return;
}
+ case Intrinsic::amdgcn_cvt_pknorm_i16:
+ case Intrinsic::amdgcn_cvt_pknorm_u16:
+ case Intrinsic::amdgcn_cvt_pk_i16:
+ case Intrinsic::amdgcn_cvt_pk_u16: {
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ SDLoc SL(N);
+ unsigned Opcode;
+
+ if (IID == Intrinsic::amdgcn_cvt_pknorm_i16)
+ Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
+ else if (IID == Intrinsic::amdgcn_cvt_pknorm_u16)
+ Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
+ else if (IID == Intrinsic::amdgcn_cvt_pk_i16)
+ Opcode = AMDGPUISD::CVT_PK_I16_I32;
+ else
+ Opcode = AMDGPUISD::CVT_PK_U16_U32;
+
+ SDValue Cvt = DAG.getNode(Opcode, SL, MVT::i32, Src0, Src1);
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, MVT::v2i16, Cvt));
+ return;
+ }
+ }
break;
}
case ISD::SELECT: {
@@ -4424,10 +4449,27 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDVa
case Intrinsic::amdgcn_ubfe:
return DAG.getNode(AMDGPUISD::BFE_U32, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::amdgcn_cvt_pkrtz: {
- // FIXME: Stop adding cast if v2f16 legal.
+ case Intrinsic::amdgcn_cvt_pkrtz:
+ case Intrinsic::amdgcn_cvt_pknorm_i16:
+ case Intrinsic::amdgcn_cvt_pknorm_u16:
+ case Intrinsic::amdgcn_cvt_pk_i16:
+ case Intrinsic::amdgcn_cvt_pk_u16: {
+ // FIXME: Stop adding cast if v2f16/v2i16 are legal.
EVT VT = Op.getValueType();
- SDValue Node = DAG.getNode(AMDGPUISD::CVT_PKRTZ_F16_F32, DL, MVT::i32,
+ unsigned Opcode;
+
+ if (IntrinsicID == Intrinsic::amdgcn_cvt_pkrtz)
+ Opcode = AMDGPUISD::CVT_PKRTZ_F16_F32;
+ else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_i16)
+ Opcode = AMDGPUISD::CVT_PKNORM_I16_F32;
+ else if (IntrinsicID == Intrinsic::amdgcn_cvt_pknorm_u16)
+ Opcode = AMDGPUISD::CVT_PKNORM_U16_F32;
+ else if (IntrinsicID == Intrinsic::amdgcn_cvt_pk_i16)
+ Opcode = AMDGPUISD::CVT_PK_I16_I32;
+ else
+ Opcode = AMDGPUISD::CVT_PK_U16_U32;
+
+ SDValue Node = DAG.getNode(Opcode, DL, MVT::i32,
Op.getOperand(1), Op.getOperand(2));
return DAG.getNode(ISD::BITCAST, DL, VT, Node);
}
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -871,24 +871,6 @@ bool isArgPassedInSGPR(const Argument *A) {
}
}
-// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
-bool isUniformMMO(const MachineMemOperand *MMO) {
- const Value *Ptr = MMO->getValue();
- // UndefValue means this is a load of a kernel input. These are uniform.
- // Sometimes LDS instructions have constant pointers.
- // If Ptr is null, then that means this mem operand contains a
- // PseudoSourceValue like GOT.
- if (!Ptr || isa<UndefValue>(Ptr) ||
- isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
- return true;
-
- if (const Argument *Arg = dyn_cast<Argument>(Ptr))
- return isArgPassedInSGPR(Arg);
-
- const Instruction *I = dyn_cast<Instruction>(Ptr);
- return I && I->getMetadata("amdgpu.uniform");
-}
-
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
if (isGCN3Encoding(ST))
return ByteOffset;
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Fri Feb 16 19:10:15 2018 (r329394)
@@ -363,7 +363,6 @@ LLVM_READNONE
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
bool isArgPassedInSGPR(const Argument *Arg);
-bool isUniformMMO(const MachineMemOperand *MMO);
/// \returns The encoding that will be used for \p ByteOffset in the SMRD
/// offset field.
Modified: vendor/llvm/dist-release_60/lib/Target/AMDGPU/VOP2Instructions.td
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/AMDGPU/VOP2Instructions.td Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/AMDGPU/VOP2Instructions.td Fri Feb 16 19:10:15 2018 (r329394)
@@ -407,11 +407,11 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b3
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"
-defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>;
-defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>>;
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_i16_f32>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpknorm_u16_f32>;
defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_I32_F32_F32>, AMDGPUpkrtz_f16_f32>;
-defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>>;
-defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_u16_u32>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_I32_I32_I32>, AMDGPUpk_i16_i32>;
} // End SubtargetPredicate = isGCN
Modified: vendor/llvm/dist-release_60/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -396,10 +396,14 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &
// rip-relative addressing is actually relative to the *next* instruction.
// Since an immediate can follow the mod/rm byte for an instruction, this
- // means that we need to bias the immediate field of the instruction with
- // the size of the immediate field. If we have this case, add it into the
+ // means that we need to bias the displacement field of the instruction with
+ // the size of the immediate field. If we have this case, add it into the
// expression to emit.
- int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
+ // Note: rip-relative addressing using immediate displacement values should
+ // not be adjusted, assuming it was the user's intent.
+ int ImmSize = !Disp.isImm() && X86II::hasImm(TSFlags)
+ ? X86II::getSizeOfImm(TSFlags)
+ : 0;
EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind),
CurByte, OS, Fixups, -ImmSize);
Modified: vendor/llvm/dist-release_60/lib/Target/X86/X86AsmPrinter.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/X86/X86AsmPrinter.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/X86/X86AsmPrinter.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -370,6 +370,8 @@ static void printIntelMemReference(X86AsmPrinter &P, c
static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
char Mode, raw_ostream &O) {
unsigned Reg = MO.getReg();
+ bool EmitPercent = true;
+
switch (Mode) {
default: return true; // Unknown mode.
case 'b': // Print QImode register
@@ -384,6 +386,9 @@ static bool printAsmMRegister(X86AsmPrinter &P, const
case 'k': // Print SImode register
Reg = getX86SubSuperRegister(Reg, 32);
break;
+ case 'V':
+ EmitPercent = false;
+ LLVM_FALLTHROUGH;
case 'q':
// Print 64-bit register names if 64-bit integer registers are available.
// Otherwise, print 32-bit register names.
@@ -391,7 +396,10 @@ static bool printAsmMRegister(X86AsmPrinter &P, const
break;
}
- O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+ if (EmitPercent)
+ O << '%';
+
+ O << X86ATTInstPrinter::getRegisterName(Reg);
return false;
}
@@ -464,6 +472,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr
case 'w': // Print HImode register
case 'k': // Print SImode register
case 'q': // Print DImode register
+ case 'V': // Print native register without '%'
if (MO.isReg())
return printAsmMRegister(*this, MO, ExtraCode[0], O);
printOperand(*this, MI, OpNo, O);
Modified: vendor/llvm/dist-release_60/lib/Target/X86/X86DomainReassignment.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/X86/X86DomainReassignment.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/X86/X86DomainReassignment.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -663,8 +663,10 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::XOR32rr, X86::KXORDrr);
createReplacer(X86::XOR64rr, X86::KXORQrr);
- createReplacer(X86::TEST32rr, X86::KTESTDrr);
- createReplacer(X86::TEST64rr, X86::KTESTQrr);
+ // TODO: KTEST is not a replacement for TEST due to flag differences. Need
+ // to prove only Z flag is used.
+ //createReplacer(X86::TEST32rr, X86::KTESTDrr);
+ //createReplacer(X86::TEST64rr, X86::KTESTQrr);
}
if (STI->hasDQI()) {
@@ -684,8 +686,10 @@ void X86DomainReassignment::initConverters() {
createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
createReplacer(X86::SHL8ri, X86::KSHIFTLBri);
- createReplacer(X86::TEST8rr, X86::KTESTBrr);
- createReplacer(X86::TEST16rr, X86::KTESTWrr);
+ // TODO: KTEST is not a replacement for TEST due to flag differences. Need
+ // to prove only Z flag is used.
+ //createReplacer(X86::TEST8rr, X86::KTESTBrr);
+ //createReplacer(X86::TEST16rr, X86::KTESTWrr);
createReplacer(X86::XOR8rr, X86::KXORBrr);
}
Modified: vendor/llvm/dist-release_60/lib/Target/X86/X86ISelLowering.cpp
==============================================================================
--- vendor/llvm/dist-release_60/lib/Target/X86/X86ISelLowering.cpp Fri Feb 16 18:50:06 2018 (r329393)
+++ vendor/llvm/dist-release_60/lib/Target/X86/X86ISelLowering.cpp Fri Feb 16 19:10:15 2018 (r329394)
@@ -17017,24 +17017,6 @@ static bool hasNonFlagsUse(SDValue Op) {
return false;
}
-// Emit KTEST instruction for bit vectors on AVX-512
-static SDValue EmitKTEST(SDValue Op, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- if (Op.getOpcode() == ISD::BITCAST) {
- auto hasKTEST = [&](MVT VT) {
- unsigned SizeInBits = VT.getSizeInBits();
- return (Subtarget.hasDQI() && (SizeInBits == 8 || SizeInBits == 16)) ||
- (Subtarget.hasBWI() && (SizeInBits == 32 || SizeInBits == 64));
- };
- SDValue Op0 = Op.getOperand(0);
- MVT Op0VT = Op0.getValueType().getSimpleVT();
- if (Op0VT.isVector() && Op0VT.getVectorElementType() == MVT::i1 &&
- hasKTEST(Op0VT))
- return DAG.getNode(X86ISD::KTEST, SDLoc(Op), Op0VT, Op0, Op0);
- }
- return SDValue();
-}
-
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
@@ -17079,9 +17061,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsign
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
if (Op.getResNo() != 0 || NeedOF || NeedCF) {
- // Emit KTEST for bit vectors
- if (auto Node = EmitKTEST(Op, DAG, Subtarget))
- return Node;
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, Op.getValueType()));
@@ -17310,10 +17289,6 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsign
}
if (Opcode == 0) {
- // Emit KTEST for bit vectors
- if (auto Node = EmitKTEST(Op, DAG, Subtarget))
- return Node;
-
// Emit a CMP with 0, which is the TEST pattern.
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, dl, Op.getValueType()));
@@ -18093,6 +18068,34 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtar
return Result;
}
+// Try to select this as a KTEST+SETCC if possible.
+static SDValue EmitKTEST(SDValue Op0, SDValue Op1, ISD::CondCode CC,
+ const SDLoc &dl, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Only support equality comparisons.
+ if (CC != ISD::SETEQ && CC != ISD::SETNE)
+ return SDValue();
+
+ // Must be a bitcast from vXi1.
+ if (Op0.getOpcode() != ISD::BITCAST)
+ return SDValue();
+
+ Op0 = Op0.getOperand(0);
+ MVT VT = Op0.getSimpleValueType();
+ if (!(Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) &&
+ !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)))
+ return SDValue();
+
+ X86::CondCode X86CC;
+ if (isNullConstant(Op1)) {
+ X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+ } else
+ return SDValue();
+
+ SDValue KTEST = DAG.getNode(X86ISD::KTEST, dl, MVT::i32, Op0, Op0);
+ return getSETCC(X86CC, KTEST, dl, DAG);
+}
+
SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
@@ -18115,6 +18118,10 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, Sele
return NewSetCC;
}
+ // Try to lower using KTEST.
+ if (SDValue NewSetCC = EmitKTEST(Op0, Op1, CC, dl, DAG, Subtarget))
+ return NewSetCC;
+
// Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
// these.
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list