svn commit: r343313 - in projects/clang800-import/contrib/llvm: include/llvm/CodeGen include/llvm/IR lib/CodeGen/AsmPrinter lib/CodeGen/SelectionDAG lib/MC lib/Target/AArch64 lib/Target/AMDGPU lib/...
Dimitry Andric
dim at FreeBSD.org
Tue Jan 22 20:13:51 UTC 2019
Author: dim
Date: Tue Jan 22 20:13:43 2019
New Revision: 343313
URL: https://svnweb.freebsd.org/changeset/base/343313
Log:
Merge llvm release_80 branch r351543, and resolve conflicts.
Modified:
projects/clang800-import/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
projects/clang800-import/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
projects/clang800-import/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
projects/clang800-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
projects/clang800-import/contrib/llvm/lib/MC/MCWin64EH.cpp
projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
projects/clang800-import/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h
projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrSSE.td
projects/clang800-import/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
projects/clang800-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Directory Properties:
projects/clang800-import/contrib/llvm/ (props changed)
Modified: projects/clang800-import/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
==============================================================================
--- projects/clang800-import/contrib/llvm/include/llvm/CodeGen/MachineFunction.h Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/include/llvm/CodeGen/MachineFunction.h Tue Jan 22 20:13:43 2019 (r343313)
@@ -329,6 +329,7 @@ class MachineFunction {
bool CallsUnwindInit = false;
bool HasEHScopes = false;
bool HasEHFunclets = false;
+ bool HasLocalEscape = false;
/// List of C++ TypeInfo used.
std::vector<const GlobalValue *> TypeInfos;
@@ -810,6 +811,9 @@ class MachineFunction {
bool hasEHFunclets() const { return HasEHFunclets; }
void setHasEHFunclets(bool V) { HasEHFunclets = V; }
+
+ bool hasLocalEscape() const { return HasLocalEscape; }
+ void setHasLocalEscape(bool V) { HasLocalEscape = V; }
/// Find or create an LandingPadInfo for the specified MachineBasicBlock.
LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
Modified: projects/clang800-import/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
==============================================================================
--- projects/clang800-import/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td Tue Jan 22 20:13:43 2019 (r343313)
@@ -392,6 +392,24 @@ class AMDGPULDSF32Intrin<string clang_builtin> :
[IntrArgMemOnly, NoCapture<0>]
>;
+class AMDGPUDSOrderedIntrinsic : Intrinsic<
+ [llvm_i32_ty],
+ // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
+ // the bit packing can be optimized at the IR level.
+ [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
+ llvm_i32_ty, // value to add or swap
+ llvm_i32_ty, // ordering
+ llvm_i32_ty, // scope
+ llvm_i1_ty, // isVolatile
+ llvm_i32_ty, // ordered count index (OA index), also added to the address
+ llvm_i1_ty, // wave release, usually set to 1
+ llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
+ [NoCapture<0>]
+>;
+
+def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
+def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
+
def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
Modified: projects/clang800-import/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -545,15 +545,17 @@ void WinException::emitCSpecificHandlerTable(const Mac
OS.AddComment(Comment);
};
- // Emit a label assignment with the SEH frame offset so we can use it for
- // llvm.eh.recoverfp.
- StringRef FLinkageName =
- GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
- MCSymbol *ParentFrameOffset =
- Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
- const MCExpr *MCOffset =
- MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
- Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+ if (!isAArch64) {
+ // Emit a label assignment with the SEH frame offset so we can use it for
+ // llvm.eh.recoverfp.
+ StringRef FLinkageName =
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ const MCExpr *MCOffset =
+ MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+ }
// Use the assembler to compute the number of table entries through label
// difference and division.
@@ -937,6 +939,9 @@ void WinException::emitEHRegistrationOffsetLabel(const
if (FI != INT_MAX) {
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
unsigned UnusedReg;
+ // FIXME: getFrameIndexReference needs to match the behavior of
+ // AArch64RegisterInfo::hasBasePointer in which one of the scenarios where
+ // SP is used is if frame size >= 256.
Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
}
Modified: projects/clang800-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -6182,6 +6182,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst
.addFrameIndex(FI);
}
+ MF.setHasLocalEscape(true);
+
return nullptr;
}
Modified: projects/clang800-import/contrib/llvm/lib/MC/MCWin64EH.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/MC/MCWin64EH.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/MC/MCWin64EH.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -453,6 +453,38 @@ static void ARM64EmitUnwindCode(MCStreamer &streamer,
}
}
+// Returns the epilog symbol of an epilog with the exact same unwind code
+// sequence, if it exists. Otherwise, returns nulltpr.
+// EpilogInstrs - Unwind codes for the current epilog.
+// Epilogs - Epilogs that potentialy match the current epilog.
+static MCSymbol*
+FindMatchingEpilog(const std::vector<WinEH::Instruction>& EpilogInstrs,
+ const std::vector<MCSymbol *>& Epilogs,
+ const WinEH::FrameInfo *info) {
+ for (auto *EpilogStart : Epilogs) {
+ auto InstrsIter = info->EpilogMap.find(EpilogStart);
+ assert(InstrsIter != info->EpilogMap.end() &&
+ "Epilog not found in EpilogMap");
+ const auto &Instrs = InstrsIter->second;
+
+ if (Instrs.size() != EpilogInstrs.size())
+ continue;
+
+ bool Match = true;
+ for (unsigned i = 0; i < Instrs.size(); ++i)
+ if (Instrs[i].Operation != EpilogInstrs[i].Operation ||
+ Instrs[i].Offset != EpilogInstrs[i].Offset ||
+ Instrs[i].Register != EpilogInstrs[i].Register) {
+ Match = false;
+ break;
+ }
+
+ if (Match)
+ return EpilogStart;
+ }
+ return nullptr;
+}
+
// Populate the .xdata section. The format of .xdata on ARM64 is documented at
// https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
@@ -477,12 +509,28 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer,
// Process epilogs.
MapVector<MCSymbol *, uint32_t> EpilogInfo;
+ // Epilogs processed so far.
+ std::vector<MCSymbol *> AddedEpilogs;
+
for (auto &I : info->EpilogMap) {
MCSymbol *EpilogStart = I.first;
auto &EpilogInstrs = I.second;
uint32_t CodeBytes = ARM64CountOfUnwindCodes(EpilogInstrs);
- EpilogInfo[EpilogStart] = TotalCodeBytes;
- TotalCodeBytes += CodeBytes;
+
+ MCSymbol* MatchingEpilog =
+ FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
+ if (MatchingEpilog) {
+ assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
+ "Duplicate epilog not found");
+ EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog];
+ // Clear the unwind codes in the EpilogMap, so that they don't get output
+ // in the logic below.
+ EpilogInstrs.clear();
+ } else {
+ EpilogInfo[EpilogStart] = TotalCodeBytes;
+ TotalCodeBytes += CodeBytes;
+ AddedEpilogs.push_back(EpilogStart);
+ }
}
// Code Words, Epilog count, E, X, Vers, Function Length
Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -694,6 +694,34 @@ void AArch64AsmPrinter::EmitInstruction(const MachineI
switch (MI->getOpcode()) {
default:
break;
+ case AArch64::MOVMCSym: {
+ unsigned DestReg = MI->getOperand(0).getReg();
+ const MachineOperand &MO_Sym = MI->getOperand(1);
+ MachineOperand Hi_MOSym(MO_Sym), Lo_MOSym(MO_Sym);
+ MCOperand Hi_MCSym, Lo_MCSym;
+
+ Hi_MOSym.setTargetFlags(AArch64II::MO_G1 | AArch64II::MO_S);
+ Lo_MOSym.setTargetFlags(AArch64II::MO_G0 | AArch64II::MO_NC);
+
+ MCInstLowering.lowerOperand(Hi_MOSym, Hi_MCSym);
+ MCInstLowering.lowerOperand(Lo_MOSym, Lo_MCSym);
+
+ MCInst MovZ;
+ MovZ.setOpcode(AArch64::MOVZXi);
+ MovZ.addOperand(MCOperand::createReg(DestReg));
+ MovZ.addOperand(Hi_MCSym);
+ MovZ.addOperand(MCOperand::createImm(16));
+ EmitToStreamer(*OutStreamer, MovZ);
+
+ MCInst MovK;
+ MovK.setOpcode(AArch64::MOVKXi);
+ MovK.addOperand(MCOperand::createReg(DestReg));
+ MovK.addOperand(MCOperand::createReg(DestReg));
+ MovK.addOperand(Lo_MCSym);
+ MovK.addOperand(MCOperand::createImm(0));
+ EmitToStreamer(*OutStreamer, MovK);
+ return;
+ }
case AArch64::MOVIv2d_ns:
// If the target has <rdar://problem/16473581>, lower this
// instruction to movi.16b instead.
Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -228,6 +228,10 @@ bool AArch64FrameLowering::hasFP(const MachineFunction
MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
return true;
+ // Win64 SEH requires frame pointer if funclets are present.
+ if (MF.hasLocalEscape())
+ return true;
+
return false;
}
Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -2743,6 +2743,34 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN
case Intrinsic::aarch64_neon_umin:
return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::localaddress: {
+ // Returns one of the stack, base, or frame pointer registers, depending on
+ // which is used to reference local variables.
+ MachineFunction &MF = DAG.getMachineFunction();
+ const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
+ unsigned Reg;
+ if (RegInfo->hasBasePointer(MF))
+ Reg = RegInfo->getBaseRegister();
+ else // This function handles the SP or FP case.
+ Reg = RegInfo->getFrameRegister(MF);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
+ Op.getSimpleValueType());
+ }
+
+ case Intrinsic::eh_recoverfp: {
+ // FIXME: This needs to be implemented to correctly handle highly aligned
+ // stack objects. For now we simply return the incoming FP. Refer D53541
+ // for more details.
+ SDValue FnOp = Op.getOperand(1);
+ SDValue IncomingFPOp = Op.getOperand(2);
+ GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
+ auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
+ if (!Fn)
+ report_fatal_error(
+ "llvm.eh.recoverfp must take a function as the first argument");
+ return IncomingFPOp;
+ }
}
}
Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td Tue Jan 22 20:13:43 2019 (r343313)
@@ -133,7 +133,11 @@ def UseNegativeImmediates
: Predicate<"false">, AssemblerPredicate<"!FeatureNoNegativeImmediates",
"NegativeImmediates">;
+def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisInt<1>]>>;
+
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
//
@@ -6800,6 +6804,9 @@ def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$F
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
(TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
+
+def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
+def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
include "AArch64InstrAtomics.td"
include "AArch64SVEInstrInfo.td"
Modified: projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -466,6 +466,13 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineB
// Modify MI as necessary to handle as much of 'Offset' as possible
Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
+
+ if (MI.getOpcode() == TargetOpcode::LOCAL_ESCAPE) {
+ MachineOperand &FI = MI.getOperand(FIOperandNum);
+ FI.ChangeToImmediate(Offset);
+ return;
+ }
+
if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
return;
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h Tue Jan 22 20:13:43 2019 (r343313)
@@ -254,7 +254,7 @@ namespace AMDGPUAS {
FLAT_ADDRESS = 0, ///< Address space for flat memory.
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- REGION_ADDRESS = 2, ///< Address space for region memory.
+ REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
LOCAL_ADDRESS = 3, ///< Address space for local memory.
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -4192,6 +4192,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(un
NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
+ NODE_NAME_CASE(DS_ORDERED_COUNT)
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
NODE_NAME_CASE(ATOMIC_INC)
NODE_NAME_CASE(ATOMIC_DEC)
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h Tue Jan 22 20:13:43 2019 (r343313)
@@ -474,6 +474,7 @@ enum NodeType : unsigned {
TBUFFER_STORE_FORMAT_D16,
TBUFFER_LOAD_FORMAT,
TBUFFER_LOAD_FORMAT_D16,
+ DS_ORDERED_COUNT,
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td Tue Jan 22 20:13:43 2019 (r343313)
@@ -72,6 +72,8 @@ def : SourceOfDivergence<int_amdgcn_buffer_atomic_xor>
def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
+def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
+def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
foreach intr = AMDGPUImageDimAtomicIntrinsics in
def : SourceOfDivergence<intr>;
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -308,6 +308,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Ins
switch (Inst->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td Tue Jan 22 20:13:43 2019 (r343313)
@@ -817,6 +817,11 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64, i64, "atomic_
defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
+def : Pat <
+ (SIds_ordered_count i32:$value, i16:$offset),
+ (DS_ORDERED_COUNT $value, (as_i16imm $offset))
+>;
+
//===----------------------------------------------------------------------===//
// Real instructions
//===----------------------------------------------------------------------===//
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -88,14 +88,28 @@ static bool isSMovRel(unsigned Opcode) {
}
}
-static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
+static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
+ const MachineInstr &MI) {
+ if (TII.isAlwaysGDS(MI.getOpcode()))
+ return true;
+
switch (MI.getOpcode()) {
case AMDGPU::S_SENDMSG:
case AMDGPU::S_SENDMSGHALT:
case AMDGPU::S_TTRACEDATA:
return true;
+ // These DS opcodes don't support GDS.
+ case AMDGPU::DS_NOP:
+ case AMDGPU::DS_PERMUTE_B32:
+ case AMDGPU::DS_BPERMUTE_B32:
+ return false;
default:
- // TODO: GDS
+ if (TII.isDS(MI.getOpcode())) {
+ int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::gds);
+ if (MI.getOperand(GDS).getImm())
+ return true;
+ }
return false;
}
}
@@ -145,7 +159,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stal
checkReadM0Hazards(MI) > 0)
return NoopHazard;
- if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
+ if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
checkReadM0Hazards(MI) > 0)
return NoopHazard;
@@ -199,7 +213,7 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInst
isSMovRel(MI->getOpcode())))
return std::max(WaitStates, checkReadM0Hazards(MI));
- if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
+ if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
return std::max(WaitStates, checkReadM0Hazards(MI));
return WaitStates;
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -910,6 +910,8 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInf
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
@@ -937,6 +939,8 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicI
switch (II->getIntrinsicID()) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
@@ -5438,6 +5442,63 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDVal
SDLoc DL(Op);
switch (IntrID) {
+ case Intrinsic::amdgcn_ds_ordered_add:
+ case Intrinsic::amdgcn_ds_ordered_swap: {
+ MemSDNode *M = cast<MemSDNode>(Op);
+ SDValue Chain = M->getOperand(0);
+ SDValue M0 = M->getOperand(2);
+ SDValue Value = M->getOperand(3);
+ unsigned OrderedCountIndex = M->getConstantOperandVal(7);
+ unsigned WaveRelease = M->getConstantOperandVal(8);
+ unsigned WaveDone = M->getConstantOperandVal(9);
+ unsigned ShaderType;
+ unsigned Instruction;
+
+ switch (IntrID) {
+ case Intrinsic::amdgcn_ds_ordered_add:
+ Instruction = 0;
+ break;
+ case Intrinsic::amdgcn_ds_ordered_swap:
+ Instruction = 1;
+ break;
+ }
+
+ if (WaveDone && !WaveRelease)
+ report_fatal_error("ds_ordered_count: wave_done requires wave_release");
+
+ switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_KERNEL:
+ ShaderType = 0;
+ break;
+ case CallingConv::AMDGPU_PS:
+ ShaderType = 1;
+ break;
+ case CallingConv::AMDGPU_VS:
+ ShaderType = 2;
+ break;
+ case CallingConv::AMDGPU_GS:
+ ShaderType = 3;
+ break;
+ default:
+ report_fatal_error("ds_ordered_count unsupported for this calling conv");
+ }
+
+ unsigned Offset0 = OrderedCountIndex << 2;
+ unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
+ (Instruction << 4);
+ unsigned Offset = Offset0 | (Offset1 << 8);
+
+ SDValue Ops[] = {
+ Chain,
+ Value,
+ DAG.getTargetConstant(Offset, DL, MVT::i16),
+ copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
+ };
+ return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
+ M->getVTList(), Ops, M->getMemoryVT(),
+ M->getMemOperand());
+ }
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_fadd:
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -536,11 +536,14 @@ void WaitcntBrackets::updateByEvent(const SIInstrInfo
CurrScore);
}
if (Inst.mayStore()) {
- setExpScore(
- &Inst, TII, TRI, MRI,
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
- CurrScore);
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
+ AMDGPU::OpName::data0) != -1) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
+ CurrScore);
+ }
+ if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::data1) != -1) {
setExpScore(&Inst, TII, TRI, MRI,
AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
@@ -1093,7 +1096,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(Machine
// bracket and the destination operand scores.
// TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
- if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
+ if (TII->isAlwaysGDS(Inst.getOpcode()) ||
+ TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
} else {
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -2390,6 +2390,16 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineIn
changesVGPRIndexingMode(MI);
}
+bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
+ return Opcode == AMDGPU::DS_ORDERED_COUNT ||
+ Opcode == AMDGPU::DS_GWS_INIT ||
+ Opcode == AMDGPU::DS_GWS_SEMA_V ||
+ Opcode == AMDGPU::DS_GWS_SEMA_BR ||
+ Opcode == AMDGPU::DS_GWS_SEMA_P ||
+ Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
+ Opcode == AMDGPU::DS_GWS_BARRIER;
+}
+
bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();
@@ -2403,7 +2413,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(cons
// EXEC = 0, but checking for that case here seems not worth it
// given the typical code patterns.
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
- Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
+ Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
+ Opcode == AMDGPU::DS_ORDERED_COUNT)
return true;
if (MI.isInlineAsm())
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h Tue Jan 22 20:13:43 2019 (r343313)
@@ -450,6 +450,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::DS;
}
+ bool isAlwaysGDS(uint16_t Opcode) const;
+
static bool isMIMG(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
}
Modified: projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td Tue Jan 22 20:13:43 2019 (r343313)
@@ -45,6 +45,11 @@ def SIsbuffer_load : SDNode<"AMDGPUISD::SBUFFER_LOAD",
[SDNPMayLoad, SDNPMemOperand]
>;
+def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
+ [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
+>;
+
def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
Modified: projects/clang800-import/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -17,6 +17,7 @@
#include "MSP430InstrInfo.h"
#include "MSP430MCInstLower.h"
#include "MSP430TargetMachine.h"
+#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -28,6 +29,7 @@
#include "llvm/IR/Module.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/TargetRegistry.h"
@@ -44,6 +46,8 @@ namespace {
StringRef getPassName() const override { return "MSP430 Assembly Printer"; }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
void printOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O, const char* Modifier = nullptr);
void printSrcMemOperand(const MachineInstr *MI, int OpNum,
@@ -55,6 +59,8 @@ namespace {
unsigned OpNo, unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) override;
void EmitInstruction(const MachineInstr *MI) override;
+
+ void EmitInterruptVectorSection(MachineFunction &ISR);
};
} // end of anonymous namespace
@@ -151,6 +157,32 @@ void MSP430AsmPrinter::EmitInstruction(const MachineIn
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
+}
+
+void MSP430AsmPrinter::EmitInterruptVectorSection(MachineFunction &ISR) {
+ MCSection *Cur = OutStreamer->getCurrentSectionOnly();
+ const auto *F = &ISR.getFunction();
+ assert(F->hasFnAttribute("interrupt") &&
+ "Functions with MSP430_INTR CC should have 'interrupt' attribute");
+ StringRef IVIdx = F->getFnAttribute("interrupt").getValueAsString();
+ MCSection *IV = OutStreamer->getContext().getELFSection(
+ "__interrupt_vector_" + IVIdx,
+ ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
+ OutStreamer->SwitchSection(IV);
+
+ const MCSymbol *FunctionSymbol = getSymbol(F);
+ OutStreamer->EmitSymbolValue(FunctionSymbol, TM.getProgramPointerSize());
+ OutStreamer->SwitchSection(Cur);
+}
+
+bool MSP430AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ // Emit separate section for an interrupt vector if ISR
+ if (MF.getFunction().getCallingConv() == CallingConv::MSP430_INTR)
+ EmitInterruptVectorSection(MF);
+
+ SetupMachineFunction(MF);
+ EmitFunctionBody();
+ return false;
}
// Force static initialization.
Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -27202,6 +27202,8 @@ const char *X86TargetLowering::getTargetNodeName(unsig
case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
+ case X86ISD::VSHLV: return "X86ISD::VSHLV";
+ case X86ISD::VSRLV: return "X86ISD::VSRLV";
case X86ISD::VSRAV: return "X86ISD::VSRAV";
case X86ISD::VROTLI: return "X86ISD::VROTLI";
case X86ISD::VROTRI: return "X86ISD::VROTRI";
Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86ISelLowering.h Tue Jan 22 20:13:43 2019 (r343313)
@@ -315,10 +315,8 @@ namespace llvm {
// Vector shift elements
VSHL, VSRL, VSRA,
- // Vector variable shift right arithmetic.
- // Unlike ISD::SRA, in case shift count greater then element size
- // use sign bit to fill destination data element.
- VSRAV,
+ // Vector variable shift
+ VSHLV, VSRLV, VSRAV,
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrAVX512.td Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrAVX512.td Tue Jan 22 20:13:43 2019 (r343313)
@@ -6445,52 +6445,53 @@ defm : avx512_var_shift_lowering<avx512vl_i16_info, "V
defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSRLVW", srl, [HasBWI, NoVLX]>;
// Special handing for handling VPSRAV intrinsics.
-multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
- list<Predicate> p> {
+multiclass avx512_var_shift_int_lowering<string InstrStr, SDNode OpNode,
+ X86VectorVTInfo _, list<Predicate> p> {
let Predicates = p in {
- def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
+ def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
_.RC:$src2)>;
- def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))),
+ def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rm)
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
+ (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
+ (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
_.RC:$src1, _.RC:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)),
+ (OpNode _.RC:$src1, (_.LdFrag addr:$src2)),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
_.RC:$src1, addr:$src2)>;
}
}
-multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
- list<Predicate> p> :
- avx512_var_shift_int_lowering<InstrStr, _, p> {
+multiclass avx512_var_shift_int_lowering_mb<string InstrStr, SDNode OpNode,
+ X86VectorVTInfo _,
+ list<Predicate> p> :
+ avx512_var_shift_int_lowering<InstrStr, OpNode, _, p> {
let Predicates = p in {
- def : Pat<(_.VT (X86vsrav _.RC:$src1,
+ def : Pat<(_.VT (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
_.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1,
+ (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
_.RC:$src0)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
_.KRC:$mask, _.RC:$src1, addr:$src2)>;
def : Pat<(_.VT (vselect _.KRCWM:$mask,
- (X86vsrav _.RC:$src1,
+ (OpNode _.RC:$src1,
(X86VBroadcast (_.ScalarLdFrag addr:$src2))),
_.ImmAllZerosV)),
(!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
@@ -6498,15 +6499,47 @@ multiclass avx512_var_shift_int_lowering_mb<string Ins
}
}
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
-defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
-defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
+multiclass avx512_var_shift_int_lowering_vl<string InstrStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo,
+ Predicate p> {
+ defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info512, [p]>;
+ defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info256,
+ [HasVLX, p]>;
+ defm : avx512_var_shift_int_lowering<InstrStr, OpNode, VTInfo.info128,
+ [HasVLX, p]>;
+}
+
+multiclass avx512_var_shift_int_lowering_mb_vl<string InstrStr, SDNode OpNode,
+ AVX512VLVectorVTInfo VTInfo,
+ Predicate p> {
+ defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info512, [p]>;
+ defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info256,
+ [HasVLX, p]>;
+ defm : avx512_var_shift_int_lowering_mb<InstrStr, OpNode, VTInfo.info128,
+ [HasVLX, p]>;
+}
+
+defm : avx512_var_shift_int_lowering_vl<"VPSRAVW", X86vsrav, avx512vl_i16_info,
+ HasBWI>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVD", X86vsrav,
+ avx512vl_i32_info, HasAVX512>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSRAVQ", X86vsrav,
+ avx512vl_i64_info, HasAVX512>;
+
+defm : avx512_var_shift_int_lowering_vl<"VPSRLVW", X86vsrlv, avx512vl_i16_info,
+ HasBWI>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVD", X86vsrlv,
+ avx512vl_i32_info, HasAVX512>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSRLVQ", X86vsrlv,
+ avx512vl_i64_info, HasAVX512>;
+
+defm : avx512_var_shift_int_lowering_vl<"VPSLLVW", X86vshlv, avx512vl_i16_info,
+ HasBWI>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVD", X86vshlv,
+ avx512vl_i32_info, HasAVX512>;
+defm : avx512_var_shift_int_lowering_mb_vl<"VPSLLVQ", X86vshlv,
+ avx512vl_i64_info, HasAVX512>;
+
// Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX.
let Predicates = [HasAVX512, NoVLX] in {
Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Jan 22 20:13:43 2019 (r343313)
@@ -198,6 +198,8 @@ def X86vsra : SDNode<"X86ISD::VSRA", X86vshiftunifo
def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>, SDTCisInt<0>]>;
+def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>;
+def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>;
def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>;
def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>;
Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrSSE.td
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrSSE.td Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86InstrSSE.td Tue Jan 22 20:13:43 2019 (r343313)
@@ -8318,7 +8318,7 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))
// Variable Bit Shifts
//
multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType vt128, ValueType vt256> {
+ SDNode IntrinNode, ValueType vt128, ValueType vt256> {
def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -8347,23 +8347,23 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeSt
(vt256 (load addr:$src2)))))]>,
VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded,
SchedWriteVarVecShift.YMM.ReadAfterFold]>;
+
+ def : Pat<(vt128 (IntrinNode VR128:$src1, VR128:$src2)),
+ (!cast<Instruction>(NAME#"rr") VR128:$src1, VR128:$src2)>;
+ def : Pat<(vt128 (IntrinNode VR128:$src1, (load addr:$src2))),
+ (!cast<Instruction>(NAME#"rm") VR128:$src1, addr:$src2)>;
+ def : Pat<(vt256 (IntrinNode VR256:$src1, VR256:$src2)),
+ (!cast<Instruction>(NAME#"Yrr") VR256:$src1, VR256:$src2)>;
+ def : Pat<(vt256 (IntrinNode VR256:$src1, (load addr:$src2))),
+ (!cast<Instruction>(NAME#"Yrm") VR256:$src1, addr:$src2)>;
}
let Predicates = [HasAVX2, NoVLX] in {
- defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
- defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
- defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
- defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
- defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
-
- def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
- (VPSRAVDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))),
- (VPSRAVDrm VR128:$src1, addr:$src2)>;
- def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
- (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
- def : Pat<(v8i32 (X86vsrav VR256:$src1, (load addr:$src2))),
- (VPSRAVDYrm VR256:$src1, addr:$src2)>;
+ defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, X86vshlv, v4i32, v8i32>;
+ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, X86vshlv, v2i64, v4i64>, VEX_W;
+ defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, X86vsrlv, v4i32, v8i32>;
+ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, X86vsrlv, v2i64, v4i64>, VEX_W;
+ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, X86vsrav, v4i32, v8i32>;
}
//===----------------------------------------------------------------------===//
Modified: projects/clang800-import/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h Tue Jan 22 20:13:43 2019 (r343313)
@@ -389,10 +389,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] =
X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0),
- X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0),
@@ -405,10 +405,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] =
X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -943,11 +943,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] =
X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
- X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0),
- X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
+ X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0),
X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0),
X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0),
@@ -971,11 +971,11 @@ static const IntrinsicData IntrinsicsWithoutChain[] =
X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0),
X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
+ X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
Modified: projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -3065,9 +3065,11 @@ static bool TryToSinkInstruction(Instruction *I, Basic
I->isTerminator())
return false;
- // Do not sink alloca instructions out of the entry block.
- if (isa<AllocaInst>(I) && I->getParent() ==
- &DestBlock->getParent()->getEntryBlock())
+ // Do not sink static or dynamic alloca instructions. Static allocas must
+ // remain in the entry block, and dynamic allocas must not be sunk in between
+ // a stacksave / stackrestore pair, which would incorrectly shorten its
+ // lifetime.
+ if (isa<AllocaInst>(I))
return false;
// Do not sink into catchswitch blocks.
Modified: projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/SROA.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Transforms/Scalar/SROA.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -3031,7 +3031,10 @@ class llvm::sroa::AllocaSliceRewriter (private)
ConstantInt *Size =
ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
NewEndOffset - NewBeginOffset);
- Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
+ // Lifetime intrinsics always expect an i8* so directly get such a pointer
+ // for the new alloca slice.
+ Type *PointerTy = IRB.getInt8PtrTy(OldPtr->getType()->getPointerAddressSpace());
+ Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
Value *New;
if (II.getIntrinsicID() == Intrinsic::lifetime_start)
New = IRB.CreateLifetimeStart(Ptr, Size);
Modified: projects/clang800-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
==============================================================================
--- projects/clang800-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Tue Jan 22 20:08:25 2019 (r343312)
+++ projects/clang800-import/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Tue Jan 22 20:13:43 2019 (r343313)
@@ -1468,8 +1468,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsi
// If any of the scalars is marked as a value that needs to stay scalar, then
// we need to gather the scalars.
+ // The reduction nodes (stored in UserIgnoreList) also should stay scalar.
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
- if (MustGather.count(VL[i])) {
+ if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
newTreeEntry(VL, false, UserTreeIdx);
return;
More information about the svn-src-projects
mailing list