svn commit: r312956 - in vendor/llvm/dist: cmake/modules include/llvm/IR lib/Analysis lib/Bitcode/Reader lib/CodeGen/SelectionDAG lib/Target/AArch64 lib/Target/AMDGPU lib/Target/ARM lib/Target/X86 ...
Dimitry Andric
dim at FreeBSD.org
Sun Jan 29 20:58:40 UTC 2017
Author: dim
Date: Sun Jan 29 20:58:36 2017
New Revision: 312956
URL: https://svnweb.freebsd.org/changeset/base/312956
Log:
Vendor import of llvm release_40 branch r293443:
https://llvm.org/svn/llvm-project/llvm/branches/release_40@293443
Added:
vendor/llvm/dist/test/Analysis/BasicAA/pr31761.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll
vendor/llvm/dist/utils/sanitizers/
vendor/llvm/dist/utils/sanitizers/ubsan_blacklist.txt (contents, props changed)
Deleted:
vendor/llvm/dist/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
Modified:
vendor/llvm/dist/cmake/modules/HandleLLVMOptions.cmake
vendor/llvm/dist/include/llvm/IR/IntrinsicsAMDGPU.td
vendor/llvm/dist/lib/Analysis/BasicAliasAnalysis.cpp
vendor/llvm/dist/lib/Bitcode/Reader/MetadataLoader.cpp
vendor/llvm/dist/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
vendor/llvm/dist/lib/Target/AArch64/AArch64.td
vendor/llvm/dist/lib/Target/AArch64/AArch64InstrInfo.cpp
vendor/llvm/dist/lib/Target/AArch64/AArch64Subtarget.h
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelLowering.h
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUInstrInfo.td
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h
vendor/llvm/dist/lib/Target/AMDGPU/R600ISelLowering.cpp
vendor/llvm/dist/lib/Target/AMDGPU/R600Instructions.td
vendor/llvm/dist/lib/Target/AMDGPU/SIFrameLowering.cpp
vendor/llvm/dist/lib/Target/AMDGPU/SIISelLowering.cpp
vendor/llvm/dist/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
vendor/llvm/dist/lib/Target/AMDGPU/SIMachineFunctionInfo.h
vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp
vendor/llvm/dist/lib/Target/AMDGPU/VOP3Instructions.td
vendor/llvm/dist/lib/Target/ARM/ARMAsmPrinter.cpp
vendor/llvm/dist/lib/Target/ARM/ARMISelLowering.cpp
vendor/llvm/dist/lib/Target/X86/X86ISelLowering.cpp
vendor/llvm/dist/lib/Transforms/Utils/SimplifyCFG.cpp
vendor/llvm/dist/test/CodeGen/AArch64/no-quad-ldp-stp.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/32-bit-local-address-space.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/add.i16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/add.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/and.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/anyext.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/atomic_cmp_swap_local.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/atomic_load_add.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/atomic_load_sub.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/basic-branch.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/bfi_int.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/bfm.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/bitreverse.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/br_cc.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/branch-condition-and.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/bswap.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/bug-vopc-commute.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/build_vector.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/concat_vectors.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/copy-illegal-type.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/copy-to-reg.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/ctlz.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/ctpop.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/ctpop64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/cttz_zero_undef.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/cube.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/elf.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/extload.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/extract_vector_elt-f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/extract_vector_elt-i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/extract_vector_elt-i8.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fabs.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fadd.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fadd.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fadd64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fceil.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fcmp.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fcmp64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fcopysign.f32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fcopysign.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fdiv.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fdiv.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fdiv.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/ffloor.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/ffloor.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/flat-address-space.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/flat-scratch-reg.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fma.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmax3.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmax3.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmaxnum.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmaxnum.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmin3.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fminnum.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmul.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmul.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fmuladd.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fnearbyint.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fneg-combines.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fneg.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fneg.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fp16_to_fp32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fp16_to_fp64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fp32_to_fp16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fp_to_sint.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fp_to_uint.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fpext.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fpext.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fptosi.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fptoui.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fptrunc.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fptrunc.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fract.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fract.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/frem.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fsqrt.f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fsqrt.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/fsub.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/ftrunc.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/gep-address-space.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/global-directive.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/global-extload-i16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/global_atomics.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/global_atomics_i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/gv-const-addrspace.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/gv-offset-folding.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/half.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/hsa-note-no-func.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/i1-copy-phi.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/icmp64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/imm.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/imm16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/indirect-addressing-si.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/indirect-private-64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/infinite-loop.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/inline-asm.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/insert_vector_elt.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/inserted-wait-states.mir
vendor/llvm/dist/test/CodeGen/AMDGPU/kernel-args.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/large-alloca-graphics.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.u32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.AMDGPU.clamp.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.cos.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.exp2.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.floor.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.fma.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.log2.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.memcpy.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.rint.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.round.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.sin.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-constant-f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-constant-i1.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-constant-i16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-constant-i32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-constant-i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-constant-i8.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-global-f32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-global-f64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-global-i1.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-global-i16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-global-i32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-global-i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-global-i8.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-local-i32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-local-i8.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/load-weird-sizes.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/local-64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/local-atomics.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/local-atomics64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/local-stack-slot-bug.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/local-stack-slot-offset.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/lshl.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/lshr.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/mad_int24.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/mad_uint24.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/madak.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/madmk.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/max.i16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/merge-store-usedef.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/min.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/mul.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/mul_int24.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/operand-spacing.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/or.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/rcp-pattern.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/readcyclecounter.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/reduce-load-width-alignment.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/reorder-stores.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/rotl.i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/rotr.i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/rotr.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/s_addk_i32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/s_movk_i32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/s_mulk_i32.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/scalar_to_vector.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/schedule-kernel-arg-loads.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sdiv.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sdivrem24.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sdivrem64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/select-i1.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/select-vectors.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/select.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/selectcc-opt.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/setcc-opt.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/setcc64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/seto.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sext-in-reg.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sgpr-copy.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/shl.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/shl_add_ptr.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/si-annotate-cf.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/si-lod-bias.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/si-sgpr-spill.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/si-spill-cf.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/si-vector-hang.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sign_extend.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sint_to_fp.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sitofp.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/smed3.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sminmax.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/smrd.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sopk-compares.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sra.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/srl.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/store-global.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/store-v3i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/sub.i16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/trunc-bitcast-vector.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/trunc-cmp-constant.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/trunc-store-i1.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/trunc-store.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/uaddo.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/udiv.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/udivrem.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/udivrem24.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/udivrem64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/uint_to_fp.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/uitofp.f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/umed3.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/unaligned-load-store.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/uniform-cfg.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/urecip.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/urem.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/v_cndmask.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/v_mac.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/v_mac_f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/v_madak_f16.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/vselect.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/wait.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/waitcnt-flat.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/xor.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/zero_extend.ll
vendor/llvm/dist/test/CodeGen/ARM/neon_div.ll
vendor/llvm/dist/test/CodeGen/ARM/vector-load.ll
vendor/llvm/dist/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll
vendor/llvm/dist/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll
vendor/llvm/dist/test/CodeGen/X86/avx-trunc.ll
vendor/llvm/dist/test/CodeGen/X86/avx512-trunc.ll
vendor/llvm/dist/test/CodeGen/X86/phaddsub.ll
vendor/llvm/dist/test/MC/AMDGPU/vop3.s
vendor/llvm/dist/test/Transforms/SimplifyCFG/sink-common-code.ll
Modified: vendor/llvm/dist/cmake/modules/HandleLLVMOptions.cmake
==============================================================================
--- vendor/llvm/dist/cmake/modules/HandleLLVMOptions.cmake Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/cmake/modules/HandleLLVMOptions.cmake Sun Jan 29 20:58:36 2017 (r312956)
@@ -555,6 +555,8 @@ if(LLVM_USE_SANITIZER)
append_common_sanitizer_flags()
append("-fsanitize=undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all"
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+ append("-fsanitize-blacklist=${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt"
+ CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
elseif (LLVM_USE_SANITIZER STREQUAL "Thread")
append_common_sanitizer_flags()
append("-fsanitize=thread" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
Modified: vendor/llvm/dist/include/llvm/IR/IntrinsicsAMDGPU.td
==============================================================================
--- vendor/llvm/dist/include/llvm/IR/IntrinsicsAMDGPU.td Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/include/llvm/IR/IntrinsicsAMDGPU.td Sun Jan 29 20:58:36 2017 (r312956)
@@ -100,6 +100,10 @@ def int_amdgcn_dispatch_id :
GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+def int_amdgcn_implicit_buffer_ptr :
+ GCCBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
+ Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
//===----------------------------------------------------------------------===//
// Instruction Intrinsics
//===----------------------------------------------------------------------===//
Modified: vendor/llvm/dist/lib/Analysis/BasicAliasAnalysis.cpp
==============================================================================
--- vendor/llvm/dist/lib/Analysis/BasicAliasAnalysis.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Analysis/BasicAliasAnalysis.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -1191,14 +1191,14 @@ AliasResult BasicAAResult::aliasGEP(cons
return MayAlias;
AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
- AAMDNodes(), V2, V2Size, V2AAInfo,
- nullptr, UnderlyingV2);
+ AAMDNodes(), V2, MemoryLocation::UnknownSize,
+ V2AAInfo, nullptr, UnderlyingV2);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
- // cannot alias per GEP semantics: "A pointer value formed from a
- // getelementptr instruction is associated with the addresses associated
- // with the first operand of the getelementptr".
+ // cannot alias per GEP semantics: "Any memory access must be done through
+ // a pointer value associated with an address range of the memory access,
+ // otherwise the behavior is undefined.".
return R;
// If the max search depth is reached the result is undefined
Modified: vendor/llvm/dist/lib/Bitcode/Reader/MetadataLoader.cpp
==============================================================================
--- vendor/llvm/dist/lib/Bitcode/Reader/MetadataLoader.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Bitcode/Reader/MetadataLoader.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -919,7 +919,8 @@ Error MetadataLoader::MetadataLoaderImpl
// If this isn't a LocalAsMetadata record, we're dropping it. This used
// to be legal, but there's no upgrade path.
auto dropRecord = [&] {
- MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++);
+ MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo);
+ NextMetadataNo++;
};
if (Record.size() != 2) {
dropRecord();
@@ -934,7 +935,8 @@ Error MetadataLoader::MetadataLoaderImpl
MetadataList.assignValue(
LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_OLD_NODE: {
@@ -959,7 +961,8 @@ Error MetadataLoader::MetadataLoaderImpl
} else
Elts.push_back(nullptr);
}
- MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++);
+ MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_VALUE: {
@@ -972,7 +975,8 @@ Error MetadataLoader::MetadataLoaderImpl
MetadataList.assignValue(
ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_DISTINCT_NODE:
@@ -985,7 +989,8 @@ Error MetadataLoader::MetadataLoaderImpl
Elts.push_back(getMDOrNull(ID));
MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
: MDNode::get(Context, Elts),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_LOCATION: {
@@ -999,7 +1004,8 @@ Error MetadataLoader::MetadataLoaderImpl
Metadata *InlinedAt = getMDOrNull(Record[4]);
MetadataList.assignValue(
GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_GENERIC_DEBUG: {
@@ -1019,7 +1025,8 @@ Error MetadataLoader::MetadataLoaderImpl
DwarfOps.push_back(getMDOrNull(Record[I]));
MetadataList.assignValue(
GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_SUBRANGE: {
@@ -1030,7 +1037,8 @@ Error MetadataLoader::MetadataLoaderImpl
MetadataList.assignValue(
GET_OR_DISTINCT(DISubrange,
(Context, Record[1], unrotateSign(Record[2]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_ENUMERATOR: {
@@ -1041,7 +1049,8 @@ Error MetadataLoader::MetadataLoaderImpl
MetadataList.assignValue(
GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]),
getMDString(Record[2]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_BASIC_TYPE: {
@@ -1053,7 +1062,8 @@ Error MetadataLoader::MetadataLoaderImpl
GET_OR_DISTINCT(DIBasicType,
(Context, Record[1], getMDString(Record[2]), Record[3],
Record[4], Record[5])),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_DERIVED_TYPE: {
@@ -1069,7 +1079,8 @@ Error MetadataLoader::MetadataLoaderImpl
getDITypeRefOrNull(Record[5]),
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
Record[9], Flags, getDITypeRefOrNull(Record[11]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_COMPOSITE_TYPE: {
@@ -1134,7 +1145,8 @@ Error MetadataLoader::MetadataLoaderImpl
if (!IsNotUsedInTypeRef && Identifier)
MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
- MetadataList.assignValue(CT, NextMetadataNo++);
+ MetadataList.assignValue(CT, NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_SUBROUTINE_TYPE: {
@@ -1151,7 +1163,8 @@ Error MetadataLoader::MetadataLoaderImpl
MetadataList.assignValue(
GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
@@ -1165,7 +1178,8 @@ Error MetadataLoader::MetadataLoaderImpl
(Context, getMDOrNull(Record[1]),
getMDString(Record[2]), getMDString(Record[3]),
getMDString(Record[4]), getMDString(Record[5]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
@@ -1181,7 +1195,8 @@ Error MetadataLoader::MetadataLoaderImpl
Record.size() == 3 ? DIFile::CSK_None
: static_cast<DIFile::ChecksumKind>(Record[3]),
Record.size() == 3 ? nullptr : getMDString(Record[4]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_COMPILE_UNIT: {
@@ -1200,7 +1215,8 @@ Error MetadataLoader::MetadataLoaderImpl
Record.size() <= 14 ? 0 : Record[14],
Record.size() <= 16 ? true : Record[16]);
- MetadataList.assignValue(CU, NextMetadataNo++);
+ MetadataList.assignValue(CU, NextMetadataNo);
+ NextMetadataNo++;
// Move the Upgrade the list of subprograms.
if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11]))
@@ -1247,7 +1263,8 @@ Error MetadataLoader::MetadataLoaderImpl
getMDOrNull(Record[16 + Offset]), // declaration
getMDOrNull(Record[17 + Offset]) // variables
));
- MetadataList.assignValue(SP, NextMetadataNo++);
+ MetadataList.assignValue(SP, NextMetadataNo);
+ NextMetadataNo++;
// Upgrade sp->function mapping to function->sp mapping.
if (HasFn) {
@@ -1272,7 +1289,8 @@ Error MetadataLoader::MetadataLoaderImpl
GET_OR_DISTINCT(DILexicalBlock,
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), Record[3], Record[4])),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_LEXICAL_BLOCK_FILE: {
@@ -1284,7 +1302,8 @@ Error MetadataLoader::MetadataLoaderImpl
GET_OR_DISTINCT(DILexicalBlockFile,
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), Record[3])),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_NAMESPACE: {
@@ -1298,7 +1317,8 @@ Error MetadataLoader::MetadataLoaderImpl
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]), getMDString(Record[3]),
Record[4], ExportSymbols)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_MACRO: {
@@ -1310,7 +1330,8 @@ Error MetadataLoader::MetadataLoaderImpl
GET_OR_DISTINCT(DIMacro,
(Context, Record[1], Record[2], getMDString(Record[3]),
getMDString(Record[4]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_MACRO_FILE: {
@@ -1322,7 +1343,8 @@ Error MetadataLoader::MetadataLoaderImpl
GET_OR_DISTINCT(DIMacroFile,
(Context, Record[1], Record[2], getMDOrNull(Record[3]),
getMDOrNull(Record[4]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_TEMPLATE_TYPE: {
@@ -1333,7 +1355,8 @@ Error MetadataLoader::MetadataLoaderImpl
MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
(Context, getMDString(Record[1]),
getDITypeRefOrNull(Record[2]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_TEMPLATE_VALUE: {
@@ -1346,7 +1369,8 @@ Error MetadataLoader::MetadataLoaderImpl
(Context, Record[1], getMDString(Record[2]),
getDITypeRefOrNull(Record[3]),
getMDOrNull(Record[4]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_GLOBAL_VAR: {
@@ -1364,7 +1388,8 @@ Error MetadataLoader::MetadataLoaderImpl
getMDOrNull(Record[4]), Record[5],
getDITypeRefOrNull(Record[6]), Record[7], Record[8],
getMDOrNull(Record[10]), Record[11])),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
} else if (Version == 0) {
// Upgrade old metadata, which stored a global variable reference or a
// ConstantInt here.
@@ -1396,7 +1421,8 @@ Error MetadataLoader::MetadataLoaderImpl
getMDOrNull(Record[10]), AlignInBits));
auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr);
- MetadataList.assignValue(DGVE, NextMetadataNo++);
+ MetadataList.assignValue(DGVE, NextMetadataNo);
+ NextMetadataNo++;
if (Attach)
Attach->addDebugInfo(DGVE);
} else
@@ -1429,7 +1455,8 @@ Error MetadataLoader::MetadataLoaderImpl
getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
getDITypeRefOrNull(Record[5 + HasTag]),
Record[6 + HasTag], Flags, AlignInBits)),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_EXPRESSION: {
@@ -1446,7 +1473,8 @@ Error MetadataLoader::MetadataLoaderImpl
MetadataList.assignValue(
GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_GLOBAL_VAR_EXPR: {
@@ -1457,7 +1485,8 @@ Error MetadataLoader::MetadataLoaderImpl
MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression,
(Context, getMDOrNull(Record[1]),
getMDOrNull(Record[2]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_OBJC_PROPERTY: {
@@ -1471,7 +1500,8 @@ Error MetadataLoader::MetadataLoaderImpl
getMDOrNull(Record[2]), Record[3],
getMDString(Record[4]), getMDString(Record[5]),
Record[6], getDITypeRefOrNull(Record[7]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_IMPORTED_ENTITY: {
@@ -1484,7 +1514,8 @@ Error MetadataLoader::MetadataLoaderImpl
(Context, Record[1], getMDOrNull(Record[2]),
getDITypeRefOrNull(Record[3]), Record[4],
getMDString(Record[5]))),
- NextMetadataNo++);
+ NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_STRING_OLD: {
@@ -1494,13 +1525,15 @@ Error MetadataLoader::MetadataLoaderImpl
HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
++NumMDStringLoaded;
Metadata *MD = MDString::get(Context, String);
- MetadataList.assignValue(MD, NextMetadataNo++);
+ MetadataList.assignValue(MD, NextMetadataNo);
+ NextMetadataNo++;
break;
}
case bitc::METADATA_STRINGS: {
auto CreateNextMDString = [&](StringRef Str) {
++NumMDStringLoaded;
- MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo++);
+ MetadataList.assignValue(MDString::get(Context, Str), NextMetadataNo);
+ NextMetadataNo++;
};
if (Error Err = parseMetadataStrings(Record, Blob, CreateNextMDString))
return Err;
Modified: vendor/llvm/dist/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
==============================================================================
--- vendor/llvm/dist/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -3439,7 +3439,10 @@ SDValue DAGTypeLegalizer::GenWidenVector
LD->getPointerInfo().getWithOffset(Offset),
MinAlign(Align, Increment), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
- if (L->getValueType(0).isVector()) {
+ if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
+ // Later code assumes the vector loads produced will be mergeable, so we
+ // must pad the final entry up to the previous width. Scalars are
+ // combined separately.
SmallVector<SDValue, 16> Loads;
Loads.push_back(L);
unsigned size = L->getValueSizeInBits(0);
Modified: vendor/llvm/dist/lib/Target/AArch64/AArch64.td
==============================================================================
--- vendor/llvm/dist/lib/Target/AArch64/AArch64.td Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AArch64/AArch64.td Sun Jan 29 20:58:36 2017 (r312956)
@@ -85,9 +85,8 @@ def FeaturePostRAScheduler : SubtargetFe
def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
"Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
-def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
- "AvoidQuadLdStPairs", "true",
- "Do not form quad load/store pair operations">;
+def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
+ "Paired128IsSlow", "true", "Paired 128 bit loads and stores are slow">;
def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
"alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
@@ -222,7 +221,7 @@ def ProcCyclone : SubtargetFeature<"cycl
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M1 processors",
- [FeatureAvoidQuadLdStPairs,
+ [FeatureSlowPaired128,
FeatureCRC,
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
@@ -236,7 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exy
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M2/M3 processors",
- [FeatureAvoidQuadLdStPairs,
+ [FeatureSlowPaired128,
FeatureCRC,
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
Modified: vendor/llvm/dist/lib/Target/AArch64/AArch64InstrInfo.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AArch64/AArch64InstrInfo.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AArch64/AArch64InstrInfo.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -1652,7 +1652,7 @@ bool AArch64InstrInfo::isCandidateToMerg
return false;
// On some CPUs quad load/store pairs are slower than two single load/stores.
- if (Subtarget.avoidQuadLdStPairs()) {
+ if (Subtarget.isPaired128Slow()) {
switch (MI.getOpcode()) {
default:
break;
Modified: vendor/llvm/dist/lib/Target/AArch64/AArch64Subtarget.h
==============================================================================
--- vendor/llvm/dist/lib/Target/AArch64/AArch64Subtarget.h Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AArch64/AArch64Subtarget.h Sun Jan 29 20:58:36 2017 (r312956)
@@ -79,7 +79,7 @@ protected:
bool CustomAsCheapAsMove = false;
bool UsePostRAScheduler = false;
bool Misaligned128StoreIsSlow = false;
- bool AvoidQuadLdStPairs = false;
+ bool Paired128IsSlow = false;
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
@@ -189,7 +189,7 @@ public:
}
bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
- bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }
+ bool isPaired128Slow() const { return Paired128IsSlow; }
bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern;
}
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td Sun Jan 29 20:58:36 2017 (r312956)
@@ -282,6 +282,12 @@ def FeatureEnableSIScheduler : Subtarget
"Enable SI Machine Scheduler"
>;
+// Unless +-flat-for-global is specified, turn on FlatForGlobal for
+// all OS-es on VI and newer hardware to avoid assertion failures due
+// to missing ADDR64 variants of MUBUF instructions.
+// FIXME: moveToVALU should be able to handle converting addr64 MUBUF
+// instructions.
+
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"FlatForGlobal",
"true",
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -140,7 +140,7 @@ bool AMDGPUAsmPrinter::isBlockOnlyReacha
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
- if (STM.isAmdCodeObjectV2()) {
+ if (STM.isAmdCodeObjectV2(*MF)) {
getSIProgramInfo(KernelInfo, *MF);
EmitAmdKernelCodeT(*MF, KernelInfo);
}
@@ -149,7 +149,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyS
void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
- if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
+ if (MFI->isKernel() && STM.isAmdCodeObjectV2(*MF)) {
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
SmallString<128> SymbolName;
@@ -779,7 +779,7 @@ void AMDGPUAsmPrinter::EmitAmdKernelCode
// FIXME: Should use getKernArgSize
header.kernarg_segment_byte_size =
- STM.getKernArgSegmentSize(MFI->getABIArgOffset());
+ STM.getKernArgSegmentSize(MF, MFI->getABIArgOffset());
header.wavefront_sgpr_count = KernelInfo.NumSGPR;
header.workitem_vgpr_count = KernelInfo.NumVGPR;
header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -727,14 +727,8 @@ void AMDGPUDAGToDAGISel::SelectDIV_SCALE
unsigned Opc
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
- // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
- // omod
- SDValue Ops[8];
-
- SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
- SelectVOP3Mods(N->getOperand(1), Ops[3], Ops[2]);
- SelectVOP3Mods(N->getOperand(2), Ops[5], Ops[4]);
- CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops);
+ SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -2855,6 +2855,9 @@ SDValue AMDGPUTargetLowering::performFNe
SDLoc SL(N);
switch (Opc) {
case ISD::FADD: {
+ if (!mayIgnoreSignedZero(N0))
+ return SDValue();
+
// (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
SDValue LHS = N0.getOperand(0);
SDValue RHS = N0.getOperand(1);
@@ -2895,6 +2898,9 @@ SDValue AMDGPUTargetLowering::performFNe
}
case ISD::FMA:
case ISD::FMAD: {
+ if (!mayIgnoreSignedZero(N0))
+ return SDValue();
+
// (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
SDValue LHS = N0.getOperand(0);
SDValue MHS = N0.getOperand(1);
@@ -3272,6 +3278,7 @@ const char* AMDGPUTargetLowering::getTar
NODE_NAME_CASE(CONST_DATA_PTR)
NODE_NAME_CASE(PC_ADD_REL_OFFSET)
NODE_NAME_CASE(KILL)
+ NODE_NAME_CASE(DUMMY_CHAIN)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
NODE_NAME_CASE(SENDMSGHALT)
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelLowering.h
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelLowering.h Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUISelLowering.h Sun Jan 29 20:58:36 2017 (r312956)
@@ -119,6 +119,16 @@ protected:
public:
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
+ bool mayIgnoreSignedZero(SDValue Op) const {
+ if (getTargetMachine().Options.UnsafeFPMath) // FIXME: nsz only
+ return true;
+
+ if (const auto *BO = dyn_cast<BinaryWithFlagsSDNode>(Op))
+ return BO->Flags.hasNoSignedZeros();
+
+ return false;
+ }
+
bool isFAbsFree(EVT VT) const override;
bool isFNegFree(EVT VT) const override;
bool isTruncateFree(EVT Src, EVT Dest) const override;
@@ -320,6 +330,7 @@ enum NodeType : unsigned {
INTERP_P2,
PC_ADD_REL_OFFSET,
KILL,
+ DUMMY_CHAIN,
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
STORE_MSKOR,
LOAD_CONSTANT,
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUInstrInfo.td
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUInstrInfo.td Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUInstrInfo.td Sun Jan 29 20:58:36 2017 (r312956)
@@ -54,6 +54,9 @@ def AMDGPUconstdata_ptr : SDNode<
// This argument to this node is a dword address.
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
+// Force dependencies for vector trunc stores
+def R600dummy_chain : SDNode<"AMDGPUISD::DUMMY_CHAIN", SDTNone, [SDNPHasChain]>;
+
def AMDGPUcos : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>;
def AMDGPUsin : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>;
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDepe
ParseSubtargetFeatures(GPU, FullFS);
+ // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+ // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+ // variants of MUBUF instructions.
+ if (!hasAddr64() && !FS.contains("flat-for-global")) {
+ FlatForGlobal = true;
+ }
+
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
@@ -297,8 +304,9 @@ bool SISubtarget::isVGPRSpillingEnabled(
return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
}
-unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
- unsigned ImplicitBytes = getImplicitArgNumBytes();
+unsigned SISubtarget::getKernArgSegmentSize(const MachineFunction &MF,
+ unsigned ExplicitArgBytes) const {
+ unsigned ImplicitBytes = getImplicitArgNumBytes(MF);
if (ImplicitBytes == 0)
return ExplicitArgBytes;
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h Sun Jan 29 20:58:36 2017 (r312956)
@@ -311,22 +311,31 @@ public:
return EnableXNACK;
}
- bool isAmdCodeObjectV2() const {
- return isAmdHsaOS() || isMesa3DOS();
+ bool isMesaKernel(const MachineFunction &MF) const {
+ return isMesa3DOS() && !AMDGPU::isShader(MF.getFunction()->getCallingConv());
+ }
+
+ // Covers VS/PS/CS graphics shaders
+ bool isMesaGfxShader(const MachineFunction &MF) const {
+ return isMesa3DOS() && AMDGPU::isShader(MF.getFunction()->getCallingConv());
+ }
+
+ bool isAmdCodeObjectV2(const MachineFunction &MF) const {
+ return isAmdHsaOS() || isMesaKernel(MF);
}
/// \brief Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
- unsigned getExplicitKernelArgOffset() const {
- return isAmdCodeObjectV2() ? 0 : 36;
+ unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+ return isAmdCodeObjectV2(MF) ? 0 : 36;
}
unsigned getAlignmentForImplicitArgPtr() const {
return isAmdHsaOS() ? 8 : 4;
}
- unsigned getImplicitArgNumBytes() const {
- if (isMesa3DOS())
+ unsigned getImplicitArgNumBytes(const MachineFunction &MF) const {
+ if (isMesaKernel(MF))
return 16;
if (isAmdHsaOS() && isOpenCLEnv())
return 32;
@@ -585,7 +594,7 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
- unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
+ unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
Modified: vendor/llvm/dist/lib/Target/AMDGPU/R600ISelLowering.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/R600ISelLowering.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/R600ISelLowering.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -1115,7 +1115,10 @@ SDValue R600TargetLowering::lowerPrivate
llvm_unreachable("Unsupported private trunc store");
}
- SDValue Chain = Store->getChain();
+ SDValue OldChain = Store->getChain();
+ bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
+ // Skip dummy
+ SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
SDValue BasePtr = Store->getBasePtr();
SDValue Offset = Store->getOffset();
EVT MemVT = Store->getMemoryVT();
@@ -1171,7 +1174,15 @@ SDValue R600TargetLowering::lowerPrivate
// Store dword
// TODO: Can we be smarter about MachinePointerInfo?
- return DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+ SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
+
+ // If we are part of expanded vector, make our neighbors depend on this store
+ if (VectorTrunc) {
+ // Make all other vector elements depend on this store
+ Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
+ DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
+ }
+ return NewStore;
}
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1191,6 +1202,17 @@ SDValue R600TargetLowering::LowerSTORE(S
// Neither LOCAL nor PRIVATE can do vectors at the moment
if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
VT.isVector()) {
+ if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
+ // Add an extra level of chain to isolate this vector
+ SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
+ // TODO: can the chain be replaced without creating a new store?
+ SDValue NewStore = DAG.getTruncStore(
+ NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
+ MemVT, StoreNode->getAlignment(),
+ StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
+ StoreNode = cast<StoreSDNode>(NewStore);
+ }
+
return scalarizeVectorStore(StoreNode, DAG);
}
@@ -1225,7 +1247,7 @@ SDValue R600TargetLowering::LowerSTORE(S
// Put the mask in correct place
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
- // Put the mask in correct place
+ // Put the value bits in correct place
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
@@ -1560,7 +1582,7 @@ SDValue R600TargetLowering::LowerFormalA
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
- unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
+ unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
SDValue Arg = DAG.getLoad(
Modified: vendor/llvm/dist/lib/Target/AMDGPU/R600Instructions.td
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/R600Instructions.td Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/R600Instructions.td Sun Jan 29 20:58:36 2017 (r312956)
@@ -727,6 +727,20 @@ def FLOOR : R600_1OP_Helper <0x14, "FLOO
def MOV : R600_1OP <0x19, "MOV", []>;
+
+// This is a hack to get rid of DUMMY_CHAIN nodes.
+// Most DUMMY_CHAINs should be eliminated during legalization, but undef
+// values can sneak in some to selection.
+let isPseudo = 1, isCodeGenOnly = 1 in {
+def DUMMY_CHAIN : AMDGPUInst <
+ (outs),
+ (ins),
+ "DUMMY_CHAIN",
+ [(R600dummy_chain)]
+>;
+} // end let isPseudo = 1, isCodeGenOnly = 1
+
+
let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in {
class MOV_IMM <ValueType vt, Operand immType> : AMDGPUInst <
Modified: vendor/llvm/dist/lib/Target/AMDGPU/SIFrameLowering.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/SIFrameLowering.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/SIFrameLowering.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -237,7 +237,7 @@ void SIFrameLowering::emitPrologue(Machi
unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF)) {
PreloadedPrivateBufferReg = TRI->getPreloadedValue(
MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
}
@@ -255,7 +255,7 @@ void SIFrameLowering::emitPrologue(Machi
}
if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
- assert(ST.isAmdCodeObjectV2());
+ assert(ST.isAmdCodeObjectV2(MF) || ST.isMesaGfxShader(MF));
MRI.addLiveIn(PreloadedPrivateBufferReg);
MBB.addLiveIn(PreloadedPrivateBufferReg);
}
@@ -280,6 +280,7 @@ void SIFrameLowering::emitPrologue(Machi
bool CopyBuffer = ResourceRegUsed &&
PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
+ ST.isAmdCodeObjectV2(MF) &&
ScratchRsrcReg != PreloadedPrivateBufferReg;
// This needs to be careful of the copying order to avoid overwriting one of
@@ -303,24 +304,57 @@ void SIFrameLowering::emitPrologue(Machi
.addReg(PreloadedPrivateBufferReg, RegState::Kill);
}
- if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) {
- assert(!ST.isAmdCodeObjectV2());
+ if (ResourceRegUsed && (ST.isMesaGfxShader(MF) || (PreloadedPrivateBufferReg == AMDGPU::NoRegister))) {
+ assert(!ST.isAmdCodeObjectV2(MF));
const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
- unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
- unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
// Use relocations to get the pointer, and setup the other bits manually.
uint64_t Rsrc23 = TII->getScratchRsrcWords23();
- BuildMI(MBB, I, DL, SMovB32, Rsrc0)
- .addExternalSymbol("SCRATCH_RSRC_DWORD0")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
- BuildMI(MBB, I, DL, SMovB32, Rsrc1)
- .addExternalSymbol("SCRATCH_RSRC_DWORD1")
- .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ if (MFI->hasPrivateMemoryInputPtr()) {
+ unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+
+ if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
+ const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);
+
+ BuildMI(MBB, I, DL, Mov64, Rsrc01)
+ .addReg(PreloadedPrivateBufferReg)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ } else {
+ const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);
+
+ PointerType *PtrTy =
+ PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()),
+ AMDGPUAS::CONSTANT_ADDRESS);
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+ auto MMO = MF.getMachineMemOperand(PtrInfo,
+ MachineMemOperand::MOLoad |
+ MachineMemOperand::MOInvariant |
+ MachineMemOperand::MODereferenceable,
+ 0, 0);
+ BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)
+ .addReg(PreloadedPrivateBufferReg)
+ .addImm(0) // offset
+ .addImm(0) // glc
+ .addMemOperand(MMO)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ }
+ } else {
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, SMovB32, Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ }
BuildMI(MBB, I, DL, SMovB32, Rsrc2)
.addImm(Rsrc23 & 0xffffffff)
Modified: vendor/llvm/dist/lib/Target/AMDGPU/SIISelLowering.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/SIISelLowering.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/SIISelLowering.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -842,7 +842,7 @@ SDValue SITargetLowering::LowerFormalArg
if (!AMDGPU::isShader(CallConv)) {
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
- assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
+ assert(!Info->hasDispatchPtr() &&
!Info->hasKernargSegmentPtr() && !Info->hasFlatScratchInit() &&
!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
!Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
@@ -850,6 +850,12 @@ SDValue SITargetLowering::LowerFormalArg
!Info->hasWorkItemIDZ());
}
+ if (Info->hasPrivateMemoryInputPtr()) {
+ unsigned PrivateMemoryPtrReg = Info->addPrivateMemoryPtr(*TRI);
+ MF.addLiveIn(PrivateMemoryPtrReg, &AMDGPU::SReg_64RegClass);
+ CCInfo.AllocateReg(PrivateMemoryPtrReg);
+ }
+
// FIXME: How should these inputs interact with inreg / custom SGPR inputs?
if (Info->hasPrivateSegmentBuffer()) {
unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
@@ -908,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArg
if (VA.isMemLoc()) {
VT = Ins[i].VT;
EVT MemVT = VA.getLocVT();
- const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
+ const unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
@@ -1033,7 +1039,7 @@ SDValue SITargetLowering::LowerFormalArg
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
HasStackObjects = true;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects) {
// If we have stack objects, we unquestionably need the private buffer
// resource. For the Code Object V2 ABI, this will be the first 4 user
@@ -2362,9 +2368,13 @@ SDValue SITargetLowering::LowerINTRINSIC
// TODO: Should this propagate fast-math-flags?
switch (IntrinsicID) {
+ case Intrinsic::amdgcn_implicit_buffer_ptr: {
+ unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+ }
case Intrinsic::amdgcn_dispatch_ptr:
case Intrinsic::amdgcn_queue_ptr: {
- if (!Subtarget->isAmdCodeObjectV2()) {
+ if (!Subtarget->isAmdCodeObjectV2(MF)) {
DiagnosticInfoUnsupported BadIntrin(
*MF.getFunction(), "unsupported hsa intrinsic without hsa target",
DL.getDebugLoc());
Modified: vendor/llvm/dist/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Sun Jan 29 20:58:36 2017 (r312956)
@@ -77,7 +77,8 @@ SIMachineFunctionInfo::SIMachineFunction
PrivateSegmentWaveByteOffset(false),
WorkItemIDX(false),
WorkItemIDY(false),
- WorkItemIDZ(false) {
+ WorkItemIDZ(false),
+ PrivateMemoryInputPtr(false) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
const Function *F = MF.getFunction();
@@ -114,7 +115,7 @@ SIMachineFunctionInfo::SIMachineFunction
if (HasStackObjects || MaySpill)
PrivateSegmentWaveByteOffset = true;
- if (ST.isAmdCodeObjectV2()) {
+ if (ST.isAmdCodeObjectV2(MF)) {
if (HasStackObjects || MaySpill)
PrivateSegmentBuffer = true;
@@ -126,6 +127,9 @@ SIMachineFunctionInfo::SIMachineFunction
if (F->hasFnAttribute("amdgpu-dispatch-id"))
DispatchID = true;
+ } else if (ST.isMesaGfxShader(MF)) {
+ if (HasStackObjects || MaySpill)
+ PrivateMemoryInputPtr = true;
}
// We don't need to worry about accessing spills with flat instructions.
@@ -182,6 +186,13 @@ unsigned SIMachineFunctionInfo::addFlatS
return FlatScratchInitUserSGPR;
}
+unsigned SIMachineFunctionInfo::addPrivateMemoryPtr(const SIRegisterInfo &TRI) {
+ PrivateMemoryPtrUserSGPR = TRI.getMatchingSuperReg(
+ getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+ NumUserSGPRs += 2;
+ return PrivateMemoryPtrUserSGPR;
+}
+
SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
MachineFunction *MF,
unsigned FrameIndex,
Modified: vendor/llvm/dist/lib/Target/AMDGPU/SIMachineFunctionInfo.h
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/SIMachineFunctionInfo.h Sun Jan 29 19:54:34 2017 (r312955)
+++ vendor/llvm/dist/lib/Target/AMDGPU/SIMachineFunctionInfo.h Sun Jan 29 20:58:36 2017 (r312956)
@@ -84,6 +84,9 @@ class SIMachineFunctionInfo final : publ
unsigned ScratchRSrcReg;
unsigned ScratchWaveOffsetReg;
+ // Input registers for non-HSA ABI
+ unsigned PrivateMemoryPtrUserSGPR;
+
// Input registers setup for the HSA ABI.
// User SGPRs in allocation order.
unsigned PrivateSegmentBufferUserSGPR;
@@ -163,6 +166,11 @@ private:
bool WorkItemIDY : 1;
bool WorkItemIDZ : 1;
+ // Private memory buffer
+ // Compute directly in sgpr[0:1]
+ // Other shaders indirect 64-bits at sgpr[0:1]
+ bool PrivateMemoryInputPtr : 1;
+
MCPhysReg getNextUserSGPR() const {
assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
return AMDGPU::SGPR0 + NumUserSGPRs;
@@ -198,6 +206,7 @@ public:
unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
unsigned addDispatchID(const SIRegisterInfo &TRI);
unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
+ unsigned addPrivateMemoryPtr(const SIRegisterInfo &TRI);
// Add system SGPRs.
unsigned addWorkGroupIDX() {
@@ -302,6 +311,10 @@ public:
return WorkItemIDZ;
}
+ bool hasPrivateMemoryInputPtr() const {
+ return PrivateMemoryInputPtr;
+ }
+
unsigned getNumUserSGPRs() const {
return NumUserSGPRs;
}
@@ -338,6 +351,10 @@ public:
return QueuePtrUserSGPR;
}
+ unsigned getPrivateMemoryPtrUserSGPR() const {
+ return PrivateMemoryPtrUserSGPR;
+ }
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-all
mailing list