svn commit: r295590 - in vendor/llvm/dist: cmake/modules docs include/llvm/IR lib/Analysis lib/CodeGen/AsmPrinter lib/IR lib/Target/AArch64 lib/Target/AMDGPU lib/Target/AMDGPU/Utils lib/Target/ARM ...
Dimitry Andric
dim at FreeBSD.org
Sat Feb 13 14:57:13 UTC 2016
Author: dim
Date: Sat Feb 13 14:57:10 2016
New Revision: 295590
URL: https://svnweb.freebsd.org/changeset/base/295590
Log:
Vendor import of llvm release_38 branch r260756:
https://llvm.org/svn/llvm-project/llvm/branches/release_38@260756
Added:
vendor/llvm/dist/lib/Target/AArch64/AArch64SchedM1.td
vendor/llvm/dist/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
vendor/llvm/dist/test/CodeGen/PowerPC/inline-asm-s-modifier.ll
vendor/llvm/dist/test/CodeGen/PowerPC/pr26193.ll
vendor/llvm/dist/test/CodeGen/PowerPC/pr26356.ll
vendor/llvm/dist/test/CodeGen/PowerPC/pr26381.ll
vendor/llvm/dist/test/CodeGen/SystemZ/int-cmp-53.ll
vendor/llvm/dist/test/DebugInfo/X86/PR26148.ll
Modified:
vendor/llvm/dist/cmake/modules/AddLLVM.cmake
vendor/llvm/dist/cmake/modules/LLVM-Config.cmake
vendor/llvm/dist/docs/ReleaseNotes.rst
vendor/llvm/dist/include/llvm/IR/IntrinsicsPowerPC.td
vendor/llvm/dist/include/llvm/IR/Value.h
vendor/llvm/dist/lib/Analysis/DemandedBits.cpp
vendor/llvm/dist/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
vendor/llvm/dist/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
vendor/llvm/dist/lib/IR/Value.cpp
vendor/llvm/dist/lib/Target/AArch64/AArch64.td
vendor/llvm/dist/lib/Target/AArch64/AArch64ISelLowering.cpp
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td
vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h
vendor/llvm/dist/lib/Target/AMDGPU/Processors.td
vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp
vendor/llvm/dist/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
vendor/llvm/dist/lib/Target/ARM/ARMISelDAGToDAG.cpp
vendor/llvm/dist/lib/Target/PowerPC/PPCFastISel.cpp
vendor/llvm/dist/lib/Target/PowerPC/PPCInstrAltivec.td
vendor/llvm/dist/lib/Target/SystemZ/SystemZISelLowering.cpp
vendor/llvm/dist/lib/Target/X86/X86ISelLowering.cpp
vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineCompares.cpp
vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
vendor/llvm/dist/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
vendor/llvm/dist/lib/Transforms/Utils/SimplifyCFG.cpp
vendor/llvm/dist/test/Analysis/DemandedBits/basic.ll
vendor/llvm/dist/test/CodeGen/AArch64/fp16-v4-instructions.ll
vendor/llvm/dist/test/CodeGen/AArch64/fp16-v8-instructions.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/hsa-note-no-func.ll
vendor/llvm/dist/test/CodeGen/AMDGPU/llvm.SI.fs.interp.ll
vendor/llvm/dist/test/CodeGen/ARM/shifter_operand.ll
vendor/llvm/dist/test/CodeGen/PowerPC/fast-isel-ret.ll
vendor/llvm/dist/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
vendor/llvm/dist/test/CodeGen/X86/setcc-lowering.ll
vendor/llvm/dist/test/Transforms/InstCombine/icmp.ll
vendor/llvm/dist/test/Transforms/InstCombine/insert-extract-shuffle.ll
vendor/llvm/dist/test/Transforms/InstCombine/unpack-fca.ll
vendor/llvm/dist/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
vendor/llvm/dist/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
vendor/llvm/dist/tools/CMakeLists.txt
vendor/llvm/dist/utils/release/test-release.sh
vendor/llvm/dist/utils/unittest/CMakeLists.txt
vendor/llvm/dist/utils/unittest/UnitTestMain/CMakeLists.txt
Modified: vendor/llvm/dist/cmake/modules/AddLLVM.cmake
==============================================================================
--- vendor/llvm/dist/cmake/modules/AddLLVM.cmake Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/cmake/modules/AddLLVM.cmake Sat Feb 13 14:57:10 2016 (r295590)
@@ -468,20 +468,23 @@ function(llvm_add_library name)
endif()
endif()
- # Add the explicit dependency information for this library.
- #
- # It would be nice to verify that we have the dependencies for this library
- # name, but using get_property(... SET) doesn't suffice to determine if a
- # property has been set to an empty value.
- get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name})
-
- if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_STATIC AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB)
- set(llvm_libs LLVM)
- else()
- llvm_map_components_to_libnames(llvm_libs
- ${ARG_LINK_COMPONENTS}
- ${LLVM_LINK_COMPONENTS}
- )
+ if (DEFINED LLVM_LINK_COMPONENTS OR DEFINED ARG_LINK_COMPONENTS)
+ if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB)
+ set(llvm_libs LLVM)
+ else()
+ llvm_map_components_to_libnames(llvm_libs
+ ${ARG_LINK_COMPONENTS}
+ ${LLVM_LINK_COMPONENTS}
+ )
+ endif()
+ else()
+ # Components have not been defined explicitly in CMake, so add the
+ # dependency information for this library as defined by LLVMBuild.
+ #
+ # It would be nice to verify that we have the dependencies for this library
+ # name, but using get_property(... SET) doesn't suffice to determine if a
+ # property has been set to an empty value.
+ get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name})
endif()
if(CMAKE_VERSION VERSION_LESS 2.8.12)
@@ -882,14 +885,11 @@ function(add_unittest test_suite test_na
set(LLVM_REQUIRES_RTTI OFF)
+ list(APPEND LLVM_LINK_COMPONENTS Support) # gtest needs it for raw_ostream
add_llvm_executable(${test_name} IGNORE_EXTERNALIZE_DEBUGINFO ${ARGN})
set(outdir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR})
set_output_directory(${test_name} BINARY_DIR ${outdir} LIBRARY_DIR ${outdir})
- target_link_libraries(${test_name}
- gtest
- gtest_main
- LLVMSupport # gtest needs it for raw_ostream.
- )
+ target_link_libraries(${test_name} gtest_main gtest)
add_dependencies(${test_suite} ${test_name})
get_target_property(test_suite_folder ${test_suite} FOLDER)
Modified: vendor/llvm/dist/cmake/modules/LLVM-Config.cmake
==============================================================================
--- vendor/llvm/dist/cmake/modules/LLVM-Config.cmake Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/cmake/modules/LLVM-Config.cmake Sat Feb 13 14:57:10 2016 (r295590)
@@ -40,10 +40,19 @@ macro(llvm_config executable)
# done in case libLLVM does not contain all of the components
# the target requires.
#
- # TODO strip LLVM_DYLIB_COMPONENTS out of link_components.
+ # Strip LLVM_DYLIB_COMPONENTS out of link_components.
# To do this, we need special handling for "all", since that
# may imply linking to libraries that are not included in
# libLLVM.
+
+ if (DEFINED link_components AND DEFINED LLVM_DYLIB_COMPONENTS)
+ if("${LLVM_DYLIB_COMPONENTS}" STREQUAL "all")
+ set(link_components "")
+ else()
+ list(REMOVE_ITEM link_components ${LLVM_DYLIB_COMPONENTS})
+ endif()
+ endif()
+
target_link_libraries(${executable} LLVM)
endif()
Modified: vendor/llvm/dist/docs/ReleaseNotes.rst
==============================================================================
--- vendor/llvm/dist/docs/ReleaseNotes.rst Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/docs/ReleaseNotes.rst Sat Feb 13 14:57:10 2016 (r295590)
@@ -5,11 +5,6 @@ LLVM 3.8 Release Notes
.. contents::
:local:
-.. warning::
- These are in-progress notes for the upcoming LLVM 3.8 release. You may
- prefer the `LLVM 3.7 Release Notes <http://llvm.org/releases/3.7.0/docs
- /ReleaseNotes.html>`_.
-
Introduction
============
@@ -26,11 +21,6 @@ have questions or comments, the `LLVM De
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
them.
-Note that if you are reading this file from a Subversion checkout or the main
-LLVM web page, this document applies to the *next* release, not the current
-one. To see the release notes for a specific release, please see the `releases
-page <http://llvm.org/releases/>`_.
-
Non-comprehensive list of changes in this release
=================================================
* With this release, the minimum Windows version required for running LLVM is
@@ -79,6 +69,26 @@ Non-comprehensive list of changes in thi
* Support for dematerializing has been dropped.
+* RegisterScheduler::setDefault was removed. Targets that used to call into the
+ command line parser to set the DAGScheduler, and that don't have enough
+ control with setSchedulingPreference, should look into overriding the
+ SubTargetHook "getDAGScheduler()".
+
+* ``ilist_iterator<T>`` no longer has implicit conversions to and from ``T*``,
+ since ``ilist_iterator<T>`` may be pointing at the sentinel (which is usually
+ not of type ``T`` at all). To convert from an iterator ``I`` to a pointer,
+ use ``&*I``; to convert from a pointer ``P`` to an iterator, use
+ ``P->getIterator()``. Alternatively, explicit conversions via
+ ``static_cast<T>(U)`` are still available.
+
+* ``ilist_node<T>::getNextNode()`` and ``ilist_node<T>::getPrevNode()`` now
+ fail at compile time when the node cannot access its parent list.
+ Previously, when the sentinel was was an ``ilist_half_node<T>``, this API
+ could return the sentinal instead of ``nullptr``. Frustrated callers should
+ be updated to use ``iplist<T>::getNextNode(T*)`` instead. Alternatively, if
+ the node ``N`` is guaranteed not to be the last in the list, it is safe to
+ call ``&*++N->getIterator()`` directly.
+
.. NOTE
For small 1-3 sentence descriptions, just add an entry at the end of
this list. If your description won't fit comfortably in one bullet
@@ -98,17 +108,97 @@ Non-comprehensive list of changes in thi
Makes programs 10x faster by doing Special New Thing.
-Changes to the ARM Backend
---------------------------
- During this release ...
+Changes to the ARM Backends
+---------------------------
+
+During this release the AArch64 target has:
+
+* Added support for more sanitizers (MSAN, TSAN) and made them compatible with
+ all VMA kernel configurations (kurrently tested on 39 and 42 bits).
+* Gained initial LLD support in the new ELF back-end
+* Extended the Load/Store optimiser and cleaned up some of the bad decisions
+ made earlier.
+* Expanded LLDB support, including watchpoints, native building, Renderscript,
+ LLDB-server, debugging 32-bit applications.
+* Added support for the ``Exynos M1`` chip.
+
+During this release the ARM target has:
+
+* Gained massive performance improvements on embedded benchmarks due to finally
+ running the stride vectorizer in full form, incrementing the performance gains
+ that we already had in the previous releases with limited stride vectorization.
+* Expanded LLDB support, including watchpoints, unwind tables
+* Extended the Load/Store optimiser and cleaned up some of the bad decisions
+ made earlier.
+* Simplified code generation for global variable addresses in ELF, resulting in
+ a significant (4% in Chromium) reduction in code size.
+* Gained some additional code size improvements, though there's still a long road
+ ahead, especially for older cores.
+* Added some EABI floating point comparison functions to Compiler-RT
+* Added support for Windows+GNU triple, +features in -mcpu/-march options.
Changes to the MIPS Target
--------------------------
- During this release ...
+During this release the MIPS target has:
+
+* Significantly extended support for the Integrated Assembler. See below for
+ more information
+* Added support for the ``P5600`` processor.
+* Added support for the ``interrupt`` attribute for MIPS32R2 and later. This
+ attribute will generate a function which can be used as a interrupt handler
+ on bare metal MIPS targets using the static relocation model.
+* Added support for the ``ERETNC`` instruction found in MIPS32R5 and later.
+* Added support for OpenCL. See http://portablecl.org/.
+
+ * Address spaces 1 to 255 are now reserved for software use and conversions
+ between them are no-op casts.
+
+* Removed the ``mips16`` value for the -mcpu option since it is an :abbr:`ASE
+ (Application Specific Extension)` and not a processor. If you were using this,
+ please specify another CPU and use ``-mips16`` to enable MIPS16.
+* Removed ``copy_u.w`` from 32-bit MSA and ``copy_u.d`` from 64-bit MSA since
+ they have been removed from the MSA specification due to forward compatibility
+ issues. For example, 32-bit MSA code containing ``copy_u.w`` would behave
+ differently on a 64-bit processor supporting MSA. The corresponding intrinsics
+ are still available and may expand to ``copy_s.[wd]`` where this is
+ appropriate for forward compatibility purposes.
+* Relaxed the ``-mnan`` option to allow ``-mnan=2008`` on MIPS32R2/MIPS64R2 for
+ compatibility with GCC.
+* Made MIPS64R6 the default CPU for 64-bit Android triples.
+
+The MIPS target has also fixed various bugs including the following notable
+fixes:
+
+* Fixed reversed operands on ``mthi``/``mtlo`` in the DSP :abbr:`ASE
+ (Application Specific Extension)`.
+* The code generator no longer uses ``jal`` for calls to absolute immediate
+ addresses.
+* Disabled fast instruction selection on MIPS32R6 and MIPS64R6 since this is not
+ yet supported.
+* Corrected addend for ``R_MIPS_HI16`` and ``R_MIPS_PCHI16`` in MCJIT
+* The code generator no longer crashes when handling subregisters of an 64-bit
+ FPU register with undefined value.
+* The code generator no longer attempts to use ``$zero`` for operands that do
+ not permit ``$zero``.
+* Corrected the opcode used for ``ll``/``sc`` when using MIPS32R6/MIPS64R6 and
+ the Integrated Assembler.
+* Added support for atomic load and atomic store.
+* Corrected debug info when dynamically re-aligning the stack.
+
+Integrated Assembler
+^^^^^^^^^^^^^^^^^^^^
+We have made a large number of improvements to the integrated assembler for
+MIPS. In this release, the integrated assembler isn't quite production-ready
+since there are a few known issues related to bare-metal support, checking
+immediates on instructions, and the N32/N64 ABI's. However, the current support
+should be sufficient for many users of the O32 ABI, particularly those targeting
+MIPS32 on Linux or bare-metal MIPS32.
+If you would like to try the integrated assembler, please use
+``-fintegrated-as``.
Changes to the PowerPC Target
-----------------------------
@@ -123,6 +213,20 @@ Changes to the X86 Target
* TLS is enabled for Cygwin as emutls.
+* Smaller code for materializing 32-bit 1 and -1 constants at ``-Os``.
+
+* More efficient code for wide integer compares. (E.g. 64-bit compares
+ on 32-bit targets.)
+
+* Tail call support for ``thiscall``, ``stdcall`, ``vectorcall``, and
+ ``fastcall`` functions.
+
+Changes to the AVR Target
+-------------------------
+
+Slightly less than half of the AVR backend has been merged in at this point. It is still
+missing a number large parts which cause it to be unusable, but is well on the
+road to being completely merged and workable.
Changes to the OCaml bindings
-----------------------------
@@ -140,7 +244,19 @@ An exciting aspect of LLVM is that it is
a lot of other language and tools projects. This section lists some of the
projects that have already been updated to work with LLVM 3.8.
-* A project
+LDC - the LLVM-based D compiler
+-------------------------------
+
+`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
+pragmatically combines efficiency, control, and modeling power, with safety and
+programmer productivity. D supports powerful concepts like Compile-Time Function
+Execution (CTFE) and Template Meta-Programming, provides an innovative approach
+to concurrency and offers many classical paradigms.
+
+`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
+combined with LLVM as backend to produce efficient native code. LDC targets
+x86/x86_64 systems like Linux, OS X and Windows and also PowerPC (32/64 bit)
+and ARM. Ports to other architectures like AArch64 and MIPS64 are underway.
Additional Information
Modified: vendor/llvm/dist/include/llvm/IR/IntrinsicsPowerPC.td
==============================================================================
--- vendor/llvm/dist/include/llvm/IR/IntrinsicsPowerPC.td Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/include/llvm/IR/IntrinsicsPowerPC.td Sat Feb 13 14:57:10 2016 (r295590)
@@ -484,7 +484,7 @@ let TargetPrefix = "ppc" in { // All PP
Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
[IntrNoMem]>;
def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+ Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
[IntrNoMem]>;
def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">,
Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
Modified: vendor/llvm/dist/include/llvm/IR/Value.h
==============================================================================
--- vendor/llvm/dist/include/llvm/IR/Value.h Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/include/llvm/IR/Value.h Sat Feb 13 14:57:10 2016 (r295590)
@@ -280,11 +280,7 @@ public:
// when using them since you might not get all uses.
// The methods that don't start with materialized_ assert that modules is
// fully materialized.
-#ifdef NDEBUG
- void assertModuleIsMaterialized() const {}
-#else
void assertModuleIsMaterialized() const;
-#endif
bool use_empty() const {
assertModuleIsMaterialized();
Modified: vendor/llvm/dist/lib/Analysis/DemandedBits.cpp
==============================================================================
--- vendor/llvm/dist/lib/Analysis/DemandedBits.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Analysis/DemandedBits.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -242,13 +242,6 @@ void DemandedBits::determineLiveOperandB
if (OperandNo != 0)
AB = AOut;
break;
- case Instruction::ICmp:
- // Count the number of leading zeroes in each operand.
- ComputeKnownBits(BitWidth, UserI->getOperand(0), UserI->getOperand(1));
- auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(),
- KnownZero2.countLeadingOnes());
- AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes);
- break;
}
}
Modified: vendor/llvm/dist/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
==============================================================================
--- vendor/llvm/dist/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -555,6 +555,11 @@ bool AsmPrinter::PrintAsmOperand(const M
return true;
O << -MO.getImm();
return false;
+ case 's': // The GCC deprecated s modifier
+ if (MO.getType() != MachineOperand::MO_Immediate)
+ return true;
+ O << ((32 - MO.getImm()) & 31);
+ return false;
}
}
return true;
Modified: vendor/llvm/dist/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
==============================================================================
--- vendor/llvm/dist/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -793,16 +793,27 @@ static DebugLocEntry::Value getDebugLocV
llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
}
-/// Determine whether two variable pieces overlap.
-static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
- if (!P1->isBitPiece() || !P2->isBitPiece())
- return true;
+// Determine the relative position of the pieces described by P1 and P2.
+// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
+// 1 if P1 is entirely after P2.
+static int pieceCmp(const DIExpression *P1, const DIExpression *P2) {
unsigned l1 = P1->getBitPieceOffset();
unsigned l2 = P2->getBitPieceOffset();
unsigned r1 = l1 + P1->getBitPieceSize();
unsigned r2 = l2 + P2->getBitPieceSize();
- // True where [l1,r1[ and [r1,r2[ overlap.
- return (l1 < r2) && (l2 < r1);
+ if (r1 <= l2)
+ return -1;
+ else if (r2 <= l1)
+ return 1;
+ else
+ return 0;
+}
+
+/// Determine whether two variable pieces overlap.
+static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
+ if (!P1->isBitPiece() || !P2->isBitPiece())
+ return true;
+ return pieceCmp(P1, P2) == 0;
}
/// \brief If this and Next are describing different pieces of the same
@@ -811,14 +822,32 @@ static bool piecesOverlap(const DIExpres
/// Return true if the merge was successful.
bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
if (Begin == Next.Begin) {
- auto *Expr = cast_or_null<DIExpression>(Values[0].Expression);
- auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression);
- if (Expr->isBitPiece() && NextExpr->isBitPiece() &&
- !piecesOverlap(Expr, NextExpr)) {
- addValues(Next.Values);
- End = Next.End;
- return true;
+ auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
+ auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
+ if (!FirstExpr->isBitPiece() || !FirstNextExpr->isBitPiece())
+ return false;
+
+ // We can only merge entries if none of the pieces overlap any others.
+ // In doing so, we can take advantage of the fact that both lists are
+ // sorted.
+ for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
+ for (; j < Next.Values.size(); ++j) {
+ int res = pieceCmp(cast<DIExpression>(Values[i].Expression),
+ cast<DIExpression>(Next.Values[j].Expression));
+ if (res == 0) // The two expressions overlap, we can't merge.
+ return false;
+ // Values[i] is entirely before Next.Values[j],
+ // so go back to the next entry of Values.
+ else if (res == -1)
+ break;
+ // Next.Values[j] is entirely before Values[i], so go on to the
+ // next entry of Next.Values.
+ }
}
+
+ addValues(Next.Values);
+ End = Next.End;
+ return true;
}
return false;
}
Modified: vendor/llvm/dist/lib/IR/Value.cpp
==============================================================================
--- vendor/llvm/dist/lib/IR/Value.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/IR/Value.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -313,8 +313,8 @@ void Value::takeName(Value *V) {
ST->reinsertValue(this);
}
-#ifndef NDEBUG
void Value::assertModuleIsMaterialized() const {
+#ifndef NDEBUG
const GlobalValue *GV = dyn_cast<GlobalValue>(this);
if (!GV)
return;
@@ -322,8 +322,10 @@ void Value::assertModuleIsMaterialized()
if (!M)
return;
assert(M->isMaterialized());
+#endif
}
+#ifndef NDEBUG
static bool contains(SmallPtrSetImpl<ConstantExpr *> &Cache, ConstantExpr *Expr,
Constant *C) {
if (!Cache.insert(Expr).second)
Modified: vendor/llvm/dist/lib/Target/AArch64/AArch64.td
==============================================================================
--- vendor/llvm/dist/lib/Target/AArch64/AArch64.td Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/AArch64/AArch64.td Sat Feb 13 14:57:10 2016 (r295590)
@@ -90,6 +90,7 @@ def AArch64InstrInfo : InstrInfo;
include "AArch64SchedA53.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
+include "AArch64SchedM1.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors",
@@ -144,8 +145,7 @@ def : ProcessorModel<"cortex-a57", Corte
// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA57]>;
def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
-// FIXME: Exynos-M1 is currently modelled without a specific SchedModel.
-def : ProcessorModel<"exynos-m1", NoSchedModel, [ProcExynosM1]>;
+def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
//===----------------------------------------------------------------------===//
// Assembly parser
Modified: vendor/llvm/dist/lib/Target/AArch64/AArch64ISelLowering.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AArch64/AArch64ISelLowering.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/AArch64/AArch64ISelLowering.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -6689,6 +6689,9 @@ SDValue AArch64TargetLowering::LowerVSET
return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
}
+ if (LHS.getValueType().getVectorElementType() == MVT::f16)
+ return SDValue();
+
assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
LHS.getValueType().getVectorElementType() == MVT::f64);
Added: vendor/llvm/dist/lib/Target/AArch64/AArch64SchedM1.td
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ vendor/llvm/dist/lib/Target/AArch64/AArch64SchedM1.td Sat Feb 13 14:57:10 2016 (r295590)
@@ -0,0 +1,359 @@
+//=- AArch64SchedM1.td - Samsung Exynos-M1 Scheduling Defs ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Samsung Exynos-M1 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// The Exynos-M1 is a traditional superscalar microprocessor with a
+// 4-wide in-order stage for decode and dispatch and a wider issue stage.
+// The execution units and loads and stores are out-of-order.
+
+def ExynosM1Model : SchedMachineModel {
+ let IssueWidth = 4; // Up to 4 uops per cycle.
+ let MinLatency = 0; // OoO.
+ let MicroOpBufferSize = 96; // ROB size.
+ let LoopMicroOpBufferSize = 32; // Instruction queue size.
+ let LoadLatency = 4; // Optimistic load cases.
+ let MispredictPenalty = 14; // Minimum branch misprediction penalty.
+ let CompleteModel = 0; // Use the default model otherwise.
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on the Exynos-M1,
+// which has 9 pipelines, each with its own queue with out-of-order dispatch.
+
+def M1UnitA : ProcResource<2>; // Simple integer
+def M1UnitC : ProcResource<1>; // Simple and complex integer
+def M1UnitB : ProcResource<2>; // Branch
+def M1UnitL : ProcResource<1>; // Load
+def M1UnitS : ProcResource<1>; // Store
+def M1PipeF0 : ProcResource<1>; // FP #0
+def M1PipeF1 : ProcResource<1>; // FP #1
+
+let Super = M1PipeF0 in {
+ def M1UnitFMAC : ProcResource<1>; // FP multiplication
+ def M1UnitFCVT : ProcResource<1>; // FP conversion
+ def M1UnitNAL0 : ProcResource<1>; // Simple vector.
+ def M1UnitNMISC : ProcResource<1>; // Miscellanea
+ def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
+}
+
+let Super = M1PipeF1 in {
+ def M1UnitFADD : ProcResource<1>; // Simple FP
+ let BufferSize = 1 in
+ def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
+ def M1UnitNAL1 : ProcResource<1>; // Simple vector.
+ def M1UnitFST : ProcResource<1>; // FP store
+}
+
+let SchedModel = ExynosM1Model in {
+ def M1UnitALU : ProcResGroup<[M1UnitA,
+ M1UnitC]>; // All simple integer.
+ def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
+ M1UnitNAL1]>; // All simple vector.
+}
+
+let SchedModel = ExynosM1Model in {
+
+//===----------------------------------------------------------------------===//
+// Coarse scheduling model for the Exynos-M1.
+
+// Branch instructions.
+// TODO: Non-conditional direct branches take zero cycles and units.
+def : WriteRes<WriteBr, [M1UnitB]> { let Latency = 1; }
+def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
+// TODO: Branch and link is much different.
+
+// Arithmetic and logical integer instructions.
+def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; }
+// TODO: Shift over 3 and some extensions take 2 cycles.
+def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; }
+def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; }
+
+// Move instructions.
+def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
+
+// Divide and multiply instructions.
+// TODO: Division blocks the divider inside C.
+def : WriteRes<WriteID32, [M1UnitC]> { let Latency = 13; }
+def : WriteRes<WriteID64, [M1UnitC]> { let Latency = 21; }
+// TODO: Long multiplication take 5 cycles and also the ALU.
+// TODO: Multiplication with accumulation can be advanced.
+def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
+// TODO: 64-bit multiplication has a throughput of 1/2.
+def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; }
+
+// Miscellaneous instructions.
+def : WriteRes<WriteExtr, [M1UnitALU,
+ M1UnitALU]> { let Latency = 2; }
+
+// TODO: The latency for the post or pre register is 1 cycle.
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Load instructions.
+def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; }
+// TODO: Extended address requires also the ALU.
+def : WriteRes<WriteLDIdx, [M1UnitL]> { let Latency = 5; }
+def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; }
+
+// Store instructions.
+def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; }
+// TODO: Extended address requires also the ALU.
+def : WriteRes<WriteSTIdx, [M1UnitS]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; }
+def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; }
+
+// FP data instructions.
+def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; }
+// TODO: FCCMP is much different.
+def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
+// TODO: DP takes longer.
+def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15; }
+// TODO: MACC takes longer.
+def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; }
+
+// FP miscellaneous instructions.
+// TODO: Conversion between register files is much different.
+def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; }
+def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; }
+// TODO: Copy from FPR to GPR is much different.
+def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; }
+
+// FP load instructions.
+// TODO: ASIMD loads are much different.
+def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
+
+// FP store instructions.
+// TODO: ASIMD stores are much different.
+def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; }
+
+// ASIMD FP instructions.
+// TODO: Other operations are much different.
+def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
+
+// Other miscellaneous instructions.
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+//===----------------------------------------------------------------------===//
+// Fast forwarding.
+
+// TODO: Add FP register forwarding rules.
+
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+// Integer multiply-accumulate.
+// TODO: The forwarding for WriteIM64 saves actually 3 cycles.
+def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+//===----------------------------------------------------------------------===//
+// Finer scheduling model for the Exynos-M1.
+
+def M1WriteNEONA : SchedWriteRes<[M1UnitNALU,
+ M1UnitNALU,
+ M1UnitFADD]> { let Latency = 9; }
+def M1WriteNEONB : SchedWriteRes<[M1UnitNALU,
+ M1UnitFST]> { let Latency = 5; }
+def M1WriteNEONC : SchedWriteRes<[M1UnitNALU,
+ M1UnitFST]> { let Latency = 6; }
+def M1WriteNEOND : SchedWriteRes<[M1UnitNALU,
+ M1UnitFST,
+ M1UnitL]> { let Latency = 10; }
+def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT,
+ M1UnitFST]> { let Latency = 8; }
+def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT,
+ M1UnitFST,
+ M1UnitL]> { let Latency = 13; }
+def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC,
+ M1UnitFST]> { let Latency = 6; }
+def M1WriteNEONH : SchedWriteRes<[M1UnitNALU,
+ M1UnitFST]> { let Latency = 3; }
+def M1WriteNEONI : SchedWriteRes<[M1UnitFST,
+ M1UnitL]> { let Latency = 9; }
+def M1WriteALU1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
+def M1WriteB : SchedWriteRes<[M1UnitB]> { let Latency = 1; }
+// FIXME: This is the worst case, conditional branch and link.
+def M1WriteBL : SchedWriteRes<[M1UnitB,
+ M1UnitALU]> { let Latency = 1; }
+// FIXME: This is the worst case, when using LR.
+def M1WriteBLR : SchedWriteRes<[M1UnitB,
+ M1UnitALU,
+ M1UnitALU]> { let Latency = 2; }
+def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; }
+def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; }
+def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; }
+def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; }
+def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; }
+def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; }
+def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; }
+def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15; }
+def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23; }
+def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; }
+def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; }
+def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; }
+def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; }
+def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; }
+def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
+def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; }
+def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; }
+def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; }
+def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; }
+def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; }
+def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
+def M1WriteTB : SchedWriteRes<[M1UnitC,
+ M1UnitALU]> { let Latency = 2; }
+
+// Branch instructions
+def : InstRW<[M1WriteB ], (instrs Bcc)>;
+def : InstRW<[M1WriteBL], (instrs BL)>;
+def : InstRW<[M1WriteBLR], (instrs BLR)>;
+def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
+def : InstRW<[M1WriteTB], (instregex "^TBN?Z[WX]")>;
+
+// Arithmetic and logical integer instructions.
+def : InstRW<[M1WriteALU1], (instrs COPY)>;
+
+// Divide and multiply instructions.
+
+// Miscellaneous instructions.
+
+// Load instructions.
+
+// Store instructions.
+
+// FP data instructions.
+def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>;
+def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>;
+def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>;
+def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>;
+def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>;
+def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>;
+def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>;
+def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>;
+def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>;
+def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>;
+def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>;
+def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>;
+
+// FP miscellaneous instructions.
+def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>;
+def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
+def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>;
+def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>;
+def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>;
+def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>;
+
+// FP load instructions.
+
+// FP store instructions.
+
+// ASIMD instructions.
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>;
+def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>;
+def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>;
+def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>;
+def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>;
+
+// ASIMD FP instructions.
+def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>;
+def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>;
+def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>;
+def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>;
+def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>;
+def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>;
+def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>;
+def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>;
+def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>;
+def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v")>;
+def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v")>;
+def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>;
+
+// ASIMD miscellaneous instructions.
+def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>;
+def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>;
+def : InstRW<[M1WriteNALU1], (instregex "^CPY")>;
+def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>;
+def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>;
+def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>;
+def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>;
+def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev")>;
+def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>;
+def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>;
+def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>;
+def : InstRW<[WriteSequence<[M1WriteNAL11], 2>],
+ (instregex "^TB[LX]v8i8Two")>;
+def : InstRW<[WriteSequence<[M1WriteNAL11], 3>],
+ (instregex "^TB[LX]v8i8Three")>;
+def : InstRW<[WriteSequence<[M1WriteNAL11], 4>],
+ (instregex "^TB[LX]v8i8Four")>;
+def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>;
+def : InstRW<[WriteSequence<[M1WriteNAL12], 2>],
+ (instregex "^TB[LX]v16i8Two")>;
+def : InstRW<[WriteSequence<[M1WriteNAL12], 3>],
+ (instregex "^TB[LX]v16i8Three")>;
+def : InstRW<[WriteSequence<[M1WriteNAL12], 4>],
+ (instregex "^TB[LX]v16i8Four")>;
+def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>;
+def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>;
+def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)(1|2)(v8i8|v4i16|v2i32)")>;
+def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[M1WriteNALU1], (instregex "^ZIP(1|2)v")>;
+
+// ASIMD load instructions.
+
+// ASIMD store instructions.
+
+// Cryptography instructions.
+def : InstRW<[M1WriteNCRYPT1], (instregex "^AES")>;
+def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
+def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
+def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>;
+def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>;
+def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>;
+
+// CRC instructions.
+def : InstRW<[M1WriteC2], (instregex "^CRC32")>;
+
+} // SchedModel = ExynosM1Model
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPU.td Sat Feb 13 14:57:10 2016 (r295590)
@@ -183,6 +183,7 @@ def FeatureISAVersion7_0_0 : SubtargetFe
def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>;
def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>;
def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>;
+def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>;
class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
"localmemorysize"#Value,
@@ -252,7 +253,7 @@ def FeatureSeaIslands : SubtargetFeature
def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
- FeatureGCN3Encoding, FeatureCIInsts, FeatureLDSBankCount32]>;
+ FeatureGCN3Encoding, FeatureCIInsts]>;
//===----------------------------------------------------------------------===//
Modified: vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/AMDGPU/AMDGPUSubtarget.h Sat Feb 13 14:57:10 2016 (r295590)
@@ -53,7 +53,8 @@ public:
ISAVersion7_0_0,
ISAVersion7_0_1,
ISAVersion8_0_0,
- ISAVersion8_0_1
+ ISAVersion8_0_1,
+ ISAVersion8_0_3
};
private:
Modified: vendor/llvm/dist/lib/Target/AMDGPU/Processors.td
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/Processors.td Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/AMDGPU/Processors.td Sat Feb 13 14:57:10 2016 (r295590)
@@ -128,21 +128,23 @@ def : ProcessorModel<"mullins", SIQua
//===----------------------------------------------------------------------===//
def : ProcessorModel<"tonga", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0]
+ [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0,
+ FeatureLDSBankCount32]
>;
def : ProcessorModel<"iceland", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0]
+ [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0,
+ FeatureLDSBankCount32]
>;
def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+ [FeatureVolcanicIslands, FeatureISAVersion8_0_1, FeatureLDSBankCount32]
>;
def : ProcessorModel<"fiji", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+ [FeatureVolcanicIslands, FeatureISAVersion8_0_3, FeatureLDSBankCount32]
>;
def : ProcessorModel<"stoney", SIQuarterSpeedModel,
- [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+ [FeatureVolcanicIslands, FeatureISAVersion8_0_1, FeatureLDSBankCount16]
>;
Modified: vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/AMDGPU/SIRegisterInfo.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -234,6 +234,7 @@ void SIRegisterInfo::buildScratchLoadSto
bool IsLoad = TII->get(LoadStoreOp).mayLoad();
bool RanOutOfSGPRs = false;
+ bool Scavenged = false;
unsigned SOffset = ScratchOffset;
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
@@ -244,6 +245,8 @@ void SIRegisterInfo::buildScratchLoadSto
if (SOffset == AMDGPU::NoRegister) {
RanOutOfSGPRs = true;
SOffset = AMDGPU::SGPR0;
+ } else {
+ Scavenged = true;
}
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
.addReg(ScratchOffset)
@@ -259,10 +262,14 @@ void SIRegisterInfo::buildScratchLoadSto
getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
Value;
+ unsigned SOffsetRegState = 0;
+ if (i + 1 == e && Scavenged)
+ SOffsetRegState |= RegState::Kill;
+
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
.addReg(ScratchRsrcReg)
- .addReg(SOffset)
+ .addReg(SOffset, SOffsetRegState)
.addImm(Offset)
.addImm(0) // glc
.addImm(0) // slc
Modified: vendor/llvm/dist/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -41,6 +41,9 @@ IsaVersion getIsaVersion(const FeatureBi
if (Features.test(FeatureISAVersion8_0_1))
return {8, 0, 1};
+ if (Features.test(FeatureISAVersion8_0_3))
+ return {8, 0, 3};
+
return {0, 0, 0};
}
Modified: vendor/llvm/dist/lib/Target/ARM/ARMISelDAGToDAG.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/ARM/ARMISelDAGToDAG.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/ARM/ARMISelDAGToDAG.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -747,7 +747,7 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SD
// If Offset is a multiply-by-constant and it's profitable to extract a shift
// and use it in a shifted operand do so.
- if (Offset.getOpcode() == ISD::MUL) {
+ if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
unsigned PowerOfTwo = 0;
SDValue NewMulConst;
if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
@@ -1422,7 +1422,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSo
// If OffReg is a multiply-by-constant and it's profitable to extract a shift
// and use it in a shifted operand do so.
- if (OffReg.getOpcode() == ISD::MUL) {
+ if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
unsigned PowerOfTwo = 0;
SDValue NewMulConst;
if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
Modified: vendor/llvm/dist/lib/Target/PowerPC/PPCFastISel.cpp
==============================================================================
--- vendor/llvm/dist/lib/Target/PowerPC/PPCFastISel.cpp Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/PowerPC/PPCFastISel.cpp Sat Feb 13 14:57:10 2016 (r295590)
@@ -1615,7 +1615,7 @@ bool PPCFastISel::SelectRet(const Instru
// extension rather than sign extension. Make sure we pass the return
// value extension property to integer materialization.
unsigned SrcReg =
- PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt);
+ PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
@@ -2091,25 +2091,21 @@ unsigned PPCFastISel::PPCMaterializeInt(
const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
&PPC::GPRCRegClass);
+ int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
// If the constant is in range, use a load-immediate.
- if (UseSExt && isInt<16>(CI->getSExtValue())) {
+ // Since LI will sign extend the constant we need to make sure that for
+ // our zeroext constants that the sign extended constant fits into 16-bits -
+ // a range of 0..0x7fff.
+ if (isInt<16>(Imm)) {
unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
unsigned ImmReg = createResultReg(RC);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
- .addImm(CI->getSExtValue());
- return ImmReg;
- } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) {
- unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
- unsigned ImmReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
- .addImm(CI->getZExtValue());
+ .addImm(Imm);
return ImmReg;
}
// Construct the constant piecewise.
- int64_t Imm = CI->getZExtValue();
-
if (VT == MVT::i64)
return PPCMaterialize64BitInt(Imm, RC);
else if (VT == MVT::i32)
Modified: vendor/llvm/dist/lib/Target/PowerPC/PPCInstrAltivec.td
==============================================================================
--- vendor/llvm/dist/lib/Target/PowerPC/PPCInstrAltivec.td Sat Feb 13 14:57:04 2016 (r295589)
+++ vendor/llvm/dist/lib/Target/PowerPC/PPCInstrAltivec.td Sat Feb 13 14:57:10 2016 (r295590)
@@ -736,7 +736,7 @@ def VPKSHSS : VX1_Int_Ty2<398, "vpkshss"
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-src-vendor
mailing list