summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8>2012-10-22 14:55:30 +0000
committertstellar <tstellar@91177308-0d34-0410-b5e6-96231b3b80d8>2012-10-22 14:55:30 +0000
commit81d3998812b5977a220d62076cc446822747af21 (patch)
treeb073cb1be9c65dbdd6cd312c322efd3c04abb0af
parent63a079bf76d139b91e170557cf16b17bdbecd58d (diff)
Merge master branch
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/R600/@166411 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--bindings/ocaml/executionengine/llvm_executionengine.ml2
-rw-r--r--docs/CodingStandards.rst143
-rw-r--r--docs/CommandGuide/lit.rst13
-rw-r--r--docs/LangRef.html14
-rw-r--r--docs/README.txt2
-rw-r--r--docs/ReleaseNotes.html21
-rw-r--r--docs/TestingGuide.html17
-rw-r--r--docs/subsystems.rst7
-rw-r--r--include/llvm-c/Transforms/Vectorize.h3
-rw-r--r--include/llvm/ADT/Optional.h9
-rw-r--r--include/llvm/ADT/OwningPtr.h18
-rw-r--r--include/llvm/ADT/SmallBitVector.h8
-rw-r--r--include/llvm/Analysis/ScalarEvolutionExpander.h4
-rw-r--r--include/llvm/Attributes.h15
-rw-r--r--include/llvm/CodeGen/MachineFrameInfo.h9
-rw-r--r--include/llvm/CodeGen/MachineRegisterInfo.h45
-rw-r--r--include/llvm/CodeGen/SchedulerRegistry.h5
-rw-r--r--include/llvm/ExecutionEngine/RuntimeDyld.h2
-rw-r--r--include/llvm/InitializePasses.h2
-rw-r--r--include/llvm/Intrinsics.td6
-rw-r--r--include/llvm/LinkAllPasses.h1
-rw-r--r--include/llvm/MC/MCParser/MCAsmParser.h37
-rw-r--r--include/llvm/MC/MCParser/MCParsedAsmOperand.h5
-rw-r--r--include/llvm/MC/MCSchedule.h8
-rw-r--r--include/llvm/MC/MCTargetAsmParser.h14
-rw-r--r--include/llvm/Object/ObjectFile.h2
-rw-r--r--include/llvm/Operator.h32
-rw-r--r--include/llvm/Transforms/IPO.h5
-rw-r--r--include/llvm/Transforms/Scalar.h7
-rw-r--r--include/llvm/Transforms/Utils/BasicBlockUtils.h25
-rw-r--r--include/llvm/Transforms/Vectorize.h6
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp23
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp2
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp7
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp4
-rw-r--r--lib/CodeGen/Passes.cpp4
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--lib/CodeGen/RegAllocFast.cpp53
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp65
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp159
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h3
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp5
-rw-r--r--lib/CodeGen/StackColoring.cpp20
-rw-r--r--lib/CodeGen/TargetSchedule.cpp12
-rw-r--r--lib/DebugInfo/DWARFContext.cpp37
-rw-r--r--lib/MC/ELFObjectWriter.cpp3
-rw-r--r--lib/MC/MCParser/AsmParser.cpp265
-rw-r--r--lib/MC/MCParser/MCTargetAsmParser.cpp2
-rw-r--r--lib/MC/SubtargetFeature.cpp4
-rw-r--r--lib/Support/Unix/Signals.inc30
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp4
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp29
-rw-r--r--lib/Target/ARM/ARMISelLowering.h3
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp5
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp14
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h1
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp14
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h2
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp31
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.td267
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td1
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp55
-rw-r--r--lib/Target/Mips/MipsISelLowering.h8
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td52
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h7
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp47
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp36
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp193
-rw-r--r--lib/Target/X86/X86ISelLowering.h4
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp18
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp2
-rw-r--r--lib/Transforms/IPO/BarrierNoopPass.cpp47
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp14
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp7
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp51
-rw-r--r--lib/Transforms/Instrumentation/BlackList.cpp3
-rw-r--r--lib/Transforms/Instrumentation/BlackList.h2
-rw-r--r--lib/Transforms/Scalar/DCE.cpp9
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp8
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp134
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp10
-rw-r--r--lib/Transforms/Scalar/SROA.cpp178
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp51
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp123
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp39
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp32
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp55
-rw-r--r--lib/Transforms/Vectorize/CMakeLists.txt1
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp1358
-rw-r--r--lib/Transforms/Vectorize/Vectorize.cpp8
-rw-r--r--lib/VMCore/DataLayout.cpp6
-rw-r--r--test/BugPoint/crash-narrowfunctiontest.ll1
-rw-r--r--test/BugPoint/metadata.ll1
-rw-r--r--test/BugPoint/remove_arguments_test.ll1
-rw-r--r--test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll29
-rw-r--r--test/CodeGen/ARM/trap.ll12
-rw-r--r--test/CodeGen/MSP430/fp.ll17
-rw-r--r--test/CodeGen/Mips/brconeq.ll38
-rw-r--r--test/CodeGen/Mips/brconeqk.ll22
-rw-r--r--test/CodeGen/Mips/brconeqz.ll20
-rw-r--r--test/CodeGen/Mips/brconge.ll37
-rw-r--r--test/CodeGen/Mips/brcongt.ll25
-rw-r--r--test/CodeGen/Mips/brconle.ll37
-rw-r--r--test/CodeGen/Mips/brconlt.ll27
-rw-r--r--test/CodeGen/Mips/brconne.ll26
-rw-r--r--test/CodeGen/Mips/brconnek.ll25
-rw-r--r--test/CodeGen/Mips/brconnez.ll24
-rw-r--r--test/CodeGen/Mips/mips64-sret.ll14
-rw-r--r--test/CodeGen/Mips/tailcall.ll100
-rw-r--r--test/CodeGen/PowerPC/i64_fp_round.ll27
-rw-r--r--test/CodeGen/X86/2012-10-18-crash-dagco.ll61
-rw-r--r--test/CodeGen/X86/buildvec-insertvec.ll15
-rw-r--r--test/CodeGen/X86/extract-concat.ll17
-rw-r--r--test/CodeGen/X86/pr14090.ll76
-rw-r--r--test/CodeGen/X86/pr14098.ll23
-rw-r--r--test/CodeGen/X86/sjlj.ll18
-rw-r--r--test/Instrumentation/AddressSanitizer/basic.ll20
-rw-r--r--test/Makefile11
-rw-r--r--test/Transforms/BBVectorize/simple-ldstr-ptrs.ll50
-rw-r--r--test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll27
-rw-r--r--test/Transforms/IndVarSimplify/crash.ll18
-rw-r--r--test/Transforms/IndVarSimplify/no-iv-rewrite.ll1
-rw-r--r--test/Transforms/InstCombine/obfuscated_splat.ll11
-rw-r--r--test/Transforms/InstCombine/select.ll34
-rw-r--r--test/Transforms/InstCombine/strcpy-1.ll45
-rw-r--r--test/Transforms/InstCombine/strcpy-2.ll22
-rw-r--r--test/Transforms/InstCombine/strcpy_chk-1.ll54
-rw-r--r--test/Transforms/LoopVectorize/2012-10-20-infloop.ll27
-rw-r--r--test/Transforms/LoopVectorize/gcc-examples.ll648
-rw-r--r--test/Transforms/LoopVectorize/increment.ll66
-rw-r--r--test/Transforms/LoopVectorize/induction_plus.ll30
-rw-r--r--test/Transforms/LoopVectorize/lit.local.cfg1
-rw-r--r--test/Transforms/LoopVectorize/non-const-n.ll38
-rw-r--r--test/Transforms/LoopVectorize/read-only.ll32
-rw-r--r--test/Transforms/LoopVectorize/reduction.ll152
-rw-r--r--test/Transforms/LoopVectorize/scalar-select.ll37
-rw-r--r--test/Transforms/SROA/basictest.ll20
-rw-r--r--test/Transforms/SimplifyLibCalls/FFS.ll15
-rw-r--r--test/Transforms/SimplifyLibCalls/StrCpy.ll37
-rw-r--r--test/Transforms/TailCallElim/nocapture.ll23
-rw-r--r--test/lit.cfg13
-rw-r--r--test/lit.site.cfg.in1
-rw-r--r--tools/bugpoint-passes/bugpoint.exports1
-rw-r--r--tools/llvm-dwarfdump/llvm-dwarfdump.cpp6
-rw-r--r--unittests/ExecutionEngine/JIT/JITTest.cpp2
-rw-r--r--unittests/ExecutionEngine/JIT/Makefile7
-rw-r--r--utils/TableGen/CodeGenInstruction.cpp11
-rw-r--r--utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll3
-rw-r--r--utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg2
-rw-r--r--utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg2
-rw-r--r--utils/lit/lit/ExampleTests/lit.cfg2
-rw-r--r--utils/lit/lit/ExampleTests/vg-fail.c4
-rw-r--r--utils/lit/lit/ExampleTests/xfail-feature.c4
-rw-r--r--utils/lit/lit/LitConfig.py3
-rw-r--r--utils/lit/lit/TestRunner.py34
-rw-r--r--utils/lit/lit/TestingConfig.py10
-rw-r--r--utils/vim/llvm.vim1
164 files changed, 5586 insertions, 783 deletions
diff --git a/bindings/ocaml/executionengine/llvm_executionengine.ml b/bindings/ocaml/executionengine/llvm_executionengine.ml
index b6178dc055e..ddb53bbb5af 100644
--- a/bindings/ocaml/executionengine/llvm_executionengine.ml
+++ b/bindings/ocaml/executionengine/llvm_executionengine.ml
@@ -84,7 +84,7 @@ module ExecutionEngine = struct
= "llvm_ee_free_machine_code"
external target_data: t -> Llvm_target.DataLayout.t
- = "LLVMGetExecutionEngineDataLayout"
+ = "LLVMGetExecutionEngineTargetData"
(* The following are not bound. Patches are welcome.
diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst
index de50e6eeaf5..418e3f05a36 100644
--- a/docs/CodingStandards.rst
+++ b/docs/CodingStandards.rst
@@ -146,6 +146,132 @@ useful to use C style (``/* */``) comments however:
To comment out a large block of code, use ``#if 0`` and ``#endif``. These nest
properly and are better behaved in general than C style comments.
+Doxygen Use in Documentation Comments
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Use the ``\file`` command to turn the standard file header into a file-level
+comment.
+
+Include descriptive ``\brief`` paragraphs for all public interfaces (public
+classes, member and non-member functions). Explain API use and purpose in
+``\brief`` paragraphs, don't just restate the information that can be inferred
+from the API name. Put detailed discussion into separate paragraphs.
+
+To refer to parameter names inside a paragraph, use the ``\p name`` command.
+Don't use the ``\arg name`` command since it starts a new paragraph that
+contains documentation for the parameter.
+
+Wrap non-inline code examples in ``\code ... \endcode``.
+
+To document a function parameter, start a new paragraph with the
+``\param name`` command. If the parameter is used as an out or an in/out
+parameter, use the ``\param [out] name`` or ``\param [in,out] name`` command,
+respectively.
+
+To describe function return value, start a new paragraph with the ``\returns``
+command.
+
+A minimal documentation comment:
+
+.. code-block:: c++
+
+ /// \brief Does foo and bar.
+ void fooBar(bool Baz);
+
+A documentation comment that uses all Doxygen features in a preferred way:
+
+.. code-block:: c++
+
+ /// \brief Does foo and bar.
+ ///
+ /// Does not do foo the usual way if \p Baz is true.
+ ///
+ /// Typical usage:
+ /// \code
+ /// fooBar(false, "quux", Res);
+ /// \endcode
+ ///
+ /// \param Quux kind of foo to do.
+ /// \param [out] Result filled with bar sequence on foo success.
+ ///
+ /// \returns true on success.
+ bool fooBar(bool Baz, StringRef Quux, std::vector<int> &Result);
+
+Don't duplicate the documentation comment in the header file and in the
+implementation file. Put the documentation comments for public APIs into the
+header file. Documentation comments for private APIs can go to the
+implementation file. In any case, implementation files can include additional
+comments (not necessarily in Doxygen markup) to explain implementation details
+as needed.
+
+Don't duplicate function or class name at the beginning of the comment.
+For humans it is obvious which function or class is being documented;
+automatic documentation processing tools are smart enough to bind the comment
+to the correct declaration.
+
+Wrong:
+
+.. code-block:: c++
+
+ // In Something.h:
+
+ /// Something - An abstraction for some complicated thing.
+ class Something {
+ public:
+ /// fooBar - Does foo and bar.
+ void fooBar();
+ };
+
+ // In Something.cpp:
+
+ /// fooBar - Does foo and bar.
+ void Something::fooBar() { ... }
+
+Correct:
+
+.. code-block:: c++
+
+ // In Something.h:
+
+ /// \brief An abstraction for some complicated thing.
+ class Something {
+ public:
+ /// \brief Does foo and bar.
+ void fooBar();
+ };
+
+ // In Something.cpp:
+
+ // Builds a B-tree in order to do foo. See paper by...
+ void Something::fooBar() { ... }
+
+It is not required to use additional Doxygen features, but sometimes it might
+be a good idea to do so.
+
+Consider:
+
+* adding comments to any narrow namespace containing a collection of
+ related functions or types;
+
+* using top-level groups to organize a collection of related functions at
+ namespace scope where the grouping is smaller than the namespace;
+
+* using member groups and additional comments attached to member
+ groups to organize within a class.
+
+For example:
+
+.. code-block:: c++
+
+ class Something {
+ /// \name Functions that do Foo.
+ /// @{
+ void fooBar();
+ void fooBaz();
+ /// @}
+ ...
+ };
+
``#include`` Style
^^^^^^^^^^^^^^^^^^
@@ -604,8 +730,7 @@ code to be structured like this:
.. code-block:: c++
- /// containsFoo - Return true if the specified list has an element that is
- /// a foo.
+ /// \returns true if the specified list has an element that is a foo.
static bool containsFoo(const std::vector<Bar*> &List) {
for (unsigned i = 0, e = List.size(); i != e; ++i)
if (List[i]->isFoo())
@@ -1051,21 +1176,21 @@ If a namespace definition is small and *easily* fits on a screen (say, less than
namespace llvm {
namespace X86 {
- /// RelocationType - An enum for the x86 relocation codes. Note that
+ /// \brief An enum for the x86 relocation codes. Note that
/// the terminology here doesn't follow x86 convention - word means
/// 32-bit and dword means 64-bit.
enum RelocationType {
- /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+ /// \brief PC relative relocation, add the relocated value to
/// the value already in memory, after we adjust it for where the PC is.
reloc_pcrel_word = 0,
- /// reloc_picrel_word - PIC base relative relocation, add the relocated
- /// value to the value already in memory, after we adjust it for where the
+ /// \brief PIC base relative relocation, add the relocated value to
+ /// the value already in memory, after we adjust it for where the
/// PIC base is.
reloc_picrel_word = 1,
- /// reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
- /// add the relocated value to the value already in memory.
+ /// \brief Absolute relocation, just add the relocated value to the
+ /// value already in memory.
reloc_absolute_word = 2,
reloc_absolute_dword = 3
};
@@ -1084,7 +1209,7 @@ closed. For example:
namespace llvm {
namespace knowledge {
- /// Grokable - This class represents things that Smith can have an intimate
+ /// This class represents things that Smith can have an intimate
/// understanding of and contains the data associated with it.
class Grokable {
...
diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst
index 3eb0be91f13..9e96cd2a4bf 100644
--- a/docs/CommandGuide/lit.rst
+++ b/docs/CommandGuide/lit.rst
@@ -125,6 +125,10 @@ EXECUTION OPTIONS
*--error-exitcode* argument for valgrind is used so that valgrind failures will
cause the program to exit with a non-zero status.
+ When this option is enabled, **lit** will also automatically provide a
+ "valgrind" feature that can be used to conditionally disable (or expect failure
+ in) certain tests.
+
**--vg-arg**\ =\ *ARG*
@@ -133,6 +137,15 @@ EXECUTION OPTIONS
+**--vg-leak**
+
+ When *--vg* is used, enable memory leak checks. When this option is enabled,
+ **lit** will also automatically provide a "vg_leak" feature that can be
+ used to conditionally disable (or expect failure in) certain tests.
+
+
+
+
**--time-tests**
Track the wall time individual tests take to execute and includes the results in
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 167397ff53d..874e12fa44a 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -1364,11 +1364,13 @@ target datalayout = "<i>layout specification</i>"
8-bits. If omitted, the natural stack alignment defaults to "unspecified",
which does not prevent any alignment promotions.</dd>
- <dt><tt>p:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+ <dt><tt>p[n]:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
<dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and
- <i>preferred</i> alignments. All sizes are in bits. Specifying
- the <i>pref</i> alignment is optional. If omitted, the
- preceding <tt>:</tt> should be omitted too.</dd>
+ <i>preferred</i> alignments for address space <i>n</i>. All sizes are in
+ bits. Specifying the <i>pref</i> alignment is optional. If omitted, the
+ preceding <tt>:</tt> should be omitted too. The address space,
+ <i>n</i> is optional, and if not specified, denotes the default address
+ space 0. The value of <i>n</i> must be in the range [1,2^23).</dd>
<dt><tt>i<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
<dd>This specifies the alignment for an integer type of a given bit
@@ -1409,6 +1411,10 @@ target datalayout = "<i>layout specification</i>"
<ul>
<li><tt>E</tt> - big endian</li>
<li><tt>p:64:64:64</tt> - 64-bit pointers with 64-bit alignment</li>
+ <li><tt>p1:32:32:32</tt> - 32-bit pointers with 32-bit alignment for
+ address space 1</li>
+ <li><tt>p2:16:32:32</tt> - 16-bit pointers with 32-bit alignment for
+ address space 2</li>
<li><tt>i1:8:8</tt> - i1 is 8-bit (byte) aligned</li>
<li><tt>i8:8:8</tt> - i8 is 8-bit (byte) aligned</li>
<li><tt>i16:16:16</tt> - i16 is 16-bit aligned</li>
diff --git a/docs/README.txt b/docs/README.txt
index 2fbbf987405..5ddd599d8a7 100644
--- a/docs/README.txt
+++ b/docs/README.txt
@@ -6,7 +6,7 @@ The LLVM documentation is currently written in two formats:
* Plain HTML documentation.
* reStructured Text documentation using the Sphinx documentation generator. It
- is currently tested with Sphinx 1.1.3.
+ is currently tested with Sphinx 1.1.3.
For more information, see the "Sphinx Introduction for LLVM Developers"
document.
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 75a6fd1ca10..0ef8f3d1f37 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -466,6 +466,20 @@ Release Notes</a>.</h1>
<p>In addition to many minor performance tweaks and bug fixes, this release
includes a few major enhancements and additions to the optimizers:</p>
+<p> Loop Vectorizer - We've added a loop vectorizer and we are now able to
+ vectorize small loops. The loop vectorizer is disabled by default and
+ can be enabled using the <b>-mllvm -vectorize</b> flag. <br/>
+ We can now vectorize this code:
+
+ <pre class="doc_code">
+ for (i=0; i&lt;n; i++) {
+ a[i] = b[i+1] + c[i+3] + i;
+ sum += d[i];
+ }
+ </pre>
+
+ </p>
+
<ul>
<li>...</li>
</ul>
@@ -506,6 +520,8 @@ Release Notes</a>.</h1>
We use the lifetime markers to tell the codegen that a certain alloca
is used within a region.</p>
+<p> We now merge consecutive loads and stores. </p>
+
<p>We have put a significant amount of work into the code generator
infrastructure, which allows us to implement more aggressive algorithms and
make it run faster:</p>
@@ -645,6 +661,11 @@ Release Notes</a>.</h1>
<p>In addition, many APIs have changed in this release. Some of the major
LLVM API changes are:</p>
+<p> We've added a new interface for allowing IR-level passes to access
+ target-specific information. A new IR-level pass, called
+ "TargetTransformInfo" provides a number of low-level interfaces.
+ LSR and LowerInvoke already use the new interface. </p>
+
<ul>
<li>...</li>
</ul>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index 1f9c9157306..ae2643fe4e8 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -798,14 +798,15 @@ define two separate CHECK lines that match on the same line.
<p>Sometimes it is necessary to mark a test case as "expected fail" or XFAIL.
You can easily mark a test as XFAIL just by including <tt>XFAIL: </tt> on a
line near the top of the file. This signals that the test case should succeed
- if the test fails. Such test cases are counted separately by the testing tool. To
- specify an expected fail, use the XFAIL keyword in the comments of the test
- program followed by a colon and one or more regular expressions (separated by
- a comma). The regular expressions allow you to XFAIL the test conditionally by
- host platform. The regular expressions following the : are matched against the
- target triplet for the host machine. If there is a match, the test is expected
- to fail. If not, the test is expected to succeed. To XFAIL everywhere just
- specify <tt>XFAIL: *</tt>. Here is an example of an <tt>XFAIL</tt> line:</p>
+ if the test fails. Such test cases are counted separately by the testing
+ tool. To specify an expected fail, use the XFAIL keyword in the comments of
+ the test program followed by a colon and one or more failure patterns. Each
+ failure pattern can be either '*' (to specify fail everywhere), or a part of a
+ target triple (indicating the test should fail on that platform), or the name
+ of a configurable feature (for example, "loadable_module"). If there is a
+ match, the test is expected to fail. If not, the test is expected to
+ succeed. To XFAIL everywhere just specify <tt>XFAIL: *</tt>. Here is an
+ example of an <tt>XFAIL</tt> line:</p>
<div class="doc_code">
<pre>
diff --git a/docs/subsystems.rst b/docs/subsystems.rst
index 8c3cdf2417b..6f77b79fbe2 100644
--- a/docs/subsystems.rst
+++ b/docs/subsystems.rst
@@ -91,3 +91,10 @@ Subsystem Documentation
* :ref:`segmented_stacks`
This document describes segmented stacks and how they are used in LLVM.
+
+* `Howto: Implementing LLVM Integrated Assembler`_
+
+ A simple guide for how to implement an LLVM integrated assembler for an
+ architecture.
+
+.. _`Howto: Implementing LLVM Integrated Assembler`: http://www.embecosm.com/download/ean10.html
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
index 9e7c7540d76..68a9bdd3885 100644
--- a/include/llvm-c/Transforms/Vectorize.h
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -36,6 +36,9 @@ extern "C" {
/** See llvm::createBBVectorizePass function. */
void LLVMAddBBVectorizePass(LLVMPassManagerRef PM);
+/** See llvm::createLoopVectorizePass function. */
+void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM);
+
/**
* @}
*/
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index ee8b69f3d12..f43aeb1bc4d 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -16,8 +16,13 @@
#ifndef LLVM_ADT_OPTIONAL
#define LLVM_ADT_OPTIONAL
+#include "llvm/Support/Compiler.h"
#include <cassert>
+#if LLVM_USE_RVALUE_REFERENCES
+#include <utility>
+#endif
+
namespace llvm {
template<typename T>
@@ -28,6 +33,10 @@ public:
explicit Optional() : x(), hasVal(false) {}
Optional(const T &y) : x(y), hasVal(true) {}
+#if LLVM_USE_RVALUE_REFERENCES
+ Optional(T &&y) : x(std::forward<T>(y)), hasVal(true) {}
+#endif
+
static inline Optional create(const T* y) {
return y ? Optional(*y) : Optional();
}
diff --git a/include/llvm/ADT/OwningPtr.h b/include/llvm/ADT/OwningPtr.h
index ea9495d3869..05bcd40d086 100644
--- a/include/llvm/ADT/OwningPtr.h
+++ b/include/llvm/ADT/OwningPtr.h
@@ -32,6 +32,15 @@ class OwningPtr {
public:
explicit OwningPtr(T *P = 0) : Ptr(P) {}
+#if LLVM_USE_RVALUE_REFERENCES
+ OwningPtr(OwningPtr &&Other) : Ptr(Other.take()) {}
+
+ OwningPtr &operator=(OwningPtr &&Other) {
+ reset(Other.take());
+ return *this;
+ }
+#endif
+
~OwningPtr() {
delete Ptr;
}
@@ -86,6 +95,15 @@ class OwningArrayPtr {
public:
explicit OwningArrayPtr(T *P = 0) : Ptr(P) {}
+#if LLVM_USE_RVALUE_REFERENCES
+ OwningArrayPtr(OwningArrayPtr &&Other) : Ptr(Other.take()) {}
+
+ OwningArrayPtr &operator=(OwningArrayPtr &&Other) {
+ reset(Other.take());
+ return *this;
+ }
+#endif
+
~OwningArrayPtr() {
delete [] Ptr;
}
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index fba1d12542a..a9cd54e13b3 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -306,8 +306,8 @@ public:
assert(E <= size() && "Attempted to set out-of-bounds range!");
if (I == E) return *this;
if (isSmall()) {
- uintptr_t EMask = 1 << E;
- uintptr_t IMask = 1 << I;
+ uintptr_t EMask = ((uintptr_t)1) << E;
+ uintptr_t IMask = ((uintptr_t)1) << I;
uintptr_t Mask = EMask - IMask;
setSmallBits(getSmallBits() | Mask);
} else
@@ -337,8 +337,8 @@ public:
assert(E <= size() && "Attempted to reset out-of-bounds range!");
if (I == E) return *this;
if (isSmall()) {
- uintptr_t EMask = 1 << E;
- uintptr_t IMask = 1 << I;
+ uintptr_t EMask = ((uintptr_t)1) << E;
+ uintptr_t IMask = ((uintptr_t)1) << I;
uintptr_t Mask = EMask - IMask;
setSmallBits(getSmallBits() & ~Mask);
} else
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 3ab9c8256bb..3f8f149cb42 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -22,7 +22,7 @@
#include <set>
namespace llvm {
- class ScalarTargetTransformInfo;
+ class TargetLowering;
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are safe to speculate.
@@ -129,7 +129,7 @@ namespace llvm {
/// representative. Return the number of phis eliminated.
unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
SmallVectorImpl<WeakVH> &DeadInsts,
- const ScalarTargetTransformInfo *STTI = NULL);
+ const TargetLowering *TLI = NULL);
/// expandCodeFor - Insert code to directly compute the specified SCEV
/// expression into the program. The inserted code is inserted into the
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index a28aa183473..1e995f9a858 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -290,16 +290,7 @@ struct AttributeWithIndex {
static AttributeWithIndex get(LLVMContext &C, unsigned Idx,
ArrayRef<Attributes::AttrVal> Attrs) {
- AttrBuilder B;
-
- for (ArrayRef<Attributes::AttrVal>::iterator I = Attrs.begin(),
- E = Attrs.end(); I != E; ++I)
- B.addAttribute(*I);
-
- AttributeWithIndex P;
- P.Index = Idx;
- P.Attrs = Attributes::get(C, B);
- return P;
+ return get(Idx, Attributes::get(C, Attrs));
}
static AttributeWithIndex get(unsigned Idx, Attributes Attrs) {
AttributeWithIndex P;
@@ -324,8 +315,8 @@ public:
FunctionIndex = ~0U
};
private:
- /// AttrList - The attributes that we are managing. This can be null
- /// to represent the empty attributes list.
+ /// AttrList - The attributes that we are managing. This can be null to
+ /// represent the empty attributes list.
AttributeListImpl *AttrList;
public:
AttrListPtr() : AttrList(0) {}
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index 7188b1abbdf..0e4e132e40d 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -29,6 +29,7 @@ class MachineBasicBlock;
class TargetFrameLowering;
class BitVector;
class Value;
+class AllocaInst;
/// The CalleeSavedInfo class tracks the information need to locate where a
/// callee saved register is in the current frame.
@@ -106,14 +107,14 @@ class MachineFrameInfo {
/// Alloca - If this stack object is originated from an Alloca instruction
/// this value saves the original IR allocation. Can be NULL.
- const Value *Alloca;
+ const AllocaInst *Alloca;
// PreAllocated - If true, the object was mapped into the local frame
// block and doesn't need additional handling for allocation beyond that.
bool PreAllocated;
StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
- bool isSS, bool NSP, const Value *Val)
+ bool isSS, bool NSP, const AllocaInst *Val)
: SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
isSpillSlot(isSS), MayNeedSP(NSP), Alloca(Val), PreAllocated(false) {}
};
@@ -369,7 +370,7 @@ public:
/// getObjectAllocation - Return the underlying Alloca of the specified
/// stack object if it exists. Returns 0 if none exists.
- const Value* getObjectAllocation(int ObjectIdx) const {
+ const AllocaInst* getObjectAllocation(int ObjectIdx) const {
assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
"Invalid Object Idx!");
return Objects[ObjectIdx+NumFixedObjects].Alloca;
@@ -495,7 +496,7 @@ public:
/// a nonnegative identifier to represent it.
///
int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
- bool MayNeedSP = false, const Value *Alloca = 0) {
+ bool MayNeedSP = false, const AllocaInst *Alloca = 0) {
assert(Size != 0 && "Cannot allocate zero size stack objects!");
Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP,
Alloca));
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index a5bc7f7d391..4e86363f071 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -77,16 +77,20 @@ class MachineRegisterInfo {
return MO->Contents.Reg.Next;
}
- /// UsedPhysRegs - This is a bit vector that is computed and set by the
+ /// UsedRegUnits - This is a bit vector that is computed and set by the
/// register allocator, and must be kept up to date by passes that run after
/// register allocation (though most don't modify this). This is used
/// so that the code generator knows which callee save registers to save and
/// for other target specific uses.
- /// This vector only has bits set for registers explicitly used, not their
- /// aliases.
- BitVector UsedPhysRegs;
-
- /// UsedPhysRegMask - Additional used physregs, but including aliases.
+ /// This vector has bits set for register units that are modified in the
+ /// current function. It doesn't include registers clobbered by function
+ /// calls with register mask operands.
+ BitVector UsedRegUnits;
+
+ /// UsedPhysRegMask - Additional used physregs including aliases.
+ /// This bit vector represents all the registers clobbered by function calls.
+ /// It can model things that UsedRegUnits can't, such as function calls that
+ /// clobber ymm7 but preserve the low half in xmm7.
BitVector UsedPhysRegMask;
/// ReservedRegs - This is a bit vector of reserved registers. The target
@@ -357,29 +361,27 @@ public:
//===--------------------------------------------------------------------===//
/// isPhysRegUsed - Return true if the specified register is used in this
- /// function. This only works after register allocation.
+ /// function. Also check for clobbered aliases and registers clobbered by
+ /// function calls with register mask operands.
+ ///
+ /// This only works after register allocation. It is primarily used by
+ /// PrologEpilogInserter to determine which callee-saved registers need
+ /// spilling.
bool isPhysRegUsed(unsigned Reg) const {
- return UsedPhysRegs.test(Reg) || UsedPhysRegMask.test(Reg);
- }
-
- /// isPhysRegOrOverlapUsed - Return true if Reg or any overlapping register
- /// is used in this function.
- bool isPhysRegOrOverlapUsed(unsigned Reg) const {
if (UsedPhysRegMask.test(Reg))
return true;
- for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- if (UsedPhysRegs.test(*AI))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ if (UsedRegUnits.test(*Units))
return true;
return false;
}
/// setPhysRegUsed - Mark the specified register used in this function.
/// This should only be called during and after register allocation.
- void setPhysRegUsed(unsigned Reg) { UsedPhysRegs.set(Reg); }
-
- /// addPhysRegsUsed - Mark the specified registers used in this function.
- /// This should only be called during and after register allocation.
- void addPhysRegsUsed(const BitVector &Regs) { UsedPhysRegs |= Regs; }
+ void setPhysRegUsed(unsigned Reg) {
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ UsedRegUnits.set(*Units);
+ }
/// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used.
/// This corresponds to the bit mask attached to register mask operands.
@@ -390,8 +392,9 @@ public:
/// setPhysRegUnused - Mark the specified register unused in this function.
/// This should only be called during and after register allocation.
void setPhysRegUnused(unsigned Reg) {
- UsedPhysRegs.reset(Reg);
UsedPhysRegMask.reset(Reg);
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ UsedRegUnits.reset(*Units);
}
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index a582b0c40c8..836b73a15a2 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -102,6 +102,11 @@ ScheduleDAGSDNodes *createVLIWDAGScheduler(SelectionDAGISel *IS,
ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel);
+/// createDAGLinearizer - This creates a "no-scheduling" scheduler which
+/// linearize the DAG using topological order.
+ScheduleDAGSDNodes *createDAGLinearizer(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel);
+
} // end namespace llvm
#endif
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 7da4a4e09a3..a71b1411c8a 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -63,7 +63,7 @@ protected:
// Any relocations already associated with the symbol will be re-resolved.
void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
public:
- RuntimeDyld(RTDyldMemoryManager*);
+ RuntimeDyld(RTDyldMemoryManager *);
~RuntimeDyld();
/// loadObject - prepare the object contained in the input buffer for
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index ee9b1c5852e..a5f7008b6fb 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -66,6 +66,7 @@ void initializeAliasDebuggerPass(PassRegistry&);
void initializeAliasSetPrinterPass(PassRegistry&);
void initializeAlwaysInlinerPass(PassRegistry&);
void initializeArgPromotionPass(PassRegistry&);
+void initializeBarrierNoopPass(PassRegistry&);
void initializeBasicAliasAnalysisPass(PassRegistry&);
void initializeBasicCallGraphPass(PassRegistry&);
void initializeBlockExtractorPassPass(PassRegistry&);
@@ -261,6 +262,7 @@ void initializeVirtRegRewriterPass(PassRegistry&);
void initializeInstSimplifierPass(PassRegistry&);
void initializeUnpackMachineBundlesPass(PassRegistry&);
void initializeFinalizeMachineBundlesPass(PassRegistry&);
+void initializeLoopVectorizePass(PassRegistry&);
void initializeBBVectorizePass(PassRegistry&);
void initializeMachineFunctionPrinterPassPass(PassRegistry&);
}
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index 63d8d2c0f29..059bd804aa1 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -285,9 +285,9 @@ let Properties = [IntrNoMem] in {
// NOTE: these are internal interfaces.
def int_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
-def int_longjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
+def int_longjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
def int_sigsetjmp : Intrinsic<[llvm_i32_ty] , [llvm_ptr_ty, llvm_i32_ty]>;
-def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
+def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrNoReturn]>;
// Internal interface for object size checking
def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i1_ty],
@@ -345,7 +345,7 @@ let Properties = [IntrNoMem] in {
}
def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
-def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>;
+def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>;
//===---------------- Generic Variable Attribute Intrinsics----------------===//
//
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 4b10d0e5415..8652acd941f 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -156,6 +156,7 @@ namespace {
(void) llvm::createCorrelatedValuePropagationPass();
(void) llvm::createMemDepPrinter();
(void) llvm::createInstructionSimplifierPass();
+ (void) llvm::createLoopVectorizePass();
(void) llvm::createBBVectorizePass();
(void)new llvm::IntervalPartition();
diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h
index 08758cda226..fcf5b6a9b59 100644
--- a/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/include/llvm/MC/MCParser/MCAsmParser.h
@@ -20,6 +20,8 @@ class MCAsmLexer;
class MCAsmParserExtension;
class MCContext;
class MCExpr;
+class MCInstPrinter;
+class MCInstrInfo;
class MCParsedAsmOperand;
class MCStreamer;
class MCTargetAsmParser;
@@ -29,6 +31,14 @@ class SourceMgr;
class StringRef;
class Twine;
+/// MCAsmParserSemaCallback - Generic Sema callback for assembly parser.
+class MCAsmParserSemaCallback {
+public:
+ virtual ~MCAsmParserSemaCallback();
+ virtual void *LookupInlineAsmIdentifier(StringRef Name, void *Loc,
+ unsigned &Size) = 0;
+};
+
/// MCAsmParser - Generic assembler parser interface, for use by target specific
/// assembly parsers.
class MCAsmParser {
@@ -75,26 +85,21 @@ public:
virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
virtual void setParsingInlineAsm(bool V) = 0;
+ virtual bool isParsingInlineAsm() = 0;
+
+ /// ParseMSInlineAsm - Parse ms-style inline assembly.
+ virtual bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+ unsigned &NumOutputs, unsigned &NumInputs,
+ SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers,
+ const MCInstrInfo *MII,
+ const MCInstPrinter *IP,
+ MCAsmParserSemaCallback &SI) = 0;
/// ParseStatement - Parse the next statement.
virtual bool ParseStatement() = 0;
- /// getNumParsedOperands - Returns the number of MCAsmParsedOperands from the
- /// previously parsed statement.
- virtual unsigned getNumParsedOperands() = 0;
-
- /// getParsedOperand - Get a MCAsmParsedOperand.
- virtual MCParsedAsmOperand &getParsedOperand(unsigned OpNum) = 0;
-
- /// freeParsedOperands - Free the MCAsmParsedOperands.
- virtual void freeParsedOperands() = 0;
-
- /// isInstruction - Was the previously parsed statement an instruction?
- virtual bool isInstruction() = 0;
-
- /// getOpcode - Get the opcode from the previously parsed instruction.
- virtual unsigned getOpcode() = 0;
-
/// Warning - Emit a warning at the location \p L, with the message \p Msg.
///
/// \return The return value is true, if warnings are fatal.
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 280145bfbc8..7d0914ebb49 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -57,12 +57,17 @@ public:
/// isMem - Is this a memory operand?
virtual bool isMem() const = 0;
+ virtual unsigned getMemSize() const { return 0; }
/// getStartLoc - Get the location of the first token of this operand.
virtual SMLoc getStartLoc() const = 0;
/// getEndLoc - Get the location of the last token of this operand.
virtual SMLoc getEndLoc() const = 0;
+ /// needSizeDirective - Do we need to emit a sizing directive for this
+ /// operand? Only valid when parsing MS-style inline assembly.
+ virtual bool needSizeDirective() const { return false; }
+
/// print - Print a debug representation of the operand to the given stream.
virtual void print(raw_ostream &OS) const = 0;
/// dump - Print to the debug stream.
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index 0504dc13c8f..c9a060c79b1 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -54,10 +54,12 @@ struct MCWriteProcResEntry {
};
/// Specify the latency in cpu cycles for a particular scheduling class and def
-/// index. Also identify the WriteResources of this def. When the operand
-/// expands to a sequence of writes, this ID is the last write in the sequence.
+/// index. -1 indicates an invalid latency. Heuristics would typically consider
+/// an instruction with invalid latency to have infinite latency. Also identify
+/// the WriteResources of this def. When the operand expands to a sequence of
+/// writes, this ID is the last write in the sequence.
struct MCWriteLatencyEntry {
- unsigned Cycles;
+ int Cycles;
unsigned WriteResourceID;
bool operator==(const MCWriteLatencyEntry &Other) const {
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index c9ea5ae4846..05537f9211f 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -41,12 +41,26 @@ protected: // Can only create subclasses.
/// AvailableFeatures - The current set of available features.
unsigned AvailableFeatures;
+ /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
+ bool ParsingInlineAsm;
+
+ /// SemaCallback - The Sema callback implementation. Must be set when parsing
+ /// ms-style inline assembly.
+ MCAsmParserSemaCallback *SemaCallback;
+
public:
virtual ~MCTargetAsmParser();
unsigned getAvailableFeatures() const { return AvailableFeatures; }
void setAvailableFeatures(unsigned Value) { AvailableFeatures = Value; }
+ bool isParsingInlineAsm () { return ParsingInlineAsm; }
+ void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
+
+ void setSemaCallback(MCAsmParserSemaCallback *Callback) {
+ SemaCallback = Callback;
+ }
+
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) = 0;
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 41959bd34e1..4185ffea130 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -266,7 +266,7 @@ const uint64_t UnknownAddressOrSize = ~0ULL;
/// ObjectFile - This class is the base class for all object file types.
/// Concrete instances of this object are created by createObjectFile, which
-/// figure out which type to create.
+/// figures out which type to create.
class ObjectFile : public Binary {
virtual void anchor();
ObjectFile() LLVM_DELETED_FUNCTION;
diff --git a/include/llvm/Operator.h b/include/llvm/Operator.h
index bc5da8e8aa3..462324a6694 100644
--- a/include/llvm/Operator.h
+++ b/include/llvm/Operator.h
@@ -36,8 +36,8 @@ private:
void *operator new(size_t, unsigned) LLVM_DELETED_FUNCTION;
void *operator new(size_t s) LLVM_DELETED_FUNCTION;
Operator() LLVM_DELETED_FUNCTION;
- // NOTE: cannot use LLVM_DELETED_FUNCTION because gcc errors when deleting
- // an override of a non-deleted function.
+ // NOTE: cannot use LLVM_DELETED_FUNCTION because it's not legal to delete
+ // an overridden method that's not deleted in the base class.
~Operator();
public:
@@ -191,7 +191,7 @@ public:
/// opcodes.
template<typename SuperClass, unsigned Opc>
class ConcreteOperator : public SuperClass {
- ~ConcreteOperator() LLVM_DELETED_FUNCTION;
+ ~ConcreteOperator(); // DO NOT IMPLEMENT
public:
static inline bool classof(const Instruction *I) {
return I->getOpcode() == Opc;
@@ -207,44 +207,44 @@ public:
class AddOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {
- ~AddOperator() LLVM_DELETED_FUNCTION;
+ ~AddOperator(); // DO NOT IMPLEMENT
};
class SubOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {
- ~SubOperator() LLVM_DELETED_FUNCTION;
+ ~SubOperator(); // DO NOT IMPLEMENT
};
class MulOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {
- ~MulOperator() LLVM_DELETED_FUNCTION;
+ ~MulOperator(); // DO NOT IMPLEMENT
};
class ShlOperator
: public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
- ~ShlOperator() LLVM_DELETED_FUNCTION;
+ ~ShlOperator(); // DO NOT IMPLEMENT
};
-
+
class SDivOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
- ~SDivOperator() LLVM_DELETED_FUNCTION;
+ ~SDivOperator(); // DO NOT IMPLEMENT
};
class UDivOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {
- ~UDivOperator() LLVM_DELETED_FUNCTION;
+ ~UDivOperator(); // DO NOT IMPLEMENT
};
class AShrOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {
- ~AShrOperator() LLVM_DELETED_FUNCTION;
+ ~AShrOperator(); // DO NOT IMPLEMENT
};
class LShrOperator
: public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
- ~LShrOperator() LLVM_DELETED_FUNCTION;
+ ~LShrOperator(); // DO NOT IMPLEMENT
};
-
-
-
+
+
+
class GEPOperator
: public ConcreteOperator<Operator, Instruction::GetElementPtr> {
- ~GEPOperator() LLVM_DELETED_FUNCTION;
+ ~GEPOperator(); // DO NOT IMPLEMENT
enum {
IsInBounds = (1 << 0)
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 962cb63758d..08d3bbd941d 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -198,6 +198,11 @@ ModulePass *createPartialInliningPass();
//
ModulePass *createMetaRenamerPass();
+//===----------------------------------------------------------------------===//
+/// createBarrierNoopPass - This pass is purely a module pass barrier in a pass
+/// manager.
+ModulePass *createBarrierNoopPass();
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 3b665bf4b68..a5d8eed7462 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -119,7 +119,7 @@ Pass *createLICMPass();
// optional parameter used to consult the target machine whether certain
// transformations are profitable.
//
-Pass *createLoopStrengthReducePass();
+Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
Pass *createGlobalMergePass(const TargetLowering *TLI = 0);
@@ -249,8 +249,9 @@ extern char &LowerSwitchID;
// purpose "my LLVM-to-LLVM pass doesn't support the invoke instruction yet"
// lowering pass.
//
-FunctionPass *createLowerInvokePass();
-FunctionPass *createLowerInvokePass(bool useExpensiveEHSupport);
+FunctionPass *createLowerInvokePass(const TargetLowering *TLI = 0);
+FunctionPass *createLowerInvokePass(const TargetLowering *TLI,
+ bool useExpensiveEHSupport);
extern char &LowerInvokePassID;
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 2510aecc69c..b810f1a818c 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -25,9 +25,11 @@ namespace llvm {
class AliasAnalysis;
class Instruction;
+class MDNode;
class Pass;
class ReturnInst;
class TargetLibraryInfo;
+class TerminatorInst;
/// DeleteDeadBlock - Delete the specified block, which must have no
/// predecessors.
@@ -203,6 +205,29 @@ void SplitLandingPadPredecessors(BasicBlock *OrigBB,ArrayRef<BasicBlock*> Preds,
ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
BasicBlock *Pred);
+/// SplitBlockAndInsertIfThen - Split the containing block at the
+/// specified instruction - everything before and including Cmp stays
+/// in the old basic block, and everything after Cmp is moved to a
+/// new block. The two blocks are connected by a conditional branch
+/// (with value of Cmp being the condition).
+/// Before:
+/// Head
+/// Cmp
+/// Tail
+/// After:
+/// Head
+/// Cmp
+/// if (Cmp)
+/// ThenBlock
+/// Tail
+///
+/// If Unreachable is true, then ThenBlock ends with
+/// UnreachableInst, otherwise it branches to Tail.
+/// Returns the NewBasicBlock's terminator.
+
+TerminatorInst *SplitBlockAndInsertIfThen(Instruction *Cmp,
+ bool Unreachable, MDNode *BranchWeights = 0);
+
} // End llvm namespace
#endif
diff --git a/include/llvm/Transforms/Vectorize.h b/include/llvm/Transforms/Vectorize.h
index 1e49a9c01e6..41e53a83e2f 100644
--- a/include/llvm/Transforms/Vectorize.h
+++ b/include/llvm/Transforms/Vectorize.h
@@ -107,6 +107,12 @@ BasicBlockPass *
createBBVectorizePass(const VectorizeConfig &C = VectorizeConfig());
//===----------------------------------------------------------------------===//
+//
+// LoopVectorize - Create a loop vectorization pass.
+//
+Pass * createLoopVectorizePass();
+
+//===----------------------------------------------------------------------===//
/// @brief Vectorize the BasicBlock.
///
/// @param BB The BasicBlock to be vectorized
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 5c2a49e767f..111bfb4a6a7 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,8 +19,8 @@
#include "llvm/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/DataLayout.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/TargetTransformInfo.h"
using namespace llvm;
@@ -1599,15 +1599,15 @@ static bool width_descending(Value *lhs, Value *rhs) {
/// This does not depend on any SCEVExpander state but should be used in
/// the same context that SCEVExpander is used.
unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
- SmallVectorImpl<WeakVH> &DeadInsts,
- const ScalarTargetTransformInfo *STTI) {
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ const TargetLowering *TLI) {
// Find integer phis in order of increasing width.
SmallVector<PHINode*, 8> Phis;
for (BasicBlock::iterator I = L->getHeader()->begin();
PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
Phis.push_back(Phi);
}
- if (STTI)
+ if (TLI)
std::sort(Phis.begin(), Phis.end(), width_descending);
unsigned NumElim = 0;
@@ -1618,14 +1618,25 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
PEnd = Phis.end(); PIter != PEnd; ++PIter) {
PHINode *Phi = *PIter;
+ // Fold constant phis. They may be congruent to other constant phis and
+ // would confuse the logic below that expects proper IVs.
+ if (Value *V = Phi->hasConstantValue()) {
+ Phi->replaceAllUsesWith(V);
+ DeadInsts.push_back(Phi);
+ ++NumElim;
+ DEBUG_WITH_TYPE(DebugType, dbgs()
+ << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
+ continue;
+ }
+
if (!SE.isSCEVable(Phi->getType()))
continue;
PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
if (!OrigPhiRef) {
OrigPhiRef = Phi;
- if (Phi->getType()->isIntegerTy() && STTI &&
- STTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
+ if (Phi->getType()->isIntegerTy() && TLI
+ && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
// This phi can be freely truncated to the narrowest phi type. Map the
// truncated expression to it so it will be reused for narrow types.
const SCEV *TruncExpr =
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 2311842671a..ed78f194215 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -657,7 +657,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
bool anyregs = false;
for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (MF->getRegInfo().isPhysRegOrOverlapUsed(*I)) {
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
anyregs = true;
break;
}
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 0dfb084f1e1..f8fbc7ddf0c 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -96,6 +96,13 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
if (!OVNI)
continue;
+
+ // Don't allow rematerialization immediately after the original def.
+ // It would be incorrect if OrigMI redefines the register.
+ // See PR14098.
+ if (SlotIndex::isSameInstr(OrigIdx, UseIdx))
+ return false;
+
if (OVNI != li.getVNInfoAt(UseIdx))
return false;
}
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 0102ac708d0..ed94efb9355 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -51,7 +51,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
char MachineFunctionPrinterPass::ID = 0;
}
-char &MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
+char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID;
INITIALIZE_PASS(MachineFunctionPrinterPass, "print-machineinstrs",
"Machine Function Printer", false, false)
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index ae7c15be158..95d7a7dd689 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -21,7 +21,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
: TRI(&TRI), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- UsedPhysRegs.resize(TRI.getNumRegs());
+ UsedRegUnits.resize(TRI.getNumRegUnits());
UsedPhysRegMask.resize(TRI.getNumRegs());
// Create the physreg use/def lists.
@@ -32,7 +32,7 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI)
MachineRegisterInfo::~MachineRegisterInfo() {
#ifndef NDEBUG
clearVirtRegs();
- for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
assert(!PhysRegUseDefLists[i] &&
"PhysRegUseDefLists has entries after all instructions are deleted");
#endif
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index abd62efc026..4ea21d4ff7b 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -359,7 +359,7 @@ void TargetPassConfig::addIRPasses() {
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
- addPass(createLoopStrengthReducePass());
+ addPass(createLoopStrengthReducePass(getTargetLowering()));
if (PrintLSR)
addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
@@ -389,7 +389,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
addPass(createDwarfEHPass(TM));
break;
case ExceptionHandling::None:
- addPass(createLowerInvokePass());
+ addPass(createLowerInvokePass(TM->getTargetLowering()));
// The lower invoke pass may create unreachable code. Remove it.
addPass(createUnreachableBlockEliminationPass());
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 86df0a127bf..77554d691c2 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -227,7 +227,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
- if (Fn.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+ if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
// If the reg is modified, save it!
CSI.push_back(CalleeSavedInfo(Reg));
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index e096240e04b..d6ed36ef95b 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -113,9 +113,11 @@ namespace {
// PhysRegState - One of the RegState enums, or a virtreg.
std::vector<unsigned> PhysRegState;
- // UsedInInstr - BitVector of physregs that are used in the current
- // instruction, and so cannot be allocated.
- BitVector UsedInInstr;
+ typedef SparseSet<unsigned> UsedInInstrSet;
+
+ // UsedInInstr - Set of physregs that are used in the current instruction,
+ // and so cannot be allocated.
+ UsedInInstrSet UsedInInstr;
// SkippedInstrs - Descriptors of instructions whose clobber list was
// ignored because all registers were spilled. It is still necessary to
@@ -340,7 +342,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
PhysRegState[PhysReg] = regFree;
// Fall through
case regFree:
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
MO.setIsKill();
return;
default:
@@ -360,13 +362,13 @@ void RAFast::usePhysReg(MachineOperand &MO) {
"Instruction is not using a subregister of a reserved register");
// Leave the superregister in the working set.
PhysRegState[Alias] = regFree;
- UsedInInstr.set(Alias);
+ UsedInInstr.insert(Alias);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
case regFree:
if (TRI->isSuperRegister(PhysReg, Alias)) {
// Leave the superregister in the working set.
- UsedInInstr.set(Alias);
+ UsedInInstr.insert(Alias);
MO.getParent()->addRegisterKilled(Alias, TRI, true);
return;
}
@@ -380,7 +382,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
// All aliases are disabled, bring register into working set.
PhysRegState[PhysReg] = regFree;
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
MO.setIsKill();
}
@@ -389,7 +391,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
/// reserved instead of allocated.
void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
RegState NewState) {
- UsedInInstr.set(PhysReg);
+ UsedInInstr.insert(PhysReg);
switch (unsigned VirtReg = PhysRegState[PhysReg]) {
case regDisabled:
break;
@@ -429,7 +431,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
// can be allocated directly.
// Returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
- if (UsedInInstr.test(PhysReg)) {
+ if (UsedInInstr.count(PhysReg)) {
DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
return spillImpossible;
}
@@ -454,7 +456,7 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
unsigned Cost = 0;
for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) {
unsigned Alias = *AI;
- if (UsedInInstr.test(Alias))
+ if (UsedInInstr.count(Alias))
return spillImpossible;
switch (unsigned VirtReg = PhysRegState[Alias]) {
case regDisabled:
@@ -530,7 +532,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI,
// First try to find a completely free register.
for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
unsigned PhysReg = *I;
- if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg)) {
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) {
assignVirtToPhysReg(*LRI, PhysReg);
return LRI;
}
@@ -596,7 +598,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
LRI->LastUse = MI;
LRI->LastOpNum = OpNum;
LRI->Dirty = true;
- UsedInInstr.set(LRI->PhysReg);
+ UsedInInstr.insert(LRI->PhysReg);
return LRI;
}
@@ -646,7 +648,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
assert(LRI->PhysReg && "Register not assigned");
LRI->LastUse = MI;
LRI->LastOpNum = OpNum;
- UsedInInstr.set(LRI->PhysReg);
+ UsedInInstr.insert(LRI->PhysReg);
return LRI;
}
@@ -708,7 +710,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
- UsedInInstr.set(*AI);
+ UsedInInstr.insert(*AI);
if (ThroughRegs.count(PhysRegState[*AI]))
definePhysReg(MI, *AI, regFree);
}
@@ -756,7 +758,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
}
// Restore UsedInInstr to a state usable for allocating normal virtual uses.
- UsedInInstr.reset();
+ UsedInInstr.clear();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
@@ -764,12 +766,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
<< " as used in instr\n");
- UsedInInstr.set(Reg);
+ UsedInInstr.insert(Reg);
}
// Also mark PartialDefs as used to avoid reallocation.
for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
- UsedInInstr.set(PartialDefs[i]);
+ UsedInInstr.insert(PartialDefs[i]);
}
/// addRetOperand - ensure that a return instruction has an operand for each
@@ -942,7 +944,7 @@ void RAFast::AllocateBasicBlock() {
}
// Track registers used by instruction.
- UsedInInstr.reset();
+ UsedInInstr.clear();
// First scan.
// Mark physreg uses and early clobbers as used.
@@ -1016,11 +1018,13 @@ void RAFast::AllocateBasicBlock() {
}
}
- MRI->addPhysRegsUsed(UsedInInstr);
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setPhysRegUsed(*I);
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
- UsedInInstr.reset();
+ UsedInInstr.clear();
if (hasEarlyClobbers) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@@ -1030,7 +1034,7 @@ void RAFast::AllocateBasicBlock() {
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
- UsedInInstr.set(*AI);
+ UsedInInstr.insert(*AI);
}
}
@@ -1080,7 +1084,9 @@ void RAFast::AllocateBasicBlock() {
killVirtReg(VirtDead[i]);
VirtDead.clear();
- MRI->addPhysRegsUsed(UsedInInstr);
+ for (UsedInInstrSet::iterator
+ I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
+ MRI->setPhysRegUsed(*I);
if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
DEBUG(dbgs() << "-- coalescing: " << *MI);
@@ -1118,7 +1124,8 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
TII = TM->getInstrInfo();
MRI->freezeReservedRegs(Fn);
RegClassInfo.runOnMachineFunction(Fn);
- UsedInInstr.resize(TRI->getNumRegs());
+ UsedInInstr.clear();
+ UsedInInstr.setUniverse(TRI->getNumRegs());
assert(!MRI->isSSA() && "regalloc requires leaving SSA");
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index ba6b4569a8f..2ca67d63257 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1302,7 +1302,7 @@ public:
SmallVectorImpl<unsigned> &ShrinkRegs);
/// Get the value assignments suitable for passing to LiveInterval::join.
- const int *getAssignments() const { return &Assignments[0]; }
+ const int *getAssignments() const { return Assignments.data(); }
};
} // end anonymous namespace
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 8846247090c..17386b74e36 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5308,6 +5308,48 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (Reduced.getNode())
return Reduced;
}
+ // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
+ // where ... are all 'undef'.
+ if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
+ SmallVector<EVT, 8> VTs;
+ SDValue V;
+ unsigned Idx = 0;
+ unsigned NumDefs = 0;
+
+ for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
+ SDValue X = N0.getOperand(i);
+ if (X.getOpcode() != ISD::UNDEF) {
+ V = X;
+ Idx = i;
+ NumDefs++;
+ }
+ // Stop if more than one members are non-undef.
+ if (NumDefs > 1)
+ break;
+ VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
+ VT.getVectorElementType(),
+ X.getValueType().getVectorNumElements()));
+ }
+
+ if (NumDefs == 0)
+ return DAG.getUNDEF(VT);
+
+ if (NumDefs == 1) {
+ assert(V.getNode() && "The single defined operand is empty!");
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (i != Idx) {
+ Opnds.push_back(DAG.getUNDEF(VTs[i]));
+ continue;
+ }
+ SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V);
+ AddToWorkList(NV.getNode());
+ Opnds.push_back(NV);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+ &Opnds[0], Opnds.size());
+ }
+ }
// Simplify the operands using demanded-bits information.
if (!VT.isVector() &&
@@ -8564,8 +8606,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
// Only handle cases where both indexes are constants with the same type.
- ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
- ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
+ ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
if (InsIdx && ExtIdx &&
InsIdx->getValueType(0).getSizeInBits() <= 64 &&
@@ -8582,6 +8624,21 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
}
+ if (V->getOpcode() == ISD::CONCAT_VECTORS) {
+ // Combine:
+ // (extract_subvec (concat V1, V2, ...), i)
+ // Into:
+ // Vi if possible
+ // Only operand 0 is checked as 'concat' assumes all inputs of the same type.
+ if (V->getOperand(0).getValueType() != NVT)
+ return SDValue();
+ unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ unsigned NumElems = NVT.getVectorNumElements();
+ assert((Idx % NumElems) == 0 &&
+ "IDX in concat is not a multiple of the result vector length.");
+ return V->getOperand(Idx / NumElems);
+ }
+
return SDValue();
}
@@ -9018,6 +9075,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
(RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
return false;
+ // The loads must not depend on one another.
+ if (LLD->isPredecessorOf(RLD) ||
+ RLD->isPredecessorOf(LLD))
+ return false;
Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
LLD->getBasePtr().getValueType(),
TheSelect->getOperand(0), LLD->getBasePtr(),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2ec129f7308..abf40b77a18 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1240,6 +1240,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
+ case ISD::DEBUGTRAP:
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ if (Action == TargetLowering::Expand) {
+ // replace ISD::DEBUGTRAP with ISD::TRAP
+ SDValue NewVal;
+ NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(),
+ Node->getOperand(0));
+ ReplaceNode(Node, NewVal.getNode());
+ LegalizeOp(NewVal.getNode());
+ return;
+ }
+ break;
+
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 515eff3b253..2ae08692ae1 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
@@ -34,6 +35,10 @@ STATISTIC(NumPRCopies, "Number of physical copies");
static RegisterScheduler
fastDAGScheduler("fast", "Fast suboptimal list scheduling",
createFastDAGScheduler);
+static RegisterScheduler
+ linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling",
+ createDAGLinearizer);
+
namespace {
/// FastPriorityQueue - A degenerate priority queue that considers
@@ -629,6 +634,155 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
#endif
}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the
+// DAG in topological order.
+// IMPORTANT: this may not work for targets with phyreg dependency.
+//
+class ScheduleDAGLinearize : public ScheduleDAGSDNodes {
+public:
+ ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {}
+
+ void Schedule();
+
+ MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+
+private:
+ std::vector<SDNode*> Sequence;
+ DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user
+
+ void ScheduleNode(SDNode *N);
+};
+} // end anonymous namespace
+
+void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
+ if (N->getNodeId() != 0)
+ llvm_unreachable(0);
+
+ if (!N->isMachineOpcode() &&
+ (N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
+ // These nodes do not need to be translated into MIs.
+ return;
+
+ DEBUG(dbgs() << "\n*** Scheduling: ");
+ DEBUG(N->dump(DAG));
+ Sequence.push_back(N);
+
+ unsigned NumOps = N->getNumOperands();
+ if (unsigned NumLeft = NumOps) {
+ SDNode *GluedOpN = 0;
+ do {
+ const SDValue &Op = N->getOperand(NumLeft-1);
+ SDNode *OpN = Op.getNode();
+
+ if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) {
+ // Schedule glue operand right above N.
+ GluedOpN = OpN;
+ assert(OpN->getNodeId() != 0 && "Glue operand not ready?");
+ OpN->setNodeId(0);
+ ScheduleNode(OpN);
+ continue;
+ }
+
+ if (OpN == GluedOpN)
+ // Glue operand is already scheduled.
+ continue;
+
+ DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN);
+ if (DI != GluedMap.end() && DI->second != N)
+ // Users of glues are counted against the glued users.
+ OpN = DI->second;
+
+ unsigned Degree = OpN->getNodeId();
+ assert(Degree > 0 && "Predecessor over-released!");
+ OpN->setNodeId(--Degree);
+ if (Degree == 0)
+ ScheduleNode(OpN);
+ } while (--NumLeft);
+ }
+}
+
+/// findGluedUser - Find the representative use of a glue value by walking
+/// the use chain.
+static SDNode *findGluedUser(SDNode *N) {
+ while (SDNode *Glued = N->getGluedUser())
+ N = Glued;
+ return N;
+}
+
+void ScheduleDAGLinearize::Schedule() {
+ DEBUG(dbgs() << "********** DAG Linearization **********\n");
+
+ SmallVector<SDNode*, 8> Glues;
+ unsigned DAGSize = 0;
+ for (SelectionDAG::allnodes_iterator I = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); I != E; ++I) {
+ SDNode *N = I;
+
+ // Use node id to record degree.
+ unsigned Degree = N->use_size();
+ N->setNodeId(Degree);
+ unsigned NumVals = N->getNumValues();
+ if (NumVals && N->getValueType(NumVals-1) == MVT::Glue &&
+ N->hasAnyUseOfValue(NumVals-1)) {
+ SDNode *User = findGluedUser(N);
+ if (User) {
+ Glues.push_back(N);
+ GluedMap.insert(std::make_pair(N, User));
+ }
+ }
+
+ if (N->isMachineOpcode() ||
+ (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N)))
+ ++DAGSize;
+ }
+
+ for (unsigned i = 0, e = Glues.size(); i != e; ++i) {
+ SDNode *Glue = Glues[i];
+ SDNode *GUser = GluedMap[Glue];
+ unsigned Degree = Glue->getNodeId();
+ unsigned UDegree = GUser->getNodeId();
+
+ // Glue user must be scheduled together with the glue operand. So other
+ // users of the glue operand must be treated as its users.
+ SDNode *ImmGUser = Glue->getGluedUser();
+ for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end();
+ ui != ue; ++ui)
+ if (*ui == ImmGUser)
+ --Degree;
+ GUser->setNodeId(UDegree + Degree);
+ Glue->setNodeId(1);
+ }
+
+ Sequence.reserve(DAGSize);
+ ScheduleNode(DAG->getRoot().getNode());
+}
+
+MachineBasicBlock*
+ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
+ InstrEmitter Emitter(BB, InsertPos);
+ DenseMap<SDValue, unsigned> VRBaseMap;
+
+ DEBUG({
+ dbgs() << "\n*** Final schedule ***\n";
+ });
+
+ // FIXME: Handle dbg_values.
+ unsigned NumNodes = Sequence.size();
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ SDNode *N = Sequence[NumNodes-i-1];
+ DEBUG(N->dump(DAG));
+ Emitter.EmitNode(N, false, false, VRBaseMap);
+ }
+
+ DEBUG(dbgs() << '\n');
+
+ InsertPos = Emitter.getInsertPos();
+ return Emitter.getBlock();
+}
+
//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//
@@ -637,3 +791,8 @@ llvm::ScheduleDAGSDNodes *
llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
return new ScheduleDAGFast(*IS->MF);
}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ return new ScheduleDAGLinearize(*IS->MF);
+}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 660223a5059..714471f559e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -831,8 +831,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
}
SmallVector<SDNode *, 4> GluedNodes;
- for (SDNode *N = SU->getNode()->getGluedNode(); N;
- N = N->getGluedNode())
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 8e7bd822014..907356fd212 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -114,7 +114,8 @@ namespace llvm {
/// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
/// according to the order specified in Sequence.
///
- MachineBasicBlock *EmitSchedule(MachineBasicBlock::iterator &InsertPos);
+ virtual MachineBasicBlock*
+ EmitSchedule(MachineBasicBlock::iterator &InsertPos);
virtual void dumpNode(const SUnit *SU) const;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index cdebff94e59..db159941749 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5175,10 +5175,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
}
+ case Intrinsic::debugtrap:
case Intrinsic::trap: {
StringRef TrapFuncName = TM.Options.getTrapFunctionName();
if (TrapFuncName.empty()) {
- DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+ ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
+ ISD::TRAP : ISD::DEBUGTRAP;
+ DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot()));
return 0;
}
TargetLowering::ArgListTy Args;
@@ -5193,10 +5196,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Result.second);
return 0;
}
- case Intrinsic::debugtrap: {
- DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot()));
- return 0;
- }
+
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::usub_with_overflow:
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cd485ac235c..5abc55ba8e2 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -583,6 +583,11 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
+ // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand"
+ // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP.
+ //
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
+
IsLittleEndian = TD->isLittleEndian();
PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0));
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 54d8c8cde7e..1cbee843a12 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -48,6 +48,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/DebugInfo.h"
+#include "llvm/Instructions.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -260,7 +261,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
MarkersFound++;
- const Value *Allocation = MFI->getObjectAllocation(Slot);
+ const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
if (Allocation) {
DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<<
" with allocation: "<< Allocation->getName()<<"\n");
@@ -480,11 +481,11 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
// Keep a list of *allocas* which need to be remapped.
- DenseMap<const Value*, const Value*> Allocas;
+ DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
for (DenseMap<int, int>::iterator it = SlotRemap.begin(),
e = SlotRemap.end(); it != e; ++it) {
- const Value *From = MFI->getObjectAllocation(it->first);
- const Value *To = MFI->getObjectAllocation(it->second);
+ const AllocaInst *From = MFI->getObjectAllocation(it->first);
+ const AllocaInst *To = MFI->getObjectAllocation(it->second);
assert(To && From && "Invalid allocation object");
Allocas[From] = To;
}
@@ -514,10 +515,17 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
V = GetUnderlyingObject(V);
// If we did not find one, or if the one that we found is not in our
// map, then move on.
- if (!V || !Allocas.count(V))
+ if (!V || !isa<AllocaInst>(V)) {
+ // Clear mem operand since we don't know for sure that it doesn't
+ // alias a merged alloca.
+ MMO->setValue(0);
+ continue;
+ }
+ const AllocaInst *AI= cast<AllocaInst>(V);
+ if (!Allocas.count(AI))
continue;
- MMO->setValue(Allocas[V]);
+ MMO->setValue(Allocas[AI]);
FixedMemOp++;
}
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 7a6e2604d77..6a096a16c4e 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -58,6 +58,14 @@ unsigned TargetSchedModel::getNumMicroOps(MachineInstr *MI) const {
return MI->isTransient() ? 0 : 1;
}
+// The machine model may explicitly specify an invalid latency, which
+// effectively means infinite latency. Since users of the TargetSchedule API
+// don't know how to handle this, we convert it to a very large latency that is
+// easy to distinguish when debugging the DAG but won't induce overflow.
+static unsigned convertLatency(int Cycles) {
+ return Cycles >= 0 ? Cycles : 1000;
+}
+
/// If we can determine the operand latency from the def only, without machine
/// model or itinerary lookup, do so. Otherwise return -1.
int TargetSchedModel::getDefLatency(const MachineInstr *DefMI,
@@ -178,7 +186,7 @@ unsigned TargetSchedModel::computeOperandLatency(
const MCWriteLatencyEntry *WLEntry =
STI->getWriteLatencyEntry(SCDesc, DefIdx);
unsigned WriteID = WLEntry->WriteResourceID;
- unsigned Latency = WLEntry->Cycles;
+ unsigned Latency = convertLatency(WLEntry->Cycles);
if (!UseMI)
return Latency;
@@ -219,7 +227,7 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const {
// Lookup the definition's write latency in SubtargetInfo.
const MCWriteLatencyEntry *WLEntry =
STI->getWriteLatencyEntry(SCDesc, DefIdx);
- Latency = std::max(Latency, WLEntry->Cycles);
+ Latency = std::max(Latency, convertLatency(WLEntry->Cycles));
}
return Latency;
}
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index 241f55eaed2..afd614cc356 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -17,6 +17,8 @@
using namespace llvm;
using namespace dwarf;
+typedef DWARFDebugLine::LineTable DWARFLineTable;
+
void DWARFContext::dump(raw_ostream &OS) {
OS << ".debug_abbrev contents:\n";
getDebugAbbrev()->dump(OS);
@@ -94,7 +96,7 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() {
return Aranges.get();
}
-const DWARFDebugLine::LineTable *
+const DWARFLineTable *
DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
if (!Line)
Line.reset(new DWARFDebugLine());
@@ -106,7 +108,7 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
return 0; // No line table for this compile unit.
// See if the line table is cached.
- if (const DWARFDebugLine::LineTable *lt = Line->getLineTable(stmtOffset))
+ if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
return lt;
// We have to parse it first.
@@ -117,11 +119,11 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
void DWARFContext::parseCompileUnits() {
uint32_t offset = 0;
- const DataExtractor &debug_info_data = DataExtractor(getInfoSection(),
- isLittleEndian(), 0);
- while (debug_info_data.isValidOffset(offset)) {
+ const DataExtractor &DIData = DataExtractor(getInfoSection(),
+ isLittleEndian(), 0);
+ while (DIData.isValidOffset(offset)) {
CUs.push_back(DWARFCompileUnit(*this));
- if (!CUs.back().extract(debug_info_data, &offset)) {
+ if (!CUs.back().extract(DIData, &offset)) {
CUs.pop_back();
break;
}
@@ -163,9 +165,11 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
return getCompileUnitForOffset(CUOffset);
}
-static bool getFileNameForCompileUnit(
- DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable,
- uint64_t FileIndex, bool NeedsAbsoluteFilePath, std::string &FileName) {
+static bool getFileNameForCompileUnit(DWARFCompileUnit *CU,
+ const DWARFLineTable *LineTable,
+ uint64_t FileIndex,
+ bool NeedsAbsoluteFilePath,
+ std::string &FileName) {
if (CU == 0 ||
LineTable == 0 ||
!LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath,
@@ -183,10 +187,12 @@ static bool getFileNameForCompileUnit(
return true;
}
-static bool getFileLineInfoForCompileUnit(
- DWARFCompileUnit *CU, const DWARFDebugLine::LineTable *LineTable,
- uint64_t Address, bool NeedsAbsoluteFilePath, std::string &FileName,
- uint32_t &Line, uint32_t &Column) {
+static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU,
+ const DWARFLineTable *LineTable,
+ uint64_t Address,
+ bool NeedsAbsoluteFilePath,
+ std::string &FileName,
+ uint32_t &Line, uint32_t &Column) {
if (CU == 0 || LineTable == 0)
return false;
// Get the index of row we're looking for in the line table.
@@ -225,8 +231,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
}
}
if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
- const DWARFDebugLine::LineTable *LineTable =
- getLineTableForCompileUnit(CU);
+ const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
const bool NeedsAbsoluteFilePath =
Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
getFileLineInfoForCompileUnit(CU, LineTable, Address,
@@ -250,7 +255,7 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
DIInliningInfo InliningInfo;
uint32_t CallFile = 0, CallLine = 0, CallColumn = 0;
- const DWARFDebugLine::LineTable *LineTable = 0;
+ const DWARFLineTable *LineTable = 0;
for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) {
const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain[i];
std::string FileName = "<invalid>";
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 6e37b5c1a03..b8b3188ce48 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -733,8 +733,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
Index = -1;
}
Addend = Value;
- // Compensate for the addend on i386.
- if (is64Bit())
+ if (hasRelocationAddend())
Value = 0;
}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 0406ff8d446..f22b2754f68 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -19,6 +19,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/AsmCond.h"
#include "llvm/MC/MCParser/AsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
@@ -35,6 +37,8 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
+#include <set>
+#include <string>
#include <vector>
using namespace llvm;
@@ -42,6 +46,8 @@ static cl::opt<bool>
FatalAssemblerWarnings("fatal-assembler-warnings",
cl::desc("Consider warnings as error"));
+MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {}
+
namespace {
/// \brief Helper class for tracking macro definitions.
@@ -139,7 +145,8 @@ private:
/// ParsedOperands - The parsed operands from the last parsed statement.
SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
- /// Opcode - The opcode from the last parsed instruction.
+ /// Opcode - The opcode from the last parsed instruction. This is MS-style
+ /// inline asm specific.
unsigned Opcode;
public:
@@ -180,20 +187,17 @@ public:
virtual const AsmToken &Lex();
- bool ParseStatement();
void setParsingInlineAsm(bool V) { ParsingInlineAsm = V; }
- unsigned getNumParsedOperands() { return ParsedOperands.size(); }
- MCParsedAsmOperand &getParsedOperand(unsigned OpNum) {
- assert (ParsedOperands.size() > OpNum);
- return *ParsedOperands[OpNum];
- }
- void freeParsedOperands() {
- for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
- delete ParsedOperands[i];
- ParsedOperands.clear();
- }
- bool isInstruction() { return Opcode != (unsigned)~0x0; }
- unsigned getOpcode() { return Opcode; }
+ bool isParsingInlineAsm() { return ParsingInlineAsm; }
+
+ bool ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+ unsigned &NumOutputs, unsigned &NumInputs,
+ SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers,
+ const MCInstrInfo *MII,
+ const MCInstPrinter *IP,
+ MCAsmParserSemaCallback &SI);
bool ParseExpression(const MCExpr *&Res);
virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
@@ -205,6 +209,7 @@ public:
private:
void CheckForValidSection();
+ bool ParseStatement();
void EatToEndOfLine();
bool ParseCppHashLineFilenameComment(const SMLoc &L);
@@ -310,6 +315,11 @@ private:
bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp"
bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc"
bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr"
+
+ // MS-style inline assembly parsing.
+ bool isInstruction() { return Opcode != (unsigned)~0x0; }
+ unsigned getOpcode() { return Opcode; }
+ void setOpcode(unsigned Value) { Opcode = Value; }
};
/// \brief Generic implementations of directive handling, etc. which is shared
@@ -1377,10 +1387,13 @@ bool AsmParser::ParseStatement() {
ParsingInlineAsm);
}
- // Free any parsed operands. If parsing ms-style inline assembly it is the
- // responsibility of the caller (i.e., clang) to free the parsed operands.
- if (!ParsingInlineAsm)
- freeParsedOperands();
+ // Free any parsed operands. If parsing ms-style inline assembly the operands
+ // will be freed by the ParseMSInlineAsm() function.
+ if (!ParsingInlineAsm) {
+ for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+ delete ParsedOperands[i];
+ ParsedOperands.clear();
+ }
// Don't skip the rest of the line, the instruction parser is responsible for
// that.
@@ -3561,6 +3574,222 @@ bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) {
return false;
}
+namespace {
+enum AsmRewriteKind {
+ AOK_Imm,
+ AOK_Input,
+ AOK_Output,
+ AOK_SizeDirective,
+ AOK_Skip
+};
+
+struct AsmRewrite {
+ AsmRewriteKind Kind;
+ SMLoc Loc;
+ unsigned Len;
+ unsigned Size;
+public:
+ AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, unsigned size = 0)
+ : Kind(kind), Loc(loc), Len(len), Size(size) { }
+};
+}
+
+bool AsmParser::ParseMSInlineAsm(void *AsmLoc, std::string &AsmString,
+ unsigned &NumOutputs, unsigned &NumInputs,
+ SmallVectorImpl<void *> &OpDecls,
+ SmallVectorImpl<std::string> &Constraints,
+ SmallVectorImpl<std::string> &Clobbers,
+ const MCInstrInfo *MII,
+ const MCInstPrinter *IP,
+ MCAsmParserSemaCallback &SI) {
+ SmallVector<void*, 4> InputDecls;
+ SmallVector<void*, 4> OutputDecls;
+ SmallVector<std::string, 4> InputConstraints;
+ SmallVector<std::string, 4> OutputConstraints;
+ std::set<std::string> ClobberRegs;
+
+ SmallVector<struct AsmRewrite, 4> AsmStrRewrites;
+
+ // Prime the lexer.
+ Lex();
+
+ // While we have input, parse each statement.
+ unsigned InputIdx = 0;
+ unsigned OutputIdx = 0;
+ while (getLexer().isNot(AsmToken::Eof)) {
+ // Clear the opcode.
+ setOpcode(~0x0);
+
+ // Save the conditional ignore state of the parser prior to parsing the statement.
+ bool PreParseCondStateIgnore = TheCondState.Ignore;
+
+ // Save the starting point of this statement in case we need to skip it.
+ SMLoc Start = getLexer().getLoc();
+
+ if (ParseStatement())
+ return true;
+
+ // If PreParseCondStateIgnore is false, but TheCondState.Ignore is true, then we
+ // just parsed a directive that changed the state to ignore. Don't skip
+ // emitting this directive.
+ if (PreParseCondStateIgnore && TheCondState.Ignore) {
+ unsigned Len = getLexer().getLoc().getPointer() - Start.getPointer();
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Skip, Start, Len));
+ continue;
+ }
+
+ if (isInstruction()) {
+ const MCInstrDesc &Desc = MII->get(getOpcode());
+
+ // Build the list of clobbers, outputs and inputs.
+ for (unsigned i = 1, e = ParsedOperands.size(); i != e; ++i) {
+ MCParsedAsmOperand *Operand = ParsedOperands[i];
+
+ // Immediate.
+ if (Operand->isImm()) {
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Imm,
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
+ continue;
+ }
+
+ // Register operand.
+ if (Operand->isReg()) {
+ unsigned NumDefs = Desc.getNumDefs();
+ // Clobber.
+ if (NumDefs && Operand->getMCOperandNum() < NumDefs) {
+ std::string Reg;
+ raw_string_ostream OS(Reg);
+ IP->printRegName(OS, Operand->getReg());
+ ClobberRegs.insert(StringRef(OS.str()));
+ }
+ continue;
+ }
+
+ // Expr/Input or Output.
+ unsigned Size;
+ void *OpDecl = SI.LookupInlineAsmIdentifier(Operand->getName(), AsmLoc,
+ Size);
+ if (OpDecl) {
+ bool isOutput = (i == 1) && Desc.mayStore();
+ if (Operand->needSizeDirective())
+ AsmStrRewrites.push_back(AsmRewrite(AOK_SizeDirective,
+ Operand->getStartLoc(), 0,
+ Operand->getMemSize()));
+
+ if (isOutput) {
+ std::string Constraint = "=";
+ ++InputIdx;
+ OutputDecls.push_back(OpDecl);
+ Constraint += Operand->getConstraint().str();
+ OutputConstraints.push_back(Constraint);
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Output,
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
+ } else {
+ InputDecls.push_back(OpDecl);
+ InputConstraints.push_back(Operand->getConstraint().str());
+ AsmStrRewrites.push_back(AsmRewrite(AOK_Input,
+ Operand->getStartLoc(),
+ Operand->getNameLen()));
+ }
+ }
+ }
+ // Free any parsed operands.
+ for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+ delete ParsedOperands[i];
+ ParsedOperands.clear();
+ }
+ }
+
+ // Set the number of Outputs and Inputs.
+ NumOutputs = OutputDecls.size();
+ NumInputs = InputDecls.size();
+
+ // Set the unique clobbers.
+ for (std::set<std::string>::iterator I = ClobberRegs.begin(),
+ E = ClobberRegs.end(); I != E; ++I)
+ Clobbers.push_back(*I);
+
+ // Merge the various outputs and inputs. Output are expected first.
+ if (NumOutputs || NumInputs) {
+ unsigned NumExprs = NumOutputs + NumInputs;
+ OpDecls.resize(NumExprs);
+ Constraints.resize(NumExprs);
+ for (unsigned i = 0; i < NumOutputs; ++i) {
+ OpDecls[i] = OutputDecls[i];
+ Constraints[i] = OutputConstraints[i];
+ }
+ for (unsigned i = 0, j = NumOutputs; i < NumInputs; ++i, ++j) {
+ OpDecls[j] = InputDecls[i];
+ Constraints[j] = InputConstraints[i];
+ }
+ }
+
+ // Build the IR assembly string.
+ std::string AsmStringIR;
+ AsmRewriteKind PrevKind = AOK_Imm;
+ raw_string_ostream OS(AsmStringIR);
+ const char *Start = SrcMgr.getMemoryBuffer(0)->getBufferStart();
+ for (SmallVectorImpl<struct AsmRewrite>::iterator
+ I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) {
+ const char *Loc = (*I).Loc.getPointer();
+
+ AsmRewriteKind Kind = (*I).Kind;
+
+ // Emit everything up to the immediate/expression. If the previous rewrite
+ // was a size directive, then this has already been done.
+ if (PrevKind != AOK_SizeDirective)
+ OS << StringRef(Start, Loc - Start);
+ PrevKind = Kind;
+
+ // Skip the original expression.
+ if (Kind == AOK_Skip) {
+ Start = Loc + (*I).Len;
+ continue;
+ }
+
+ // Rewrite expressions in $N notation.
+ switch (Kind) {
+ default: break;
+ case AOK_Imm:
+ OS << Twine("$$") + StringRef(Loc, (*I).Len);
+ break;
+ case AOK_Input:
+ OS << '$';
+ OS << InputIdx++;
+ break;
+ case AOK_Output:
+ OS << '$';
+ OS << OutputIdx++;
+ break;
+ case AOK_SizeDirective:
+ switch((*I).Size) {
+ default: break;
+ case 8: OS << "byte ptr "; break;
+ case 16: OS << "word ptr "; break;
+ case 32: OS << "dword ptr "; break;
+ case 64: OS << "qword ptr "; break;
+ case 80: OS << "xword ptr "; break;
+ case 128: OS << "xmmword ptr "; break;
+ case 256: OS << "ymmword ptr "; break;
+ }
+ }
+
+ // Skip the original expression.
+ if (Kind != AOK_SizeDirective)
+ Start = Loc + (*I).Len;
+ }
+
+ // Emit the remainder of the asm string.
+ const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd();
+ if (Start != AsmEnd)
+ OS << StringRef(Start, AsmEnd - Start);
+
+ AsmString = OS.str();
+ return false;
+}
+
/// \brief Create an MCAsmParser instance.
MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM,
MCContext &C, MCStreamer &Out,
diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp
index 6fb1ba4216f..60a3a3b59a3 100644
--- a/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -11,7 +11,7 @@
using namespace llvm;
MCTargetAsmParser::MCTargetAsmParser()
- : AvailableFeatures(0)
+ : AvailableFeatures(0), ParsingInlineAsm(false)
{
}
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 378c4184a83..7625abd465f 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -119,8 +119,8 @@ void SubtargetFeatures::AddFeature(const StringRef String,
}
/// Find KV in array using binary search.
-const SubtargetFeatureKV *Find(const StringRef S, const SubtargetFeatureKV *A,
- size_t L) {
+static const SubtargetFeatureKV *Find(StringRef S, const SubtargetFeatureKV *A,
+ size_t L) {
// Make the lower bound element we're looking for
SubtargetFeatureKV KV;
KV.Key = S.data();
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index cf64fa8f385..9e94068c9c3 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -121,17 +121,29 @@ static void UnregisterHandlers() {
/// NB: This must be an async signal safe function. It cannot allocate or free
/// memory, even in debug builds.
static void RemoveFilesToRemove() {
- // Note: avoid iterators in case of debug iterators that allocate or release
+ // We avoid iterators in case of debug iterators that allocate or release
// memory.
for (unsigned i = 0, e = FilesToRemove.size(); i != e; ++i) {
- // Note that we don't want to use any external code here, and we don't care
- // about errors. We're going to try as hard as we can as often as we need
- // to to make these files go away. If these aren't files, too bad.
- //
- // We do however rely on a std::string implementation for which repeated
- // calls to 'c_str()' don't allocate memory. We pre-call 'c_str()' on all
- // of these strings to try to ensure this is safe.
- unlink(FilesToRemove[i].c_str());
+ // We rely on a std::string implementation for which repeated calls to
+ // 'c_str()' don't allocate memory. We pre-call 'c_str()' on all of these
+ // strings to try to ensure this is safe.
+ const char *path = FilesToRemove[i].c_str();
+
+ // Get the status so we can determine if it's a file or directory. If we
+ // can't stat the file, ignore it.
+ struct stat buf;
+ if (stat(path, &buf) != 0)
+ continue;
+
+ // If this is not a regular file, ignore it. We want to prevent removal of
+ // special files like /dev/null, even if the compiler is being run with the
+ // super-user permissions.
+ if (!S_ISREG(buf.st_mode))
+ continue;
+
+ // Otherwise, remove the file. We ignore any errors here as there is nothing
+ // else we can do.
+ unlink(path);
}
}
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 3e70d578231..1d4bbb74d33 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -1176,7 +1176,7 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned NumSpills = 0;
for (; NumSpills < 8; ++NumSpills)
- if (!MRI.isPhysRegOrOverlapUsed(ARM::D8 + NumSpills))
+ if (!MRI.isPhysRegUsed(ARM::D8 + NumSpills))
break;
// Don't do this for just one d-register. It's not worth it.
@@ -1249,7 +1249,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
- if (MF.getRegInfo().isPhysRegOrOverlapUsed(Reg)) {
+ if (MF.getRegInfo().isPhysRegUsed(Reg)) {
Spilled = true;
CanEliminateFrame = false;
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b2eb5784879..8de23872349 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -2554,7 +2554,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned OffsetFromOrigArg,
- unsigned ArgOffset) const {
+ unsigned ArgOffset,
+ bool ForceMutable) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -2603,7 +2604,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
&MemOps[0], MemOps.size());
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+ AFI->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
}
SDValue
@@ -2729,15 +2731,20 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
- unsigned VARegSize, VARegSaveSize;
- computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
- VarArgStyleRegisters(CCInfo, DAG,
- dl, Chain, CurOrigArg, Ins[VA.getValNo()].PartOffset, 0);
- unsigned Bytes = Flags.getByValSize() - VARegSize;
- if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
- int FI = MFI->CreateFixedObject(Bytes,
- VA.getLocMemOffset(), false);
- InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ if (!AFI->getVarArgsFrameIndex()) {
+ VarArgStyleRegisters(CCInfo, DAG,
+ dl, Chain, CurOrigArg,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
+ int VAFrameIndex = AFI->getVarArgsFrameIndex();
+ InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
+ } else {
+ int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+ VA.getLocMemOffset(), false);
+ InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+ }
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9acab0b0834..4eb3b2cb515 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -469,7 +469,8 @@ namespace llvm {
DebugLoc dl, SDValue &Chain,
const Value *OrigArg,
unsigned OffsetFromOrigArg,
- unsigned ArgOffset)
+ unsigned ArgOffset,
+ bool ForceMutable = false)
const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index bf0dabb4a0f..d2b1cc37f21 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -525,8 +525,9 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value,
else
ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
const char *ReferenceName;
- const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
- &ReferenceName);
+ uint64_t SymbolValue = 0x00000000ffffffffULL & Value;
+ const char *Name = SymbolLookUp(DisInfo, SymbolValue, &ReferenceType,
+ Address, &ReferenceName);
if (Name) {
SymbolicOp.AddSymbol.Name = Name;
SymbolicOp.AddSymbol.Present = true;
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index a312c8d5b25..2e170f17bf9 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -221,3 +221,17 @@ MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
+
+void
+MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+ const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ // Create a frame entry for the FPW register that must be saved.
+ if (TFI->hasFP(MF)) {
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+ (void)FrameIdx;
+ assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+ "Slot for FPW register must be last in order to be found!");
+ }
+}
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index b636827da7b..cb02545852b 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -46,6 +46,7 @@ public:
bool hasFP(const MachineFunction &MF) const;
bool hasReservedCallFrame(const MachineFunction &MF) const;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index aed46a2ec59..9ae238f66f5 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -220,20 +220,6 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i+1).ChangeToImmediate(Offset);
}
-void
-MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
- const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- // Create a frame entry for the FPW register that must be saved.
- if (TFI->hasFP(MF)) {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
- (void)FrameIdx;
- assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
- "Slot for FPW register must be last in order to be found!");
- }
-}
-
unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 9ee0a03f631..64a43bcafbb 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -49,8 +49,6 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, RegScavenger *RS = NULL) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
};
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 8991433005d..5e33fed0cc9 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -25,7 +25,7 @@
using namespace llvm;
Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
- : MipsInstrInfo(tm, /* FIXME: set mips16 unconditional br */ 0),
+ : MipsInstrInfo(tm, Mips::BimmX16),
RI(*tm.getSubtargetImpl()) {}
const MipsRegisterInfo &Mips16InstrInfo::getRegisterInfo() const {
@@ -137,12 +137,39 @@ bool Mips16InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
/// GetOppositeBranchOpc - Return the inverse of the specified
/// opcode, e.g. turning BEQ to BNE.
unsigned Mips16InstrInfo::GetOppositeBranchOpc(unsigned Opc) const {
+ switch (Opc) {
+ default: llvm_unreachable("Illegal opcode!");
+ case Mips::BeqzRxImmX16: return Mips::BnezRxImmX16;
+ case Mips::BnezRxImmX16: return Mips::BeqzRxImmX16;
+ case Mips::BteqzT8CmpX16: return Mips::BtnezT8CmpX16;
+ case Mips::BteqzT8SltX16: return Mips::BtnezT8SltX16;
+ case Mips::BteqzT8SltiX16: return Mips::BtnezT8SltiX16;
+ case Mips::BtnezX16: return Mips::BteqzX16;
+ case Mips::BtnezT8CmpiX16: return Mips::BteqzT8CmpiX16;
+ case Mips::BtnezT8SltuX16: return Mips::BteqzT8SltuX16;
+ case Mips::BtnezT8SltiuX16: return Mips::BteqzT8SltiuX16;
+ case Mips::BteqzX16: return Mips::BtnezX16;
+ case Mips::BteqzT8CmpiX16: return Mips::BtnezT8CmpiX16;
+ case Mips::BteqzT8SltuX16: return Mips::BtnezT8SltuX16;
+ case Mips::BteqzT8SltiuX16: return Mips::BtnezT8SltiuX16;
+ case Mips::BtnezT8CmpX16: return Mips::BteqzT8CmpX16;
+ case Mips::BtnezT8SltX16: return Mips::BteqzT8SltX16;
+ case Mips::BtnezT8SltiX16: return Mips::BteqzT8SltiX16;
+ }
assert(false && "Implement this function.");
return 0;
}
unsigned Mips16InstrInfo::GetAnalyzableBrOpc(unsigned Opc) const {
- return 0;
+ return (Opc == Mips::BeqzRxImmX16 || Opc == Mips::BimmX16 ||
+ Opc == Mips::BnezRxImmX16 || Opc == Mips::BteqzX16 ||
+ Opc == Mips::BteqzT8CmpX16 || Opc == Mips::BteqzT8CmpiX16 ||
+ Opc == Mips::BteqzT8SltX16 || Opc == Mips::BteqzT8SltuX16 ||
+ Opc == Mips::BteqzT8SltiX16 || Opc == Mips::BteqzT8SltiuX16 ||
+ Opc == Mips::BtnezX16 || Opc == Mips::BtnezT8CmpX16 ||
+ Opc == Mips::BtnezT8CmpiX16 || Opc == Mips::BtnezT8SltX16 ||
+ Opc == Mips::BtnezT8SltuX16 || Opc == Mips::BtnezT8SltiX16 ||
+ Opc == Mips::BtnezT8SltiuX16 ) ? Opc : 0;
}
void Mips16InstrInfo::ExpandRetRA16(MachineBasicBlock &MBB,
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index eba201a0ea9..2694b09206f 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -21,6 +21,26 @@ def mem16 : Operand<i32> {
}
//
+// EXT-I instruction format
+//
+class FEXT_I16_ins<bits<5> eop, string asmstr, InstrItinClass itin> :
+ FEXT_I16<eop, (outs), (ins brtarget:$imm16),
+ !strconcat(asmstr, "\t$imm16"),[], itin>;
+
+//
+// EXT-I8 instruction format
+//
+
+class FEXT_I816_ins_base<bits<3> _func, string asmstr,
+ string asmstr2, InstrItinClass itin>:
+ FEXT_I816<_func, (outs), (ins uimm16:$imm), !strconcat(asmstr, asmstr2),
+ [], itin>;
+
+class FEXT_I816_ins<bits<3> _func, string asmstr,
+ InstrItinClass itin>:
+ FEXT_I816_ins_base<_func, asmstr, "\t$imm", itin>;
+
+//
// Assembler formats in alphabetical order.
// Natural and pseudos are mixed together.
//
@@ -40,6 +60,11 @@ class FEXT_RI16_ins<bits<5> _op, string asmstr,
class FEXT_RI16_PC_ins<bits<5> _op, string asmstr, InstrItinClass itin>:
FEXT_RI16_ins_base<_op, asmstr, "\t$rx, $$pc, $imm", itin>;
+class FEXT_RI16_B_ins<bits<5> _op, string asmstr,
+ InstrItinClass itin>:
+ FEXT_RI16<_op, (outs), (ins CPU16Regs:$rx, brtarget:$imm),
+ !strconcat(asmstr, "\t$rx, $imm"), [], itin>;
+
class FEXT_2RI16_ins<bits<5> _op, string asmstr,
InstrItinClass itin>:
FEXT_RI16<_op, (outs CPU16Regs:$rx), (ins CPU16Regs:$rx_, simm16:$imm),
@@ -47,6 +72,7 @@ class FEXT_2RI16_ins<bits<5> _op, string asmstr,
let Constraints = "$rx_ = $rx";
}
+
// this has an explicit sp argument that we ignore to work around a problem
// in the compiler
class FEXT_RI16_SP_explicit_ins<bits<5> _op, string asmstr,
@@ -75,6 +101,31 @@ class FEXT_SHIFT16_ins<bits<2> _f, string asmstr, InstrItinClass itin>:
FEXT_SHIFT16<_f, (outs CPU16Regs:$rx), (ins CPU16Regs:$ry, shamt:$sa),
!strconcat(asmstr, "\t$rx, $ry, $sa"), [], itin>;
+//
+// EXT-T8I8
+//
+class FEXT_T8I816_ins<bits<3> _func, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_I816<_func, (outs),
+ (ins CPU16Regs:$rx, CPU16Regs:$ry, brtarget:$imm),
+ !strconcat(asmstr2, !strconcat("\t$rx, $ry\n\t",
+ !strconcat(asmstr, "\t$imm"))),[], itin> {
+ let isCodeGenOnly=1;
+}
+
+//
+// EXT-T8I8I
+//
+class FEXT_T8I8I16_ins<bits<3> _func, string asmstr, string asmstr2,
+ InstrItinClass itin>:
+ FEXT_I816<_func, (outs),
+ (ins CPU16Regs:$rx, simm16:$imm, brtarget:$targ),
+ !strconcat(asmstr2, !strconcat("\t$rx, $imm\n\t",
+ !strconcat(asmstr, "\t$targ"))), [], itin> {
+ let isCodeGenOnly=1;
+}
+//
+
//
// I8_MOVR32 instruction format (used only by the MOVR32 instructio
@@ -165,6 +216,17 @@ class ArithLogic16Defs<bit isCom=0> {
bit neverHasSideEffects = 1;
}
+class branch16 {
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit isBarrier = 1;
+}
+
+class cbranch16 {
+ bit isBranch = 1;
+ bit isTerminator = 1;
+}
+
class MayLoad {
bit mayLoad = 1;
}
@@ -204,6 +266,69 @@ def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
// To do a bitwise logical AND.
def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+
+
+//
+// Format: BEQZ rx, offset MIPS16e
+// Purpose: Branch on Equal to Zero (Extended)
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+
+// Format: B offset MIPS16e
+// Purpose: Unconditional Branch
+// To do an unconditional PC-relative branch.
+//
+def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
+
+//
+// Format: BNEZ rx, offset MIPS16e
+// Purpose: Branch on Not Equal to Zero (Extended)
+// To test a GPR then do a PC-relative conditional branch.
+//
+def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+
+//
+// Format: BTEQZ offset MIPS16e
+// Purpose: Branch on T Equal to Zero (Extended)
+// To test special register T then do a PC-relative conditional branch.
+//
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16;
+
+def BteqzT8CmpX16: FEXT_T8I816_ins<0b000, "bteqz", "cmp", IIAlu>, cbranch16;
+
+def BteqzT8CmpiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "cmpi", IIAlu>,
+ cbranch16;
+
+def BteqzT8SltX16: FEXT_T8I816_ins<0b000, "bteqz", "slt", IIAlu>, cbranch16;
+
+def BteqzT8SltuX16: FEXT_T8I816_ins<0b000, "bteqz", "sltu", IIAlu>, cbranch16;
+
+def BteqzT8SltiX16: FEXT_T8I8I16_ins<0b000, "bteqz", "slti", IIAlu>, cbranch16;
+
+def BteqzT8SltiuX16: FEXT_T8I8I16_ins<0b000, "bteqz", "sltiu", IIAlu>,
+ cbranch16;
+
+//
+// Format: BTNEZ offset MIPS16e
+// Purpose: Branch on T Not Equal to Zero (Extended)
+// To test special register T then do a PC-relative conditional branch.
+//
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16;
+
+def BtnezT8CmpX16: FEXT_T8I816_ins<0b000, "btnez", "cmp", IIAlu>, cbranch16;
+
+def BtnezT8CmpiX16: FEXT_T8I8I16_ins<0b000, "btnez", "cmpi", IIAlu>, cbranch16;
+
+def BtnezT8SltX16: FEXT_T8I816_ins<0b000, "btnez", "slt", IIAlu>, cbranch16;
+
+def BtnezT8SltuX16: FEXT_T8I816_ins<0b000, "btnez", "sltu", IIAlu>, cbranch16;
+
+def BtnezT8SltiX16: FEXT_T8I8I16_ins<0b000, "btnez", "slti", IIAlu>, cbranch16;
+
+def BtnezT8SltiuX16: FEXT_T8I8I16_ins<0b000, "btnez", "sltiu", IIAlu>,
+ cbranch16;
+
//
// Format: DIV rx, ry MIPS16e
// Purpose: Divide Word
@@ -562,6 +687,11 @@ def: StoreM16_pat<truncstorei8, SbRxRyOffMemX16>;
def: StoreM16_pat<truncstorei16, ShRxRyOffMemX16>;
def: StoreM16_pat<store, SwRxRyOffMemX16>;
+// Unconditional branch
+class UncondBranch16_pat<SDNode OpNode, Instruction I>:
+ Mips16Pat<(OpNode bb:$imm16), (I bb:$imm16)> {
+ let Predicates = [RelocPIC, InMips16Mode];
+ }
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1 in
@@ -574,7 +704,144 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1,
hasExtraSrcRegAllocReq = 1 in
def RetRA16 : MipsPseudo16<(outs), (ins), "", [(MipsRet)]>;
+
+//
+// Some branch conditional patterns are not generated by llvm at this time.
+// Some are for seemingly arbitrary reasons not used: i.e. with signed number
+// comparison they are used and for unsigned a different pattern is used.
+// I am pushing upstream from the full mips16 port and it seemed that I needed
+// these earlier and the mips32 port has these but now I cannot create test
+// cases that use these patterns. While I sort this all out I will leave these
+// extra patterns commented out and if I can be sure they are really not used,
+// I will delete the code. I don't want to check the code in uncommented without
+// a valid test case. In some cases, the compiler is generating patterns with
+// setcc instead and earlier I had implemented setcc first so may have masked
+// the problem. The setcc variants are suboptimal for mips16 so I may wantto
+// figure out how to enable the brcond patterns or else possibly new
+// combinations of of brcond and setcc.
+//
+//
+// bcond-seteq
+//
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16),
+ (BteqzT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (seteq CPU16Regs:$rx, 0)), bb:$targ16),
+ (BeqzRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+// bcond-setgt (do we need to have this pair of setlt, setgt??)
+//
+def: Mips16Pat
+ <(brcond (i32 (setgt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+ >;
+
+//
+// bcond-setge
+//
+def: Mips16Pat
+ <(brcond (i32 (setge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+//
+// never called because compiler transforms a >= k to a > (k-1)
+//def: Mips16Pat
+// <(brcond (i32 (setge CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16),
+// (BteqzT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16)
+// >;
+
+//
+// bcond-setlt
+//
+def: Mips16Pat
+ <(brcond (i32 (setlt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8SltX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setlt CPU16Regs:$rx, immSExt16:$imm)), bb:$imm16),
+ (BtnezT8SltiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$imm16)
+ >;
+
+//
+// bcond-setle
+//
+def: Mips16Pat
+ <(brcond (i32 (setle CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BteqzT8SltX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+ >;
+
+//
+// bcond-setne
+//
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+ (BtnezT8CmpX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, immZExt16:$imm)), bb:$targ16),
+ (BtnezT8CmpiX16 CPU16Regs:$rx, immSExt16:$imm, bb:$targ16)
+ >;
+
+def: Mips16Pat
+ <(brcond (i32 (setne CPU16Regs:$rx, 0)), bb:$targ16),
+ (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+// This needs to be there but I forget which code will generate it
+//
+def: Mips16Pat
+ <(brcond CPU16Regs:$rx, bb:$targ16),
+ (BnezRxImmX16 CPU16Regs:$rx, bb:$targ16)
+ >;
+
+//
+
+//
+// bcond-setugt
+//
+//def: Mips16Pat
+// <(brcond (i32 (setugt CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BtnezT8SltuX16 CPU16Regs:$ry, CPU16Regs:$rx, bb:$imm16)
+// >;
+
+//
+// bcond-setuge
+//
+//def: Mips16Pat
+// <(brcond (i32 (setuge CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BteqzT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+// >;
+
+
+//
+// bcond-setult
+//
+//def: Mips16Pat
+// <(brcond (i32 (setult CPU16Regs:$rx, CPU16Regs:$ry)), bb:$imm16),
+// (BtnezT8SltuX16 CPU16Regs:$rx, CPU16Regs:$ry, bb:$imm16)
+// >;
+
+def: UncondBranch16_pat<br, BimmX16>;
+
// Small immediates
+def: Mips16Pat<(i32 immSExt16:$in),
+ (AddiuRxRxImmX16 (Move32R16 ZERO), immSExt16:$in)>;
+
def: Mips16Pat<(i32 immZExt16:$in), (LiRxImmX16 immZExt16:$in)>;
//
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 99a9f25abd2..ed0ea0e849a 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -177,6 +177,7 @@ def BLTZ64 : CBranchZero<0x01, 0, "bltz", setlt, CPU64Regs>;
}
let DecoderNamespace = "Mips64" in
def JALR64 : JumpLinkReg<0x00, 0x09, "jalr", CPU64Regs>;
+def TAILCALL64_R : JumpFR<CPU64Regs, MipsTailCall>, IsTailCall;
let DecoderNamespace = "Mips64" in {
/// Multiply and Divide Instructions.
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 2236af9a7b6..5cc9662864e 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Intrinsics.h"
#include "llvm/CallingConv.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -32,12 +33,19 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+static cl::opt<bool>
+EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
+ cl::desc("MIPS: Enable tail calls."), cl::init(false));
+
// If I is a shifted mask, set the size (Size) and the first bit of the
// mask (Pos), and return true.
// For example, if I is 0x003ff800, (Pos, Size) = (11, 11).
@@ -58,6 +66,7 @@ static SDValue GetGlobalReg(SelectionDAG &DAG, EVT Ty) {
const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
case MipsISD::JmpLink: return "MipsISD::JmpLink";
+ case MipsISD::TailCall: return "MipsISD::TailCall";
case MipsISD::Hi: return "MipsISD::Hi";
case MipsISD::Lo: return "MipsISD::Lo";
case MipsISD::GPRel: return "MipsISD::GPRel";
@@ -2870,9 +2879,26 @@ PassByValArg64(SDValue Chain, DebugLoc dl,
MemOpChains.push_back(Chain);
}
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization.
+bool MipsTargetLowering::
+IsEligibleForTailCallOptimization(CallingConv::ID CalleeCC,
+ unsigned NextStackOffset) const {
+ if (!EnableMipsTailCalls)
+ return false;
+
+ // Do not tail-call optimize if there is an argument passed on stack.
+ if (IsO32 && (CalleeCC != CallingConv::Fast)) {
+ if (NextStackOffset > 16)
+ return false;
+ } else if (NextStackOffset)
+ return false;
+
+ return true;
+}
+
/// LowerCall - functions arguments are copied from virtual regs to
/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
-/// TODO: isTailCall.
SDValue
MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2887,14 +2913,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CallingConv::ID CallConv = CLI.CallConv;
bool isVarArg = CLI.IsVarArg;
- // MIPs target does not yet support tail call optimization.
- isTailCall = false;
-
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -2921,19 +2943,25 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (IsO32 && (CallConv != CallingConv::Fast))
NextStackOffset = std::max(NextStackOffset, (unsigned)16);
+ // Check if it's really possible to do a tail call.
+ if (isTailCall)
+ isTailCall = IsEligibleForTailCallOptimization(CallConv, NextStackOffset);
+
+ if (isTailCall)
+ ++NumTailCalls;
+
// Chain is the output chain of the last Load/Store or CopyToReg node.
// ByValChain is the output chain of the last Memcpy node created for copying
// byval arguments to the stack.
SDValue NextStackOffsetVal = DAG.getIntPtrConstant(NextStackOffset, true);
- Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
+
+ if (!isTailCall)
+ Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal);
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl,
IsN64 ? Mips::SP_64 : Mips::SP,
getPointerTy());
- if (MipsFI->getMaxCallFrameSize() < NextStackOffset)
- MipsFI->setMaxCallFrameSize(NextStackOffset);
-
// With EABI is it possible to have 16 args on registers.
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -3138,6 +3166,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (InFlag.getNode())
Ops.push_back(InFlag);
+ if (isTailCall)
+ return DAG.getNode(MipsISD::TailCall, dl, MVT::Other, &Ops[0], Ops.size());
+
Chain = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
@@ -3379,7 +3410,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
unsigned Reg = MipsFI->getSRetReturnReg();
if (!Reg) {
- Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
+ Reg = MF.getRegInfo().
+ createVirtualRegister(getRegClassFor(IsN64 ? MVT::i64 : MVT::i32));
MipsFI->setSRetReturnReg(Reg);
}
SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
@@ -3508,7 +3540,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
llvm_unreachable("sret virtual register not created in the entry block");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
- Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, IsN64 ? Mips::V0_64 : Mips::V0, Val,
+ Flag);
Flag = Chain.getValue(1);
}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 28d126b62c6..b75a513fa7e 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -29,6 +29,9 @@ namespace llvm {
// Jump and link (call)
JmpLink,
+ // Tail call
+ TailCall,
+
// Get the Higher 16 bits from a 32-bit immediate
// No relation with Mips Hi register
Hi,
@@ -205,6 +208,11 @@ namespace llvm {
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization.
+ bool IsEligibleForTailCallOptimization(CallingConv::ID CalleeCC,
+ unsigned NextStackOffset) const;
+
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 5a51fc80eb6..cc216c391dd 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -52,6 +52,10 @@ def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
+// Tail call
+def MipsTailCall : SDNode<"MipsISD::TailCall", SDT_MipsJmpLink,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
// Hi and Lo nodes are used to handle global addresses. Used on
// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
// static model. (nothing to do with Mips Registers Hi and Lo)
@@ -175,6 +179,27 @@ class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
let Predicates = [HasStandardEncoding];
}
+class IsBranch {
+ bit isBranch = 1;
+}
+
+class IsReturn {
+ bit isReturn = 1;
+}
+
+class IsCall {
+ bit isCall = 1;
+}
+
+class IsTailCall {
+ bit isCall = 1;
+ bit isTerminator = 1;
+ bit isReturn = 1;
+ bit isBarrier = 1;
+ bit hasExtraSrcRegAllocReq = 1;
+ bit isCodeGenOnly = 1;
+}
+
//===----------------------------------------------------------------------===//
// Instruction format superclass
//===----------------------------------------------------------------------===//
@@ -573,14 +598,13 @@ class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op, Operand Od,
IIAlu>;
// Jump
-class JumpFJ<bits<6> op, string instr_asm>:
- FJ<op, (outs), (ins jmptarget:$target),
- !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch> {
- let isBranch=1;
+class JumpFJ<bits<6> op, DAGOperand opnd, string instr_asm,
+ SDPatternOperator operator, SDPatternOperator targetoperator>:
+ FJ<op, (outs), (ins opnd:$target), !strconcat(instr_asm, "\t$target"),
+ [(operator targetoperator:$target)], IIBranch> {
let isTerminator=1;
let isBarrier=1;
let hasDelaySlot = 1;
- let Predicates = [RelocStatic, HasStandardEncoding];
let DecoderMethod = "DecodeJumpTarget";
let Defs = [AT];
}
@@ -601,21 +625,21 @@ class UncondBranch<bits<6> op, string instr_asm>:
// Base class for indirect branch and return instruction classes.
let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
-class JumpFR<RegisterClass RC, list<dag> pattern>:
- FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", pattern, IIBranch> {
+class JumpFR<RegisterClass RC, SDPatternOperator operator = null_frag>:
+ FR<0, 0x8, (outs), (ins RC:$rs), "jr\t$rs", [(operator RC:$rs)], IIBranch> {
let rt = 0;
let rd = 0;
let shamt = 0;
}
// Indirect branch
-class IndirectBranch<RegisterClass RC>: JumpFR<RC, [(brind RC:$rs)]> {
+class IndirectBranch<RegisterClass RC>: JumpFR<RC, brind> {
let isBranch = 1;
let isIndirectBranch = 1;
}
// Return instruction
-class RetBase<RegisterClass RC>: JumpFR<RC, []> {
+class RetBase<RegisterClass RC>: JumpFR<RC> {
let isReturn = 1;
let isCodeGenOnly = 1;
let hasCtrlDep = 1;
@@ -980,7 +1004,8 @@ def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>,
}
/// Jump and Branch Instructions
-def J : JumpFJ<0x02, "j">;
+def J : JumpFJ<0x02, jmptarget, "j", br, bb>,
+ Requires<[RelocStatic, HasStandardEncoding]>, IsBranch;
def JR : IndirectBranch<CPURegs>;
def B : UncondBranch<0x04, "b">;
def BEQ : CBranch<0x04, "beq", seteq, CPURegs>;
@@ -998,6 +1023,8 @@ def JAL : JumpLink<0x03, "jal">;
def JALR : JumpLinkReg<0x00, 0x09, "jalr", CPURegs>;
def BGEZAL : BranchLink<"bgezal", 0x11, CPURegs>;
def BLTZAL : BranchLink<"bltzal", 0x10, CPURegs>;
+def TAILCALL : JumpFJ<0x02, calltarget, "j", MipsTailCall, imm>, IsTailCall;
+def TAILCALL_R : JumpFR<CPURegs, MipsTailCall>, IsTailCall;
def RET : RetBase<CPURegs>;
@@ -1107,6 +1134,11 @@ def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
//def : MipsPat<(MipsJmpLink CPURegs:$dst),
// (JALR CPURegs:$dst)>;
+// Tail call
+def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
+ (TAILCALL tglobaladdr:$dst)>;
+def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
+ (TAILCALL texternalsym:$dst)>;
// hi/lo relocs
def : MipsPat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
def : MipsPat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 93ce94803a0..5b766f22a80 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -46,15 +46,13 @@ class MipsFunctionInfo : public MachineFunctionInfo {
// InArgFIRange: Range of indices of all frame objects created during call to
// LowerFormalArguments.
std::pair<int, int> InArgFIRange;
- unsigned MaxCallFrameSize;
bool EmitNOAT;
public:
MipsFunctionInfo(MachineFunction& MF)
: MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
- VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
- MaxCallFrameSize(0), EmitNOAT(false)
+ VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), EmitNOAT(false)
{}
bool isInArgFI(int FI) const {
@@ -71,9 +69,6 @@ public:
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
- unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
- void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
-
bool getEmitNOAT() const { return EmitNOAT; }
void setEmitNOAT() { EmitNOAT = true; }
};
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index c18250a78f7..36db4b51799 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4224,7 +4224,52 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
return SDValue();
if (Op.getOperand(0).getValueType() == MVT::i64) {
- SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+ SDValue SINT = Op.getOperand(0);
+ // When converting to single-precision, we actually need to convert
+ // to double-precision first and then round to single-precision.
+ // To avoid double-rounding effects during that operation, we have
+ // to prepare the input operand. Bits that might be truncated when
+ // converting to double-precision are replaced by a bit that won't
+ // be lost at this stage, but is below the single-precision rounding
+ // position.
+ //
+ // However, if -enable-unsafe-fp-math is in effect, accept double
+ // rounding to avoid the extra overhead.
+ if (Op.getValueType() == MVT::f32 &&
+ !DAG.getTarget().Options.UnsafeFPMath) {
+
+ // Twiddle input to make sure the low 11 bits are zero. (If this
+ // is the case, we are guaranteed the value will fit into the 53 bit
+ // mantissa of an IEEE double-precision value without rounding.)
+ // If any of those low 11 bits were not zero originally, make sure
+ // bit 12 (value 2048) is set instead, so that the final rounding
+ // to single-precision gets the correct result.
+ SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ SINT, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Round, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
+ Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ Round, DAG.getConstant(-2048, MVT::i64));
+
+ // However, we cannot use that value unconditionally: if the magnitude
+ // of the input value is small, the bit-twiddling we did above might
+ // end up visibly changing the output. Fortunately, in that case, we
+ // don't need to twiddle bits since the original input will convert
+ // exactly to double-precision floating-point already. Therefore,
+ // construct a conditional to use the original value if the top 11
+ // bits are all sign-bit copies, and use the rounded value computed
+ // above otherwise.
+ SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
+ SINT, DAG.getConstant(53, MVT::i32));
+ Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Cond, DAG.getConstant(1, MVT::i64));
+ Cond = DAG.getSetCC(dl, MVT::i32,
+ Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+
+ SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
+ }
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
if (Op.getValueType() == MVT::f32)
FP = DAG.getNode(ISD::FP_ROUND, dl,
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 454664e3ed1..e07fafcb207 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -11,6 +11,7 @@
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -180,6 +181,7 @@ struct X86Operand : public MCParsedAsmOperand {
unsigned IndexReg;
unsigned Scale;
unsigned Size;
+ bool NeedSizeDir;
} Mem;
};
@@ -312,6 +314,16 @@ struct X86Operand : public MCParsedAsmOperand {
return isImmSExti64i32Value(CE->getValue());
}
+ unsigned getMemSize() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.Size;
+ }
+
+ bool needSizeDirective() const {
+ assert(Kind == Memory && "Invalid access!");
+ return Mem.NeedSizeDir;
+ }
+
bool isMem() const { return Kind == Memory; }
bool isMem8() const {
return Kind == Memory && (!Mem.Size || Mem.Size == 8);
@@ -451,7 +463,8 @@ struct X86Operand : public MCParsedAsmOperand {
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
- SMLoc EndLoc, unsigned Size = 0) {
+ SMLoc EndLoc, unsigned Size = 0,
+ bool NeedSizeDir = false) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -459,6 +472,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = 0;
Res->Mem.Scale = 1;
Res->Mem.Size = Size;
+ Res->Mem.NeedSizeDir = NeedSizeDir;
return Res;
}
@@ -466,7 +480,7 @@ struct X86Operand : public MCParsedAsmOperand {
static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
unsigned BaseReg, unsigned IndexReg,
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
- unsigned Size = 0) {
+ unsigned Size = 0, bool NeedSizeDir = false) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
@@ -481,6 +495,7 @@ struct X86Operand : public MCParsedAsmOperand {
Res->Mem.IndexReg = IndexReg;
Res->Mem.Scale = Scale;
Res->Mem.Size = Size;
+ Res->Mem.NeedSizeDir = NeedSizeDir;
return Res;
}
};
@@ -753,7 +768,19 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(unsigned SegReg, SMLoc Start) {
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
if (getParser().ParseExpression(Disp, End)) return 0;
End = Parser.getTok().getLoc();
- return X86Operand::CreateMem(Disp, Start, End, Size);
+
+ bool NeedSizeDir = false;
+ if (!Size && isParsingInlineAsm()) {
+ if (const MCSymbolRefExpr *SymRef = dyn_cast<MCSymbolRefExpr>(Disp)) {
+ const MCSymbol &Sym = SymRef->getSymbol();
+ // FIXME: The SemaLookup will fail if the name is anything other then an
+ // identifier.
+ // FIXME: Pass a valid SMLoc.
+ SemaCallback->LookupInlineAsmIdentifier(Sym.getName(), NULL, Size);
+ NeedSizeDir = Size > 0;
+ }
+ }
+ return X86Operand::CreateMem(Disp, Start, End, Size, NeedSizeDir);
}
X86Operand *X86AsmParser::ParseIntelOperand() {
@@ -1526,9 +1553,6 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
assert(Op->isToken() && "Leading operand should always be a mnemonic!");
ArrayRef<SMRange> EmptyRanges = ArrayRef<SMRange>();
- // Clear the opcode.
- Opcode = ~0x0;
-
// First, handle aliases that expand to multiple instructions.
// FIXME: This should be replaced with a real .td file alias mechanism.
// Also, MatchInstructionImpl should actually *do* the EmitInstruction
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2f09e9e6ff1..5034cc0330a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -459,7 +459,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intened to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
- // support continuation, user-level threading, and etc.. As a result, not
+ // support continuation, user-level threading, and etc.. As a result, no
// other SjLj exception interfaces are implemented and please don't build
// your own exception handling based on them.
// LLVM/Clang supports zero-cost DWARF exception handling.
@@ -557,6 +557,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
setOperationAction(ISD::TRAP, MVT::Other, Legal);
+ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VASTART , MVT::Other, Custom);
@@ -1058,6 +1059,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
setOperationAction(ISD::FABS, MVT::v4f64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
+
setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
@@ -1255,7 +1260,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::FP_TO_SINT);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
@@ -5173,6 +5177,80 @@ X86TargetLowering::LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
+X86TargetLowering::buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+
+ // Skip if insert_vec_elt is not supported.
+ if (!isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
+ return SDValue();
+
+ DebugLoc DL = Op.getDebugLoc();
+ unsigned NumElems = Op.getNumOperands();
+
+ SDValue VecIn1;
+ SDValue VecIn2;
+ SmallVector<unsigned, 4> InsertIndices;
+ SmallVector<int, 8> Mask(NumElems, -1);
+
+ for (unsigned i = 0; i != NumElems; ++i) {
+ unsigned Opc = Op.getOperand(i).getOpcode();
+
+ if (Opc == ISD::UNDEF)
+ continue;
+
+ if (Opc != ISD::EXTRACT_VECTOR_ELT) {
+ // Quit if more than 1 elements need inserting.
+ if (InsertIndices.size() > 1)
+ return SDValue();
+
+ InsertIndices.push_back(i);
+ continue;
+ }
+
+ SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
+ SDValue ExtIdx = Op.getOperand(i).getOperand(1);
+
+ // Quit if extracted from vector of different type.
+ if (ExtractedFromVec.getValueType() != VT)
+ return SDValue();
+
+ // Quit if non-constant index.
+ if (!isa<ConstantSDNode>(ExtIdx))
+ return SDValue();
+
+ if (VecIn1.getNode() == 0)
+ VecIn1 = ExtractedFromVec;
+ else if (VecIn1 != ExtractedFromVec) {
+ if (VecIn2.getNode() == 0)
+ VecIn2 = ExtractedFromVec;
+ else if (VecIn2 != ExtractedFromVec)
+ // Quit if more than 2 vectors to shuffle
+ return SDValue();
+ }
+
+ unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
+
+ if (ExtractedFromVec == VecIn1)
+ Mask[i] = Idx;
+ else if (ExtractedFromVec == VecIn2)
+ Mask[i] = Idx + NumElems;
+ }
+
+ if (VecIn1.getNode() == 0)
+ return SDValue();
+
+ VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+ SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, &Mask[0]);
+ for (unsigned i = 0, e = InsertIndices.size(); i != e; ++i) {
+ unsigned Idx = InsertIndices[i];
+ NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
+ DAG.getIntPtrConstant(Idx));
+ }
+
+ return NV;
+}
+
+SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
@@ -5448,6 +5526,11 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (LD.getNode())
return LD;
+ // Check for a build vector from mostly shuffle plus few inserting.
+ SDValue Sh = buildFromShuffleMostly(Op, DAG);
+ if (Sh.getNode())
+ return Sh;
+
// For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
SDValue Result;
@@ -8104,10 +8187,42 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned, bool IsReplace) co
}
}
+SDValue X86TargetLowering::lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc DL = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT SVT = Op.getOperand(0).getValueType();
+
+ if (!VT.is128BitVector() || !SVT.is256BitVector() ||
+ VT.getVectorNumElements() != SVT.getVectorNumElements())
+ return SDValue();
+
+ assert(Subtarget->hasAVX() && "256-bit vector is observed without AVX!");
+
+ unsigned NumElems = VT.getVectorNumElements();
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ NumElems * 2);
+
+ SDValue In = Op.getOperand(0);
+ SmallVector<int, 16> MaskVec(NumElems * 2, -1);
+ // Prepare truncation shuffle mask
+ for (unsigned i = 0; i != NumElems; ++i)
+ MaskVec[i] = i * 2;
+ SDValue V = DAG.getVectorShuffle(NVT, DL,
+ DAG.getNode(ISD::BITCAST, DL, NVT, In),
+ DAG.getUNDEF(NVT), &MaskVec[0]);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V,
+ DAG.getIntPtrConstant(0));
+}
+
SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
SelectionDAG &DAG) const {
- if (Op.getValueType().isVector())
+ if (Op.getValueType().isVector()) {
+ if (Op.getValueType() == MVT::v8i16)
+ return DAG.getNode(ISD::TRUNCATE, Op.getDebugLoc(), Op.getValueType(),
+ DAG.getNode(ISD::FP_TO_SINT, Op.getDebugLoc(),
+ MVT::v8i32, Op.getOperand(0)));
return SDValue();
+ }
std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
/*IsSigned=*/ true, /*IsReplace=*/ false);
@@ -11376,6 +11491,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+ case ISD::TRUNCATE: return lowerTRUNCATE(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
case ISD::FP_EXTEND: return lowerFP_EXTEND(Op, DAG);
@@ -13276,7 +13392,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// For v = setjmp(buf), we generate
//
// thisMBB:
- // buf[Label_Offset] = ljMBB
+ // buf[LabelOffset] = restoreMBB
// SjLjSetup restoreMBB
//
// mainMBB:
@@ -13304,18 +13420,48 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
// thisMBB:
- unsigned PtrImmStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
- const int64_t Label_Offset = 1 * PVT.getStoreSize();
-
+ unsigned PtrStoreOpc = 0;
+ unsigned LabelReg = 0;
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ Reloc::Model RM = getTargetMachine().getRelocationModel();
+ bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) &&
+ (RM == Reloc::Static || RM == Reloc::DynamicNoPIC);
+
+ // Prepare IP either in reg or imm.
+ if (!UseImmLabel) {
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ LabelReg = MRI.createVirtualRegister(PtrRC);
+ if (Subtarget->is64Bit()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
+ .addReg(X86::RIP)
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB)
+ .addReg(0);
+ } else {
+ const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
+ .addReg(XII->getGlobalBaseReg(MF))
+ .addImm(0)
+ .addReg(0)
+ .addMBB(restoreMBB, Subtarget->ClassifyBlockAddressReference())
+ .addReg(0);
+ }
+ } else
+ PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
// Store IP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrImmStoreOpc));
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
- MIB.addDisp(MI->getOperand(MemOpndSlot + i), Label_Offset);
+ MIB.addDisp(MI->getOperand(MemOpndSlot + i), LabelOffset);
else
MIB.addOperand(MI->getOperand(MemOpndSlot + i));
}
- MIB.addMBB(restoreMBB);
+ if (!UseImmLabel)
+ MIB.addReg(LabelReg);
+ else
+ MIB.addMBB(restoreMBB);
MIB.setMemRefs(MMOBegin, MMOEnd);
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
@@ -13370,8 +13516,8 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MachineInstrBuilder MIB;
- const int64_t Label_Offset = 1 * PVT.getStoreSize();
- const int64_t SP_Offset = 2 * PVT.getStoreSize();
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r;
@@ -13385,7 +13531,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
- MIB.addDisp(MI->getOperand(i), Label_Offset);
+ MIB.addDisp(MI->getOperand(i), LabelOffset);
else
MIB.addOperand(MI->getOperand(i));
}
@@ -13394,7 +13540,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
MIB = BuildMI(*MBB, MI, DL, TII->get(PtrLoadOpc), SP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
- MIB.addDisp(MI->getOperand(i), SP_Offset);
+ MIB.addDisp(MI->getOperand(i), SPOffset);
else
MIB.addOperand(MI->getOperand(i));
}
@@ -15411,11 +15557,11 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
ISD::LoadExtType Ext = Ld->getExtensionType();
// If this is a vector EXT Load then attempt to optimize it using a
- // shuffle. We need SSE4 for the shuffles.
+ // shuffle. We need SSSE3 shuffles.
// TODO: It is possible to support ZExt by zeroing the undef values
// during the shuffle phase or after the shuffle.
if (RegVT.isVector() && RegVT.isInteger() &&
- Ext == ISD::EXTLOAD && Subtarget->hasSSE41()) {
+ Ext == ISD::EXTLOAD && Subtarget->hasSSSE3()) {
assert(MemVT != RegVT && "Cannot extend to the same type");
assert(MemVT.isVector() && "Must load a vector from memory");
@@ -16269,20 +16415,6 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-static SDValue PerformFP_TO_SINTCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
-
- // v4i8 = FP_TO_SINT() -> v4i8 = TRUNCATE (V4i32 = FP_TO_SINT()
- if (VT == MVT::v8i8 || VT == MVT::v4i8) {
- DebugLoc dl = N->getDebugLoc();
- MVT DstVT = VT == MVT::v4i8 ? MVT::v4i32 : MVT::v8i32;
- SDValue I = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, N->getOperand(0));
- return DAG.getNode(ISD::TRUNCATE, dl, VT, I);
- }
-
- return SDValue();
-}
-
// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
X86TargetLowering::DAGCombinerInfo &DCI) {
@@ -16421,7 +16553,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget);
case ISD::UINT_TO_FP: return PerformUINT_TO_FPCombine(N, DAG);
case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this);
- case ISD::FP_TO_SINT: return PerformFP_TO_SINTCombine(N, DAG);
case ISD::FADD: return PerformFADDCombine(N, DAG, Subtarget);
case ISD::FSUB: return PerformFSUBCombine(N, DAG, Subtarget);
case X86ISD::FXOR:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 40e966ad676..228fab1689e 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -795,6 +795,7 @@ namespace llvm {
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -824,9 +825,10 @@ namespace llvm {
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
- // Utility functions to help LowerVECTOR_SHUFFLE
+ // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
+ SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index c44549c30ac..cfd68f74b7b 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -67,15 +67,11 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
/// operand to an MCSymbol.
MCSymbol *X86MCInstLower::
GetSymbolFromOperand(const MachineOperand &MO) const {
- assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
+ assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
SmallString<128> Name;
- if (!MO.isGlobal()) {
- assert(MO.isSymbol());
- Name += MAI.getGlobalPrefix();
- Name += MO.getSymbolName();
- } else {
+ if (MO.isGlobal()) {
const GlobalValue *GV = MO.getGlobal();
bool isImplicitlyPrivate = false;
if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
@@ -85,6 +81,11 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
isImplicitlyPrivate = true;
Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+ } else if (MO.isSymbol()) {
+ Name += MAI.getGlobalPrefix();
+ Name += MO.getSymbolName();
+ } else if (MO.isMBB()) {
+ Name += MO.getMBB()->getSymbol()->getName();
}
// If the target flags on the operand changes the name of the symbol, do that
@@ -215,7 +216,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
if (Expr == 0)
Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
- if (!MO.isJTI() && MO.getOffset())
+ if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
Expr = MCBinaryExpr::CreateAdd(Expr,
MCConstantExpr::Create(MO.getOffset(), Ctx),
Ctx);
@@ -348,9 +349,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
MCOp = MCOperand::CreateImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
- MO.getMBB()->getSymbol(), Ctx));
- break;
case MachineOperand::MO_GlobalAddress:
case MachineOperand::MO_ExternalSymbol:
MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index 449eed3d8d5..c4a58874a41 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -147,7 +147,7 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const TargetRegisterClass *RC = &X86::VR256RegClass;
for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end();
i != e; i++) {
- if (MRI.isPhysRegUsed(*i)) {
+ if (!MRI.reg_nodbg_empty(*i)) {
YMMUsed = true;
break;
}
diff --git a/lib/Transforms/IPO/BarrierNoopPass.cpp b/lib/Transforms/IPO/BarrierNoopPass.cpp
new file mode 100644
index 00000000000..2e32240621f
--- /dev/null
+++ b/lib/Transforms/IPO/BarrierNoopPass.cpp
@@ -0,0 +1,47 @@
+//===- BarrierNoopPass.cpp - A barrier pass for the pass manager ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// NOTE: DO NOT USE THIS IF AVOIDABLE
+//
+// This pass is a nonce pass intended to allow manipulation of the implicitly
+// nesting pass manager. For example, it can be used to cause a CGSCC pass
+// manager to be closed prior to running a new collection of function passes.
+//
+// FIXME: This is a huge HACK. This should be removed when the pass manager's
+// nesting is made explicit instead of implicit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+using namespace llvm;
+
+namespace {
+/// \brief A nonce module pass used to place a barrier in a pass manager.
+///
+/// There is no mechanism for ending a CGSCC pass manager once one is started.
+/// This prevents extension points from having clear deterministic ordering
+/// when they are phrased as non-module passes.
+class BarrierNoop : public ModulePass {
+public:
+ static char ID; // Pass identification.
+
+ BarrierNoop() : ModulePass(ID) {
+ initializeBarrierNoopPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) { return false; }
+};
+}
+
+ModulePass *llvm::createBarrierNoopPass() { return new BarrierNoop(); }
+
+char BarrierNoop::ID = 0;
+INITIALIZE_PASS(BarrierNoop, "barrier", "A No-Op Barrier Pass",
+ false, false)
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 3f6b1de614d..90c1c33e6dc 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -1,5 +1,6 @@
add_llvm_library(LLVMipo
ArgumentPromotion.cpp
+ BarrierNoopPass.cpp
ConstantMerge.cpp
DeadArgumentElimination.cpp
ExtractGV.cpp
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index a2862022a3b..1d8f1e531a0 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -119,6 +119,14 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(Inliner);
Inliner = 0;
}
+
+ // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
+ // pass manager, but we don't want to add extensions into that pass manager.
+ // To prevent this we must insert a no-op module pass to reset the pass
+ // manager to get the same behavior as EP_OptimizerLast in non-O0 builds.
+ if (!GlobalExtensions->empty() || !Extensions.empty())
+ MPM.add(createBarrierNoopPass());
+
addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
return;
}
@@ -176,6 +184,12 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
MPM.add(createLoopDeletionPass()); // Delete dead loops
+
+ if (Vectorize) {
+ MPM.add(createLoopVectorizePass());
+ MPM.add(createLICMPass());
+ }
+
if (!DisableUnrollLoops)
MPM.add(createLoopUnrollPass()); // Unroll small loops
addExtensionsToPM(EP_LoopOptimizerEnd, MPM);
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index ecce242a81e..a2d4c888f2c 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -333,6 +333,10 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
// All operands were constants, fold it.
if (ConstOps.size() == I->getNumOperands()) {
+ if (CmpInst *C = dyn_cast<CmpInst>(I))
+ return ConstantFoldCompareInstOperands(C->getPredicate(), ConstOps[0],
+ ConstOps[1], TD, TLI);
+
if (LoadInst *LI = dyn_cast<LoadInst>(I))
if (!LI->isVolatile())
return ConstantFoldLoadFromConstPtr(ConstOps[0], TD);
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index cf60f0f426d..dd7ea14e8a8 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -636,8 +636,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// If LHS's width is changed, shift the mask value accordingly.
// If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
- // references to RHSOp0 to LHSOp0, so we don't need to shift the mask.
- if (eltMask >= 0 && newRHS != NULL)
+ // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
+ // If newRHS == newLHS, we want to remap any references from newRHS to
+ // newLHS so that we can properly identify splats that may occur due to
+ // obfuscation accross the two vectors.
+ if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
eltMask += newLHSWidth;
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index b566994edfc..4abaeca0c5c 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -245,38 +245,6 @@ static GlobalVariable *createPrivateGlobalForString(Module &M, StringRef Str) {
GlobalValue::PrivateLinkage, StrConst, "");
}
-// Split the basic block and insert an if-then code.
-// Before:
-// Head
-// Cmp
-// Tail
-// After:
-// Head
-// if (Cmp)
-// ThenBlock
-// Tail
-//
-// ThenBlock block is created and its terminator is returned.
-// If Unreachable, ThenBlock is terminated with UnreachableInst, otherwise
-// it is terminated with BranchInst to Tail.
-static TerminatorInst *splitBlockAndInsertIfThen(Value *Cmp, bool Unreachable) {
- Instruction *SplitBefore = cast<Instruction>(Cmp)->getNextNode();
- BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
- TerminatorInst *HeadOldTerm = Head->getTerminator();
- LLVMContext &C = Head->getParent()->getParent()->getContext();
- BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- TerminatorInst *CheckTerm;
- if (Unreachable)
- CheckTerm = new UnreachableInst(C, ThenBlock);
- else
- CheckTerm = BranchInst::Create(Tail, ThenBlock);
- BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
- ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
- return CheckTerm;
-}
-
Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
// Shadow >> scale
Shadow = IRB.CreateLShr(Shadow, MappingScale);
@@ -324,7 +292,7 @@ bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
Value *Cmp = IRB.CreateICmpNE(Length,
Constant::getNullValue(Length->getType()));
- InsertBefore = splitBlockAndInsertIfThen(Cmp, false);
+ InsertBefore = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
}
instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
@@ -480,7 +448,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
TerminatorInst *CrashTerm = 0;
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
- TerminatorInst *CheckTerm = splitBlockAndInsertIfThen(Cmp, false);
+ TerminatorInst *CheckTerm =
+ SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), false);
assert(dyn_cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
@@ -491,7 +460,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
ReplaceInstWithInst(CheckTerm, NewTerm);
} else {
- CrashTerm = splitBlockAndInsertIfThen(Cmp, true);
+ CrashTerm = SplitBlockAndInsertIfThen(cast<Instruction>(Cmp), true);
}
Instruction *Crash =
@@ -534,7 +503,7 @@ void AddressSanitizer::createInitializerPoisonCalls(Module &M,
bool AddressSanitizer::ShouldInstrumentGlobal(GlobalVariable *G) {
Type *Ty = cast<PointerType>(G->getType())->getElementType();
- DEBUG(dbgs() << "GLOBAL: " << *G);
+ DEBUG(dbgs() << "GLOBAL: " << *G << "\n");
if (BL->isIn(*G)) return false;
if (!Ty->isSized()) return false;
@@ -682,7 +651,7 @@ bool AddressSanitizer::insertGlobalRedzones(Module &M) {
FirstDynamic = LastDynamic;
}
- DEBUG(dbgs() << "NEW GLOBAL:\n" << *NewGlobal);
+ DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
}
ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
@@ -851,6 +820,7 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
bool AddressSanitizer::runOnFunction(Function &F) {
if (BL->isIn(F)) return false;
if (&F == AsanCtorFunction) return false;
+ DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
// If needed, insert __asan_init before checking for AddressSafety attr.
maybeInsertAsanInitAtFunctionEntry(F);
@@ -914,8 +884,6 @@ bool AddressSanitizer::runOnFunction(Function &F) {
NumInstrumented++;
}
- DEBUG(dbgs() << F);
-
bool ChangedStack = poisonStackInFunction(F);
// We must unpoison the stack before every NoReturn call (throw, _exit, etc).
@@ -925,6 +893,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
IRBuilder<> IRB(CI);
IRB.CreateCall(AsanHandleNoReturnFunc);
}
+ DEBUG(dbgs() << "ASAN done instrumenting:\n" << F << "\n");
return NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
}
@@ -1148,6 +1117,10 @@ bool AddressSanitizer::poisonStackInFunction(Function &F) {
}
}
+ // We are done. Remove the old unused alloca instructions.
+ for (size_t i = 0, n = AllocaVec.size(); i < n; i++)
+ AllocaVec[i]->eraseFromParent();
+
if (ClDebugStack) {
DEBUG(dbgs() << F);
}
diff --git a/lib/Transforms/Instrumentation/BlackList.cpp b/lib/Transforms/Instrumentation/BlackList.cpp
index 2cb119964a3..ef34b8a56d8 100644
--- a/lib/Transforms/Instrumentation/BlackList.cpp
+++ b/lib/Transforms/Instrumentation/BlackList.cpp
@@ -45,6 +45,9 @@ BlackList::BlackList(const StringRef Path) {
StringMap<std::string> Regexps;
for (SmallVector<StringRef, 16>::iterator I = Lines.begin(), E = Lines.end();
I != E; ++I) {
+ // Ignore empty lines and lines starting with "#"
+ if (I->empty() || I->startswith("#"))
+ continue;
// Get our prefix and unparsed regexp.
std::pair<StringRef, StringRef> SplitLine = I->split(":");
StringRef Prefix = SplitLine.first;
diff --git a/lib/Transforms/Instrumentation/BlackList.h b/lib/Transforms/Instrumentation/BlackList.h
index 73977fc10a6..f3c05a5058c 100644
--- a/lib/Transforms/Instrumentation/BlackList.h
+++ b/lib/Transforms/Instrumentation/BlackList.h
@@ -12,7 +12,9 @@
//
// The blacklist disables instrumentation of various functions and global
// variables. Each line contains a prefix, followed by a wild card expression.
+// Empty lines and lines starting with "#" are ignored.
// ---
+// # Blacklisted items:
// fun:*_ZN4base6subtle*
// global:*global_with_bad_access_or_initialization*
// global-init:*global_with_initialization_issues*
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 086f0a1a714..a2e074fae89 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -118,13 +118,8 @@ bool DCE::runOnFunction(Function &F) {
I->eraseFromParent();
// Remove the instruction from the worklist if it still exists in it.
- for (std::vector<Instruction*>::iterator WI = WorkList.begin();
- WI != WorkList.end(); ) {
- if (*WI == I)
- WI = WorkList.erase(WI);
- else
- ++WI;
- }
+ WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),
+ WorkList.end());
MadeChange = true;
++DCEEliminated;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 78630b2a9f3..82eb7464677 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -551,15 +551,17 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
PN->setIncomingValue(i, ExitVal);
- // If this instruction is dead now, delete it.
- RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
+ // If this instruction is dead now, delete it. Don't do it now to avoid
+ // invalidating iterators.
+ if (isInstructionTriviallyDead(Inst, TLI))
+ DeadInsts.push_back(Inst);
if (NumPreds == 1) {
// Completely replace a single-pred PHI. This is safe, because the
// NewVal won't be variant in the loop, so we don't need an LCSSA phi
// node anymore.
PN->replaceAllUsesWith(ExitVal);
- RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
+ PN->eraseFromParent();
}
}
if (NumPreds != 1) {
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 99a62dbe62f..958348d9faa 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,7 +37,7 @@
//
// TODO: Handle multiple loops at a time.
//
-// TODO: Should AddrMode::BaseGV be changed to a ConstantExpr
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
// instead of a GlobalValue?
//
// TODO: When truncation is free, truncate ICmp users' operands to make it a
@@ -67,7 +67,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/TargetTransformInfo.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/DenseSet.h"
@@ -75,6 +74,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
#include <algorithm>
using namespace llvm;
@@ -1118,7 +1118,7 @@ public:
enum KindType {
Basic, ///< A normal use, with no folding.
Special, ///< A special case of basic, allowing -1 scales.
- Address, ///< An address use; folding according to ScalarTargetTransformInfo.
+ Address, ///< An address use; folding according to TargetLowering
ICmpZero ///< An equality icmp with both operands folded into one.
// TODO: Add a generic icmp too?
};
@@ -1272,12 +1272,12 @@ void LSRUse::dump() const {
/// address-mode folding and special icmp tricks.
static bool isLegalUse(const AddrMode &AM,
LSRUse::KindType Kind, Type *AccessTy,
- const ScalarTargetTransformInfo *STTI) {
+ const TargetLowering *TLI) {
switch (Kind) {
case LSRUse::Address:
// If we have low-level target information, ask the target if it can
// completely fold this address.
- if (STTI) return STTI->isLegalAddressingMode(AM, AccessTy);
+ if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
// Otherwise, just guess that reg+reg addressing is legal.
return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
@@ -1300,7 +1300,7 @@ static bool isLegalUse(const AddrMode &AM,
// If we have low-level target information, ask the target if it can fold an
// integer immediate on an icmp.
if (AM.BaseOffs != 0) {
- if (!STTI)
+ if (!TLI)
return false;
// We have one of:
// ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
@@ -1309,7 +1309,7 @@ static bool isLegalUse(const AddrMode &AM,
int64_t Offs = AM.BaseOffs;
if (AM.Scale == 0)
Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
- return STTI->isLegalICmpImmediate(Offs);
+ return TLI->isLegalICmpImmediate(Offs);
}
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1330,20 +1330,20 @@ static bool isLegalUse(const AddrMode &AM,
static bool isLegalUse(AddrMode AM,
int64_t MinOffset, int64_t MaxOffset,
LSRUse::KindType Kind, Type *AccessTy,
- const ScalarTargetTransformInfo *LTTI) {
+ const TargetLowering *TLI) {
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
(MinOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
- if (isLegalUse(AM, Kind, AccessTy, LTTI)) {
+ if (isLegalUse(AM, Kind, AccessTy, TLI)) {
AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
// Check for overflow.
if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
(MaxOffset > 0))
return false;
AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
- return isLegalUse(AM, Kind, AccessTy, LTTI);
+ return isLegalUse(AM, Kind, AccessTy, TLI);
}
return false;
}
@@ -1352,7 +1352,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
- const ScalarTargetTransformInfo *LTTI) {
+ const TargetLowering *TLI) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
@@ -1371,14 +1371,14 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
AM.HasBaseReg = true;
}
- return isLegalUse(AM, Kind, AccessTy, LTTI);
+ return isLegalUse(AM, Kind, AccessTy, TLI);
}
static bool isAlwaysFoldable(const SCEV *S,
int64_t MinOffset, int64_t MaxOffset,
bool HasBaseReg,
LSRUse::KindType Kind, Type *AccessTy,
- const ScalarTargetTransformInfo *LTTI,
+ const TargetLowering *TLI,
ScalarEvolution &SE) {
// Fast-path: zero is always foldable.
if (S->isZero()) return true;
@@ -1402,7 +1402,7 @@ static bool isAlwaysFoldable(const SCEV *S,
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, LTTI);
+ return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
}
namespace {
@@ -1502,7 +1502,7 @@ class LSRInstance {
ScalarEvolution &SE;
DominatorTree &DT;
LoopInfo &LI;
- const ScalarTargetTransformInfo *const STTI;
+ const TargetLowering *const TLI;
Loop *const L;
bool Changed;
@@ -1638,7 +1638,7 @@ class LSRInstance {
Pass *P);
public:
- LSRInstance(const ScalarTargetTransformInfo *ltti, Loop *l, Pass *P);
+ LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
bool getChanged() const { return Changed; }
@@ -1688,10 +1688,11 @@ void LSRInstance::OptimizeShadowIV() {
}
if (!DestTy) continue;
- if (STTI) {
+ if (TLI) {
// If target does not support DestTy natively then do not apply
// this transformation.
- if (!STTI->isTypeLegal(DestTy)) continue;
+ EVT DVT = TLI->getValueType(DestTy);
+ if (!TLI->isTypeLegal(DVT)) continue;
}
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
@@ -2014,18 +2015,18 @@ LSRInstance::OptimizeLoopTermCond() {
if (C->getValue().getMinSignedBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
- // Without STTI, assume that any stride might be valid, and so any
+ // Without TLI, assume that any stride might be valid, and so any
// use might be shared.
- if (!STTI)
+ if (!TLI)
goto decline_post_inc;
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
AddrMode AM;
AM.Scale = C->getSExtValue();
- if (STTI->isLegalAddressingMode(AM, AccessTy))
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
AM.Scale = -AM.Scale;
- if (STTI->isLegalAddressingMode(AM, AccessTy))
+ if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
}
}
@@ -2096,12 +2097,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
- Kind, AccessTy, STTI))
+ Kind, AccessTy, TLI))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
- Kind, AccessTy, STTI))
+ Kind, AccessTy, TLI))
return false;
NewMaxOffset = NewOffset;
}
@@ -2130,7 +2131,7 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, STTI)) {
+ if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
Expr = Copy;
Offset = 0;
}
@@ -2395,7 +2396,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
/// TODO: Consider IVInc free if it's already used in another chains.
static bool
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
- ScalarEvolution &SE, const ScalarTargetTransformInfo *STTI) {
+ ScalarEvolution &SE, const TargetLowering *TLI) {
if (StressIVChain)
return true;
@@ -2653,7 +2654,7 @@ void LSRInstance::CollectChains() {
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
UsersIdx < NChains; ++UsersIdx) {
if (!isProfitableChain(IVChainVec[UsersIdx],
- ChainUsersVec[UsersIdx].FarUsers, SE, STTI))
+ ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
continue;
// Preserve the chain at UsesIdx.
if (ChainIdx != UsersIdx)
@@ -2680,8 +2681,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
/// Return true if the IVInc can be folded into an addressing mode.
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
- Value *Operand,
- const ScalarTargetTransformInfo *STTI) {
+ Value *Operand, const TargetLowering *TLI) {
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
if (!IncConst || !isAddressUse(UserInst, Operand))
return false;
@@ -2691,7 +2691,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
- LSRUse::Address, getAccessType(UserInst), STTI))
+ LSRUse::Address, getAccessType(UserInst), TLI))
return false;
return true;
@@ -2762,7 +2762,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// If an IV increment can't be folded, use it as the next IV value.
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
- STTI)) {
+ TLI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
LeftOverExpr = 0;
@@ -3108,7 +3108,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, STTI, SE))
+ LU.Kind, LU.AccessTy, TLI, SE))
continue;
// Collect all operands except *J.
@@ -3122,7 +3122,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
if (InnerAddOps.size() == 1 &&
isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
Base.getNumRegs() > 1,
- LU.Kind, LU.AccessTy, STTI, SE))
+ LU.Kind, LU.AccessTy, TLI, SE))
continue;
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3132,9 +3132,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add the remaining pieces of the add back into the new formula.
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (STTI && InnerSumSC &&
+ if (TLI && InnerSumSC &&
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue())) {
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
InnerSumSC->getValue()->getZExtValue();
@@ -3144,8 +3144,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Add J as its own register, or an unfolded immediate.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (STTI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- STTI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue()))
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
SC->getValue()->getZExtValue();
@@ -3205,7 +3205,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseGV = GV;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI))
+ LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@@ -3230,7 +3230,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
- LU.Kind, LU.AccessTy, STTI)) {
+ LU.Kind, LU.AccessTy, TLI)) {
// Add the offset to the base register.
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
@@ -3250,7 +3250,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI))
+ LU.Kind, LU.AccessTy, TLI))
continue;
F.BaseRegs[i] = G;
(void)InsertFormula(LU, LUIdx, F);
@@ -3297,7 +3297,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
F.AM.BaseOffs = NewBaseOffs;
// Check that this scale is legal.
- if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, STTI))
+ if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
continue;
// Compensate for the use having MinOffset built into it.
@@ -3353,12 +3353,12 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
// Check whether this scale is going to be legal.
if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI)) {
+ LU.Kind, LU.AccessTy, TLI)) {
// As a special-case, handle special out-of-loop Basic users specially.
// TODO: Reconsider this special case.
if (LU.Kind == LSRUse::Basic &&
isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
- LSRUse::Special, LU.AccessTy, STTI) &&
+ LSRUse::Special, LU.AccessTy, TLI) &&
LU.AllFixupsOutsideLoop)
LU.Kind = LSRUse::Special;
else
@@ -3391,8 +3391,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
/// GenerateTruncates - Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
- // This requires ScalarTargetTransformInfo to tell us which truncates are free.
- if (!STTI) return;
+ // This requires TargetLowering to tell us which truncates are free.
+ if (!TLI) return;
// Don't bother truncating symbolic values.
if (Base.AM.BaseGV) return;
@@ -3405,7 +3405,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
for (SmallSetVector<Type *, 4>::const_iterator
I = Types.begin(), E = Types.end(); I != E; ++I) {
Type *SrcTy = *I;
- if (SrcTy != DstTy && STTI->isTruncateFree(SrcTy, DstTy)) {
+ if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3561,7 +3561,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
Formula NewF = F;
NewF.AM.BaseOffs = Offs;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI))
+ LU.Kind, LU.AccessTy, TLI))
continue;
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
@@ -3586,9 +3586,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
Formula NewF = F;
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI)) {
- if (!STTI ||
- !STTI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+ LU.Kind, LU.AccessTy, TLI)) {
+ if (!TLI ||
+ !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
continue;
NewF = F;
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3900,7 +3900,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
Formula &F = LUThatHas->Formulae[i];
if (!isLegalUse(F.AM,
LUThatHas->MinOffset, LUThatHas->MaxOffset,
- LUThatHas->Kind, LUThatHas->AccessTy, STTI)) {
+ LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
dbgs() << '\n');
LUThatHas->DeleteFormula(F);
@@ -4589,12 +4589,12 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
}
-LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P)
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
: IU(P->getAnalysis<IVUsers>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTree>()),
LI(P->getAnalysis<LoopInfo>()),
- STTI(stti), L(l), Changed(false), IVIncInsertPos(0) {
+ TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
@@ -4684,7 +4684,7 @@ LSRInstance::LSRInstance(const ScalarTargetTransformInfo *stti, Loop *l, Pass *P
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
JE = LU.Formulae.end(); J != JE; ++J)
assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
- LU.Kind, LU.AccessTy, STTI) &&
+ LU.Kind, LU.AccessTy, TLI) &&
"Illegal formula generated!");
};
#endif
@@ -4757,13 +4757,13 @@ void LSRInstance::dump() const {
namespace {
class LoopStrengthReduce : public LoopPass {
- /// ScalarTargetTransformInfo provides target information that is needed
- /// for strength reducing loops.
- const ScalarTargetTransformInfo *STTI;
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// transformation profitability.
+ const TargetLowering *const TLI;
public:
static char ID; // Pass ID, replacement for typeid
- LoopStrengthReduce();
+ explicit LoopStrengthReduce(const TargetLowering *tli = 0);
private:
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4783,12 +4783,13 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
"Loop Strength Reduction", false, false)
-Pass *llvm::createLoopStrengthReducePass() {
- return new LoopStrengthReduce();
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+ return new LoopStrengthReduce(TLI);
}
-LoopStrengthReduce::LoopStrengthReduce()
- : LoopPass(ID), STTI(0) {
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+ : LoopPass(ID), TLI(tli) {
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
}
@@ -4814,13 +4815,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
bool Changed = false;
- TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
-
- if (TTI)
- STTI = TTI->getScalarTargetTransformInfo();
-
// Run the main LSR transformation.
- Changed |= LSRInstance(STTI, L, this).getChanged();
+ Changed |= LSRInstance(TLI, L, this).getChanged();
// Remove any extra phis created by processing inner loops.
Changed |= DeleteDeadPHIs(L->getHeader());
@@ -4831,7 +4827,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
Rewriter.setDebugType(DEBUG_TYPE);
#endif
unsigned numFolded = Rewriter.
- replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, STTI);
+ replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
if (numFolded) {
Changed = true;
DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 15b168fe2aa..047b43eb84f 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -907,13 +907,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
/// specified.
static void RemoveFromWorklist(Instruction *I,
std::vector<Instruction*> &Worklist) {
- std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
- Worklist.end(), I);
- while (WI != Worklist.end()) {
- unsigned Offset = WI-Worklist.begin();
- Worklist.erase(WI);
- WI = std::find(Worklist.begin()+Offset, Worklist.end(), I);
- }
+
+ Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I),
+ Worklist.end());
}
/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 3e84a91c1db..71c62257e7f 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -132,9 +132,6 @@ public:
///
/// We flag partitions as splittable when they are formed entirely due to
/// accesses by trivially splittable operations such as memset and memcpy.
- ///
- /// FIXME: At some point we should consider loads and stores of FCAs to be
- /// splittable and eagerly split them into scalar values.
bool IsSplittable;
/// \brief Test whether a partition has been marked as dead.
@@ -1785,9 +1782,9 @@ static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const DataLayout &TD,
break;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) {
ElementTy = SeqTy->getElementType();
- Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(
- ElementTy->isPointerTy() ?
- cast<PointerType>(ElementTy)->getAddressSpace(): 0), 0)));
+ // Note that we use the default address space as this index is over an
+ // array or a vector, not a pointer.
+ Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(0), 0)));
} else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
if (STy->element_begin() == STy->element_end())
break; // Nothing left to descend into.
@@ -1828,7 +1825,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
if (ElementSizeInBits % 8)
return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
- APInt NumSkippedElements = Offset.udiv(ElementSize);
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(VecTy->getNumElements()))
return 0;
Offset -= NumSkippedElements * ElementSize;
@@ -1840,7 +1837,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
Type *ElementTy = ArrTy->getElementType();
APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
- APInt NumSkippedElements = Offset.udiv(ElementSize);
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(ArrTy->getNumElements()))
return 0;
@@ -1896,7 +1893,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy));
if (ElementSize == 0)
return 0; // Zero-length arrays can't help us build a natural GEP.
- APInt NumSkippedElements = Offset.udiv(ElementSize);
+ APInt NumSkippedElements = Offset.sdiv(ElementSize);
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
@@ -2211,6 +2208,48 @@ static bool isIntegerWideningViable(const DataLayout &TD,
return WholeAllocaOp;
}
+static Value *extractInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *V,
+ IntegerType *Ty, uint64_t Offset,
+ const Twine &Name) {
+ IntegerType *IntTy = cast<IntegerType>(V->getType());
+ assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
+ "Element extends past full value");
+ uint64_t ShAmt = 8*Offset;
+ if (DL.isBigEndian())
+ ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
+ if (ShAmt)
+ V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot extract to a larger integer!");
+ if (Ty != IntTy)
+ V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
+ return V;
+}
+
+static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
+ Value *V, uint64_t Offset, const Twine &Name) {
+ IntegerType *IntTy = cast<IntegerType>(Old->getType());
+ IntegerType *Ty = cast<IntegerType>(V->getType());
+ assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
+ "Cannot insert a larger integer!");
+ if (Ty != IntTy)
+ V = IRB.CreateZExt(V, IntTy, Name + ".ext");
+ assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
+ "Element store outside of alloca store");
+ uint64_t ShAmt = 8*Offset;
+ if (DL.isBigEndian())
+ ShAmt = 8*(DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
+ if (ShAmt)
+ V = IRB.CreateShl(V, ShAmt, Name + ".shift");
+
+ if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
+ APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
+ Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
+ V = IRB.CreateOr(Old, V, Name + ".insert");
+ }
+ return V;
+}
+
namespace {
/// \brief Visitor to rewrite instructions using a partition of an alloca to
/// use a new alloca.
@@ -2371,60 +2410,6 @@ private:
return IRB.getInt32(Index);
}
- Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy,
- uint64_t Offset) {
- assert(IntTy && "We cannot extract an integer from the alloca");
- Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".load"));
- V = convertValue(TD, IRB, V, IntTy);
- assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t RelOffset = Offset - NewAllocaBeginOffset;
- assert(TD.getTypeStoreSize(TargetTy) + RelOffset <=
- TD.getTypeStoreSize(IntTy) &&
- "Element load outside of alloca store");
- uint64_t ShAmt = 8*RelOffset;
- if (TD.isBigEndian())
- ShAmt = 8*(TD.getTypeStoreSize(IntTy) -
- TD.getTypeStoreSize(TargetTy) - RelOffset);
- if (ShAmt)
- V = IRB.CreateLShr(V, ShAmt, getName(".shift"));
- assert(TargetTy->getBitWidth() <= IntTy->getBitWidth() &&
- "Cannot extract to a larger integer!");
- if (TargetTy != IntTy)
- V = IRB.CreateTrunc(V, TargetTy, getName(".trunc"));
- return V;
- }
-
- StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) {
- IntegerType *Ty = cast<IntegerType>(V->getType());
- assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
- "Cannot insert a larger integer!");
- if (Ty != IntTy)
- V = IRB.CreateZExt(V, IntTy, getName(".ext"));
- assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset");
- uint64_t RelOffset = Offset - NewAllocaBeginOffset;
- assert(TD.getTypeStoreSize(Ty) + RelOffset <=
- TD.getTypeStoreSize(IntTy) &&
- "Element store outside of alloca store");
- uint64_t ShAmt = 8*RelOffset;
- if (TD.isBigEndian())
- ShAmt = 8*(TD.getTypeStoreSize(IntTy) - TD.getTypeStoreSize(Ty)
- - RelOffset);
- if (ShAmt)
- V = IRB.CreateShl(V, ShAmt, getName(".shift"));
-
- if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
- APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
- Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
- getName(".oldload"));
- Old = convertValue(TD, IRB, Old, IntTy);
- Old = IRB.CreateAnd(Old, Mask, getName(".mask"));
- V = IRB.CreateOr(Old, V, getName(".insert"));
- }
- V = convertValue(TD, IRB, V, NewAllocaTy);
- return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
- }
-
void deleteIfTriviallyDead(Value *V) {
Instruction *I = cast<Instruction>(V);
if (isInstructionTriviallyDead(I))
@@ -2452,12 +2437,18 @@ private:
}
bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
+ assert(IntTy && "We cannot insert an integer to the alloca");
assert(!LI.isVolatile());
- Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()),
- BeginOffset);
- LI.replaceAllUsesWith(Result);
+ Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ V = convertValue(TD, IRB, V, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = extractInteger(TD, IRB, V, cast<IntegerType>(LI.getType()), Offset,
+ getName(".extract"));
+ LI.replaceAllUsesWith(V);
Pass.DeadInsts.push_back(&LI);
- DEBUG(dbgs() << " to: " << *Result << "\n");
+ DEBUG(dbgs() << " to: " << *V << "\n");
return true;
}
@@ -2519,8 +2510,20 @@ private:
}
bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) {
+ assert(IntTy && "We cannot extract an integer from the alloca");
assert(!SI.isVolatile());
- StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset);
+ Value *V = SI.getValueOperand();
+ if (TD.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, SI.getValueOperand(), Offset,
+ getName(".insert"));
+ }
+ V = convertValue(TD, IRB, V, NewAllocaTy);
+ StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
Pass.DeadInsts.push_back(&SI);
(void)Store;
DEBUG(dbgs() << " to: " << *Store << "\n");
@@ -2652,10 +2655,12 @@ private:
if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
EndOffset < NewAllocaEndOffset)) {
assert(!II.isVolatile());
- StoreInst *Store = insertInteger(IRB, V, BeginOffset);
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
- return true;
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
}
if (V->getType() != AllocaTy)
@@ -2811,17 +2816,25 @@ private:
getIndex(IRB, BeginOffset),
getName(".copyextract"));
} else if (IntTy && !IsWholeAlloca && !IsDest) {
- Src = extractInteger(IRB, SubIntTy, BeginOffset);
+ Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".load"));
+ Src = convertValue(TD, IRB, Src, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ Src = extractInteger(TD, IRB, Src, SubIntTy, Offset, getName(".extract"));
} else {
Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(),
getName(".copyload"));
}
if (IntTy && !IsWholeAlloca && IsDest) {
- StoreInst *Store = insertInteger(IRB, Src, BeginOffset);
- (void)Store;
- DEBUG(dbgs() << " to: " << *Store << "\n");
- return true;
+ Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+ getName(".oldload"));
+ Old = convertValue(TD, IRB, Old, IntTy);
+ assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+ uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+ Src = insertInteger(TD, IRB, Old, Src, Offset, getName(".insert"));
+ Src = convertValue(TD, IRB, Src, NewAllocaTy);
}
if (IsVectorElement && IsDest) {
@@ -2875,10 +2888,7 @@ private:
Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType());
// Replace the operands which were using the old pointer.
- User::op_iterator OI = PN.op_begin(), OE = PN.op_end();
- for (; OI != OE; ++OI)
- if (*OI == OldPtr)
- *OI = NewPtr;
+ std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
DEBUG(dbgs() << " to: " << PN << "\n");
deleteIfTriviallyDead(OldPtr);
@@ -3248,11 +3258,7 @@ static Type *getTypePartition(const DataLayout &TD, Type *Ty,
}
// Try to build up a sub-structure.
- SmallVector<Type *, 4> ElementTys;
- do {
- ElementTys.push_back(*EI++);
- } while (EI != EE);
- StructType *SubTy = StructType::get(STy->getContext(), ElementTys,
+ StructType *SubTy = StructType::get(STy->getContext(), makeArrayRef(EI, EE),
STy->isPacked());
const StructLayout *SubSL = TD.getStructLayout(SubTy);
if (Size != SubSL->getSizeInBytes())
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index d86c4cbc9f6..c82a00fc2c8 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -135,47 +135,6 @@ static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
namespace {
//===---------------------------------------===//
-// 'strcpy' Optimizations
-
-struct StrCpyOpt : public LibCallOptimization {
- bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall.
-
- StrCpyOpt(bool c) : OptChkCall(c) {}
-
- virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
- // Verify the "strcpy" function prototype.
- unsigned NumParams = OptChkCall ? 3 : 2;
- FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != NumParams ||
- FT->getReturnType() != FT->getParamType(0) ||
- FT->getParamType(0) != FT->getParamType(1) ||
- FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
-
- Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
- if (Dst == Src) // strcpy(x,x) -> x
- return Src;
-
- // These optimizations require DataLayout.
- if (!TD) return 0;
-
- // See if we can get the length of the input string.
- uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
-
- // We have enough information to now generate the memcpy call to do the
- // concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- if (!OptChkCall ||
- !EmitMemCpyChk(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getArgOperand(2), B, TD, TLI))
- B.CreateMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
- return Dst;
- }
-};
-
-//===---------------------------------------===//
// 'stpcpy' Optimizations
struct StpCpyOpt: public LibCallOptimization {
@@ -813,8 +772,8 @@ struct FFSOpt : public LibCallOptimization {
// Constant fold.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- if (CI->getValue() == 0) // ffs(0) -> 0.
- return Constant::getNullValue(CI->getType());
+ if (CI->isZero()) // ffs(0) -> 0.
+ return B.getInt32(0);
// ffs(c) -> cttz(c)+1
return B.getInt32(CI->getValue().countTrailingZeros() + 1);
}
@@ -1275,7 +1234,6 @@ namespace {
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
- StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
StpCpyOpt StpCpy; StpCpyOpt StpCpyChk;
StrNCpyOpt StrNCpy;
StrLenOpt StrLen; StrPBrkOpt StrPBrk;
@@ -1295,8 +1253,7 @@ namespace {
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true),
- StpCpy(false), StpCpyChk(true),
+ SimplifyLibCalls() : FunctionPass(ID), StpCpy(false), StpCpyChk(true),
UnaryDoubleFP(false), UnsafeUnaryDoubleFP(true) {
initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
}
@@ -1348,7 +1305,6 @@ void SimplifyLibCalls::AddOpt(LibFunc::Func F1, LibFunc::Func F2,
/// we know.
void SimplifyLibCalls::InitOptimizations() {
// String and Memory LibCall Optimizations
- Optimizations["strcpy"] = &StrCpy;
Optimizations["strncpy"] = &StrNCpy;
Optimizations["stpcpy"] = &StpCpy;
Optimizations["strlen"] = &StrLen;
@@ -1369,7 +1325,6 @@ void SimplifyLibCalls::InitOptimizations() {
AddOpt(LibFunc::memset, &MemSet);
// _chk variants of String and Memory LibCall Optimizations.
- Optimizations["__strcpy_chk"] = &StrCpyChk;
Optimizations["__stpcpy_chk"] = &StpCpyChk;
// Math Library Optimizations
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 6557d630a94..28c278f0362 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -69,6 +69,7 @@
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
@@ -117,34 +118,45 @@ FunctionPass *llvm::createTailCallEliminationPass() {
return new TailCallElim();
}
-/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
-/// callees of this function. We only do very simple analysis right now, this
-/// could be expanded in the future to use mod/ref information for particular
-/// call sites if desired.
-static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
- // FIXME: do simple 'address taken' analysis.
- return true;
+/// CanTRE - Scan the specified basic block for alloca instructions.
+/// If it contains any that are variable-sized or not in the entry block,
+/// returns false.
+static bool CanTRE(AllocaInst *AI) {
+ // Because of PR962, we don't TRE allocas outside the entry block.
+
+ // If this alloca is in the body of the function, or if it is a variable
+ // sized allocation, we cannot tail call eliminate calls marked 'tail'
+ // with this mechanism.
+ BasicBlock *BB = AI->getParent();
+ return BB == &BB->getParent()->getEntryBlock() &&
+ isa<ConstantInt>(AI->getArraySize());
}
-/// CheckForEscapingAllocas - Scan the specified basic block for alloca
-/// instructions. If it contains any that might be accessed by calls, return
-/// true.
-static bool CheckForEscapingAllocas(BasicBlock *BB,
- bool &CannotTCETailMarkedCall) {
- bool RetVal = false;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- RetVal |= AllocaMightEscapeToCalls(AI);
-
- // If this alloca is in the body of the function, or if it is a variable
- // sized allocation, we cannot tail call eliminate calls marked 'tail'
- // with this mechanism.
- if (BB != &BB->getParent()->getEntryBlock() ||
- !isa<ConstantInt>(AI->getArraySize()))
- CannotTCETailMarkedCall = true;
- }
- return RetVal;
-}
+struct AllocaCaptureTracker : public CaptureTracker {
+ AllocaCaptureTracker() : Captured(false) {}
+
+ void tooManyUses() { Captured = true; }
+
+ bool shouldExplore(Use *U) {
+ Value *V = U->getUser();
+ if (isa<CallInst>(V) || isa<InvokeInst>(V))
+ UsesAlloca.push_back(V);
+
+ return true;
+ }
+
+ bool captured(Use *U) {
+ if (isa<ReturnInst>(U->getUser()))
+ return false;
+
+ Captured = true;
+ return true;
+ }
+
+ SmallVector<WeakVH, 64> UsesAlloca;
+
+ bool Captured;
+};
bool TailCallElim::runOnFunction(Function &F) {
// If this function is a varargs function, we won't be able to PHI the args
@@ -157,38 +169,34 @@ bool TailCallElim::runOnFunction(Function &F) {
bool MadeChange = false;
bool FunctionContainsEscapingAllocas = false;
- // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
+ // CanTRETailMarkedCall - If false, we cannot perform TRE on tail calls
// marked with the 'tail' attribute, because doing so would cause the stack
- // size to increase (real TCE would deallocate variable sized allocas, TCE
+ // size to increase (real TRE would deallocate variable sized allocas, TRE
// doesn't).
- bool CannotTCETailMarkedCall = false;
-
- // Loop over the function, looking for any returning blocks, and keeping track
- // of whether this function has any non-trivially used allocas.
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
- break;
-
- FunctionContainsEscapingAllocas |=
- CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
+ bool CanTRETailMarkedCall = true;
+
+ // Find calls that can be marked tail.
+ AllocaCaptureTracker ACT;
+ for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ CanTRETailMarkedCall &= CanTRE(AI);
+ PointerMayBeCaptured(AI, &ACT);
+ if (ACT.Captured)
+ return false;
+ }
+ }
}
- /// FIXME: The code generator produces really bad code when an 'escaping
- /// alloca' is changed from being a static alloca to being a dynamic alloca.
- /// Until this is resolved, disable this transformation if that would ever
- /// happen. This bug is PR962.
- if (FunctionContainsEscapingAllocas)
- return false;
-
- // Second pass, change any tail calls to loops.
+ // Second pass, change any tail recursive calls to loops.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs,CannotTCETailMarkedCall);
+ ArgumentPHIs, !CanTRETailMarkedCall);
if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
TailCallsAreMarkedTail, ArgumentPHIs,
- CannotTCETailMarkedCall);
+ !CanTRETailMarkedCall);
MadeChange |= Change;
}
}
@@ -210,21 +218,24 @@ bool TailCallElim::runOnFunction(Function &F) {
}
}
- // Finally, if this function contains no non-escaping allocas, or calls
- // setjmp, mark all calls in the function as eligible for tail calls
- //(there is no stack memory for them to access).
+ // Finally, if this function contains no non-escaping allocas and doesn't
+ // call setjmp, mark all calls in the function as eligible for tail calls
+ // (there is no stack memory for them to access).
+ std::sort(ACT.UsesAlloca.begin(), ACT.UsesAlloca.end());
+
if (!FunctionContainsEscapingAllocas && !F.callsFunctionThatReturnsTwice())
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- CI->setTailCall();
- MadeChange = true;
- }
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (!std::binary_search(ACT.UsesAlloca.begin(), ACT.UsesAlloca.end(),
+ CI)) {
+ CI->setTailCall();
+ MadeChange = true;
+ }
return MadeChange;
}
-
/// CanMoveAboveCall - Return true if it is safe to move the specified
/// instruction from after the call to before the call, assuming that all
/// instructions between the call and this instruction are movable.
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index edeebe1b79a..9fea11391a1 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -687,3 +687,42 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
return cast<ReturnInst>(NewRet);
}
+/// SplitBlockAndInsertIfThen - Split the containing block at the
+/// specified instruction - everything before and including Cmp stays
+/// in the old basic block, and everything after Cmp is moved to a
+/// new block. The two blocks are connected by a conditional branch
+/// (with value of Cmp being the condition).
+/// Before:
+/// Head
+/// Cmp
+/// Tail
+/// After:
+/// Head
+/// Cmp
+/// if (Cmp)
+/// ThenBlock
+/// Tail
+///
+/// If Unreachable is true, then ThenBlock ends with
+/// UnreachableInst, otherwise it branches to Tail.
+/// Returns the NewBasicBlock's terminator.
+
+TerminatorInst *llvm::SplitBlockAndInsertIfThen(Instruction *Cmp,
+ bool Unreachable, MDNode *BranchWeights) {
+ Instruction *SplitBefore = Cmp->getNextNode();
+ BasicBlock *Head = SplitBefore->getParent();
+ BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+ TerminatorInst *HeadOldTerm = Head->getTerminator();
+ LLVMContext &C = Head->getContext();
+ BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ TerminatorInst *CheckTerm;
+ if (Unreachable)
+ CheckTerm = new UnreachableInst(C, ThenBlock);
+ else
+ CheckTerm = BranchInst::Create(Tail, ThenBlock);
+ BranchInst *HeadNewTerm =
+ BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/Tail, Cmp);
+ HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
+ ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+ return CheckTerm;
+}
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index f35cbbdde5e..930555424de 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -45,10 +45,10 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/TargetTransformInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
#include <csetjmp>
#include <set>
using namespace llvm;
@@ -70,14 +70,15 @@ namespace {
Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
bool useExpensiveEHSupport;
- // We peek in STTI to grab the target's jmp_buf size and alignment
- const ScalarTargetTransformInfo *STTI;
+ // We peek in TLI to grab the target's jmp_buf size and alignment
+ const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
- explicit LowerInvoke(bool useExpensiveEHSupport = ExpensiveEHSupport)
+ explicit LowerInvoke(const TargetLowering *tli = NULL,
+ bool useExpensiveEHSupport = ExpensiveEHSupport)
: FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
- STTI(0) {
+ TLI(tli) {
initializeLowerInvokePass(*PassRegistry::getPassRegistry());
}
bool doInitialization(Module &M);
@@ -107,24 +108,21 @@ INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass() {
- return new LowerInvoke(ExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+ return new LowerInvoke(TLI, ExpensiveEHSupport);
}
-FunctionPass *llvm::createLowerInvokePass(bool useExpensiveEHSupport) {
- return new LowerInvoke(useExpensiveEHSupport);
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+ bool useExpensiveEHSupport) {
+ return new LowerInvoke(TLI, useExpensiveEHSupport);
}
// doInitialization - Make sure that there is a prototype for abort in the
// current module.
bool LowerInvoke::doInitialization(Module &M) {
- TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
- if (TTI)
- STTI = TTI->getScalarTargetTransformInfo();
-
Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
if (useExpensiveEHSupport) {
// Insert a type for the linked list of jump buffers.
- unsigned JBSize = STTI ? STTI->getJumpBufSize() : 0;
+ unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
JBSize = JBSize ? JBSize : 200;
Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
@@ -432,7 +430,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
// that needs to be restored on all exits from the function. This is an
// alloca because the value needs to be live across invokes.
- unsigned Align = STTI ? STTI->getJumpBufAlignment() : 0;
+ unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
AllocaInst *JmpBuf =
new AllocaInst(JBLinkTy, 0, Align,
"jblink", F.begin()->begin());
@@ -577,10 +575,6 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
}
bool LowerInvoke::runOnFunction(Function &F) {
- TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
- if (TTI)
- STTI = TTI->getScalarTargetTransformInfo();
-
if (useExpensiveEHSupport)
return insertExpensiveEHSupport(F);
else
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index bd28ec35273..b15acdff633 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -183,14 +183,30 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(2) != TD->getIntPtrType(Context))
return 0;
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // __strcpy_chk(x,x) -> x
+ return Src;
+
// If a) we don't have any length information, or b) we know this will
// fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
if (isFoldable(2, 1, true)) {
- Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
- TLI, Name.substr(2, 6));
+ Value *Ret = EmitStrCpy(Dst, Src, B, TD, TLI, Name.substr(2, 6));
+ return Ret;
+ } else {
+ // Maybe we can stil fold __strcpy_chk to __memcpy_chk.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // This optimization require DataLayout.
+ if (!TD) return 0;
+
+ Value *Ret =
+ EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(Context), Len),
+ CI->getArgOperand(2), B, TD, TLI);
return Ret;
}
return 0;
@@ -497,6 +513,35 @@ struct StrNCmpOpt : public LibCallOptimization {
}
};
+struct StrCpyOpt : public LibCallOptimization {
+ virtual Value *callOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+ // Verify the "strcpy" function prototype.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 ||
+ FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ FT->getParamType(0) != B.getInt8PtrTy())
+ return 0;
+
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+ if (Dst == Src) // strcpy(x,x) -> x
+ return Src;
+
+ // These optimizations require DataLayout.
+ if (!TD) return 0;
+
+ // See if we can get the length of the input string.
+ uint64_t Len = GetStringLength(Src);
+ if (Len == 0) return 0;
+
+ // We have enough information to now generate the memcpy call to do the
+ // copy for us. Make a memcpy to copy the nul byte with align = 1.
+ B.CreateMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+ return Dst;
+ }
+};
+
} // End anonymous namespace.
namespace llvm {
@@ -520,6 +565,7 @@ class LibCallSimplifierImpl {
StrRChrOpt StrRChr;
StrCmpOpt StrCmp;
StrNCmpOpt StrNCmp;
+ StrCpyOpt StrCpy;
void initOptimizations();
public:
@@ -540,14 +586,15 @@ void LibCallSimplifierImpl::initOptimizations() {
Optimizations["__stpcpy_chk"] = &StrCpyChk;
Optimizations["__strncpy_chk"] = &StrNCpyChk;
Optimizations["__stpncpy_chk"] = &StrNCpyChk;
- Optimizations["strcmp"] = &StrCmp;
- Optimizations["strncmp"] = &StrNCmp;
// String and memory library call optimizations.
Optimizations["strcat"] = &StrCat;
Optimizations["strncat"] = &StrNCat;
Optimizations["strchr"] = &StrChr;
Optimizations["strrchr"] = &StrRChr;
+ Optimizations["strcmp"] = &StrCmp;
+ Optimizations["strncmp"] = &StrNCmp;
+ Optimizations["strcpy"] = &StrCpy;
}
Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
diff --git a/lib/Transforms/Vectorize/CMakeLists.txt b/lib/Transforms/Vectorize/CMakeLists.txt
index 06cf1e4e532..e64034ab26b 100644
--- a/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/lib/Transforms/Vectorize/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMVectorize
BBVectorize.cpp
Vectorize.cpp
+ LoopVectorize.cpp
)
add_dependencies(LLVMVectorize intrinsics_gen)
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
new file mode 100644
index 00000000000..6fbf342d06c
--- /dev/null
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -0,0 +1,1358 @@
+//===- LoopVectorize.cpp - A Loop Vectorizer ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the LLVM loop vectorizer. This pass modifies 'vectorizable' loops
+// and generates target-independent LLVM-IR. Legalization of the IR is done
+// in the codegen. However, the vectorizes uses (will use) the codegen
+// interfaces to generate IR that is likely to result in an optimal binary.
+//
+// The loop vectorizer combines consecutive loop iteration into a single
+// 'wide' iteration. After this transformation the index is incremented
+// by the SIMD vector width, and not by one.
+//
+// This pass has three parts:
+// 1. The main loop pass that drives the different parts.
+// 2. LoopVectorizationLegality - A helper class that checks for the legality
+// of the vectorization.
+// 3. SingleBlockLoopVectorizer - A helper class that performs the actual
+// widening of instructions.
+//===----------------------------------------------------------------------===//
+//
+// The reduction-variable vectorization is based on the paper:
+// D. Nuzman and R. Henderson. Multi-platform Auto-vectorization.
+//
+// Variable uniformity checks are inspired by:
+// Karrenberg, R. and Hack, S. Whole Function Vectorization.
+//
+// Other ideas/concepts are from:
+// A. Zaks and D. Nuzman. Autovectorization in GCC-two years later.
+//
+//===----------------------------------------------------------------------===//
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Value.h"
+#include "llvm/Function.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DataLayout.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<unsigned>
+DefaultVectorizationFactor("default-loop-vectorize-width",
+ cl::init(4), cl::Hidden,
+ cl::desc("Set the default loop vectorization width"));
+namespace {
+
+// Forward declaration.
+class LoopVectorizationLegality;
+
+/// SingleBlockLoopVectorizer vectorizes loops which contain only one basic
+/// block to a specified vectorization factor (VF).
+/// This class performs the widening of scalars into vectors, or multiple
+/// scalars. This class also implements the following features:
+/// * It inserts an epilogue loop for handling loops that don't have iteration
+/// counts that are known to be a multiple of the vectorization factor.
+/// * It handles the code generation for reduction variables.
+/// * Scalarization (implementation using scalars) of un-vectorizable
+/// instructions.
+/// SingleBlockLoopVectorizer does not perform any vectorization-legality
+/// checks, and relies on the caller to check for the different legality
+/// aspects. The SingleBlockLoopVectorizer relies on the
+/// LoopVectorizationLegality class to provide information about the induction
+/// and reduction variables that were found to a given vectorization factor.
+class SingleBlockLoopVectorizer {
+public:
+ /// Ctor.
+ SingleBlockLoopVectorizer(Loop *Orig, ScalarEvolution *Se, LoopInfo *Li,
+ LPPassManager *Lpm, unsigned VecWidth):
+ OrigLoop(Orig), SE(Se), LI(Li), LPM(Lpm), VF(VecWidth),
+ Builder(Se->getContext()), Induction(0), OldInduction(0) { }
+
+ // Perform the actual loop widening (vectorization).
+ void vectorize(LoopVectorizationLegality *Legal) {
+ ///Create a new empty loop. Unlink the old loop and connect the new one.
+ createEmptyLoop(Legal);
+ /// Widen each instruction in the old loop to a new one in the new loop.
+ /// Use the Legality module to find the induction and reduction variables.
+ vectorizeLoop(Legal);
+ // register the new loop.
+ cleanup();
+ }
+
+private:
+ /// Create an empty loop, based on the loop ranges of the old loop.
+ void createEmptyLoop(LoopVectorizationLegality *Legal);
+ /// Copy and widen the instructions from the old loop.
+ void vectorizeLoop(LoopVectorizationLegality *Legal);
+ /// Insert the new loop to the loop hierarchy and pass manager.
+ void cleanup();
+
+ /// This instruction is un-vectorizable. Implement it as a sequence
+ /// of scalars.
+ void scalarizeInstruction(Instruction *Instr);
+
+ /// Create a broadcast instruction. This method generates a broadcast
+ /// instruction (shuffle) for loop invariant values and for the induction
+ /// value. If this is the induction variable then we extend it to N, N+1, ...
+ /// this is needed because each iteration in the loop corresponds to a SIMD
+ /// element.
+ Value *getBroadcastInstrs(Value *V);
+
+ /// This is a helper function used by getBroadcastInstrs. It adds 0, 1, 2 ..
+ /// for each element in the vector. Starting from zero.
+ Value *getConsecutiveVector(Value* Val);
+
+ /// When we go over instructions in the basic block we rely on previous
+ /// values within the current basic block or on loop invariant values.
+ /// When we widen (vectorize) values we place them in the map. If the values
+ /// are not within the map, they have to be loop invariant, so we simply
+ /// broadcast them into a vector.
+ Value *getVectorValue(Value *V);
+
+ /// Get a uniform vector of constant integers. We use this to get
+ /// vectors of ones and zeros for the reduction code.
+ Constant* getUniformVector(unsigned Val, Type* ScalarTy);
+
+ typedef DenseMap<Value*, Value*> ValueMap;
+
+ /// The original loop.
+ Loop *OrigLoop;
+ // Scev analysis to use.
+ ScalarEvolution *SE;
+ // Loop Info.
+ LoopInfo *LI;
+ // Loop Pass Manager;
+ LPPassManager *LPM;
+ // The vectorization factor to use.
+ unsigned VF;
+
+ // The builder that we use
+ IRBuilder<> Builder;
+
+ // --- Vectorization state ---
+
+ /// Middle Block between the vector and the scalar.
+ BasicBlock *LoopMiddleBlock;
+ ///The ExitBlock of the scalar loop.
+ BasicBlock *LoopExitBlock;
+ ///The vector loop body.
+ BasicBlock *LoopVectorBody;
+ ///The scalar loop body.
+ BasicBlock *LoopScalarBody;
+ ///The first bypass block.
+ BasicBlock *LoopBypassBlock;
+
+ /// The new Induction variable which was added to the new block.
+ PHINode *Induction;
+ /// The induction variable of the old basic block.
+ PHINode *OldInduction;
+ // Maps scalars to widened vectors.
+ ValueMap WidenMap;
+};
+
+/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
+/// to what vectorization factor.
+/// This class does not look at the profitability of vectorization, only the
+/// legality. This class has two main kinds of checks:
+/// * Memory checks - The code in canVectorizeMemory checks if vectorization
+/// will change the order of memory accesses in a way that will change the
+/// correctness of the program.
+/// * Scalars checks - The code in canVectorizeBlock checks for a number
+/// of different conditions, such as the availability of a single induction
+/// variable, that all types are supported and vectorize-able, etc.
+/// This code reflects the capabilities of SingleBlockLoopVectorizer.
+/// This class is also used by SingleBlockLoopVectorizer for identifying
+/// induction variable and the different reduction variables.
+class LoopVectorizationLegality {
+public:
+ LoopVectorizationLegality(Loop *Lp, ScalarEvolution *Se, DataLayout *Dl):
+ TheLoop(Lp), SE(Se), DL(Dl), Induction(0) { }
+
+ /// This represents the kinds of reductions that we support.
+ /// We use the enum values to hold the 'identity' value for
+ /// each operand. This value does not change the result if applied.
+ enum ReductionKind {
+ NoReduction = -1, /// Not a reduction.
+ IntegerAdd = 0, /// Sum of numbers.
+ IntegerMult = 1 /// Product of numbers.
+ };
+
+ /// This POD struct holds information about reduction variables.
+ struct ReductionDescriptor {
+ // Default C'tor
+ ReductionDescriptor():
+ StartValue(0), LoopExitInstr(0), Kind(NoReduction) {}
+
+ // C'tor.
+ ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K):
+ StartValue(Start), LoopExitInstr(Exit), Kind(K) {}
+
+ // The starting value of the reduction.
+ // It does not have to be zero!
+ Value *StartValue;
+ // The instruction who's value is used outside the loop.
+ Instruction *LoopExitInstr;
+ // The kind of the reduction.
+ ReductionKind Kind;
+ };
+
+ /// ReductionList contains the reduction descriptors for all
+ /// of the reductions that were found in the loop.
+ typedef DenseMap<PHINode*, ReductionDescriptor> ReductionList;
+
+ /// Returns the maximum vectorization factor that we *can* use to vectorize
+ /// this loop. This does not mean that it is profitable to vectorize this
+ /// loop, only that it is legal to do so. This may be a large number. We
+ /// can vectorize to any SIMD width below this number.
+ unsigned getLoopMaxVF();
+
+ /// Returns the Induction variable.
+ PHINode *getInduction() {return Induction;}
+
+ /// Returns the reduction variables found in the loop.
+ ReductionList *getReductionVars() { return &Reductions; }
+
+ /// Check if the pointer returned by this GEP is consecutive
+ /// when the index is vectorized. This happens when the last
+ /// index of the GEP is consecutive, like the induction variable.
+ /// This check allows us to vectorize A[idx] into a wide load/store.
+ bool isConsecutiveGep(Value *Ptr);
+
+private:
+ /// Check if a single basic block loop is vectorizable.
+ /// At this point we know that this is a loop with a constant trip count
+ /// and we only need to check individual instructions.
+ bool canVectorizeBlock(BasicBlock &BB);
+
+ /// When we vectorize loops we may change the order in which
+ /// we read and write from memory. This method checks if it is
+ /// legal to vectorize the code, considering only memory constrains.
+ /// Returns true if BB is vectorizable
+ bool canVectorizeMemory(BasicBlock &BB);
+
+ // Check if a pointer value is known to be disjoint.
+ // Example: Alloca, Global, NoAlias.
+ bool isIdentifiedSafeObject(Value* Val);
+
+ /// Returns True, if 'Phi' is the kind of reduction variable for type
+ /// 'Kind'. If this is a reduction variable, it adds it to ReductionList.
+ bool AddReductionVar(PHINode *Phi, ReductionKind Kind);
+ /// Returns true if the instruction I can be a reduction variable of type
+ /// 'Kind'.
+ bool isReductionInstr(Instruction *I, ReductionKind Kind);
+ /// Returns True, if 'Phi' is an induction variable.
+ bool isInductionVariable(PHINode *Phi);
+
+ /// The loop that we evaluate.
+ Loop *TheLoop;
+ /// Scev analysis.
+ ScalarEvolution *SE;
+ /// DataLayout analysis.
+ DataLayout *DL;
+
+ // --- vectorization state --- //
+
+ /// Holds the induction variable.
+ PHINode *Induction;
+ /// Holds the reduction variables.
+ ReductionList Reductions;
+ /// Allowed outside users. This holds the reduction
+ /// vars which can be accessed from outside the loop.
+ SmallPtrSet<Value*, 4> AllowedExit;
+};
+
+struct LoopVectorize : public LoopPass {
+ static char ID; // Pass identification, replacement for typeid
+
+ LoopVectorize() : LoopPass(ID) {
+ initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
+ }
+
+ ScalarEvolution *SE;
+ DataLayout *DL;
+ LoopInfo *LI;
+
+ virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
+ // We only vectorize innermost loops.
+ if (!L->empty())
+ return false;
+
+ SE = &getAnalysis<ScalarEvolution>();
+ DL = getAnalysisIfAvailable<DataLayout>();
+ LI = &getAnalysis<LoopInfo>();
+
+ DEBUG(dbgs() << "LV: Checking a loop in \"" <<
+ L->getHeader()->getParent()->getName() << "\"\n");
+
+ // Check if it is legal to vectorize the loop.
+ LoopVectorizationLegality LVL(L, SE, DL);
+ unsigned MaxVF = LVL.getLoopMaxVF();
+
+ // Check that we can vectorize this loop using the chosen vectorization
+ // width.
+ if (MaxVF < DefaultVectorizationFactor) {
+ DEBUG(dbgs() << "LV: non-vectorizable MaxVF ("<< MaxVF << ").\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< MaxVF << ").\n");
+
+ // If we decided that it is *legal* to vectorizer the loop then do it.
+ SingleBlockLoopVectorizer LB(L, SE, LI, &LPM, DefaultVectorizationFactor);
+ LB.vectorize(&LVL);
+
+ DEBUG(verifyFunction(*L->getHeader()->getParent()));
+ return true;
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ LoopPass::getAnalysisUsage(AU);
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+ }
+
+};
+
+Value *SingleBlockLoopVectorizer::getBroadcastInstrs(Value *V) {
+ // Instructions that access the old induction variable
+ // actually want to get the new one.
+ if (V == OldInduction)
+ V = Induction;
+ // Create the types.
+ LLVMContext &C = V->getContext();
+ Type *VTy = VectorType::get(V->getType(), VF);
+ Type *I32 = IntegerType::getInt32Ty(C);
+ Constant *Zero = ConstantInt::get(I32, 0);
+ Value *Zeros = ConstantAggregateZero::get(VectorType::get(I32, VF));
+ Value *UndefVal = UndefValue::get(VTy);
+ // Insert the value into a new vector.
+ Value *SingleElem = Builder.CreateInsertElement(UndefVal, V, Zero);
+ // Broadcast the scalar into all locations in the vector.
+ Value *Shuf = Builder.CreateShuffleVector(SingleElem, UndefVal, Zeros,
+ "broadcast");
+ // We are accessing the induction variable. Make sure to promote the
+ // index for each consecutive SIMD lane. This adds 0,1,2 ... to all lanes.
+ if (V == Induction)
+ return getConsecutiveVector(Shuf);
+ return Shuf;
+}
+
+Value *SingleBlockLoopVectorizer::getConsecutiveVector(Value* Val) {
+ assert(Val->getType()->isVectorTy() && "Must be a vector");
+ assert(Val->getType()->getScalarType()->isIntegerTy() &&
+ "Elem must be an integer");
+ // Create the types.
+ Type *ITy = Val->getType()->getScalarType();
+ VectorType *Ty = cast<VectorType>(Val->getType());
+ unsigned VLen = Ty->getNumElements();
+ SmallVector<Constant*, 8> Indices;
+
+ // Create a vector of consecutive numbers from zero to VF.
+ for (unsigned i = 0; i < VLen; ++i)
+ Indices.push_back(ConstantInt::get(ITy, i));
+
+ // Add the consecutive indices to the vector value.
+ Constant *Cv = ConstantVector::get(Indices);
+ assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
+ return Builder.CreateAdd(Val, Cv, "induction");
+}
+
+bool LoopVectorizationLegality::isConsecutiveGep(Value *Ptr) {
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!Gep)
+ return false;
+
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = Gep->getOperand(NumOperands - 1);
+
+ // Check that all of the gep indices are uniform except for the last.
+ for (unsigned i = 0; i < NumOperands - 1; ++i)
+ if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+ return false;
+
+ // We can emit wide load/stores only of the last index is the induction
+ // variable.
+ const SCEV *Last = SE->getSCEV(LastIndex);
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+
+ // The memory is consecutive because the last index is consecutive
+ // and all other indices are loop invariant.
+ if (Step->isOne())
+ return true;
+ }
+
+ return false;
+}
+
+Value *SingleBlockLoopVectorizer::getVectorValue(Value *V) {
+ assert(!V->getType()->isVectorTy() && "Can't widen a vector");
+ // If we saved a vectorized copy of V, use it.
+ Value *&MapEntry = WidenMap[V];
+ if (MapEntry)
+ return MapEntry;
+
+ // Broadcast V and save the value for future uses.
+ Value *B = getBroadcastInstrs(V);
+ MapEntry = B;
+ return B;
+}
+
+Constant*
+SingleBlockLoopVectorizer::getUniformVector(unsigned Val, Type* ScalarTy) {
+ SmallVector<Constant*, 8> Indices;
+ // Create a vector of consecutive numbers from zero to VF.
+ for (unsigned i = 0; i < VF; ++i)
+ Indices.push_back(ConstantInt::get(ScalarTy, Val));
+
+ // Add the consecutive indices to the vector value.
+ return ConstantVector::get(Indices);
+}
+
+void SingleBlockLoopVectorizer::scalarizeInstruction(Instruction *Instr) {
+ assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+ // Holds vector parameters or scalars, in case of uniform vals.
+ SmallVector<Value*, 8> Params;
+
+ // Find all of the vectorized parameters.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *SrcOp = Instr->getOperand(op);
+
+ // If we are accessing the old induction variable, use the new one.
+ if (SrcOp == OldInduction) {
+ Params.push_back(getBroadcastInstrs(Induction));
+ continue;
+ }
+
+ // Try using previously calculated values.
+ Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
+
+ // If the src is an instruction that appeared earlier in the basic block
+ // then it should already be vectorized.
+ if (SrcInst && SrcInst->getParent() == Instr->getParent()) {
+ assert(WidenMap.count(SrcInst) && "Source operand is unavailable");
+ // The parameter is a vector value from earlier.
+ Params.push_back(WidenMap[SrcInst]);
+ } else {
+ // The parameter is a scalar from outside the loop. Maybe even a constant.
+ Params.push_back(SrcOp);
+ }
+ }
+
+ assert(Params.size() == Instr->getNumOperands() &&
+ "Invalid number of operands");
+
+ // Does this instruction return a value ?
+ bool IsVoidRetTy = Instr->getType()->isVoidTy();
+ Value *VecResults = 0;
+
+ // If we have a return value, create an empty vector. We place the scalarized
+ // instructions in this vector.
+ if (!IsVoidRetTy)
+ VecResults = UndefValue::get(VectorType::get(Instr->getType(), VF));
+
+ // For each scalar that we create:
+ for (unsigned i = 0; i < VF; ++i) {
+ Instruction *Cloned = Instr->clone();
+ if (!IsVoidRetTy)
+ Cloned->setName(Instr->getName() + ".cloned");
+ // Replace the operands of the cloned instrucions with extracted scalars.
+ for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+ Value *Op = Params[op];
+ // Param is a vector. Need to extract the right lane.
+ if (Op->getType()->isVectorTy())
+ Op = Builder.CreateExtractElement(Op, Builder.getInt32(i));
+ Cloned->setOperand(op, Op);
+ }
+
+ // Place the cloned scalar in the new loop.
+ Builder.Insert(Cloned);
+
+ // If the original scalar returns a value we need to place it in a vector
+ // so that future users will be able to use it.
+ if (!IsVoidRetTy)
+ VecResults = Builder.CreateInsertElement(VecResults, Cloned,
+ Builder.getInt32(i));
+ }
+
+ if (!IsVoidRetTy)
+ WidenMap[Instr] = VecResults;
+}
+
+void SingleBlockLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) {
+ /*
+ In this function we generate a new loop. The new loop will contain
+ the vectorized instructions while the old loop will continue to run the
+ scalar remainder.
+
+ [ ] <-- vector loop bypass.
+ / |
+ / v
+| [ ] <-- vector pre header.
+| |
+| v
+| [ ] \
+| [ ]_| <-- vector loop.
+| |
+ \ v
+ >[ ] <--- middle-block.
+ / |
+ / v
+| [ ] <--- new preheader.
+| |
+| v
+| [ ] \
+| [ ]_| <-- old scalar loop to handle remainder.
+ \ |
+ \ v
+ >[ ] <-- exit block.
+ ...
+ */
+
+ // This is the original scalar-loop preheader.
+ BasicBlock *BypassBlock = OrigLoop->getLoopPreheader();
+ BasicBlock *ExitBlock = OrigLoop->getExitBlock();
+ assert(ExitBlock && "Must have an exit block");
+
+ assert(OrigLoop->getNumBlocks() == 1 && "Invalid loop");
+ assert(BypassBlock && "Invalid loop structure");
+
+ BasicBlock *VectorPH =
+ BypassBlock->splitBasicBlock(BypassBlock->getTerminator(), "vector.ph");
+ BasicBlock *VecBody = VectorPH->splitBasicBlock(VectorPH->getTerminator(),
+ "vector.body");
+
+ BasicBlock *MiddleBlock = VecBody->splitBasicBlock(VecBody->getTerminator(),
+ "middle.block");
+ BasicBlock *ScalarPH =
+ MiddleBlock->splitBasicBlock(MiddleBlock->getTerminator(),
+ "scalar.preheader");
+ // Find the induction variable.
+ BasicBlock *OldBasicBlock = OrigLoop->getHeader();
+ OldInduction = Legal->getInduction();
+ assert(OldInduction && "We must have a single phi node.");
+ Type *IdxTy = OldInduction->getType();
+
+ // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
+ // inside the loop.
+ Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+
+ // Generate the induction variable.
+ Induction = Builder.CreatePHI(IdxTy, 2, "index");
+ Constant *Zero = ConstantInt::get(IdxTy, 0);
+ Constant *Step = ConstantInt::get(IdxTy, VF);
+
+ // Find the loop boundaries.
+ const SCEV *ExitCount = SE->getExitCount(OrigLoop, OrigLoop->getHeader());
+ assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
+
+ // Get the total trip count from the count by adding 1.
+ ExitCount = SE->getAddExpr(ExitCount,
+ SE->getConstant(ExitCount->getType(), 1));
+
+ // Expand the trip count and place the new instructions in the preheader.
+ // Notice that the pre-header does not change, only the loop body.
+ SCEVExpander Exp(*SE, "induction");
+ Instruction *Loc = BypassBlock->getTerminator();
+
+ // We may need to extend the index in case there is a type mismatch.
+ // We know that the count starts at zero and does not overflow.
+ // We are using Zext because it should be less expensive.
+ if (ExitCount->getType() != Induction->getType())
+ ExitCount = SE->getZeroExtendExpr(ExitCount, IdxTy);
+
+ // Count holds the overall loop count (N).
+ Value *Count = Exp.expandCodeFor(ExitCount, Induction->getType(), Loc);
+ // Now we need to generate the expression for N - (N % VF), which is
+ // the part that the vectorized body will execute.
+ Constant *CIVF = ConstantInt::get(IdxTy, VF);
+ Value *R = BinaryOperator::CreateURem(Count, CIVF, "n.mod.vf", Loc);
+ Value *CountRoundDown = BinaryOperator::CreateSub(Count, R, "n.vec", Loc);
+
+ // Now, compare the new count to zero. If it is zero, jump to the scalar part.
+ Value *Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
+ CountRoundDown, ConstantInt::getNullValue(IdxTy),
+ "cmp.zero", Loc);
+ BranchInst::Create(MiddleBlock, VectorPH, Cmp, Loc);
+ // Remove the old terminator.
+ Loc->eraseFromParent();
+
+ // Add a check in the middle block to see if we have completed
+ // all of the iterations in the first vector loop.
+ // If (N - N%VF) == N, then we *don't* need to run the remainder.
+ Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+ CountRoundDown, "cmp.n",
+ MiddleBlock->getTerminator());
+
+ BranchInst::Create(ExitBlock, ScalarPH, CmpN, MiddleBlock->getTerminator());
+ // Remove the old terminator.
+ MiddleBlock->getTerminator()->eraseFromParent();
+
+ // Create i+1 and fill the PHINode.
+ Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
+ Induction->addIncoming(Zero, VectorPH);
+ Induction->addIncoming(NextIdx, VecBody);
+ // Create the compare.
+ Value *ICmp = Builder.CreateICmpEQ(NextIdx, CountRoundDown);
+ Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
+
+ // Now we have two terminators. Remove the old one from the block.
+ VecBody->getTerminator()->eraseFromParent();
+
+ // Fix the scalar body iteration count.
+ unsigned BlockIdx = OldInduction->getBasicBlockIndex(ScalarPH);
+ OldInduction->setIncomingValue(BlockIdx, CountRoundDown);
+
+ // Get ready to start creating new instructions into the vectorized body.
+ Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+
+ // Register the new loop.
+ Loop* Lp = new Loop();
+ LPM->insertLoop(Lp, OrigLoop->getParentLoop());
+
+ Lp->addBasicBlockToLoop(VecBody, LI->getBase());
+
+ Loop *ParentLoop = OrigLoop->getParentLoop();
+ if (ParentLoop) {
+ ParentLoop->addBasicBlockToLoop(ScalarPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(VectorPH, LI->getBase());
+ ParentLoop->addBasicBlockToLoop(MiddleBlock, LI->getBase());
+ }
+
+ // Save the state.
+ LoopMiddleBlock = MiddleBlock;
+ LoopExitBlock = ExitBlock;
+ LoopVectorBody = VecBody;
+ LoopScalarBody = OldBasicBlock;
+ LoopBypassBlock = BypassBlock;
+}
+
+void
+SingleBlockLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
+ typedef SmallVector<PHINode*, 4> PhiVector;
+ BasicBlock &BB = *OrigLoop->getHeader();
+ Constant *Zero = ConstantInt::get(
+ IntegerType::getInt32Ty(BB.getContext()), 0);
+
+ // In order to support reduction variables we need to be able to vectorize
+ // Phi nodes. Phi nodes have cycles, so we need to vectorize them in two
+ // steages. First, we create a new vector PHI node with no incoming edges.
+ // We use this value when we vectorize all of the instructions that use the
+ // PHI. Next, after all of the instructions in the block are complete we
+ // add the new incoming edges to the PHI. At this point all of the
+ // instructions in the basic block are vectorized, so we can use them to
+ // construct the PHI.
+ PhiVector PHIsToFix;
+
+ // For each instruction in the old loop.
+ for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+ Instruction *Inst = it;
+
+ switch (Inst->getOpcode()) {
+ case Instruction::Br:
+ // Nothing to do for PHIs and BR, since we already took care of the
+ // loop control flow instructions.
+ continue;
+ case Instruction::PHI:{
+ PHINode* P = cast<PHINode>(Inst);
+ // Special handling for the induction var.
+ if (OldInduction == Inst)
+ continue;
+ // This is phase one of vectorizing PHIs.
+ // This has to be a reduction variable.
+ assert(Legal->getReductionVars()->count(P) && "Not a Reduction");
+ Type *VecTy = VectorType::get(Inst->getType(), VF);
+ WidenMap[Inst] = Builder.CreatePHI(VecTy, 2, "vec.phi");
+ PHIsToFix.push_back(P);
+ continue;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor: {
+ // Just widen binops.
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Value *B = getVectorValue(Inst->getOperand(1));
+ // Use this vector value for all users of the original instruction.
+ WidenMap[Inst] = Builder.CreateBinOp(BinOp->getOpcode(), A, B);
+ break;
+ }
+ case Instruction::Select: {
+ // Widen selects.
+ // If the selector is loop invariant we can create a select
+ // instruction with a scalar condition. Otherwise, use vector-select.
+ Value *Cond = Inst->getOperand(0);
+ bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(Cond), OrigLoop);
+
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ Cond = getVectorValue(Cond);
+ if (InvariantCond)
+ Cond = Builder.CreateExtractElement(Cond, Builder.getInt32(0));
+
+ Value *Op0 = getVectorValue(Inst->getOperand(1));
+ Value *Op1 = getVectorValue(Inst->getOperand(2));
+ WidenMap[Inst] = Builder.CreateSelect(Cond, Op0, Op1);
+ break;
+ }
+
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Widen compares. Generate vector compares.
+ bool FCmp = (Inst->getOpcode() == Instruction::FCmp);
+ CmpInst *Cmp = dyn_cast<CmpInst>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Value *B = getVectorValue(Inst->getOperand(1));
+ if (FCmp)
+ WidenMap[Inst] = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+ else
+ WidenMap[Inst] = Builder.CreateICmp(Cmp->getPredicate(), A, B);
+ break;
+ }
+
+ case Instruction::Store: {
+ // Attempt to issue a wide store.
+ StoreInst *SI = dyn_cast<StoreInst>(Inst);
+ Type *StTy = VectorType::get(SI->getValueOperand()->getType(), VF);
+ Value *Ptr = SI->getPointerOperand();
+ unsigned Alignment = SI->getAlignment();
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+ // This store does not use GEPs.
+ if (!Legal->isConsecutiveGep(Gep)) {
+ scalarizeInstruction(Inst);
+ break;
+ }
+
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
+ LastIndex = Builder.CreateExtractElement(LastIndex, Builder.getInt32(0));
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Ptr = Builder.Insert(Gep2);
+ Ptr = Builder.CreateBitCast(Ptr, StTy->getPointerTo());
+ Value *Val = getVectorValue(SI->getValueOperand());
+ Builder.CreateStore(Val, Ptr)->setAlignment(Alignment);
+ break;
+ }
+ case Instruction::Load: {
+ // Attempt to issue a wide load.
+ LoadInst *LI = dyn_cast<LoadInst>(Inst);
+ Type *RetTy = VectorType::get(LI->getType(), VF);
+ Value *Ptr = LI->getPointerOperand();
+ unsigned Alignment = LI->getAlignment();
+ GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+
+ // We don't have a gep. Scalarize the load.
+ if (!Legal->isConsecutiveGep(Gep)) {
+ scalarizeInstruction(Inst);
+ break;
+ }
+
+ // The last index does not have to be the induction. It can be
+ // consecutive and be a function of the index. For example A[I+1];
+ unsigned NumOperands = Gep->getNumOperands();
+ Value *LastIndex = getVectorValue(Gep->getOperand(NumOperands -1));
+ LastIndex = Builder.CreateExtractElement(LastIndex, Builder.getInt32(0));
+
+ // Create the new GEP with the new induction variable.
+ GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
+ Gep2->setOperand(NumOperands - 1, LastIndex);
+ Ptr = Builder.Insert(Gep2);
+ Ptr = Builder.CreateBitCast(Ptr, RetTy->getPointerTo());
+ LI = Builder.CreateLoad(Ptr);
+ LI->setAlignment(Alignment);
+ // Use this vector value for all users of the load.
+ WidenMap[Inst] = LI;
+ break;
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ /// Vectorize bitcasts.
+ CastInst *CI = dyn_cast<CastInst>(Inst);
+ Value *A = getVectorValue(Inst->getOperand(0));
+ Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
+ WidenMap[Inst] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
+ break;
+ }
+
+ default:
+ /// All other instructions are unsupported. Scalarize them.
+ scalarizeInstruction(Inst);
+ break;
+ }// end of switch.
+ }// end of for_each instr.
+
+ // At this point every instruction in the original loop is widended to
+ // a vector form. We are almost done. Now, we need to fix the PHI nodes
+ // that we vectorized. The PHI nodes are currently empty because we did
+ // not want to introduce cycles. Notice that the remaining PHI nodes
+ // that we need to fix are reduction variables.
+
+ // Create the 'reduced' values for each of the induction vars.
+ // The reduced values are the vector values that we scalarize and combine
+ // after the loop is finished.
+ for (PhiVector::iterator it = PHIsToFix.begin(), e = PHIsToFix.end();
+ it != e; ++it) {
+ PHINode *RdxPhi = *it;
+ PHINode *VecRdxPhi = dyn_cast<PHINode>(WidenMap[RdxPhi]);
+ assert(RdxPhi && "Unable to recover vectorized PHI");
+
+ // Find the reduction variable descriptor.
+ assert(Legal->getReductionVars()->count(RdxPhi) &&
+ "Unable to find the reduction variable");
+ LoopVectorizationLegality::ReductionDescriptor RdxDesc =
+ (*Legal->getReductionVars())[RdxPhi];
+
+ // We need to generate a reduction vector from the incoming scalar.
+ // To do so, we need to generate the 'identity' vector and overide
+ // one of the elements with the incoming scalar reduction. We need
+ // to do it in the vector-loop preheader.
+ Builder.SetInsertPoint(LoopBypassBlock->getTerminator());
+
+ // This is the vector-clone of the value that leaves the loop.
+ Value *VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
+ Type *VecTy = VectorExit->getType();
+
+ // Find the reduction identity variable. The value of the enum is the
+ // identity. Zero for addition. One for Multiplication.
+ unsigned IdentitySclr = RdxDesc.Kind;
+ Constant *Identity = getUniformVector(IdentitySclr,
+ VecTy->getScalarType());
+
+ // This vector is the Identity vector where the first element is the
+ // incoming scalar reduction.
+ Value *VectorStart = Builder.CreateInsertElement(Identity,
+ RdxDesc.StartValue, Zero);
+
+
+ // Fix the vector-loop phi.
+ // We created the induction variable so we know that the
+ // preheader is the first entry.
+ BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
+
+ // Reductions do not have to start at zero. They can start with
+ // any loop invariant values.
+ VecRdxPhi->addIncoming(VectorStart, VecPreheader);
+ unsigned SelfEdgeIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
+ Value *Val = getVectorValue(RdxPhi->getIncomingValue(SelfEdgeIdx));
+ VecRdxPhi->addIncoming(Val, LoopVectorBody);
+
+ // Before each round, move the insertion point right between
+ // the PHIs and the values we are going to write.
+ // This allows us to write both PHINodes and the extractelement
+ // instructions.
+ Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
+
+ // This PHINode contains the vectorized reduction variable, or
+ // the initial value vector, if we bypass the vector loop.
+ PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
+ NewPhi->addIncoming(VectorStart, LoopBypassBlock);
+ NewPhi->addIncoming(getVectorValue(RdxDesc.LoopExitInstr), LoopVectorBody);
+
+ // Extract the first scalar.
+ Value *Scalar0 =
+ Builder.CreateExtractElement(NewPhi, Builder.getInt32(0));
+ // Extract and sum the remaining vector elements.
+ for (unsigned i=1; i < VF; ++i) {
+ Value *Scalar1 =
+ Builder.CreateExtractElement(NewPhi, Builder.getInt32(i));
+ if (RdxDesc.Kind == LoopVectorizationLegality::IntegerAdd) {
+ Scalar0 = Builder.CreateAdd(Scalar0, Scalar1);
+ } else {
+ Scalar0 = Builder.CreateMul(Scalar0, Scalar1);
+ }
+ }
+
+ // Now, we need to fix the users of the reduction variable
+ // inside and outside of the scalar remainder loop.
+ // We know that the loop is in LCSSA form. We need to update the
+ // PHI nodes in the exit blocks.
+ for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
+ LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
+ PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
+ if (!LCSSAPhi) continue;
+
+ // All PHINodes need to have a single entry edge, or two if
+ // we already fixed them.
+ assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
+
+ // We found our reduction value exit-PHI. Update it with the
+ // incoming bypass edge.
+ if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
+ // Add an edge coming from the bypass.
+ LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
+ break;
+ }
+ }// end of the LCSSA phi scan.
+
+ // Fix the scalar loop reduction variable with the incoming reduction sum
+ // from the vector body and from the backedge value.
+ int IncomingEdgeBlockIdx = (RdxPhi)->getBasicBlockIndex(LoopScalarBody);
+ int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); // The other block.
+ (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
+ (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
+ }// end of for each redux variable.
+}
+
+void SingleBlockLoopVectorizer::cleanup() {
+ // The original basic block.
+ SE->forgetLoop(OrigLoop);
+}
+
+unsigned LoopVectorizationLegality::getLoopMaxVF() {
+ if (!TheLoop->getLoopPreheader()) {
+ assert(false && "No preheader!!");
+ DEBUG(dbgs() << "LV: Loop not normalized." << "\n");
+ return 1;
+ }
+
+ // We can only vectorize single basic block loops.
+ unsigned NumBlocks = TheLoop->getNumBlocks();
+ if (NumBlocks != 1) {
+ DEBUG(dbgs() << "LV: Too many blocks:" << NumBlocks << "\n");
+ return 1;
+ }
+
+ // We need to have a loop header.
+ BasicBlock *BB = TheLoop->getHeader();
+ DEBUG(dbgs() << "LV: Found a loop: " << BB->getName() << "\n");
+
+ // Go over each instruction and look at memory deps.
+ if (!canVectorizeBlock(*BB)) {
+ DEBUG(dbgs() << "LV: Can't vectorize this loop header\n");
+ return 1;
+ }
+
+ // ScalarEvolution needs to be able to find the exit count.
+ const SCEV *ExitCount = SE->getExitCount(TheLoop, BB);
+ if (ExitCount == SE->getCouldNotCompute()) {
+ DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
+ return 1;
+ }
+
+ DEBUG(dbgs() << "LV: We can vectorize this loop!\n");
+
+ // Okay! We can vectorize. At this point we don't have any other mem analysis
+ // which may limit our maximum vectorization factor, so just return the
+ // maximum SIMD size.
+ return DefaultVectorizationFactor;
+}
+
+bool LoopVectorizationLegality::canVectorizeBlock(BasicBlock &BB) {
+ // Scan the instructions in the block and look for hazards.
+ for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+ Instruction *I = it;
+
+ PHINode *Phi = dyn_cast<PHINode>(I);
+ if (Phi) {
+ // This should not happen because the loop should be normalized.
+ if (Phi->getNumIncomingValues() != 2) {
+ DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
+ return false;
+ }
+ // We only look at integer phi nodes.
+ if (!Phi->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << "LV: Found an non-int PHI.\n");
+ return false;
+ }
+
+ if (isInductionVariable(Phi)) {
+ if (Induction) {
+ DEBUG(dbgs() << "LV: Found too many inductions."<< *Phi <<"\n");
+ return false;
+ }
+ DEBUG(dbgs() << "LV: Found the induction PHI."<< *Phi <<"\n");
+ Induction = Phi;
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerAdd)) {
+ DEBUG(dbgs() << "LV: Found an ADD reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+ if (AddReductionVar(Phi, IntegerMult)) {
+ DEBUG(dbgs() << "LV: Found an Mult reduction PHI."<< *Phi <<"\n");
+ continue;
+ }
+
+ DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
+ return false;
+ }// end of PHI handling
+
+ // We still don't handle functions.
+ CallInst *CI = dyn_cast<CallInst>(I);
+ if (CI) {
+ DEBUG(dbgs() << "LV: Found a call site:"<<
+ CI->getCalledFunction()->getName() << "\n");
+ return false;
+ }
+
+ // We do not re-vectorize vectors.
+ if (!VectorType::isValidElementType(I->getType()) &&
+ !I->getType()->isVoidTy()) {
+ DEBUG(dbgs() << "LV: Found unvectorizable type." << "\n");
+ return false;
+ }
+
+ // Reduction instructions are allowed to have exit users.
+ // All other instructions must not have external users.
+ if (!AllowedExit.count(I))
+ //Check that all of the users of the loop are inside the BB.
+ for (Value::use_iterator it = I->use_begin(), e = I->use_end();
+ it != e; ++it) {
+ Instruction *U = cast<Instruction>(*it);
+ // This user may be a reduction exit value.
+ BasicBlock *Parent = U->getParent();
+ if (Parent != &BB) {
+ DEBUG(dbgs() << "LV: Found an outside user for : "<< *U << "\n");
+ return false;
+ }
+ }
+ } // next instr.
+
+ if (!Induction) {
+ DEBUG(dbgs() << "LV: Did not find an induction var.\n");
+ return false;
+ }
+
+ // If the memory dependencies do not prevent us from
+ // vectorizing, then vectorize.
+ return canVectorizeMemory(BB);
+}
+
+bool LoopVectorizationLegality::canVectorizeMemory(BasicBlock &BB) {
+ typedef SmallVector<Value*, 16> ValueVector;
+ typedef SmallPtrSet<Value*, 16> ValueSet;
+ // Holds the Load and Store *instructions*.
+ ValueVector Loads;
+ ValueVector Stores;
+
+ // Scan the BB and collect legal loads and stores.
+ for (BasicBlock::iterator it = BB.begin(), e = BB.end(); it != e; ++it) {
+ Instruction *I = it;
+
+ // If this is a load, save it. If this instruction can read from memory
+ // but is not a load, then we quit. Notice that we don't handle function
+ // calls that read or write.
+ if (I->mayReadFromMemory()) {
+ LoadInst *Ld = dyn_cast<LoadInst>(I);
+ if (!Ld) return false;
+ if (!Ld->isSimple()) {
+ DEBUG(dbgs() << "LV: Found a non-simple load.\n");
+ return false;
+ }
+ Loads.push_back(Ld);
+ continue;
+ }
+
+ // Save store instructions. Abort if other instructions write to memory.
+ if (I->mayWriteToMemory()) {
+ StoreInst *St = dyn_cast<StoreInst>(I);
+ if (!St) return false;
+ if (!St->isSimple()) {
+ DEBUG(dbgs() << "LV: Found a non-simple store.\n");
+ return false;
+ }
+ Stores.push_back(St);
+ }
+ } // next instr.
+
+ // Now we have two lists that hold the loads and the stores.
+ // Next, we find the pointers that they use.
+
+ // Check if we see any stores. If there are no stores, then we don't
+ // care if the pointers are *restrict*.
+ if (!Stores.size()) {
+ DEBUG(dbgs() << "LV: Found a read-only loop!\n");
+ return true;
+ }
+
+ // Holds the read and read-write *pointers* that we find.
+ ValueVector Reads;
+ ValueVector ReadWrites;
+
+ // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
+ // multiple times on the same object. If the ptr is accessed twice, once
+ // for read and once for write, it will only appear once (on the write
+ // list). This is okay, since we are going to check for conflicts between
+ // writes and between reads and writes, but not between reads and reads.
+ ValueSet Seen;
+
+ ValueVector::iterator I, IE;
+ for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
+ StoreInst *ST = dyn_cast<StoreInst>(*I);
+ assert(ST && "Bad StoreInst");
+ Value* Ptr = ST->getPointerOperand();
+ // If we did *not* see this pointer before, insert it to
+ // the read-write list. At this phase it is only a 'write' list.
+ if (Seen.insert(Ptr))
+ ReadWrites.push_back(Ptr);
+ }
+
+ for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
+ LoadInst *LD = dyn_cast<LoadInst>(*I);
+ assert(LD && "Bad LoadInst");
+ Value* Ptr = LD->getPointerOperand();
+ // If we did *not* see this pointer before, insert it to the
+ // read list. If we *did* see it before, then it is already in
+ // the read-write list. This allows us to vectorize expressions
+ // such as A[i] += x; Because the address of A[i] is a read-write
+ // pointer. This only works if the index of A[i] is consecutive.
+ // If the address of i is unknown (for example A[B[i]]) then we may
+ // read a few words, modify, and write a few words, and some of the
+ // words may be written to the same address.
+ if (Seen.insert(Ptr) || !isConsecutiveGep(Ptr))
+ Reads.push_back(Ptr);
+ }
+
+ // Now that the pointers are in two lists (Reads and ReadWrites), we
+ // can check that there are no conflicts between each of the writes and
+ // between the writes to the reads.
+ ValueSet WriteObjects;
+ ValueVector TempObjects;
+
+ // Check that the read-writes do not conflict with other read-write
+ // pointers.
+ for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) {
+ GetUnderlyingObjects(*I, TempObjects, DL);
+ for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
+ it != e; ++it) {
+ if (!isIdentifiedSafeObject(*it)) {
+ DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
+ return false;
+ }
+ if (!WriteObjects.insert(*it)) {
+ DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
+ << **it <<"\n");
+ return false;
+ }
+ }
+ TempObjects.clear();
+ }
+
+ /// Check that the reads don't conflict with the read-writes.
+ for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) {
+ GetUnderlyingObjects(*I, TempObjects, DL);
+ for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
+ it != e; ++it) {
+ if (!isIdentifiedSafeObject(*it)) {
+ DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
+ return false;
+ }
+ if (WriteObjects.count(*it)) {
+ DEBUG(dbgs() << "LV: Found a possible read/write reorder:"
+ << **it <<"\n");
+ return false;
+ }
+ }
+ TempObjects.clear();
+ }
+
+ // All is okay.
+ return true;
+}
+
+/// Checks if the value is a Global variable or if it is an Arguments
+/// marked with the NoAlias attribute.
+bool LoopVectorizationLegality::isIdentifiedSafeObject(Value* Val) {
+ assert(Val && "Invalid value");
+ if (isa<GlobalValue>(Val))
+ return true;
+ if (isa<AllocaInst>(Val))
+ return true;
+ if (Argument *A = dyn_cast<Argument>(Val))
+ return A->hasNoAliasAttr();
+ return false;
+}
+
+bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
+ ReductionKind Kind) {
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+
+ // Find the possible incoming reduction variable.
+ BasicBlock *BB = Phi->getParent();
+ int SelfEdgeIdx = Phi->getBasicBlockIndex(BB);
+ int InEdgeBlockIdx = (SelfEdgeIdx ? 0 : 1); // The other entry.
+ Value *RdxStart = Phi->getIncomingValue(InEdgeBlockIdx);
+
+ // ExitInstruction is the single value which is used outside the loop.
+ // We only allow for a single reduction value to be used outside the loop.
+ // This includes users of the reduction, variables (which form a cycle
+ // which ends in the phi node).
+ Instruction *ExitInstruction = 0;
+
+ // Iter is our iterator. We start with the PHI node and scan for all of the
+ // users of this instruction. All users must be instructions which can be
+ // used as reduction variables (such as ADD). We may have a single
+ // out-of-block user. They cycle must end with the original PHI.
+ // Also, we can't have multiple block-local users.
+ Instruction *Iter = Phi;
+ while (true) {
+ // Any reduction instr must be of one of the allowed kinds.
+ if (!isReductionInstr(Iter, Kind))
+ return false;
+
+ // Did we found a user inside this block ?
+ bool FoundInBlockUser = false;
+ // Did we reach the initial PHI node ?
+ bool FoundStartPHI = false;
+
+ // If the instruction has no users then this is a broken
+ // chain and can't be a reduction variable.
+ if (Iter->use_empty())
+ return false;
+
+ // For each of the *users* of iter.
+ for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end();
+ it != e; ++it) {
+ Instruction *U = cast<Instruction>(*it);
+ // We already know that the PHI is a user.
+ if (U == Phi) {
+ FoundStartPHI = true;
+ continue;
+ }
+ // Check if we found the exit user.
+ BasicBlock *Parent = U->getParent();
+ if (Parent != BB) {
+ // We must have a single exit instruction.
+ if (ExitInstruction != 0)
+ return false;
+ ExitInstruction = Iter;
+ }
+ // We can't have multiple inside users.
+ if (FoundInBlockUser)
+ return false;
+ FoundInBlockUser = true;
+ Iter = U;
+ }
+
+ // We found a reduction var if we have reached the original
+ // phi node and we only have a single instruction with out-of-loop
+ // users.
+ if (FoundStartPHI && ExitInstruction) {
+ // This instruction is allowed to have out-of-loop users.
+ AllowedExit.insert(ExitInstruction);
+
+ // Save the description of this reduction variable.
+ ReductionDescriptor RD(RdxStart, ExitInstruction, Kind);
+ Reductions[Phi] = RD;
+ return true;
+ }
+ }
+}
+
+bool
+LoopVectorizationLegality::isReductionInstr(Instruction *I,
+ ReductionKind Kind) {
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::PHI:
+ // possibly.
+ return true;
+ case Instruction::Add:
+ case Instruction::Sub:
+ return Kind == IntegerAdd;
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ return Kind == IntegerMult;
+ }
+}
+
+bool LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
+ // Check that the PHI is consecutive and starts at zero.
+ const SCEV *PhiScev = SE->getSCEV(Phi);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR) {
+ DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
+ return false;
+ }
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+ const SCEV *Start = AR->getStart();
+
+ if (!Step->isOne() || !Start->isZero()) {
+ DEBUG(dbgs() << "LV: PHI does not start at zero or steps by one.\n");
+ return false;
+ }
+ return true;
+}
+
+} // namespace
+
+char LoopVectorize::ID = 0;
+static const char lv_name[] = "Loop Vectorization";
+INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
+
+namespace llvm {
+ Pass *createLoopVectorizePass() {
+ return new LoopVectorize();
+ }
+}
+
diff --git a/lib/Transforms/Vectorize/Vectorize.cpp b/lib/Transforms/Vectorize/Vectorize.cpp
index 1ef60029bcf..d26973a7b38 100644
--- a/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/lib/Transforms/Vectorize/Vectorize.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements common infrastructure for libLLVMVectorizeOpts.a, which
+// This file implements common infrastructure for libLLVMVectorizeOpts.a, which
// implements several vectorization transformations over the LLVM intermediate
// representation, including the C bindings for that library.
//
@@ -23,10 +23,11 @@
using namespace llvm;
-/// initializeVectorizationPasses - Initialize all passes linked into the
+/// initializeVectorizationPasses - Initialize all passes linked into the
/// Vectorization library.
void llvm::initializeVectorization(PassRegistry &Registry) {
initializeBBVectorizePass(Registry);
+ initializeLoopVectorizePass(Registry);
}
void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
@@ -37,3 +38,6 @@ void LLVMAddBBVectorizePass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createBBVectorizePass());
}
+void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
+ unwrap(PM)->add(createLoopVectorizePass());
+}
diff --git a/lib/VMCore/DataLayout.cpp b/lib/VMCore/DataLayout.cpp
index 3f75069a60a..e6994be257c 100644
--- a/lib/VMCore/DataLayout.cpp
+++ b/lib/VMCore/DataLayout.cpp
@@ -559,8 +559,10 @@ uint64_t DataLayout::getTypeSizeInBits(Type *Ty) const {
// only 80 bits contain information.
case Type::X86_FP80TyID:
return 80;
- case Type::VectorTyID:
- return cast<VectorType>(Ty)->getBitWidth();
+ case Type::VectorTyID: {
+ VectorType *VTy = cast<VectorType>(Ty);
+ return VTy->getNumElements()*getTypeSizeInBits(VTy->getElementType());
+ }
default:
llvm_unreachable("DataLayout::getTypeSizeInBits(): Unsupported type");
}
diff --git a/test/BugPoint/crash-narrowfunctiontest.ll b/test/BugPoint/crash-narrowfunctiontest.ll
index d080d9dd4b0..9df823ab973 100644
--- a/test/BugPoint/crash-narrowfunctiontest.ll
+++ b/test/BugPoint/crash-narrowfunctiontest.ll
@@ -2,6 +2,7 @@
;
; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
; REQUIRES: loadable_module
+; XFAIL: lto
define i32 @foo() { ret i32 1 }
diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll
index 0eda5667ba4..98c79ee03a6 100644
--- a/test/BugPoint/metadata.ll
+++ b/test/BugPoint/metadata.ll
@@ -1,6 +1,7 @@
; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
; REQUIRES: loadable_module
+; XFAIL: lto
; Bugpoint should keep the call's metadata attached to the call.
diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll
index 29a03b83107..13aa9c5a653 100644
--- a/test/BugPoint/remove_arguments_test.ll
+++ b/test/BugPoint/remove_arguments_test.ll
@@ -1,6 +1,7 @@
; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes
; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
; REQUIRES: loadable_module
+; XFAIL: lto
; Test to make sure that arguments are removed from the function if they are
; unnecessary. And clean up any types that that frees up too.
diff --git a/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
new file mode 100644
index 00000000000..fcc6a7f7e96
--- /dev/null
+++ b/test/CodeGen/ARM/2012-10-18-PR14099-ByvalFrameAddress.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi | FileCheck %s
+
+%struct.s = type { [4 x i32] }
+@v = constant %struct.s zeroinitializer;
+
+declare void @f(%struct.s* %p);
+
+; CHECK: t:
+define void @t(i32 %a, %struct.s* byval %s) nounwind {
+entry:
+
+; Here we need to only check proper start address of restored %s argument.
+; CHECK: sub sp, sp, #16
+; CHECK: push {r11, lr}
+; CHECK: add r0, sp, #12
+; CHECK: stm r0, {r1, r2, r3}
+; CHECK: add r0, sp, #12
+; CHECK-NEXT: bl f
+ call void @f(%struct.s* %s)
+ ret void
+}
+
+; CHECK: caller:
+define void @caller() {
+
+; CHECK: ldm r0, {r1, r2, r3}
+ call void @t(i32 0, %struct.s* @v);
+ ret void
+}
diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll
index 38842a9646f..21865f8e4ae 100644
--- a/test/CodeGen/ARM/trap.ll
+++ b/test/CodeGen/ARM/trap.ll
@@ -14,4 +14,16 @@ entry:
unreachable
}
+define void @t2() nounwind {
+entry:
+; INSTR: t2:
+; INSTR: trap
+
+; FUNC: t2:
+; FUNC: bl __trap
+ call void @llvm.debugtrap()
+ unreachable
+}
+
declare void @llvm.trap() nounwind
+declare void @llvm.debugtrap() nounwind
diff --git a/test/CodeGen/MSP430/fp.ll b/test/CodeGen/MSP430/fp.ll
new file mode 100644
index 00000000000..c3273eff05c
--- /dev/null
+++ b/test/CodeGen/MSP430/fp.ll
@@ -0,0 +1,17 @@
+; RUN: llc -O0 -disable-fp-elim < %s | FileCheck %s
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430---elf"
+
+define void @fp() nounwind {
+entry:
+; CHECK: fp:
+; CHECK: push.w r4
+; CHECK: mov.w r1, r4
+; CHECK: sub.w #2, r1
+ %i = alloca i16, align 2
+; CHECK: mov.w #0, -2(r4)
+ store i16 0, i16* %i, align 2
+; CHECK: pop.w r4
+ ret void
+}
diff --git a/test/CodeGen/Mips/brconeq.ll b/test/CodeGen/Mips/brconeq.ll
new file mode 100644
index 00000000000..613391557ef
--- /dev/null
+++ b/test/CodeGen/Mips/brconeq.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 10, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @j, align 4
+ %cmp = icmp eq i32 %0, %1
+; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
+; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/test/CodeGen/Mips/brconeqk.ll b/test/CodeGen/Mips/brconeqk.ll
new file mode 100644
index 00000000000..2c0e72dabd2
--- /dev/null
+++ b/test/CodeGen/Mips/brconeqk.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %cmp = icmp eq i32 %0, 10
+ br i1 %cmp, label %if.end, label %if.then
+; 16: cmpi ${{[0-9]+}}, {{[0-9]+}}
+; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconeqz.ll b/test/CodeGen/Mips/brconeqz.ll
new file mode 100644
index 00000000000..5586e7b976d
--- /dev/null
+++ b/test/CodeGen/Mips/brconeqz.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %cmp = icmp eq i32 %0, 0
+ br i1 %cmp, label %if.end, label %if.then
+; 16: beqz ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
diff --git a/test/CodeGen/Mips/brconge.ll b/test/CodeGen/Mips/brconge.ll
new file mode 100644
index 00000000000..02f0a633b31
--- /dev/null
+++ b/test/CodeGen/Mips/brconge.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 10, align 4
+@k = global i32 5, align 4
+@result1 = global i32 0, align 4
+@result2 = global i32 1, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @j, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.end
+
+; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
+; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result1, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %2 = load i32* @k, align 4
+ %cmp1 = icmp slt i32 %0, %2
+ br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %if.end
+ store i32 1, i32* @result1, align 4
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %if.end
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brcongt.ll b/test/CodeGen/Mips/brcongt.ll
new file mode 100644
index 00000000000..767b51b21b9
--- /dev/null
+++ b/test/CodeGen/Mips/brcongt.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 10, align 4
+@k = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @i, align 4
+ %1 = load i32* @j, align 4
+ %cmp = icmp sgt i32 %0, %1
+ br i1 %cmp, label %if.end, label %if.then
+; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
+; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconle.ll b/test/CodeGen/Mips/brconle.ll
new file mode 100644
index 00000000000..854b2481c6e
--- /dev/null
+++ b/test/CodeGen/Mips/brconle.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 -5, align 4
+@j = global i32 10, align 4
+@k = global i32 -5, align 4
+@result1 = global i32 0, align 4
+@result2 = global i32 1, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @i, align 4
+ %cmp = icmp sgt i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.end
+
+; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
+; 16: bteqz $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result1, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %2 = load i32* @k, align 4
+ %cmp1 = icmp sgt i32 %1, %2
+ br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %if.end
+ store i32 0, i32* @result1, align 4
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %if.end
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconlt.ll b/test/CodeGen/Mips/brconlt.ll
new file mode 100644
index 00000000000..931a3e8c7ba
--- /dev/null
+++ b/test/CodeGen/Mips/brconlt.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 10, align 4
+@k = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @i, align 4
+ %cmp = icmp slt i32 %0, %1
+ br i1 %cmp, label %if.end, label %if.then
+
+; 16: slt ${{[0-9]+}}, ${{[0-9]+}}
+; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconne.ll b/test/CodeGen/Mips/brconne.ll
new file mode 100644
index 00000000000..5d5bde3fcf9
--- /dev/null
+++ b/test/CodeGen/Mips/brconne.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@i = global i32 5, align 4
+@j = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %1 = load i32* @i, align 4
+ %cmp = icmp eq i32 %0, %1
+ br i1 %cmp, label %if.then, label %if.end
+; 16: cmp ${{[0-9]+}}, ${{[0-9]+}}
+; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}})
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconnek.ll b/test/CodeGen/Mips/brconnek.ll
new file mode 100644
index 00000000000..6208d7c5a04
--- /dev/null
+++ b/test/CodeGen/Mips/brconnek.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@j = global i32 5, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %cmp = icmp eq i32 %0, 5
+ br i1 %cmp, label %if.then, label %if.end
+
+; 16: cmpi ${{[0-9]+}}, {{[0-9]+}}
+; 16: btnez $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}})
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/brconnez.ll b/test/CodeGen/Mips/brconnez.ll
new file mode 100644
index 00000000000..47db7901b51
--- /dev/null
+++ b/test/CodeGen/Mips/brconnez.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+
+@j = global i32 0, align 4
+@result = global i32 0, align 4
+
+define void @test() nounwind {
+entry:
+ %0 = load i32* @j, align 4
+ %cmp = icmp eq i32 %0, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+; 16: bnez ${{[0-9]+}}, $[[LABEL:[0-9A-Ba-b_]+]]
+; 16: lw ${{[0-9]+}}, %got(result)(${{[0-9]+}})
+; 16: $[[LABEL]]:
+
+if.then: ; preds = %entry
+ store i32 1, i32* @result, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+
diff --git a/test/CodeGen/Mips/mips64-sret.ll b/test/CodeGen/Mips/mips64-sret.ll
new file mode 100644
index 00000000000..498c5fe1747
--- /dev/null
+++ b/test/CodeGen/Mips/mips64-sret.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=n64 -O0 < %s
+
+%struct.S = type { [8 x i32] }
+
+@g = common global %struct.S zeroinitializer, align 4
+
+define void @f(%struct.S* noalias sret %agg.result) nounwind {
+entry:
+ %0 = bitcast %struct.S* %agg.result to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%struct.S* @g to i8*), i64 32, i32 4, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll
new file mode 100644
index 00000000000..4989636a20d
--- /dev/null
+++ b/test/CodeGen/Mips/tailcall.ll
@@ -0,0 +1,100 @@
+; RUN: llc -march=mipsel -relocation-model=pic -enable-mips-tail-calls < %s | \
+; RUN: FileCheck %s -check-prefix=PIC32
+; RUN: llc -march=mipsel -relocation-model=static \
+; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=STATIC32
+; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -enable-mips-tail-calls \
+; RUN: < %s | FileCheck %s -check-prefix=N64
+
+@g0 = common global i32 0, align 4
+@g1 = common global i32 0, align 4
+@g2 = common global i32 0, align 4
+@g3 = common global i32 0, align 4
+@g4 = common global i32 0, align 4
+@g5 = common global i32 0, align 4
+@g6 = common global i32 0, align 4
+@g7 = common global i32 0, align 4
+@g8 = common global i32 0, align 4
+@g9 = common global i32 0, align 4
+
+define i32 @caller1(i32 %a0) nounwind {
+entry:
+; PIC32-NOT: jalr
+; STATIC32-NOT: jal
+; N64-NOT: jalr
+
+ %call = tail call i32 @callee1(i32 1, i32 1, i32 1, i32 %a0) nounwind
+ ret i32 %call
+}
+
+declare i32 @callee1(i32, i32, i32, i32)
+
+define i32 @caller2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind {
+entry:
+; PIC32: jalr
+; STATIC32: jal
+; N64-NOT: jalr
+
+ %call = tail call i32 @callee2(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind
+ ret i32 %call
+}
+
+declare i32 @callee2(i32, i32, i32, i32, i32)
+
+define i32 @caller3(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind {
+entry:
+; PIC32: jalr
+; STATIC32: jal
+; N64-NOT: jalr
+
+ %call = tail call i32 @callee3(i32 1, i32 1, i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) nounwind
+ ret i32 %call
+}
+
+declare i32 @callee3(i32, i32, i32, i32, i32, i32, i32, i32)
+
+define i32 @caller4(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind {
+entry:
+; PIC32: jalr
+; STATIC32: jal
+; N64: jalr
+
+ %call = tail call i32 @callee4(i32 1, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind
+ ret i32 %call
+}
+
+declare i32 @callee4(i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+define i32 @caller5() nounwind readonly {
+entry:
+; PIC32-NOT: jalr
+; STATIC32-NOT: jal
+; N64-NOT: jalr
+
+ %0 = load i32* @g0, align 4
+ %1 = load i32* @g1, align 4
+ %2 = load i32* @g2, align 4
+ %3 = load i32* @g3, align 4
+ %4 = load i32* @g4, align 4
+ %5 = load i32* @g5, align 4
+ %6 = load i32* @g6, align 4
+ %7 = load i32* @g7, align 4
+ %8 = load i32* @g8, align 4
+ %9 = load i32* @g9, align 4
+ %call = tail call fastcc i32 @callee5(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9)
+ ret i32 %call
+}
+
+define internal fastcc i32 @callee5(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9) nounwind readnone noinline {
+entry:
+ %add = add nsw i32 %a1, %a0
+ %add1 = add nsw i32 %add, %a2
+ %add2 = add nsw i32 %add1, %a3
+ %add3 = add nsw i32 %add2, %a4
+ %add4 = add nsw i32 %add3, %a5
+ %add5 = add nsw i32 %add4, %a6
+ %add6 = add nsw i32 %add5, %a7
+ %add7 = add nsw i32 %add6, %a8
+ %add8 = add nsw i32 %add7, %a9
+ ret i32 %add8
+}
+
diff --git a/test/CodeGen/PowerPC/i64_fp_round.ll b/test/CodeGen/PowerPC/i64_fp_round.ll
new file mode 100644
index 00000000000..5a0c072c9c5
--- /dev/null
+++ b/test/CodeGen/PowerPC/i64_fp_round.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define float @test(i64 %x) nounwind readnone {
+entry:
+ %conv = sitofp i64 %x to float
+ ret float %conv
+}
+
+; Verify that we get the code sequence needed to avoid double-rounding.
+; Note that only parts of the sequence are checked for here, to allow
+; for minor code generation differences.
+
+; CHECK: sradi [[REGISTER:[0-9]+]], 3, 53
+; CHECK: addi [[REGISTER:[0-9]+]], [[REGISTER]], 1
+; CHECK: cmpldi 0, [[REGISTER]], 1
+; CHECK: isel [[REGISTER:[0-9]+]], {{[0-9]+}}, 3, 1
+; CHECK: std [[REGISTER]], -{{[0-9]+}}(1)
+
+
+; Also check that with -enable-unsafe-fp-math we do not get that extra
+; code sequence. Simply verify that there is no "isel" present.
+
+; RUN: llc -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=UNSAFE
+; CHECK-UNSAFE-NOT: isel
+
diff --git a/test/CodeGen/X86/2012-10-18-crash-dagco.ll b/test/CodeGen/X86/2012-10-18-crash-dagco.ll
new file mode 100644
index 00000000000..5b98624a37b
--- /dev/null
+++ b/test/CodeGen/X86/2012-10-18-crash-dagco.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=x86-64 -mcpu=corei7 -disable-cgp-select2branch < %s
+
+; We should not crash on this test.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.0.0"
+
+@global = external constant [411 x i8], align 1
+
+define void @snork() nounwind {
+bb:
+ br i1 undef, label %bb26, label %bb27
+
+bb26: ; preds = %bb48, %bb26, %bb
+ switch i32 undef, label %bb26 [
+ i32 142771596, label %bb28
+ ]
+
+bb27: ; preds = %bb48, %bb
+ switch i32 undef, label %bb49 [
+ i32 142771596, label %bb28
+ ]
+
+bb28: ; preds = %bb27, %bb26
+ %tmp = load i32* null
+ %tmp29 = trunc i32 %tmp to i8
+ store i8* undef, i8** undef
+ %tmp30 = load i32* null
+ %tmp31 = icmp eq i32 %tmp30, 0
+ %tmp32 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 undef
+ %tmp33 = load i8* %tmp32, align 1
+ %tmp34 = getelementptr inbounds [411 x i8]* @global, i32 0, i32 0
+ %tmp35 = load i8* %tmp34, align 1
+ %tmp36 = select i1 %tmp31, i8 %tmp35, i8 %tmp33
+ %tmp37 = select i1 undef, i8 %tmp29, i8 %tmp36
+ %tmp38 = zext i8 %tmp37 to i32
+ %tmp39 = select i1 undef, i32 0, i32 %tmp38
+ %tmp40 = getelementptr inbounds i32* null, i32 %tmp39
+ %tmp41 = load i32* %tmp40, align 4
+ %tmp42 = load i32* undef, align 4
+ %tmp43 = load i32* undef
+ %tmp44 = xor i32 %tmp42, %tmp43
+ %tmp45 = lshr i32 %tmp44, 8
+ %tmp46 = lshr i32 %tmp44, 7
+ call void @spam()
+ unreachable
+
+bb47: ; No predecessors!
+ ret void
+
+bb48: ; No predecessors!
+ br i1 undef, label %bb27, label %bb26
+
+bb49: ; preds = %bb49, %bb27
+ br label %bb49
+
+bb50: ; preds = %bb50
+ br label %bb50
+}
+
+declare void @spam() noreturn nounwind
diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll
new file mode 100644
index 00000000000..3fb69a48b3c
--- /dev/null
+++ b/test/CodeGen/X86/buildvec-insertvec.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
+ %t0 = fptoui <3 x float> %in to <3 x i8>
+ %t1 = shufflevector <3 x i8> %t0, <3 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ %t2 = insertelement <4 x i8> %t1, i8 -1, i32 3
+ store <4 x i8> %t2, <4 x i8>* %out, align 4
+ ret void
+; CHECK: foo
+; CHECK: cvttps2dq
+; CHECK-NOT: pextrd
+; CHECK: pinsrd
+; CHECK-NEXT: pshufb
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/extract-concat.ll b/test/CodeGen/X86/extract-concat.ll
new file mode 100644
index 00000000000..704309eb650
--- /dev/null
+++ b/test/CodeGen/X86/extract-concat.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+define void @foo(<4 x float> %in, <4 x i8>* %out) {
+ %t0 = fptosi <4 x float> %in to <4 x i32>
+ %t1 = trunc <4 x i32> %t0 to <4 x i16>
+ %t2 = shufflevector <4 x i16> %t1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %t3 = trunc <8 x i16> %t2 to <8 x i8>
+ %t4 = shufflevector <8 x i8> %t3, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %t5 = insertelement <4 x i8> %t4, i8 -1, i32 3
+ store <4 x i8> %t5, <4 x i8>* %out
+ ret void
+; CHECK: foo
+; CHECK: cvttps2dq
+; CHECK-NOT: pextrd
+; CHECK: pshufb
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/pr14090.ll b/test/CodeGen/X86/pr14090.ll
new file mode 100644
index 00000000000..d76b912fd8e
--- /dev/null
+++ b/test/CodeGen/X86/pr14090.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86-64 -print-before=stack-coloring -print-after=stack-coloring >%t 2>&1 && FileCheck <%t %s
+
+define void @foo(i64* %retval.i, i32 %call, i32* %.ph.i80, i32 %fourteen, i32* %out.lo, i32* %out.hi) nounwind align 2 {
+entry:
+ %_Tmp.i39 = alloca i64, align 8
+ %retval.i33 = alloca i64, align 8
+ %_Tmp.i = alloca i64, align 8
+ %retval.i.i = alloca i64, align 8
+ %_First.i = alloca i64, align 8
+
+ %0 = load i64* %retval.i, align 8
+
+ %1 = load i64* %retval.i, align 8
+
+ %_Tmp.i39.0.cast73 = bitcast i64* %_Tmp.i39 to i8*
+ call void @llvm.lifetime.start(i64 8, i8* %_Tmp.i39.0.cast73)
+ store i64 %1, i64* %_Tmp.i39, align 8
+ %cmp.i.i.i40 = icmp slt i32 %call, 0
+ %2 = lshr i64 %1, 32
+ %3 = trunc i64 %2 to i32
+ %sub.i.i.i44 = sub i32 0, %call
+ %cmp2.i.i.i45 = icmp ult i32 %3, %sub.i.i.i44
+ %or.cond.i.i.i46 = and i1 %cmp.i.i.i40, %cmp2.i.i.i45
+ %add.i.i.i47 = add i32 %3, %call
+ %sub5.i.i.i48 = lshr i32 %add.i.i.i47, 5
+ %trunc.i50 = trunc i64 %1 to i32
+ %inttoptr.i51 = inttoptr i32 %trunc.i50 to i32*
+ %add61617.i.i.i52 = or i32 %sub5.i.i.i48, -134217728
+ %add61617.i.sub5.i.i.i53 = select i1 %or.cond.i.i.i46, i32 %add61617.i.i.i52, i32 %sub5.i.i.i48
+ %storemerge2.i.i54 = getelementptr inbounds i32* %inttoptr.i51, i32 %add61617.i.sub5.i.i.i53
+ %_Tmp.i39.0.cast74 = bitcast i64* %_Tmp.i39 to i32**
+ store i32* %storemerge2.i.i54, i32** %_Tmp.i39.0.cast74, align 8
+ %storemerge.i.i55 = and i32 %add.i.i.i47, 31
+ %_Tmp.i39.4.raw_idx = getelementptr inbounds i8* %_Tmp.i39.0.cast73, i32 4
+ %_Tmp.i39.4.cast = bitcast i8* %_Tmp.i39.4.raw_idx to i32*
+ store i32 %storemerge.i.i55, i32* %_Tmp.i39.4.cast, align 4
+ %srcval.i56 = load i64* %_Tmp.i39, align 8
+ call void @llvm.lifetime.end(i64 8, i8* %_Tmp.i39.0.cast73)
+
+; CHECK: Before Merge disjoint stack slots
+; CHECK: [[PREFIX15:MOV64mr.*<fi#]]{{[0-9]}}[[SUFFIX15:.*;]] mem:ST8[%fifteen]
+; CHECK: [[PREFIX87:MOV32mr.*;]] mem:ST4[%sunkaddr87]
+
+; CHECK: After Merge disjoint stack slots
+; CHECK: [[PREFIX15]]{{[0-9]}}[[SUFFIX15]] mem:ST8[%_Tmp.i39]
+; CHECK: [[PREFIX87]] mem:ST4[<unknown>]
+
+ %fifteen = bitcast i64* %retval.i.i to i32**
+ %sixteen = bitcast i64* %retval.i.i to i8*
+ call void @llvm.lifetime.start(i64 8, i8* %sixteen)
+ store i32* %.ph.i80, i32** %fifteen, align 8, !tbaa !0
+ %sunkaddr = ptrtoint i64* %retval.i.i to i32
+ %sunkaddr86 = add i32 %sunkaddr, 4
+ %sunkaddr87 = inttoptr i32 %sunkaddr86 to i32*
+ store i32 %fourteen, i32* %sunkaddr87, align 4, !tbaa !3
+ %seventeen = load i64* %retval.i.i, align 8
+ call void @llvm.lifetime.end(i64 8, i8* %sixteen)
+ %eighteen = lshr i64 %seventeen, 32
+ %nineteen = trunc i64 %eighteen to i32
+ %shl.i.i.i = shl i32 1, %nineteen
+
+ store i32 %shl.i.i.i, i32* %out.lo, align 8
+ store i32 %nineteen, i32* %out.hi, align 8
+
+ ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"any pointer", metadata !1}
+!4 = metadata !{metadata !"vtable pointer", metadata !2}
diff --git a/test/CodeGen/X86/pr14098.ll b/test/CodeGen/X86/pr14098.ll
new file mode 100644
index 00000000000..6ce2449ab6a
--- /dev/null
+++ b/test/CodeGen/X86/pr14098.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mtriple i386-unknown-linux-gnu -relocation-model=pic -verify-machineinstrs < %s
+; We used to crash on this.
+
+declare void @foo()
+declare void @foo3(i1 %x)
+define void @bar(i1 %a1, i16 %a2) nounwind align 2 {
+bb0:
+ %a3 = trunc i16 %a2 to i8
+ %a4 = lshr i16 %a2, 8
+ %a5 = trunc i16 %a4 to i8
+ br i1 %a1, label %bb1, label %bb2
+bb1:
+ br label %bb2
+bb2:
+ %a6 = phi i8 [ 3, %bb0 ], [ %a5, %bb1 ]
+ %a7 = phi i8 [ 9, %bb0 ], [ %a3, %bb1 ]
+ %a8 = icmp eq i8 %a6, 1
+ call void @foo()
+ %a9 = icmp eq i8 %a7, 0
+ call void @foo3(i1 %a9)
+ call void @foo3(i1 %a8)
+ ret void
+}
diff --git a/test/CodeGen/X86/sjlj.ll b/test/CodeGen/X86/sjlj.ll
index d594e982994..681db009438 100644
--- a/test/CodeGen/X86/sjlj.ll
+++ b/test/CodeGen/X86/sjlj.ll
@@ -1,5 +1,7 @@
; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X86 %s
-; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck --check-prefix=X64 %s
+; RUN: llc < %s -mtriple=i386-pc-linux -mcpu=corei7 -relocation-model=pic | FileCheck --check-prefix=PIC86 %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=static | FileCheck --check-prefix=X64 %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 -relocation-model=pic | FileCheck --check-prefix=PIC64 %s
@buf = internal global [5 x i8*] zeroinitializer
@@ -20,14 +22,26 @@ define i32 @sj0() nounwind {
ret i32 %r
; X86: sj0
; x86: movl %ebp, buf
-; x86: movl ${{.*LBB.*}}, buf+4
; X86: movl %esp, buf+8
+; x86: movl ${{.*LBB.*}}, buf+4
; X86: ret
+; PIC86: sj0
+; PIC86: movl %ebp, buf@GOTOFF(%[[GOT:.*]])
+; PIC86: movl %esp, buf@GOTOFF+8(%[[GOT]])
+; PIC86: leal {{.*LBB.*}}@GOTOFF(%[[GOT]]), %[[LREG:.*]]
+; PIC86: movl %[[LREG]], buf@GOTOFF+4
+; PIC86: ret
; X64: sj0
; x64: movq %rbp, buf(%rip)
; x64: movq ${{.*LBB.*}}, buf+8(%rip)
; X64: movq %rsp, buf+16(%rip)
; X64: ret
+; PIC64: sj0
+; PIC64: movq %rbp, buf(%rip)
+; PIC64: movq %rsp, buf+16(%rip)
+; PIC64: leaq {{.*LBB.*}}(%rip), %[[LREG:.*]]
+; PIC64: movq %[[LREG]], buf+8(%rip)
+; PIC64: ret
}
define void @lj0() nounwind {
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index d1900018706..655f69c16fd 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -69,3 +69,23 @@ entry:
store i32 42, i32* %a
ret void
}
+
+; Check that asan leaves just one alloca.
+
+declare void @alloca_test_use([10 x i8]*)
+define void @alloca_test() address_safety {
+entry:
+ %x = alloca [10 x i8], align 1
+ %y = alloca [10 x i8], align 1
+ %z = alloca [10 x i8], align 1
+ call void @alloca_test_use([10 x i8]* %x)
+ call void @alloca_test_use([10 x i8]* %y)
+ call void @alloca_test_use([10 x i8]* %z)
+ ret void
+}
+
+; CHECK: define void @alloca_test()
+; CHECK: = alloca
+; CHECK-NOT: = alloca
+; CHECK: ret void
+
diff --git a/test/Makefile b/test/Makefile
index 3c6b5b68a4f..810fdded465 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -117,6 +117,16 @@ else
ENABLE_ASSERTIONS=1
endif
+# Derive whether or not LTO is enabled by checking the extra options.
+LTO_IS_ENABLED := 0
+ifneq ($(findstring -flto,$(CompileCommonOpts)),)
+LTO_IS_ENABLED := 1
+else
+ifneq ($(findstring -O4,$(CompileCommonOpts)),)
+LTO_IS_ENABLED := 1
+endif
+endif
+
lit.site.cfg: FORCE
@echo "Making LLVM 'lit.site.cfg' file..."
@$(ECHOPATH) s=@TARGET_TRIPLE@=$(TARGET_TRIPLE)=g > lit.tmp
@@ -129,6 +139,7 @@ lit.site.cfg: FORCE
@$(ECHOPATH) s=@OCAMLOPT@=$(OCAMLOPT) -cc $(subst *,'\\\"',*$(subst =,"\\=",$(CXX_FOR_OCAMLOPT))*) -I $(LibDir)/ocaml=g >> lit.tmp
@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
+ @$(ECHOPATH) s=@LTO_IS_ENABLED@=$(LTO_IS_ENABLED)=g >> lit.tmp
@$(ECHOPATH) s=@TARGETS_TO_BUILD@=$(TARGETS_TO_BUILD)=g >> lit.tmp
@$(ECHOPATH) s=@LLVM_BINDINGS@=$(BINDINGS_TO_BUILD)=g >> lit.tmp
@$(ECHOPATH) s=@HOST_OS@=$(HOST_OS)=g >> lit.tmp
diff --git a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
index f992d415477..6294543cd81 100644
--- a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
+++ b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
@@ -79,3 +79,53 @@ entry:
; CHECK-AO-NOT: <2 x
}
+; Simple 3-pair chain with loads and stores (using ptrs and gep)
+; using pointer vectors.
+define void @test3(<2 x i64*>* %a, <2 x i64*>* %b, <2 x i64*>* %c) nounwind uwtable readonly {
+entry:
+ %i0 = load <2 x i64*>* %a, align 8
+ %i1 = load <2 x i64*>* %b, align 8
+ %arrayidx3 = getelementptr inbounds <2 x i64*>* %a, i64 1
+ %i3 = load <2 x i64*>* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds <2 x i64*>* %b, i64 1
+ %i4 = load <2 x i64*>* %arrayidx4, align 8
+ %j1 = extractelement <2 x i64*> %i1, i32 0
+ %j4 = extractelement <2 x i64*> %i4, i32 0
+ %o1 = load i64* %j1, align 8
+ %o4 = load i64* %j4, align 8
+ %j0 = extractelement <2 x i64*> %i0, i32 0
+ %j3 = extractelement <2 x i64*> %i3, i32 0
+ %ptr0 = getelementptr inbounds i64* %j0, i64 %o1
+ %ptr3 = getelementptr inbounds i64* %j3, i64 %o4
+ %qtr0 = insertelement <2 x i64*> undef, i64* %ptr0, i32 0
+ %rtr0 = insertelement <2 x i64*> %qtr0, i64* %ptr0, i32 1
+ %qtr3 = insertelement <2 x i64*> undef, i64* %ptr3, i32 0
+ %rtr3 = insertelement <2 x i64*> %qtr3, i64* %ptr3, i32 1
+ store <2 x i64*> %rtr0, <2 x i64*>* %c, align 8
+ %arrayidx5 = getelementptr inbounds <2 x i64*>* %c, i64 1
+ store <2 x i64*> %rtr3, <2 x i64*>* %arrayidx5, align 8
+ ret void
+; CHECK: @test3
+; CHECK: %i0.v.i0 = bitcast <2 x i64*>* %a to <4 x i64*>*
+; CHECK: %i1 = load <2 x i64*>* %b, align 8
+; CHECK: %i0 = load <4 x i64*>* %i0.v.i0, align 8
+; CHECK: %arrayidx4 = getelementptr inbounds <2 x i64*>* %b, i64 1
+; CHECK: %i4 = load <2 x i64*>* %arrayidx4, align 8
+; CHECK: %j1 = extractelement <2 x i64*> %i1, i32 0
+; CHECK: %j4 = extractelement <2 x i64*> %i4, i32 0
+; CHECK: %o1 = load i64* %j1, align 8
+; CHECK: %o4 = load i64* %j4, align 8
+; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
+; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
+; CHECK: %ptr0.v.i0 = shufflevector <4 x i64*> %i0, <4 x i64*> undef, <2 x i32> <i32 0, i32 2>
+; CHECK: %ptr0 = getelementptr inbounds <2 x i64*> %ptr0.v.i0, <2 x i64> %ptr0.v.i1.2
+; CHECK: %rtr0 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> zeroinitializer
+; CHECK: %rtr3 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> <i32 1, i32 1>
+; CHECK: %0 = bitcast <2 x i64*>* %c to <4 x i64*>*
+; CHECK: %1 = shufflevector <2 x i64*> %rtr0, <2 x i64*> %rtr3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: store <4 x i64*> %1, <4 x i64*>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test3
+; CHECK-AO-NOT: <4 x
+}
+
diff --git a/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll b/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll
new file mode 100644
index 00000000000..5c478669d29
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; PR12627
+define void @test1(i32 %x) nounwind uwtable ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %phi1 = phi i1 [ false, %entry ], [ %cmpa, %for.body ]
+ %phi2 = phi i1 [ false, %entry ], [ %cmpb, %for.body ]
+ %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ tail call void @aux(i1 %phi1, i1 %phi2) nounwind
+ %cmpa = icmp sgt i32 %i.07, 200
+ %cmpb = icmp sgt i32 %i.07, 100
+ %inc = add nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, 100
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+
+; CHECK: @test1
+; CHECK-NOT: phi i1
+; CHECK: call void @aux(i1 false, i1 false)
+}
+
+declare void @aux(i1, i1)
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
index 62af42b9d68..1b702a3b1a3 100644
--- a/test/Transforms/IndVarSimplify/crash.ll
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -113,3 +113,21 @@ bb9:
ret void
}
+; PR12536
+define void @fn1() noreturn nounwind {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.end, %entry
+ %b.0 = phi i32 [ undef, %entry ], [ %conv, %for.end ]
+ br label %for.cond1
+
+for.cond1: ; preds = %for.cond1, %for.cond
+ %c.0 = phi i32 [ %b.0, %for.cond1 ], [ 0, %for.cond ]
+ br i1 undef, label %for.cond1, label %for.end
+
+for.end: ; preds = %for.cond1
+ %cmp2 = icmp slt i32 %c.0, 1
+ %conv = zext i1 %cmp2 to i32
+ br label %for.cond
+}
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index bfdd000e38e..507f695e67c 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -199,7 +199,6 @@ entry:
; back to the loop iv.
;
; CHECK: loop:
-; CHECK: phi i32
; CHECK-NOT: phi
; CHECK: exit:
loop:
diff --git a/test/Transforms/InstCombine/obfuscated_splat.ll b/test/Transforms/InstCombine/obfuscated_splat.ll
new file mode 100644
index 00000000000..c25dade168a
--- /dev/null
+++ b/test/Transforms/InstCombine/obfuscated_splat.ll
@@ -0,0 +1,11 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+
+define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) {
+ %A = load <4 x float>* %in_ptr, align 16
+ %B = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
+ %C = shufflevector <4 x float> %B, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 4, i32 undef>
+ %D = shufflevector <4 x float> %C, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK: %D = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> zeroinitializer
+ store <4 x float> %D, <4 x float> *%out_ptr
+ ret void
+}
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 4baae2618dd..cc3aacdce3c 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -829,3 +829,37 @@ define i1 @test63(i1 %A, i1 %B) {
; CHECK: %C = or i1 %B, %not
; CHECK: ret i1 %C
}
+
+; PR14131
+define void @test64(i32 %p, i16 %b) noreturn nounwind {
+entry:
+ %p.addr.0.insert.mask = and i32 %p, -65536
+ %conv2 = and i32 %p, 65535
+ br i1 undef, label %lor.rhs, label %lor.end
+
+lor.rhs:
+ %p.addr.0.extract.trunc = trunc i32 %p.addr.0.insert.mask to i16
+ %phitmp = zext i16 %p.addr.0.extract.trunc to i32
+ br label %lor.end
+
+lor.end:
+ %t.1 = phi i32 [ 0, %entry ], [ %phitmp, %lor.rhs ]
+ %conv6 = zext i16 %b to i32
+ %div = udiv i32 %conv6, %t.1
+ %tobool8 = icmp eq i32 %div, 0
+ %cmp = icmp eq i32 %t.1, 0
+ %cmp12 = icmp ult i32 %conv2, 2
+ %cmp.sink = select i1 %tobool8, i1 %cmp12, i1 %cmp
+ br i1 %cmp.sink, label %cond.end17, label %cond.false16
+
+cond.false16:
+ br label %cond.end17
+
+cond.end17:
+ br label %while.body
+
+while.body:
+ br label %while.body
+; CHECK: @test64
+; CHECK-NOT: select
+}
diff --git a/test/Transforms/InstCombine/strcpy-1.ll b/test/Transforms/InstCombine/strcpy-1.ll
new file mode 100644
index 00000000000..b6cf048b2a8
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy-1.ll
@@ -0,0 +1,45 @@
+; Test that the strcpy library call simplifier works correctly.
+; rdar://6839935
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcpy(i8*, i8*)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+ ret void
+}
+
+define i8* @test_simplify2() {
+; CHECK: @test_simplify2
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+
+ %ret = call i8* @strcpy(i8* %dst, i8* %dst)
+; CHECK: ret i8* getelementptr inbounds ([32 x i8]* @a, i32 0, i32 0)
+ ret i8* %ret
+}
+
+define i8* @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [32 x i8]* @b, i32 0, i32 0
+
+ %ret = call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: call i8* @strcpy
+ ret i8* %ret
+}
diff --git a/test/Transforms/InstCombine/strcpy-2.ll b/test/Transforms/InstCombine/strcpy-2.ll
new file mode 100644
index 00000000000..779e9fdd959
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy-2.ll
@@ -0,0 +1,22 @@
+; Test that the strcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strcpy(i8*, i8*)
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i16* @strcpy(i8* %dst, i8* %src)
+; CHECK: call i16* @strcpy
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
index c03e8a348b8..3e48f4fd305 100644
--- a/test/Transforms/InstCombine/strcpy_chk-1.ll
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -7,16 +7,16 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
@a = common global [60 x i8] zeroinitializer, align 1
@b = common global [60 x i8] zeroinitializer, align 1
-@.str = private constant [8 x i8] c"abcdefg\00"
+@.str = private constant [12 x i8] c"abcdefghijk\00"
; Check cases where slen >= strlen (src).
define void @test_simplify1() {
; CHECK: @test_simplify1
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @strcpy
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
ret void
}
@@ -24,19 +24,19 @@ define void @test_simplify1() {
define void @test_simplify2() {
; CHECK: @test_simplify2
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @strcpy
- call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12)
ret void
}
define void @test_simplify3() {
; CHECK: @test_simplify3
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @strcpy
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
ret void
}
@@ -53,36 +53,42 @@ define void @test_simplify4() {
ret void
}
-define void @test_no_simplify1() {
-; CHECK: @test_no_simplify1
+; Check case where the string length is not constant.
+
+define void @test_simplify5() {
+; CHECK: @test_simplify5
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @__strcpy_chk
- call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+; CHECK: @__memcpy_chk
+ %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
ret void
}
-; Check case were slen < strlen (src).
+; Check case where the source and destination are the same.
-define void @test_no_simplify2() {
-; CHECK: @test_no_simplify2
+define i8* @test_simplify6() {
+; CHECK: @test_simplify6
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
-; CHECK-NEXT: call i8* @__strcpy_chk
- call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 3)
- ret void
+; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+ %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+ %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
+ ret i8* %ret
}
-define void @test_no_simplify3() {
-; CHECK: @test_no_simplify3
+; Check case where slen < strlen (src).
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
%dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
- %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
; CHECK-NEXT: call i8* @__strcpy_chk
- call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 0)
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
ret void
}
declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
new file mode 100644
index 00000000000..5caaffc8dde
--- /dev/null
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-vectorize -dce
+
+; Check that we don't fall into an infinite loop.
+define void @test() nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ %0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
+ br label %for.body
+}
+
+
+
+define void @test2() nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv47 = phi i64 [ 0, %entry ], [ %indvars.iv.next48, %for.body ]
+ %0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
+ %indvars.iv.next48 = add i64 %indvars.iv47, 1
+ br i1 undef, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ unreachable
+}
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
new file mode 100644
index 00000000000..6fb1792b2c8
--- /dev/null
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -0,0 +1,648 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+@G = common global [32 x [1024 x i32]] zeroinitializer, align 16
+@ub = common global [1024 x i32] zeroinitializer, align 16
+@uc = common global [1024 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@fa = common global [1024 x float] zeroinitializer, align 16
+@fb = common global [1024 x float] zeroinitializer, align 16
+@ic = common global [1024 x i32] zeroinitializer, align 16
+@da = common global [1024 x float] zeroinitializer, align 16
+@db = common global [1024 x float] zeroinitializer, align 16
+@dc = common global [1024 x float] zeroinitializer, align 16
+@dd = common global [1024 x float] zeroinitializer, align 16
+@dj = common global [1024 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
+;CHECK: @example2
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph5, label %.preheader
+
+..preheader_crit_edge: ; preds = %.lr.ph5
+ %phitmp = sext i32 %n to i64
+ br label %.preheader
+
+.preheader: ; preds = %..preheader_crit_edge, %0
+ %i.0.lcssa = phi i64 [ %phitmp, %..preheader_crit_edge ], [ 0, %0 ]
+ %2 = icmp eq i32 %n, 0
+ br i1 %2, label %._crit_edge, label %.lr.ph
+
+.lr.ph5: ; preds = %0, %.lr.ph5
+ %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
+ %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6
+ store i32 %x, i32* %3, align 4
+ %indvars.iv.next7 = add i64 %indvars.iv6, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %..preheader_crit_edge, label %.lr.ph5
+
+.lr.ph: ; preds = %.preheader, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
+ %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
+ %4 = add nsw i32 %.02, -1
+ %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %6 = load i32* %5, align 4
+ %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %8 = load i32* %7, align 4
+ %9 = and i32 %8, %6
+ %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %9, i32* %10, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %11 = icmp eq i32 %4, 0
+ br i1 %11, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %.preheader
+ ret void
+}
+
+; We can't vectorize this loop because it has non constant loop bounds.
+;CHECK: @example3
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+ %1 = icmp eq i32 %n, 0
+ br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+ %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+ %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+ %2 = add nsw i32 %.05, -1
+ %3 = getelementptr inbounds i32* %.023, i64 1
+ %4 = load i32* %.023, align 16
+ %5 = getelementptr inbounds i32* %.014, i64 1
+ store i32 %4, i32* %.014, align 16
+ %6 = icmp eq i32 %2, 0
+ br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret void
+}
+
+;CHECK: @example4
+;CHECK: load <4 x i32>
+;CHECK: ret void
+define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+ %1 = add nsw i32 %n, -1
+ %2 = icmp eq i32 %n, 0
+ br i1 %2, label %.preheader4, label %.lr.ph10
+
+.preheader4: ; preds = %0
+ %3 = icmp sgt i32 %1, 0
+ br i1 %3, label %.lr.ph6, label %._crit_edge
+
+.lr.ph10: ; preds = %0, %.lr.ph10
+ %4 = phi i32 [ %9, %.lr.ph10 ], [ %1, %0 ]
+ %.018 = phi i32* [ %8, %.lr.ph10 ], [ %p, %0 ]
+ %.027 = phi i32* [ %5, %.lr.ph10 ], [ %q, %0 ]
+ %5 = getelementptr inbounds i32* %.027, i64 1
+ %6 = load i32* %.027, align 16
+ %7 = add nsw i32 %6, 5
+ %8 = getelementptr inbounds i32* %.018, i64 1
+ store i32 %7, i32* %.018, align 16
+ %9 = add nsw i32 %4, -1
+ %10 = icmp eq i32 %4, 0
+ br i1 %10, label %._crit_edge, label %.lr.ph10
+
+.preheader: ; preds = %.lr.ph6
+ br i1 %3, label %.lr.ph, label %._crit_edge
+
+.lr.ph6: ; preds = %.preheader4, %.lr.ph6
+ %indvars.iv11 = phi i64 [ %indvars.iv.next12, %.lr.ph6 ], [ 0, %.preheader4 ]
+ %indvars.iv.next12 = add i64 %indvars.iv11, 1
+ %11 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv.next12
+ %12 = load i32* %11, align 4
+ %13 = add nsw i64 %indvars.iv11, 3
+ %14 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %13
+ %15 = load i32* %14, align 4
+ %16 = add nsw i32 %15, %12
+ %17 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv11
+ store i32 %16, i32* %17, align 4
+ %lftr.wideiv13 = trunc i64 %indvars.iv.next12 to i32
+ %exitcond14 = icmp eq i32 %lftr.wideiv13, %1
+ br i1 %exitcond14, label %.preheader, label %.lr.ph6
+
+.lr.ph: ; preds = %.preheader, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.preheader ]
+ %18 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %19 = load i32* %18, align 4
+ %20 = icmp sgt i32 %19, 4
+ %21 = select i1 %20, i32 4, i32 0
+ %22 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ store i32 %21, i32* %22, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %1
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph10, %.preheader4, %.lr.ph, %.preheader
+ ret void
+}
+
+;CHECK: @example8
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example8(i32 %x) nounwind uwtable ssp {
+ br label %.preheader
+
+.preheader: ; preds = %3, %0
+ %indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %3 ]
+ br label %1
+
+; <label>:1 ; preds = %1, %.preheader
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [32 x [1024 x i32]]* @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv
+ store i32 %x, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %3, label %1
+
+; <label>:3 ; preds = %1
+ %indvars.iv.next4 = add i64 %indvars.iv3, 1
+ %lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32
+ %exitcond6 = icmp eq i32 %lftr.wideiv5, 32
+ br i1 %exitcond6, label %4, label %.preheader
+
+; <label>:4 ; preds = %3
+ ret void
+}
+
+;CHECK: @example9
+;CHECK: phi <4 x i32>
+;CHECK: ret i32
+define i32 @example9() nounwind uwtable readonly ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %diff.01 = phi i32 [ 0, %0 ], [ %7, %1 ]
+ %2 = getelementptr inbounds [1024 x i32]* @ub, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [1024 x i32]* @uc, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add i32 %3, %diff.01
+ %7 = sub i32 %6, %5
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret i32 %7
+}
+
+;CHECK: @example10a
+;CHECK: load <4 x i16>
+;CHECK: add <4 x i16>
+;CHECK: store <4 x i16>
+;CHECK: ret void
+define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds i32* %ib, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %ic, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %8 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+ %9 = load i16* %8, align 2
+ %10 = getelementptr inbounds i16* %sc, i64 %indvars.iv
+ %11 = load i16* %10, align 2
+ %12 = add i16 %11, %9
+ %13 = getelementptr inbounds i16* %sa, i64 %indvars.iv
+ store i16 %12, i16* %13, align 2
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %14, label %1
+
+; <label>:14 ; preds = %1
+ ret void
+}
+
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+ %3 = load i16* %2, align 2
+ %4 = sext i16 %3 to i32
+ %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+ store i32 %4, i32* %5, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %6, label %1
+
+; <label>:6 ; preds = %1
+ ret void
+}
+
+;CHECK: @example11
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: ret void
+define void @example11() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = shl nsw i64 %indvars.iv, 1
+ %3 = or i64 %2, 1
+ %4 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %3
+ %5 = load i32* %4, align 4
+ %6 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %3
+ %7 = load i32* %6, align 4
+ %8 = mul nsw i32 %7, %5
+ %9 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %2
+ %10 = load i32* %9, align 8
+ %11 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %2
+ %12 = load i32* %11, align 8
+ %13 = mul nsw i32 %12, %10
+ %14 = sub nsw i32 %8, %13
+ %15 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %14, i32* %15, align 4
+ %16 = mul nsw i32 %7, %10
+ %17 = mul nsw i32 %12, %5
+ %18 = add nsw i32 %17, %16
+ %19 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv
+ store i32 %18, i32* %19, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 512
+ br i1 %exitcond, label %20, label %1
+
+; <label>:20 ; preds = %1
+ ret void
+}
+
+;CHECK: @example12
+;CHECK: trunc <4 x i64>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example12() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %3 = trunc i64 %indvars.iv to i32
+ store i32 %3, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %4, label %1
+
+; <label>:4 ; preds = %1
+ ret void
+}
+
+; Can't vectorize because of reductions.
+;CHECK: @example13
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
+ br label %.preheader
+
+.preheader: ; preds = %14, %0
+ %indvars.iv4 = phi i64 [ 0, %0 ], [ %indvars.iv.next5, %14 ]
+ %1 = getelementptr inbounds i32** %A, i64 %indvars.iv4
+ %2 = load i32** %1, align 8
+ %3 = getelementptr inbounds i32** %B, i64 %indvars.iv4
+ %4 = load i32** %3, align 8
+ br label %5
+
+; <label>:5 ; preds = %.preheader, %5
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %5 ]
+ %diff.02 = phi i32 [ 0, %.preheader ], [ %11, %5 ]
+ %6 = getelementptr inbounds i32* %2, i64 %indvars.iv
+ %7 = load i32* %6, align 4
+ %8 = getelementptr inbounds i32* %4, i64 %indvars.iv
+ %9 = load i32* %8, align 4
+ %10 = add i32 %7, %diff.02
+ %11 = sub i32 %10, %9
+ %indvars.iv.next = add i64 %indvars.iv, 8
+ %12 = trunc i64 %indvars.iv.next to i32
+ %13 = icmp slt i32 %12, 1024
+ br i1 %13, label %5, label %14
+
+; <label>:14 ; preds = %5
+ %15 = getelementptr inbounds i32* %out, i64 %indvars.iv4
+ store i32 %11, i32* %15, align 4
+ %indvars.iv.next5 = add i64 %indvars.iv4, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next5 to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 32
+ br i1 %exitcond, label %16, label %.preheader
+
+; <label>:16 ; preds = %14
+ ret void
+}
+
+; Can't vectorize because of reductions.
+;CHECK: @example14
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
+.preheader3:
+ br label %.preheader
+
+.preheader: ; preds = %11, %.preheader3
+ %indvars.iv7 = phi i64 [ 0, %.preheader3 ], [ %indvars.iv.next8, %11 ]
+ %sum.05 = phi i32 [ 0, %.preheader3 ], [ %10, %11 ]
+ br label %0
+
+; <label>:0 ; preds = %0, %.preheader
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %0 ]
+ %sum.12 = phi i32 [ %sum.05, %.preheader ], [ %10, %0 ]
+ %1 = getelementptr inbounds i32** %in, i64 %indvars.iv
+ %2 = load i32** %1, align 8
+ %3 = getelementptr inbounds i32* %2, i64 %indvars.iv7
+ %4 = load i32* %3, align 4
+ %5 = getelementptr inbounds i32** %coeff, i64 %indvars.iv
+ %6 = load i32** %5, align 8
+ %7 = getelementptr inbounds i32* %6, i64 %indvars.iv7
+ %8 = load i32* %7, align 4
+ %9 = mul nsw i32 %8, %4
+ %10 = add nsw i32 %9, %sum.12
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %11, label %0
+
+; <label>:11 ; preds = %0
+ %indvars.iv.next8 = add i64 %indvars.iv7, 1
+ %lftr.wideiv9 = trunc i64 %indvars.iv.next8 to i32
+ %exitcond10 = icmp eq i32 %lftr.wideiv9, 32
+ br i1 %exitcond10, label %.preheader3.1, label %.preheader
+
+.preheader3.1: ; preds = %11
+ store i32 %10, i32* %out, align 4
+ br label %.preheader.1
+
+.preheader.1: ; preds = %24, %.preheader3.1
+ %indvars.iv7.1 = phi i64 [ 0, %.preheader3.1 ], [ %indvars.iv.next8.1, %24 ]
+ %sum.05.1 = phi i32 [ 0, %.preheader3.1 ], [ %23, %24 ]
+ br label %12
+
+; <label>:12 ; preds = %12, %.preheader.1
+ %indvars.iv.1 = phi i64 [ 0, %.preheader.1 ], [ %13, %12 ]
+ %sum.12.1 = phi i32 [ %sum.05.1, %.preheader.1 ], [ %23, %12 ]
+ %13 = add nsw i64 %indvars.iv.1, 1
+ %14 = getelementptr inbounds i32** %in, i64 %13
+ %15 = load i32** %14, align 8
+ %16 = getelementptr inbounds i32* %15, i64 %indvars.iv7.1
+ %17 = load i32* %16, align 4
+ %18 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.1
+ %19 = load i32** %18, align 8
+ %20 = getelementptr inbounds i32* %19, i64 %indvars.iv7.1
+ %21 = load i32* %20, align 4
+ %22 = mul nsw i32 %21, %17
+ %23 = add nsw i32 %22, %sum.12.1
+ %lftr.wideiv.1 = trunc i64 %13 to i32
+ %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 1024
+ br i1 %exitcond.1, label %24, label %12
+
+; <label>:24 ; preds = %12
+ %indvars.iv.next8.1 = add i64 %indvars.iv7.1, 1
+ %lftr.wideiv9.1 = trunc i64 %indvars.iv.next8.1 to i32
+ %exitcond10.1 = icmp eq i32 %lftr.wideiv9.1, 32
+ br i1 %exitcond10.1, label %.preheader3.2, label %.preheader.1
+
+.preheader3.2: ; preds = %24
+ %25 = getelementptr inbounds i32* %out, i64 1
+ store i32 %23, i32* %25, align 4
+ br label %.preheader.2
+
+.preheader.2: ; preds = %38, %.preheader3.2
+ %indvars.iv7.2 = phi i64 [ 0, %.preheader3.2 ], [ %indvars.iv.next8.2, %38 ]
+ %sum.05.2 = phi i32 [ 0, %.preheader3.2 ], [ %37, %38 ]
+ br label %26
+
+; <label>:26 ; preds = %26, %.preheader.2
+ %indvars.iv.2 = phi i64 [ 0, %.preheader.2 ], [ %indvars.iv.next.2, %26 ]
+ %sum.12.2 = phi i32 [ %sum.05.2, %.preheader.2 ], [ %37, %26 ]
+ %27 = add nsw i64 %indvars.iv.2, 2
+ %28 = getelementptr inbounds i32** %in, i64 %27
+ %29 = load i32** %28, align 8
+ %30 = getelementptr inbounds i32* %29, i64 %indvars.iv7.2
+ %31 = load i32* %30, align 4
+ %32 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.2
+ %33 = load i32** %32, align 8
+ %34 = getelementptr inbounds i32* %33, i64 %indvars.iv7.2
+ %35 = load i32* %34, align 4
+ %36 = mul nsw i32 %35, %31
+ %37 = add nsw i32 %36, %sum.12.2
+ %indvars.iv.next.2 = add i64 %indvars.iv.2, 1
+ %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32
+ %exitcond.2 = icmp eq i32 %lftr.wideiv.2, 1024
+ br i1 %exitcond.2, label %38, label %26
+
+; <label>:38 ; preds = %26
+ %indvars.iv.next8.2 = add i64 %indvars.iv7.2, 1
+ %lftr.wideiv9.2 = trunc i64 %indvars.iv.next8.2 to i32
+ %exitcond10.2 = icmp eq i32 %lftr.wideiv9.2, 32
+ br i1 %exitcond10.2, label %.preheader3.3, label %.preheader.2
+
+.preheader3.3: ; preds = %38
+ %39 = getelementptr inbounds i32* %out, i64 2
+ store i32 %37, i32* %39, align 4
+ br label %.preheader.3
+
+.preheader.3: ; preds = %52, %.preheader3.3
+ %indvars.iv7.3 = phi i64 [ 0, %.preheader3.3 ], [ %indvars.iv.next8.3, %52 ]
+ %sum.05.3 = phi i32 [ 0, %.preheader3.3 ], [ %51, %52 ]
+ br label %40
+
+; <label>:40 ; preds = %40, %.preheader.3
+ %indvars.iv.3 = phi i64 [ 0, %.preheader.3 ], [ %indvars.iv.next.3, %40 ]
+ %sum.12.3 = phi i32 [ %sum.05.3, %.preheader.3 ], [ %51, %40 ]
+ %41 = add nsw i64 %indvars.iv.3, 3
+ %42 = getelementptr inbounds i32** %in, i64 %41
+ %43 = load i32** %42, align 8
+ %44 = getelementptr inbounds i32* %43, i64 %indvars.iv7.3
+ %45 = load i32* %44, align 4
+ %46 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.3
+ %47 = load i32** %46, align 8
+ %48 = getelementptr inbounds i32* %47, i64 %indvars.iv7.3
+ %49 = load i32* %48, align 4
+ %50 = mul nsw i32 %49, %45
+ %51 = add nsw i32 %50, %sum.12.3
+ %indvars.iv.next.3 = add i64 %indvars.iv.3, 1
+ %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32
+ %exitcond.3 = icmp eq i32 %lftr.wideiv.3, 1024
+ br i1 %exitcond.3, label %52, label %40
+
+; <label>:52 ; preds = %40
+ %indvars.iv.next8.3 = add i64 %indvars.iv7.3, 1
+ %lftr.wideiv9.3 = trunc i64 %indvars.iv.next8.3 to i32
+ %exitcond10.3 = icmp eq i32 %lftr.wideiv9.3, 32
+ br i1 %exitcond10.3, label %53, label %.preheader.3
+
+; <label>:53 ; preds = %52
+ %54 = getelementptr inbounds i32* %out, i64 3
+ store i32 %51, i32* %54, align 4
+ ret void
+}
+
+; Can't vectorize because the src and dst pointers are not disjoint.
+;CHECK: @example21
+;CHECK-NOT: <4 x i32>
+;CHECK: ret i32
+define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0
+ %2 = sext i32 %n to i64
+ br label %3
+
+; <label>:3 ; preds = %.lr.ph, %3
+ %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
+ %a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
+ %indvars.iv.next = add i64 %indvars.iv, -1
+ %4 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %a.02
+ %7 = trunc i64 %indvars.iv.next to i32
+ %8 = icmp sgt i32 %7, 0
+ br i1 %8, label %3, label %._crit_edge
+
+._crit_edge: ; preds = %3, %0
+ %a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ]
+ ret i32 %a.0.lcssa
+}
+
+; Can't vectorize because there are multiple PHIs.
+;CHECK: @example23
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+ %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+ %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+ %2 = getelementptr inbounds i16* %.04, i64 1
+ %3 = load i16* %.04, align 2
+ %4 = zext i16 %3 to i32
+ %5 = shl nuw nsw i32 %4, 7
+ %6 = getelementptr inbounds i32* %.013, i64 1
+ store i32 %5, i32* %.013, align 4
+ %7 = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %7, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
+;CHECK: @example24
+;CHECK: shufflevector <4 x i16>
+;CHECK: ret void
+define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [1024 x float]* @fa, i64 0, i64 %indvars.iv
+ %3 = load float* %2, align 4
+ %4 = getelementptr inbounds [1024 x float]* @fb, i64 0, i64 %indvars.iv
+ %5 = load float* %4, align 4
+ %6 = fcmp olt float %3, %5
+ %x.y = select i1 %6, i16 %x, i16 %y
+ %7 = sext i16 %x.y to i32
+ %8 = getelementptr inbounds [1024 x i32]* @ic, i64 0, i64 %indvars.iv
+ store i32 %7, i32* %8, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %9, label %1
+
+; <label>:9 ; preds = %1
+ ret void
+}
+
+;CHECK: @example25
+;CHECK: and <4 x i1>
+;CHECK: zext <4 x i1>
+;CHECK: ret void
+define void @example25() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [1024 x float]* @da, i64 0, i64 %indvars.iv
+ %3 = load float* %2, align 4
+ %4 = getelementptr inbounds [1024 x float]* @db, i64 0, i64 %indvars.iv
+ %5 = load float* %4, align 4
+ %6 = fcmp olt float %3, %5
+ %7 = getelementptr inbounds [1024 x float]* @dc, i64 0, i64 %indvars.iv
+ %8 = load float* %7, align 4
+ %9 = getelementptr inbounds [1024 x float]* @dd, i64 0, i64 %indvars.iv
+ %10 = load float* %9, align 4
+ %11 = fcmp olt float %8, %10
+ %12 = and i1 %6, %11
+ %13 = zext i1 %12 to i32
+ %14 = getelementptr inbounds [1024 x i32]* @dj, i64 0, i64 %indvars.iv
+ store i32 %13, i32* %14, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %15, label %1
+
+; <label>:15 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
new file mode 100644
index 00000000000..e818d685626
--- /dev/null
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+; for (i=0; i<n; i++){
+; a[i] += i;
+; }
+;CHECK: @inc
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = trunc i64 %indvars.iv to i32
+ %5 = add nsw i32 %3, %4
+ store i32 %5, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret void
+}
+
+; Can't vectorize this loop because the access to A[X] is non linear.
+;
+; for (i = 0; i < n; ++i) {
+; A[B[i]]++;
+;
+;CHECK: @histogram
+;CHECK-NOT: <4 x i32>
+;CHECK: ret i32
+define i32 @histogram(i32* nocapture noalias %A, i32* nocapture noalias %B, i32 %n) nounwind uwtable ssp {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %idxprom1 = sext i32 %0 to i64
+ %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
+ %1 = load i32* %arrayidx2, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, i32* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
new file mode 100644
index 00000000000..bd90113e523
--- /dev/null
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -loop-vectorize -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@array = common global [1024 x i32] zeroinitializer, align 16
+
+;CHECK: @array_at_plus_one
+;CHECK: add <4 x i64>
+;CHECK: trunc <4 x i64>
+;CHECK: add i64 %index, 12
+;CHECK: ret i32
+define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %2 = add nsw i64 %indvars.iv, 12
+ %3 = getelementptr inbounds [1024 x i32]* @array, i64 0, i64 %2
+ %4 = trunc i64 %indvars.iv to i32
+ store i32 %4, i32* %3, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/lit.local.cfg b/test/Transforms/LoopVectorize/lit.local.cfg
new file mode 100644
index 00000000000..19eebc0ac7a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
new file mode 100644
index 00000000000..04c5c84a4f4
--- /dev/null
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK: shl i32
+;CHECK: zext i32
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1(i32 %n) nounwind uwtable ssp {
+ %n4 = shl i32 %n, 2
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n4
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
new file mode 100644
index 00000000000..4095ea68ef7
--- /dev/null
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @read_only_func
+;CHECK: load <4 x i32>
+;CHECK: ret i32
+define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = add nsw i64 %indvars.iv, 13
+ %5 = getelementptr inbounds i32* %B, i64 %4
+ %6 = load i32* %5, align 4
+ %7 = shl i32 %6, 1
+ %8 = add i32 %3, %sum.02
+ %9 = add i32 %8, %7
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
new file mode 100644
index 00000000000..3e871b229b5
--- /dev/null
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -0,0 +1,152 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @reduction_sum
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = trunc i64 %indvars.iv to i32
+ %7 = add i32 %sum.02, %6
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %5
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
+
+;CHECK: @reduction_prod
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = trunc i64 %indvars.iv to i32
+ %7 = mul i32 %prod.02, %6
+ %8 = mul i32 %7, %3
+ %9 = mul i32 %8, %5
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ]
+ ret i32 %prod.0.lcssa
+}
+
+;CHECK: @reduction_mix
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = mul nsw i32 %5, %3
+ %7 = trunc i64 %indvars.iv to i32
+ %8 = add i32 %sum.02, %7
+ %9 = add i32 %8, %6
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
+
+;CHECK: @reduction_mul
+;CHECK: mul <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = trunc i64 %indvars.iv to i32
+ %7 = add i32 %3, %6
+ %8 = add i32 %7, %5
+ %9 = mul i32 %8, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
+
+;CHECK: @start_at_non_zero
+;CHECK: phi <4 x i32>
+;CHECK: <i32 120, i32 0, i32 0, i32 0>
+;CHECK: ret i32
+define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
+ %arrayidx = getelementptr inbounds i32* %in, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %coeff, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, %sum.09
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ]
+ ret i32 %sum.0.lcssa
+}
+
+
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
new file mode 100644
index 00000000000..8d5b6fd8aff
--- /dev/null
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -loop-vectorize -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK: load <4 x i32>
+; make sure that we have a scalar condition and a vector operand.
+;CHECK: select i1 %cond, <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1(i1 %cond) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %sel = select i1 %cond, i32 %6, i32 zeroinitializer
+ store i32 %sel, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 644fda167d4..03120f7a327 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1063,3 +1063,23 @@ entry:
call void @llvm.lifetime.end(i64 -1, i8* %0)
ret void
}
+
+define void @PR14105({ [16 x i8] }* %ptr) {
+; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
+; sign as negative. We use a volatile memcpy to ensure promotion never actually
+; occurs.
+; CHECK: @PR14105
+
+entry:
+ %a = alloca { [16 x i8] }, align 8
+; CHECK: alloca [16 x i8], align 8
+
+ %gep = getelementptr inbounds { [16 x i8] }* %ptr, i64 -1
+; CHECK-NEXT: getelementptr inbounds { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0
+
+ %cast1 = bitcast { [16 x i8 ] }* %gep to i8*
+ %cast2 = bitcast { [16 x i8 ] }* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
+ ret void
+; CHECK: ret
+}
diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll
index e38d78349d4..6aecbeacd7e 100644
--- a/test/Transforms/SimplifyLibCalls/FFS.ll
+++ b/test/Transforms/SimplifyLibCalls/FFS.ll
@@ -1,6 +1,7 @@
-; Test that the ToAsciiOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: not grep "call.*@ffs"
+; Test that FFSOpt works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+; CHECK-NOT: call{{.*}}@ffs
@non_const = external global i32 ; <i32*> [#uses=1]
@@ -34,3 +35,11 @@ define i32 @a(i64) nounwind {
%2 = call i32 @ffsll(i64 %0) ; <i32> [#uses=1]
ret i32 %2
}
+
+; PR13028
+define i32 @b() nounwind {
+ %ffs = call i32 @ffsll(i64 0)
+ ret i32 %ffs
+; CHECK: @b
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/SimplifyLibCalls/StrCpy.ll b/test/Transforms/SimplifyLibCalls/StrCpy.ll
deleted file mode 100644
index 83406ff8f86..00000000000
--- a/test/Transforms/SimplifyLibCalls/StrCpy.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; Test that the StrCpyOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-
-@hello = constant [6 x i8] c"hello\00"
-
-declare i8* @strcpy(i8*, i8*)
-
-declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
-
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
-
-; rdar://6839935
-
-define i32 @t1() {
-; CHECK: @t1
- %target = alloca [1024 x i8]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
- %rslt1 = call i8* @strcpy( i8* %arg1, i8* %arg2 )
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32
- ret i32 0
-}
-
-define i32 @t2() {
-; CHECK: @t2
- %target = alloca [1024 x i8]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
- %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false)
- %rslt1 = call i8* @__strcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1)
-; CHECK: @__memcpy_chk
- ret i32 0
-}
diff --git a/test/Transforms/TailCallElim/nocapture.ll b/test/Transforms/TailCallElim/nocapture.ll
index 87cb9dd427b..5a1a9a6e7ce 100644
--- a/test/Transforms/TailCallElim/nocapture.ll
+++ b/test/Transforms/TailCallElim/nocapture.ll
@@ -1,9 +1,9 @@
; RUN: opt %s -tailcallelim -S | FileCheck %s
-; XFAIL: *
declare void @use(i8* nocapture, i8* nocapture)
+declare void @boring()
-define i8* @foo(i8* nocapture %A, i1 %cond) {
+define i8* @test1(i8* nocapture %A, i1 %cond) {
; CHECK: tailrecurse:
; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ]
; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ]
@@ -14,12 +14,27 @@ define i8* @foo(i8* nocapture %A, i1 %cond) {
cond_true:
; CHECK: cond_true:
; CHECK: br label %tailrecurse
- call i8* @foo(i8* %B, i1 false)
+ call i8* @test1(i8* %B, i1 false)
ret i8* null
cond_false:
; CHECK: cond_false
call void @use(i8* %A, i8* %B)
-; CHECK: tail call void @use(i8* %A.tr, i8* %B)
+; CHECK: call void @use(i8* %A.tr, i8* %B)
+ call void @boring()
+; CHECK: tail call void @boring()
ret i8* null
; CHECK: ret i8* null
}
+
+; PR14143
+define void @test2(i8* %a, i8* %b) {
+; CHECK: @test2
+; CHECK-NOT: tail call
+; CHECK: ret void
+ %c = alloca [100 x i8], align 16
+ %tmp = bitcast [100 x i8]* %c to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %tmp, i64 100, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
diff --git a/test/lit.cfg b/test/lit.cfg
index dc37317ba95..7e6760e95ae 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -146,13 +146,12 @@ if re.search(r'cygwin|mingw32|win32', mcjit_triple):
mcjit_triple += "-elf"
config.substitutions.append( ('%mcjit_triple', mcjit_triple) )
-# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
-# triple so we can check it with XFAIL and XTARGET.
-config.target_triple += lit.valgrindTriple
-
# Provide a substition for those tests that need to run the jit to obtain data
# but simply want use the currently considered most reliable jit for platform
-defaultIsMCJIT='true' if 'arm' in config.target_triple else 'false'
+if 'arm' in config.target_triple:
+ defaultIsMCJIT = 'true'
+else:
+ defaultIsMCJIT = 'false'
config.substitutions.append( ('%defaultjit', '-use-mcjit='+defaultIsMCJIT) )
# Process jit implementation option
@@ -242,6 +241,10 @@ else:
if loadable_module:
config.available_features.add('loadable_module')
+# LTO
+if config.lto_is_enabled == "1":
+ config.available_features.add('lto')
+
# llc knows whether he is compiled with -DNDEBUG.
import subprocess
try:
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 178b22f10f3..2bbe63e6348 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -11,6 +11,7 @@ config.python_executable = "@PYTHON_EXECUTABLE@"
config.ocamlopt_executable = "@OCAMLOPT@"
config.enable_shared = @ENABLE_SHARED@
config.enable_assertions = @ENABLE_ASSERTIONS@
+config.lto_is_enabled = "@LTO_IS_ENABLED@"
config.targets_to_build = "@TARGETS_TO_BUILD@"
config.llvm_bindings = "@LLVM_BINDINGS@"
config.host_os = "@HOST_OS@"
diff --git a/tools/bugpoint-passes/bugpoint.exports b/tools/bugpoint-passes/bugpoint.exports
index d8fdd6a5767..e69de29bb2d 100644
--- a/tools/bugpoint-passes/bugpoint.exports
+++ b/tools/bugpoint-passes/bugpoint.exports
@@ -1 +0,0 @@
-_ZN4llvm14BasicBlockPass14doFinalizationERNS_6ModuleE
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 38c3a1e76f5..309bc4ecd46 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -1,4 +1,4 @@
-//===-- llvm-dwarfdump.cpp - Debug info dumping utility for llvm -----------===//
+//===-- llvm-dwarfdump.cpp - Debug info dumping utility for llvm ----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -118,8 +118,8 @@ static void DumpInput(const StringRef &Filename) {
if (PrintFunctions)
SpecFlags |= DILineInfoSpecifier::FunctionName;
if (PrintInlining) {
- DIInliningInfo InliningInfo = dictx->getInliningInfoForAddress(
- Address, SpecFlags);
+ DIInliningInfo InliningInfo =
+ dictx->getInliningInfoForAddress(Address, SpecFlags);
uint32_t n = InliningInfo.getNumberOfFrames();
if (n == 0) {
// Print one empty debug line info in any case.
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index ae6855e68bf..6933091949b 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -606,7 +606,7 @@ TEST_F(JITTest, FunctionIsRecompiledAndRelinked) {
// program from the IR input to the JIT to assert that the JIT doesn't use its
// definition.
extern "C" int32_t JITTest_AvailableExternallyGlobal;
-int32_t JITTest_AvailableExternallyGlobal = 42;
+int32_t JITTest_AvailableExternallyGlobal LLVM_ATTRIBUTE_USED = 42;
namespace {
// Tests on ARM disabled as we're running the old jit
diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile
index b535a6b2960..9e0bb9ea593 100644
--- a/unittests/ExecutionEngine/JIT/Makefile
+++ b/unittests/ExecutionEngine/JIT/Makefile
@@ -35,8 +35,15 @@ ifeq ($(USE_OPROFILE), 1)
LINK_COMPONENTS += oprofilejit
endif
+EXPORTED_SYMBOL_FILE = $(PROJ_OBJ_DIR)/JITTests.exports
include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
# Permit these tests to use the JIT's symbolic lookup.
LD.Flags += $(RDYNAMIC)
+
+# Symbol exports are necessary (at least for now) when building with LTO.
+$(LLVMUnitTestExe): $(NativeExportsFile)
+$(PROJ_OBJ_DIR)/JITTests.exports: $(PROJ_SRC_DIR)/JITTests.def $(PROJ_OBJ_DIR)/.dir
+ tail -n +2 $< > $@
+
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 99d2f173a87..fd38672bfcf 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -233,11 +233,12 @@ static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
if (wpos == std::string::npos)
throw "Illegal format for tied-to constraint: '" + CStr + "'";
- std::pair<unsigned,unsigned> SrcOp =
- Ops.ParseOperandName(Name.substr(wpos), false);
- if (SrcOp > DestOp)
- throw "Illegal tied-to operand constraint '" + CStr + "'";
-
+ std::string SrcOpName = Name.substr(wpos);
+ std::pair<unsigned,unsigned> SrcOp = Ops.ParseOperandName(SrcOpName, false);
+ if (SrcOp > DestOp) {
+ std::swap(SrcOp, DestOp);
+ std::swap(SrcOpName, DestOpName);
+ }
unsigned FlatOpNo = Ops.getFlattenedOperandNumber(SrcOp);
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll b/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
deleted file mode 100644
index 3017b13e48c..00000000000
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: true
-; XFAIL: *
-; XTARGET: darwin
diff --git a/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg b/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
index e9df1e5b53b..3fdd63c2245 100644
--- a/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
+++ b/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
@@ -77,7 +77,7 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
excludes = []
-# Provide target_triple for use in XFAIL and XTARGET.
+# Provide target_triple for use in XFAIL.
config.target_triple = site_exp['target_triplet']
# Provide llvm_supports_target for use in local configs.
diff --git a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
index e9df1e5b53b..3fdd63c2245 100644
--- a/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
+++ b/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
@@ -77,7 +77,7 @@ for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
excludes = []
-# Provide target_triple for use in XFAIL and XTARGET.
+# Provide target_triple for use in XFAIL.
config.target_triple = site_exp['target_triplet']
# Provide llvm_supports_target for use in local configs.
diff --git a/utils/lit/lit/ExampleTests/lit.cfg b/utils/lit/lit/ExampleTests/lit.cfg
index 20ee37dcef2..2629918d9f6 100644
--- a/utils/lit/lit/ExampleTests/lit.cfg
+++ b/utils/lit/lit/ExampleTests/lit.cfg
@@ -23,4 +23,4 @@ config.test_exec_root = None
config.target_triple = 'foo'
# available_features: Used by ShTest and TclTest formats for REQUIRES checks.
-config.available_features = ['some-feature-name']
+config.available_features.add('some-feature-name')
diff --git a/utils/lit/lit/ExampleTests/vg-fail.c b/utils/lit/lit/ExampleTests/vg-fail.c
new file mode 100644
index 00000000000..e3339ff91aa
--- /dev/null
+++ b/utils/lit/lit/ExampleTests/vg-fail.c
@@ -0,0 +1,4 @@
+// This test should XPASS, when run without valgrind.
+
+// RUN: true
+// XFAIL: valgrind
diff --git a/utils/lit/lit/ExampleTests/xfail-feature.c b/utils/lit/lit/ExampleTests/xfail-feature.c
new file mode 100644
index 00000000000..3444bf87008
--- /dev/null
+++ b/utils/lit/lit/ExampleTests/xfail-feature.c
@@ -0,0 +1,4 @@
+// This test should XPASS.
+
+// RUN: true
+// XFAIL: some-feature-name
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index c71c0ccdea9..0a359a3db8e 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -42,14 +42,11 @@ class LitConfig:
self.numWarnings = 0
self.valgrindArgs = []
- self.valgrindTriple = ""
if self.useValgrind:
- self.valgrindTriple = "-vg"
self.valgrindArgs = ['valgrind', '-q', '--run-libc-freeres=no',
'--tool=memcheck', '--trace-children=yes',
'--error-exitcode=123']
if self.valgrindLeakCheck:
- self.valgrindTriple += "_leak"
self.valgrindArgs.append('--leak-check=full')
else:
# The default is 'summary'.
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 71882b76f8b..0c1911ed356 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -370,27 +370,27 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
return executeCommand(command, cwd=cwd, env=test.config.environment)
-def isExpectedFail(xfails, xtargets, target_triple):
- # Check if any xfail matches this target.
+def isExpectedFail(test, xfails):
+ # Check if any of the xfails match an available feature or the target.
for item in xfails:
- if item == '*' or item in target_triple:
- break
- else:
- return False
+ # If this is the wildcard, it always fails.
+ if item == '*':
+ return True
- # If so, see if it is expected to pass on this target.
- #
- # FIXME: Rename XTARGET to something that makes sense, like XPASS.
- for item in xtargets:
- if item == '*' or item in target_triple:
- return False
+ # If this is an exact match for one of the features, it fails.
+ if item in test.config.available_features:
+ return True
+
+ # If this is a part of the target triple, it fails.
+ if item in test.suite.config.target_triple:
+ return True
- return True
+ return False
def parseIntegratedTestScript(test, normalize_slashes=False,
extra_substitutions=[]):
"""parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
- script and extract the lines to 'RUN' as well as 'XFAIL' and 'XTARGET'
+ script and extract the lines to 'RUN' as well as 'XFAIL' and 'REQUIRES'
information. The RUN lines also will have variable substitution performed.
"""
@@ -431,7 +431,6 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
# Collect the test lines from the script.
script = []
xfails = []
- xtargets = []
requires = []
for ln in open(sourcepath):
if 'RUN:' in ln:
@@ -450,9 +449,6 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
elif 'XFAIL:' in ln:
items = ln[ln.index('XFAIL:') + 6:].split(',')
xfails.extend([s.strip() for s in items])
- elif 'XTARGET:' in ln:
- items = ln[ln.index('XTARGET:') + 8:].split(',')
- xtargets.extend([s.strip() for s in items])
elif 'REQUIRES:' in ln:
items = ln[ln.index('REQUIRES:') + 9:].split(',')
requires.extend([s.strip() for s in items])
@@ -491,7 +487,7 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
return (Test.UNSUPPORTED,
"Test requires the following features: %s" % msg)
- isXFail = isExpectedFail(xfails, xtargets, test.suite.config.target_triple)
+ isXFail = isExpectedFail(test, xfails)
return script,isXFail,tmpBase,execdir
def formatTestOutput(status, out, err, exitCode, failDueToStderr, script):
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index 223120c4fe2..a1f79a3bfc4 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -16,6 +16,7 @@ class TestingConfig:
'PATH' : os.pathsep.join(litConfig.path +
[os.environ.get('PATH','')]),
'SYSTEMROOT' : os.environ.get('SYSTEMROOT',''),
+ 'TERM' : os.environ.get('TERM',''),
'LLVM_DISABLE_CRASH_REPORT' : '1',
}
@@ -28,6 +29,13 @@ class TestingConfig:
'TMP' : os.environ.get('TMP',''),
})
+ # Set the default available features based on the LitConfig.
+ available_features = []
+ if litConfig.useValgrind:
+ available_features.append('valgrind')
+ if litConfig.valgrindLeakCheck:
+ available_features.append('vg_leak')
+
config = TestingConfig(parent,
name = '<unnamed>',
suffixes = set(),
@@ -39,7 +47,7 @@ class TestingConfig:
test_exec_root = None,
test_source_root = None,
excludes = [],
- available_features = [])
+ available_features = available_features)
if os.path.exists(path):
# FIXME: Improve detection and error reporting of errors in the
diff --git a/utils/vim/llvm.vim b/utils/vim/llvm.vim
index 9cd483d1479..f80de83da8c 100644
--- a/utils/vim/llvm.vim
+++ b/utils/vim/llvm.vim
@@ -79,7 +79,6 @@ syn match llvmSpecialComment /;\s*RUN:.*$/
syn match llvmSpecialComment /;\s*PR\d*\s*$/
syn match llvmSpecialComment /;\s*END\.\s*$/
syn match llvmSpecialComment /;\s*XFAIL:.*$/
-syn match llvmSpecialComment /;\s*XTARGET:.*$/
if version >= 508 || !exists("did_c_syn_inits")
if version < 508