summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElena Demikhovsky <elena.demikhovsky@intel.com>2014-11-23 08:07:43 +0000
committerElena Demikhovsky <elena.demikhovsky@intel.com>2014-11-23 08:07:43 +0000
commitae1ae2c3a179851437ed2f4ac3d83a0f024e5861 (patch)
treec03cb369617ea9722efc3424445b8b20e21187b6
parent4f5aa5994e1ace1da714bd2faacfc8f0dbaef4f7 (diff)
Masked Vector Load and Store Intrinsics.
Introduced new target-independent intrinsics in order to support masked vector loads and stores. The loop vectorizer optimizes loops containing conditional memory accesses by generating these intrinsics for existing targets AVX2 and AVX-512. The vectorizer asks the target about availability of masked vector loads and stores. Added SDNodes for masked operations and lowering patterns for X86 code generator. Examples: <16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32> %passthru, i32 4 /* align */, <16 x i1> %mask) declare void @llvm.masked.store.v8f64(i8* %addr, <8 x double> %value, i32 4, <8 x i1> %mask) Scalarizer for other targets (not AVX2/AVX-512) will be done in a separate patch. http://reviews.llvm.org/D6191 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@222632 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Analysis/TargetTransformInfo.h7
-rw-r--r--include/llvm/CodeGen/ISDOpcodes.h3
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h4
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h68
-rw-r--r--include/llvm/IR/IRBuilder.h11
-rw-r--r--include/llvm/IR/Intrinsics.h11
-rw-r--r--include/llvm/IR/Intrinsics.td15
-rw-r--r--include/llvm/Target/TargetSelectionDAG.td13
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp161
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp23
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp6368
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp54
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp70
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp2
-rw-r--r--lib/IR/Function.cpp18
-rw-r--r--lib/IR/IRBuilder.cpp26
-rw-r--r--lib/IR/Verifier.cpp13
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp22
-rw-r--r--lib/Target/X86/X86InstrAVX512.td75
-rw-r--r--lib/Target/X86/X86InstrSSE.td55
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp18
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp98
-rw-r--r--test/CodeGen/X86/masked_memop.ll73
-rw-r--r--test/Transforms/LoopVectorize/X86/mask1.ll83
-rw-r--r--test/Transforms/LoopVectorize/X86/mask2.ll84
-rw-r--r--test/Transforms/LoopVectorize/X86/mask3.ll84
-rw-r--r--test/Transforms/LoopVectorize/X86/mask4.ll83
-rw-r--r--utils/TableGen/CodeGenTarget.cpp3
-rw-r--r--utils/TableGen/IntrinsicEmitter.cpp10
32 files changed, 4418 insertions, 3154 deletions
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 9acaaa6f2eb..8af8f77ad67 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -270,6 +270,13 @@ public:
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale) const;
+ /// \brief Return true if the target works with masked instruction
+ /// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
+ /// AVX-512 architecture will also allow masks for non-consecutive memory
+ /// accesses.
+ virtual bool isLegalPredicatedStore(Type *DataType, int Consecutive) const;
+ virtual bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const;
+
/// \brief Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
/// of the specified type.
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index bbf0ad30458..4f55ccac16e 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -675,6 +675,9 @@ namespace ISD {
ATOMIC_LOAD_UMIN,
ATOMIC_LOAD_UMAX,
+ // Masked load and store
+ MLOAD, MSTORE,
+
/// This corresponds to the llvm.lifetime.* intrinsics. The first operand
/// is the chain and the second operand is the alloca pointer.
LIFETIME_START, LIFETIME_END,
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index fbdaf0d7fd2..4950797bb1e 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -866,6 +866,10 @@ public:
SDValue getIndexedStore(SDValue OrigStoe, SDLoc dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM);
+ SDValue getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr,
+ SDValue Mask, SDValue Src0, MachineMemOperand *MMO);
+ SDValue getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, MachineMemOperand *MMO);
/// getSrcValue - Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v);
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 47158272412..acd178892d0 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1177,6 +1177,8 @@ public:
N->getOpcode() == ISD::ATOMIC_LOAD_UMAX ||
N->getOpcode() == ISD::ATOMIC_LOAD ||
N->getOpcode() == ISD::ATOMIC_STORE ||
+ N->getOpcode() == ISD::MLOAD ||
+ N->getOpcode() == ISD::MSTORE ||
N->isMemIntrinsic() ||
N->isTargetMemoryOpcode();
}
@@ -1926,6 +1928,72 @@ public:
}
};
+/// MaskedLoadStoreSDNode - This is a base class is used to represent MLOAD and
+/// MSTORE nodes
+///
+class MaskedLoadStoreSDNode : public MemSDNode {
+ // Operands
+ SDUse Ops[4];
+public:
+ friend class SelectionDAG;
+ MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
+ SDValue *Operands, unsigned numOperands,
+ SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ InitOperands(Ops, Operands, numOperands);
+ }
+
+ // In the both nodes address is Op1, mask is Op2:
+ // MaskedLoadSDNode (Chain, ptr, mask, src0), src0 is a passthru value
+ // MaskedStoreSDNode (Chain, ptr, mask, data)
+ // Mask is a vector of i1 elements
+ const SDValue &getBasePtr() const { return getOperand(1); }
+ const SDValue &getMask() const { return getOperand(2); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::MLOAD ||
+ N->getOpcode() == ISD::MSTORE;
+ }
+};
+
+/// MaskedLoadSDNode - This class is used to represent an MLOAD node
+///
+class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
+public:
+ friend class SelectionDAG;
+ MaskedLoadSDNode(unsigned Order, DebugLoc dl,
+ SDValue *Operands, unsigned numOperands,
+ SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+ : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, Operands, numOperands,
+ VTs, MemVT, MMO)
+ {}
+
+ const SDValue &getSrc0() const { return getOperand(3); }
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::MLOAD;
+ }
+};
+
+/// MaskedStoreSDNode - This class is used to represent an MSTORE node
+///
+class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
+
+public:
+ friend class SelectionDAG;
+ MaskedStoreSDNode(unsigned Order, DebugLoc dl,
+ SDValue *Operands, unsigned numOperands,
+ SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+ : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, Operands, numOperands,
+ VTs, MemVT, MMO)
+ {}
+
+ const SDValue &getData() const { return getOperand(3); }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::MSTORE;
+ }
+};
+
/// MachineSDNode - An SDNode that represents everything that will be needed
/// to construct a MachineInstr. These nodes are created during the
/// instruction selection proper phase.
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index 088c7b4636a..6fa7d945866 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -429,11 +429,22 @@ public:
/// If the pointer isn't i8* it will be converted.
CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = nullptr);
+ /// \brief Create a call to Masked Load intrinsic
+ CallInst *CreateMaskedLoad(ArrayRef<Value *> Ops);
+
+ /// \brief Create a call to Masked Store intrinsic
+ CallInst *CreateMaskedStore(ArrayRef<Value *> Ops);
+
/// \brief Create an assume intrinsic call that allows the optimizer to
/// assume that the provided condition will be true.
CallInst *CreateAssumption(Value *Cond);
private:
+ /// \brief Create a call to a masked intrinsic with given Id.
+ /// Masked intrinsic has only one overloaded type - data type.
+ CallInst *CreateMaskedIntrinsic(unsigned Id, ArrayRef<Value *> Ops,
+ Type *DataTy);
+
Value *getCastedInt8PtrValue(Value *Ptr);
};
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index acc0e9e5d37..5236f195140 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -76,7 +76,8 @@ namespace Intrinsic {
enum IITDescriptorKind {
Void, VarArg, MMX, Metadata, Half, Float, Double,
Integer, Vector, Pointer, Struct,
- Argument, ExtendArgument, TruncArgument, HalfVecArgument
+ Argument, ExtendArgument, TruncArgument, HalfVecArgument,
+ SameVecWidthArgument
} Kind;
union {
@@ -96,13 +97,15 @@ namespace Intrinsic {
};
unsigned getArgumentNumber() const {
assert(Kind == Argument || Kind == ExtendArgument ||
- Kind == TruncArgument || Kind == HalfVecArgument);
+ Kind == TruncArgument || Kind == HalfVecArgument ||
+ Kind == SameVecWidthArgument);
return Argument_Info >> 2;
}
ArgKind getArgumentKind() const {
assert(Kind == Argument || Kind == ExtendArgument ||
- Kind == TruncArgument || Kind == HalfVecArgument);
- return (ArgKind)(Argument_Info&3);
+ Kind == TruncArgument || Kind == HalfVecArgument ||
+ Kind == SameVecWidthArgument);
+ return (ArgKind)(Argument_Info & 3);
}
static IITDescriptor get(IITDescriptorKind K, unsigned Field) {
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 98d48de5103..1344c0c6e19 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -112,6 +112,10 @@ class LLVMMatchType<int num>
// the intrinsic is overloaded, so the matched type should be declared as iAny.
class LLVMExtendedType<int num> : LLVMMatchType<num>;
class LLVMTruncatedType<int num> : LLVMMatchType<num>;
+class LLVMVectorSameWidth<int num, LLVMType elty>
+ : LLVMMatchType<num> {
+ ValueType ElTy = elty.VT;
+}
// Match the type of another intrinsic parameter that is expected to be a
// vector type, but change the element count to be half as many
@@ -539,6 +543,17 @@ def int_convertuu : Intrinsic<[llvm_anyint_ty],
def int_clear_cache : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
[], "llvm.clear_cache">;
+//===-------------------------- Masked Intrinsics -------------------------===//
+//
+def int_masked_store : Intrinsic<[], [llvm_ptr_ty, llvm_anyvector_ty,
+ llvm_i32_ty,
+ LLVMVectorSameWidth<0, llvm_i1_ty>],
+ [IntrReadWriteArgMem]>;
+
+def int_masked_load : Intrinsic<[llvm_anyvector_ty],
+ [llvm_ptr_ty, LLVMMatchType<0>, llvm_i32_ty,
+ LLVMVectorSameWidth<0, llvm_i1_ty>],
+ [IntrReadArgMem]>;
//===----------------------------------------------------------------------===//
// Target-specific intrinsics
//===----------------------------------------------------------------------===//
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index f63afd70983..907baa1b9b1 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -188,6 +188,14 @@ def SDTIStore : SDTypeProfile<1, 3, [ // indexed store
SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
]>;
+def SDTMaskedStore: SDTypeProfile<0, 3, [ // masked store
+ SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>
+]>;
+
+def SDTMaskedLoad: SDTypeProfile<1, 3, [ // masked load
+ SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>
+]>;
+
def SDTVecShuffle : SDTypeProfile<1, 2, [
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
]>;
@@ -454,6 +462,11 @@ def atomic_load : SDNode<"ISD::ATOMIC_LOAD", SDTAtomicLoad,
def atomic_store : SDNode<"ISD::ATOMIC_STORE", SDTAtomicStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def masked_store : SDNode<"ISD::MSTORE", SDTMaskedStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def masked_load : SDNode<"ISD::MLOAD", SDTMaskedLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
// and truncst (see below).
def ld : SDNode<"ISD::LOAD" , SDTLoad,
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index c1ffb9daefa..e09e5f8c28b 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -101,6 +101,17 @@ bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
return PrevTTI->isLegalICmpImmediate(Imm);
}
+bool TargetTransformInfo::isLegalPredicatedLoad(Type *DataType,
+ int Consecutive) const {
+ return false;
+}
+
+bool TargetTransformInfo::isLegalPredicatedStore(Type *DataType,
+ int Consecutive) const {
+ return false;
+}
+
+
bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset,
bool HasBaseReg,
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 201429fe754..2cf5e753966 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -303,6 +303,8 @@ namespace {
SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
SDValue visitINSERT_SUBVECTOR(SDNode *N);
+ SDValue visitMLOAD(SDNode *N);
+ SDValue visitMSTORE(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -412,6 +414,7 @@ namespace {
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(*DAG.getContext(), VT);
}
+ int& MLD();
};
}
@@ -1351,6 +1354,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
+ case ISD::MLOAD: return visitMLOAD(N);
+ case ISD::MSTORE: return visitMSTORE(N);
}
return SDValue();
}
@@ -4771,6 +4776,162 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
}
+SDValue DAGCombiner::visitMSTORE(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+ SDValue Mask = MST->getMask();
+ SDValue Data = MST->getData();
+ SDLoc DL(N);
+
+ // If the MSTORE data type requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+ if (Mask.getOpcode() == ISD::SETCC) {
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
+
+ SDValue Chain = MST->getChain();
+ SDValue Ptr = MST->getBasePtr();
+
+ EVT MemoryVT = MST->getMemoryVT();
+ unsigned Alignment = MST->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MST->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, MST->getAAInfo(), MST->getRanges());
+
+ Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MST->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MST->getAAInfo(),
+ MST->getRanges());
+
+ Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ }
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitMLOAD(SDNode *N) {
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
+ MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+ SDValue Mask = MLD->getMask();
+ SDLoc DL(N);
+
+ // If the MLOAD result requires splitting and the mask is provided by a
+ // SETCC, then split both nodes and its operands before legalization. This
+ // prevents the type legalizer from unrolling SETCC into scalar comparisons
+ // and enables future optimizations (e.g. min/max pattern matching on X86).
+
+ if (Mask.getOpcode() == ISD::SETCC) {
+ EVT VT = N->getValueType(0);
+
+ // Check if any splitting is required.
+ if (TLI.getTypeAction(*DAG.getContext(), VT) !=
+ TargetLowering::TypeSplitVector)
+ return SDValue();
+
+ SDValue MaskLo, MaskHi, Lo, Hi;
+ std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
+
+ SDValue Src0 = MLD->getSrc0();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Chain = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ EVT MemoryVT = MLD->getMemoryVT();
+ unsigned Alignment = MLD->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO);
+
+ AddToWorklist(Lo.getNode());
+ AddToWorklist(Hi.getNode());
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
+
+ SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
+
+ SDValue RetOps[] = { LoadRes, Chain };
+ return DAG.getMergeValues(RetOps, DL);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b73bb0a897b..52c2d1be430 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -825,6 +825,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
OpNo); break;
+ case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
+ OpNo); break;
+ case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
+ OpNo); break;
case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break;
case ISD::FP16_TO_FP:
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
@@ -1091,6 +1095,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
N->getMemoryVT(), N->getMemOperand());
}
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
+
+ assert(OpNo == 2 && "Only know how to promote the mask!");
+ EVT DataVT = N->getOperand(3).getValueType();
+ SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
+ assert(OpNo == 2 && "Only know how to promote the mask!");
+ EVT DataVT = N->getValueType(0);
+ SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[OpNo] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
SDValue Op = GetPromotedInteger(N->getOperand(0));
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 30f412ba317..805b0fc0463 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -285,6 +285,8 @@ private:
SDValue PromoteIntOp_TRUNCATE(SDNode *N);
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -578,6 +580,7 @@ private:
void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -594,6 +597,7 @@ private:
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_TRUNCATE(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 27f63d27823..cb0db5c76a2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1,3127 +1,3241 @@
-//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file performs vector type splitting and scalarization for LegalizeTypes.
-// Scalarization is the act of changing a computation in an illegal one-element
-// vector type to be a computation in its scalar element type. For example,
-// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
-// as a base case when scalarizing vector arithmetic like <4 x f32>, which
-// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
-// types.
-// Splitting is the act of changing a computation in an invalid vector type to
-// be a computation in two vectors of half the size. For example, implementing
-// <128 x f32> operations in terms of two <64 x f32> operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LegalizeTypes.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "legalize-types"
-
-//===----------------------------------------------------------------------===//
-// Result Vector Scalarization: <1 x ty> -> ty.
-//===----------------------------------------------------------------------===//
-
-void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
- SDValue R = SDValue();
-
- switch (N->getOpcode()) {
- default:
-#ifndef NDEBUG
- dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
-#endif
- report_fatal_error("Do not know how to scalarize the result of this "
- "operator!\n");
-
- case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
- case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
- case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
- case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
- case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
- case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
- case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
- case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
- case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
- case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
- case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
- case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
- case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
- case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
- case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
- case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
- case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
- case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
- case ISD::ANY_EXTEND:
- case ISD::BSWAP:
- case ISD::CTLZ:
- case ISD::CTLZ_ZERO_UNDEF:
- case ISD::CTPOP:
- case ISD::CTTZ:
- case ISD::CTTZ_ZERO_UNDEF:
- case ISD::FABS:
- case ISD::FCEIL:
- case ISD::FCOS:
- case ISD::FEXP:
- case ISD::FEXP2:
- case ISD::FFLOOR:
- case ISD::FLOG:
- case ISD::FLOG10:
- case ISD::FLOG2:
- case ISD::FNEARBYINT:
- case ISD::FNEG:
- case ISD::FP_EXTEND:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::FRINT:
- case ISD::FROUND:
- case ISD::FSIN:
- case ISD::FSQRT:
- case ISD::FTRUNC:
- case ISD::SIGN_EXTEND:
- case ISD::SINT_TO_FP:
- case ISD::TRUNCATE:
- case ISD::UINT_TO_FP:
- case ISD::ZERO_EXTEND:
- R = ScalarizeVecRes_UnaryOp(N);
- break;
-
- case ISD::ADD:
- case ISD::AND:
- case ISD::FADD:
- case ISD::FCOPYSIGN:
- case ISD::FDIV:
- case ISD::FMUL:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
-
- case ISD::FPOW:
- case ISD::FREM:
- case ISD::FSUB:
- case ISD::MUL:
- case ISD::OR:
- case ISD::SDIV:
- case ISD::SREM:
- case ISD::SUB:
- case ISD::UDIV:
- case ISD::UREM:
- case ISD::XOR:
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- R = ScalarizeVecRes_BinOp(N);
- break;
- case ISD::FMA:
- R = ScalarizeVecRes_TernaryOp(N);
- break;
- }
-
- // If R is null, the sub-method took care of registering the result.
- if (R.getNode())
- SetScalarizedVector(SDValue(N, ResNo), R);
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
- SDValue LHS = GetScalarizedVector(N->getOperand(0));
- SDValue RHS = GetScalarizedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- LHS.getValueType(), LHS, RHS);
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
- SDValue Op0 = GetScalarizedVector(N->getOperand(0));
- SDValue Op1 = GetScalarizedVector(N->getOperand(1));
- SDValue Op2 = GetScalarizedVector(N->getOperand(2));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- Op0.getValueType(), Op0, Op1, Op2);
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
- unsigned ResNo) {
- SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
- return GetScalarizedVector(Op);
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
- EVT NewVT = N->getValueType(0).getVectorElementType();
- return DAG.getNode(ISD::BITCAST, SDLoc(N),
- NewVT, N->getOperand(0));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
- EVT EltVT = N->getValueType(0).getVectorElementType();
- SDValue InOp = N->getOperand(0);
- // The BUILD_VECTOR operands may be of wider element types and
- // we may need to truncate them back to the requested return type.
- if (EltVT.isInteger())
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
- return InOp;
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
- EVT NewVT = N->getValueType(0).getVectorElementType();
- SDValue Op0 = GetScalarizedVector(N->getOperand(0));
- return DAG.getConvertRndSat(NewVT, SDLoc(N),
- Op0, DAG.getValueType(NewVT),
- DAG.getValueType(Op0.getValueType()),
- N->getOperand(3),
- N->getOperand(4),
- cast<CvtRndSatSDNode>(N)->getCvtCode());
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
- N->getValueType(0).getVectorElementType(),
- N->getOperand(0), N->getOperand(1));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
- EVT NewVT = N->getValueType(0).getVectorElementType();
- SDValue Op = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
- NewVT, Op, N->getOperand(1));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
- SDValue Op = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::FPOWI, SDLoc(N),
- Op.getValueType(), Op, N->getOperand(1));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
- // The value to insert may have a wider type than the vector element type,
- // so be sure to truncate it to the element type if necessary.
- SDValue Op = N->getOperand(1);
- EVT EltVT = N->getValueType(0).getVectorElementType();
- if (Op.getValueType() != EltVT)
- // FIXME: Can this happen for floating point types?
- Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
- return Op;
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
- assert(N->isUnindexed() && "Indexed vector load?");
-
- SDValue Result = DAG.getLoad(ISD::UNINDEXED,
- N->getExtensionType(),
- N->getValueType(0).getVectorElementType(),
- SDLoc(N),
- N->getChain(), N->getBasePtr(),
- DAG.getUNDEF(N->getBasePtr().getValueType()),
- N->getPointerInfo(),
- N->getMemoryVT().getVectorElementType(),
- N->isVolatile(), N->isNonTemporal(),
- N->isInvariant(), N->getOriginalAlignment(),
- N->getAAInfo());
-
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
- return Result;
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
- // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
- EVT DestVT = N->getValueType(0).getVectorElementType();
- SDValue Op = N->getOperand(0);
- EVT OpVT = Op.getValueType();
- SDLoc DL(N);
- // The result needs scalarizing, but it's not a given that the source does.
- // This is a workaround for targets where it's impossible to scalarize the
- // result of a conversion, because the source type is legal.
- // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
- // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
- // legal and was not scalarized.
- // See the similar logic in ScalarizeVecRes_VSETCC
- if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
- Op = GetScalarizedVector(Op);
- } else {
- EVT VT = OpVT.getVectorElementType();
- Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- }
- return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
- EVT EltVT = N->getValueType(0).getVectorElementType();
- EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
- SDValue LHS = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
- LHS, DAG.getValueType(ExtVT));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
- // If the operand is wider than the vector element type then it is implicitly
- // truncated. Make that explicit here.
- EVT EltVT = N->getValueType(0).getVectorElementType();
- SDValue InOp = N->getOperand(0);
- if (InOp.getValueType() != EltVT)
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
- return InOp;
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
- SDValue Cond = GetScalarizedVector(N->getOperand(0));
- SDValue LHS = GetScalarizedVector(N->getOperand(1));
- TargetLowering::BooleanContent ScalarBool =
- TLI.getBooleanContents(false, false);
- TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
-
- // If integer and float booleans have different contents then we can't
- // reliably optimize in all cases. There is a full explanation for this in
- // DAGCombiner::visitSELECT() where the same issue affects folding
- // (select C, 0, 1) to (xor C, 1).
- if (TLI.getBooleanContents(false, false) !=
- TLI.getBooleanContents(false, true)) {
- // At least try the common case where the boolean is generated by a
- // comparison.
- if (Cond->getOpcode() == ISD::SETCC) {
- EVT OpVT = Cond->getOperand(0)->getValueType(0);
- ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
- VecBool = TLI.getBooleanContents(OpVT);
- } else
- ScalarBool = TargetLowering::UndefinedBooleanContent;
- }
-
- if (ScalarBool != VecBool) {
- EVT CondVT = Cond.getValueType();
- switch (ScalarBool) {
- case TargetLowering::UndefinedBooleanContent:
- break;
- case TargetLowering::ZeroOrOneBooleanContent:
- assert(VecBool == TargetLowering::UndefinedBooleanContent ||
- VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
- // Vector read from all ones, scalar expects a single 1 so mask.
- Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
- Cond, DAG.getConstant(1, CondVT));
- break;
- case TargetLowering::ZeroOrNegativeOneBooleanContent:
- assert(VecBool == TargetLowering::UndefinedBooleanContent ||
- VecBool == TargetLowering::ZeroOrOneBooleanContent);
- // Vector reads from a one, scalar from all ones so sign extend.
- Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
- Cond, DAG.getValueType(MVT::i1));
- break;
- }
- }
-
- return DAG.getSelect(SDLoc(N),
- LHS.getValueType(), Cond, LHS,
- GetScalarizedVector(N->getOperand(2)));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
- SDValue LHS = GetScalarizedVector(N->getOperand(1));
- return DAG.getSelect(SDLoc(N),
- LHS.getValueType(), N->getOperand(0), LHS,
- GetScalarizedVector(N->getOperand(2)));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
- SDValue LHS = GetScalarizedVector(N->getOperand(2));
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
- N->getOperand(0), N->getOperand(1),
- LHS, GetScalarizedVector(N->getOperand(3)),
- N->getOperand(4));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() ==
- N->getOperand(0).getValueType().isVector() &&
- "Scalar/Vector type mismatch");
-
- if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
-
- SDValue LHS = GetScalarizedVector(N->getOperand(0));
- SDValue RHS = GetScalarizedVector(N->getOperand(1));
- SDLoc DL(N);
-
- // Turn it into a scalar SETCC.
- return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
- return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
- // Figure out if the scalar is the LHS or RHS and return it.
- SDValue Arg = N->getOperand(2).getOperand(0);
- if (Arg.getOpcode() == ISD::UNDEF)
- return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
- unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
- return GetScalarizedVector(N->getOperand(Op));
-}
-
-SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- N->getOperand(0).getValueType().isVector() &&
- "Operand types must be vectors");
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- EVT OpVT = LHS.getValueType();
- EVT NVT = N->getValueType(0).getVectorElementType();
- SDLoc DL(N);
-
- // The result needs scalarizing, but it's not a given that the source does.
- if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
- LHS = GetScalarizedVector(LHS);
- RHS = GetScalarizedVector(RHS);
- } else {
- EVT VT = OpVT.getVectorElementType();
- LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- }
-
- // Turn it into a scalar SETCC.
- SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
- N->getOperand(2));
- // Vectors may have a different boolean contents to scalars. Promote the
- // value appropriately.
- ISD::NodeType ExtendCode =
- TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
- return DAG.getNode(ExtendCode, DL, NVT, Res);
-}
-
-
-//===----------------------------------------------------------------------===//
-// Operand Vector Scalarization <1 x ty> -> ty.
-//===----------------------------------------------------------------------===//
-
-bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
- SDValue Res = SDValue();
-
- if (!Res.getNode()) {
- switch (N->getOpcode()) {
- default:
-#ifndef NDEBUG
- dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
-#endif
- llvm_unreachable("Do not know how to scalarize this operator's operand!");
- case ISD::BITCAST:
- Res = ScalarizeVecOp_BITCAST(N);
- break;
- case ISD::ANY_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::TRUNCATE:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- Res = ScalarizeVecOp_UnaryOp(N);
- break;
- case ISD::CONCAT_VECTORS:
- Res = ScalarizeVecOp_CONCAT_VECTORS(N);
- break;
- case ISD::EXTRACT_VECTOR_ELT:
- Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
- break;
- case ISD::VSELECT:
- Res = ScalarizeVecOp_VSELECT(N);
- break;
- case ISD::STORE:
- Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
- break;
- case ISD::FP_ROUND:
- Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
- break;
- }
- }
-
- // If the result is null, the sub-method took care of registering results etc.
- if (!Res.getNode()) return false;
-
- // If the result is N, the sub-method updated N in place. Tell the legalizer
- // core about this.
- if (Res.getNode() == N)
- return true;
-
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand expansion");
-
- ReplaceValueWith(SDValue(N, 0), Res);
- return false;
-}
-
-/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
-/// to be scalarized, it must be <1 x ty>. Convert the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
- SDValue Elt = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::BITCAST, SDLoc(N),
- N->getValueType(0), Elt);
-}
-
-/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be
-/// scalarized, it must be <1 x ty>. Do the operation on the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
- assert(N->getValueType(0).getVectorNumElements() == 1 &&
- "Unexpected vector type!");
- SDValue Elt = GetScalarizedVector(N->getOperand(0));
- SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0).getScalarType(), Elt);
- // Revectorize the result so the types line up with what the uses of this
- // expression expect.
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op);
-}
-
-/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
-/// use a BUILD_VECTOR instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
- SmallVector<SDValue, 8> Ops(N->getNumOperands());
- for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
- Ops[i] = GetScalarizedVector(N->getOperand(i));
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops);
-}
-
-/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
-/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
-/// index.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
- SDValue Res = GetScalarizedVector(N->getOperand(0));
- if (Res.getValueType() != N->getValueType(0))
- Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0),
- Res);
- return Res;
-}
-
-
-/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be
-/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT
-/// (still with vector output type since that was acceptable if we got here).
-SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
- SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
- EVT VT = N->getValueType(0);
-
- return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
- N->getOperand(2));
-}
-
-/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
-/// scalarized, it must be <1 x ty>. Just store the element.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
- assert(N->isUnindexed() && "Indexed store of one-element vector?");
- assert(OpNo == 1 && "Do not know how to scalarize this operand!");
- SDLoc dl(N);
-
- if (N->isTruncatingStore())
- return DAG.getTruncStore(N->getChain(), dl,
- GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(), N->getPointerInfo(),
- N->getMemoryVT().getVectorElementType(),
- N->isVolatile(), N->isNonTemporal(),
- N->getAlignment(), N->getAAInfo());
-
- return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(), N->getPointerInfo(),
- N->isVolatile(), N->isNonTemporal(),
- N->getOriginalAlignment(), N->getAAInfo());
-}
-
-/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs
-/// to be scalarized, it must be <1 x ty>. Convert the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
- SDValue Elt = GetScalarizedVector(N->getOperand(0));
- SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
- N->getValueType(0).getVectorElementType(), Elt,
- N->getOperand(1));
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
-}
-
-//===----------------------------------------------------------------------===//
-// Result Vector Splitting
-//===----------------------------------------------------------------------===//
-
-/// SplitVectorResult - This method is called when the specified result of the
-/// specified node is found to need vector splitting. At this point, the node
-/// may also have invalid operands or may have other results that need
-/// legalization, we just know that (at least) one result needs vector
-/// splitting.
-void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Split node result: ";
- N->dump(&DAG);
- dbgs() << "\n");
- SDValue Lo, Hi;
-
- // See if the target wants to custom expand this node.
- if (CustomLowerNode(N, N->getValueType(ResNo), true))
- return;
-
- switch (N->getOpcode()) {
- default:
-#ifndef NDEBUG
- dbgs() << "SplitVectorResult #" << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
-#endif
- report_fatal_error("Do not know how to split the result of this "
- "operator!\n");
-
- case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
- case ISD::VSELECT:
- case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
- case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
- case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
- case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
- case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
- case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
- case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
- case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
- case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
- case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
- case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
- case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
- case ISD::LOAD:
- SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
- break;
- case ISD::SETCC:
- SplitVecRes_SETCC(N, Lo, Hi);
- break;
- case ISD::VECTOR_SHUFFLE:
- SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
- break;
-
- case ISD::BSWAP:
- case ISD::CONVERT_RNDSAT:
- case ISD::CTLZ:
- case ISD::CTTZ:
- case ISD::CTLZ_ZERO_UNDEF:
- case ISD::CTTZ_ZERO_UNDEF:
- case ISD::CTPOP:
- case ISD::FABS:
- case ISD::FCEIL:
- case ISD::FCOS:
- case ISD::FEXP:
- case ISD::FEXP2:
- case ISD::FFLOOR:
- case ISD::FLOG:
- case ISD::FLOG10:
- case ISD::FLOG2:
- case ISD::FNEARBYINT:
- case ISD::FNEG:
- case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::FRINT:
- case ISD::FROUND:
- case ISD::FSIN:
- case ISD::FSQRT:
- case ISD::FTRUNC:
- case ISD::SINT_TO_FP:
- case ISD::TRUNCATE:
- case ISD::UINT_TO_FP:
- SplitVecRes_UnaryOp(N, Lo, Hi);
- break;
-
- case ISD::ANY_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- SplitVecRes_ExtendOp(N, Lo, Hi);
- break;
-
- case ISD::ADD:
- case ISD::SUB:
- case ISD::MUL:
- case ISD::FADD:
- case ISD::FCOPYSIGN:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::FDIV:
- case ISD::FPOW:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::UREM:
- case ISD::SREM:
- case ISD::FREM:
- SplitVecRes_BinOp(N, Lo, Hi);
- break;
- case ISD::FMA:
- SplitVecRes_TernaryOp(N, Lo, Hi);
- break;
- }
-
- // If Lo/Hi is null, the sub-method took care of registering results etc.
- if (Lo.getNode())
- SetSplitVector(SDValue(N, ResNo), Lo, Hi);
-}
-
-void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue LHSLo, LHSHi;
- GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
- SDValue RHSLo, RHSHi;
- GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
- SDLoc dl(N);
-
- Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
- Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
-}
-
-void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue Op0Lo, Op0Hi;
- GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
- SDValue Op1Lo, Op1Hi;
- GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
- SDValue Op2Lo, Op2Hi;
- GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
- SDLoc dl(N);
-
- Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
- Op0Lo, Op1Lo, Op2Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
- Op0Hi, Op1Hi, Op2Hi);
-}
-
-void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- // We know the result is a vector. The input may be either a vector or a
- // scalar value.
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- SDLoc dl(N);
-
- SDValue InOp = N->getOperand(0);
- EVT InVT = InOp.getValueType();
-
- // Handle some special cases efficiently.
- switch (getTypeAction(InVT)) {
- case TargetLowering::TypeLegal:
- case TargetLowering::TypePromoteInteger:
- case TargetLowering::TypeSoftenFloat:
- case TargetLowering::TypeScalarizeVector:
- case TargetLowering::TypeWidenVector:
- break;
- case TargetLowering::TypeExpandInteger:
- case TargetLowering::TypeExpandFloat:
- // A scalar to vector conversion, where the scalar needs expansion.
- // If the vector is being split in two then we can just convert the
- // expanded pieces.
- if (LoVT == HiVT) {
- GetExpandedOp(InOp, Lo, Hi);
- if (TLI.isBigEndian())
- std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
- return;
- }
- break;
- case TargetLowering::TypeSplitVector:
- // If the input is a vector that needs to be split, convert each split
- // piece of the input now.
- GetSplitVector(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
- return;
- }
-
- // In the general case, convert the input to an integer and split it by hand.
- EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
- EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
- if (TLI.isBigEndian())
- std::swap(LoIntVT, HiIntVT);
-
- SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
-
- if (TLI.isBigEndian())
- std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
-}
-
-void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- EVT LoVT, HiVT;
- SDLoc dl(N);
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- unsigned LoNumElts = LoVT.getVectorNumElements();
- SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
- Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps);
-
- SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
- Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps);
-}
-
-void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
- SDLoc dl(N);
- unsigned NumSubvectors = N->getNumOperands() / 2;
- if (NumSubvectors == 1) {
- Lo = N->getOperand(0);
- Hi = N->getOperand(1);
- return;
- }
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
- Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
-
- SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
- Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
-}
-
-void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue Vec = N->getOperand(0);
- SDValue Idx = N->getOperand(1);
- SDLoc dl(N);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
- uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
- DAG.getConstant(IdxVal + LoVT.getVectorNumElements(),
- TLI.getVectorIdxTy()));
-}
-
-void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue Vec = N->getOperand(0);
- SDValue SubVec = N->getOperand(1);
- SDValue Idx = N->getOperand(2);
- SDLoc dl(N);
- GetSplitVector(Vec, Lo, Hi);
-
- // Spill the vector to the stack.
- EVT VecVT = Vec.getValueType();
- EVT SubVecVT = VecVT.getVectorElementType();
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo(), false, false, 0);
-
- // Store the new subvector into the specified index.
- SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx);
- Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
- unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType);
- Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(),
- false, false, 0);
-
- // Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, 0);
-
- // Increment the pointer to the other part.
- unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
- StackPtr =
- DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(IncrementSize, StackPtr.getValueType()));
-
- // Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, MinAlign(Alignment, IncrementSize));
-}
-
-void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDLoc dl(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
- Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
- Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
-}
-
-void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue LHSLo, LHSHi;
- GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
- SDLoc dl(N);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) =
- DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
-
- Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
- DAG.getValueType(LoVT));
- Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
- DAG.getValueType(HiVT));
-}
-
-void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDValue Vec = N->getOperand(0);
- SDValue Elt = N->getOperand(1);
- SDValue Idx = N->getOperand(2);
- SDLoc dl(N);
- GetSplitVector(Vec, Lo, Hi);
-
- if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
- unsigned IdxVal = CIdx->getZExtValue();
- unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
- if (IdxVal < LoNumElts)
- Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
- Lo.getValueType(), Lo, Elt, Idx);
- else
- Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
- DAG.getConstant(IdxVal - LoNumElts,
- TLI.getVectorIdxTy()));
- return;
- }
-
- // See if the target wants to custom expand this node.
- if (CustomLowerNode(N, N->getValueType(0), true))
- return;
-
- // Spill the vector to the stack.
- EVT VecVT = Vec.getValueType();
- EVT EltVT = VecVT.getVectorElementType();
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo(), false, false, 0);
-
- // Store the new element. This may be larger than the vector element type,
- // so use a truncating store.
- SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
- unsigned Alignment =
- TLI.getDataLayout()->getPrefTypeAlignment(VecType);
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
- false, false, 0);
-
- // Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, 0);
-
- // Increment the pointer to the other part.
- unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- DAG.getConstant(IncrementSize, StackPtr.getValueType()));
-
- // Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
- false, false, false, MinAlign(Alignment, IncrementSize));
-}
-
-void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- EVT LoVT, HiVT;
- SDLoc dl(N);
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
- Hi = DAG.getUNDEF(HiVT);
-}
-
-void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
- SDValue &Hi) {
- assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
- EVT LoVT, HiVT;
- SDLoc dl(LD);
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- SDValue Ch = LD->getChain();
- SDValue Ptr = LD->getBasePtr();
- SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
- EVT MemoryVT = LD->getMemoryVT();
- unsigned Alignment = LD->getOriginalAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
- bool isInvariant = LD->isInvariant();
- AAMDNodes AAInfo = LD->getAAInfo();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
- LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
- isInvariant, Alignment, AAInfo);
-
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, Ptr.getValueType()));
- Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
- LD->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
- AAInfo);
-
- // Build a factor node to remember that this load is independent of the
- // other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- ReplaceValueWith(SDValue(LD, 1), Ch);
-}
-
-void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
- assert(N->getValueType(0).isVector() &&
- N->getOperand(0).getValueType().isVector() &&
- "Operand types must be vectors");
-
- EVT LoVT, HiVT;
- SDLoc DL(N);
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Split the input.
- SDValue LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
-}
-
-void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- // Get the dest types - they may not match the input types, e.g. int_to_fp.
- EVT LoVT, HiVT;
- SDLoc dl(N);
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // If the input also splits, handle it directly for a compile time speedup.
- // Otherwise split it by hand.
- EVT InVT = N->getOperand(0).getValueType();
- if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
- GetSplitVector(N->getOperand(0), Lo, Hi);
- else
- std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
-
- if (N->getOpcode() == ISD::FP_ROUND) {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
- } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
- SDValue DTyOpLo = DAG.getValueType(LoVT);
- SDValue DTyOpHi = DAG.getValueType(HiVT);
- SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
- SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
- SDValue RndOp = N->getOperand(3);
- SDValue SatOp = N->getOperand(4);
- ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
- Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
- CvtCode);
- Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
- CvtCode);
- } else {
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
- }
-}
-
-void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDLoc dl(N);
- EVT SrcVT = N->getOperand(0).getValueType();
- EVT DestVT = N->getValueType(0);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
-
- // We can do better than a generic split operation if the extend is doing
- // more than just doubling the width of the elements and the following are
- // true:
- // - The number of vector elements is even,
- // - the source type is legal,
- // - the type of a split source is illegal,
- // - the type of an extended (by doubling element size) source is legal, and
- // - the type of that extended source when split is legal.
- //
- // This won't necessarily completely legalize the operation, but it will
- // more effectively move in the right direction and prevent falling down
- // to scalarization in many cases due to the input vector being split too
- // far.
- unsigned NumElements = SrcVT.getVectorNumElements();
- if ((NumElements & 1) == 0 &&
- SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
- LLVMContext &Ctx = *DAG.getContext();
- EVT NewSrcVT = EVT::getVectorVT(
- Ctx, EVT::getIntegerVT(
- Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2),
- NumElements);
- EVT SplitSrcVT =
- EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
- EVT SplitLoVT, SplitHiVT;
- std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
- if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
- TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
- DEBUG(dbgs() << "Split vector extend via incremental extend:";
- N->dump(&DAG); dbgs() << "\n");
- // Extend the source vector by one step.
- SDValue NewSrc =
- DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
- // Get the low and high halves of the new, extended one step, vector.
- std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
- // Extend those vector halves the rest of the way.
- Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
- return;
- }
- }
- // Fall back to the generic unary operator splitting otherwise.
- SplitVecRes_UnaryOp(N, Lo, Hi);
-}
-
-void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
- SDValue &Lo, SDValue &Hi) {
- // The low and high parts of the original input give four input vectors.
- SDValue Inputs[4];
- SDLoc dl(N);
- GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
- GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
- EVT NewVT = Inputs[0].getValueType();
- unsigned NewElts = NewVT.getVectorNumElements();
-
- // If Lo or Hi uses elements from at most two of the four input vectors, then
- // express it as a vector shuffle of those two inputs. Otherwise extract the
- // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
- SmallVector<int, 16> Ops;
- for (unsigned High = 0; High < 2; ++High) {
- SDValue &Output = High ? Hi : Lo;
-
- // Build a shuffle mask for the output, discovering on the fly which
- // input vectors to use as shuffle operands (recorded in InputUsed).
- // If building a suitable shuffle vector proves too hard, then bail
- // out with useBuildVector set.
- unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
- unsigned FirstMaskIdx = High * NewElts;
- bool useBuildVector = false;
- for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
- // The mask element. This indexes into the input.
- int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
-
- // The input vector this mask element indexes into.
- unsigned Input = (unsigned)Idx / NewElts;
-
- if (Input >= array_lengthof(Inputs)) {
- // The mask element does not index into any input vector.
- Ops.push_back(-1);
- continue;
- }
-
- // Turn the index into an offset from the start of the input vector.
- Idx -= Input * NewElts;
-
- // Find or create a shuffle vector operand to hold this input.
- unsigned OpNo;
- for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
- if (InputUsed[OpNo] == Input) {
- // This input vector is already an operand.
- break;
- } else if (InputUsed[OpNo] == -1U) {
- // Create a new operand for this input vector.
- InputUsed[OpNo] = Input;
- break;
- }
- }
-
- if (OpNo >= array_lengthof(InputUsed)) {
- // More than two input vectors used! Give up on trying to create a
- // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
- useBuildVector = true;
- break;
- }
-
- // Add the mask index for the new shuffle vector.
- Ops.push_back(Idx + OpNo * NewElts);
- }
-
- if (useBuildVector) {
- EVT EltVT = NewVT.getVectorElementType();
- SmallVector<SDValue, 16> SVOps;
-
- // Extract the input elements by hand.
- for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
- // The mask element. This indexes into the input.
- int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
-
- // The input vector this mask element indexes into.
- unsigned Input = (unsigned)Idx / NewElts;
-
- if (Input >= array_lengthof(Inputs)) {
- // The mask element is "undef" or indexes off the end of the input.
- SVOps.push_back(DAG.getUNDEF(EltVT));
- continue;
- }
-
- // Turn the index into an offset from the start of the input vector.
- Idx -= Input * NewElts;
-
- // Extract the vector element by hand.
- SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- Inputs[Input], DAG.getConstant(Idx,
- TLI.getVectorIdxTy())));
- }
-
- // Construct the Lo/Hi output using a BUILD_VECTOR.
- Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps);
- } else if (InputUsed[0] == -1U) {
- // No input vectors were used! The result is undefined.
- Output = DAG.getUNDEF(NewVT);
- } else {
- SDValue Op0 = Inputs[InputUsed[0]];
- // If only one input was used, use an undefined vector for the other.
- SDValue Op1 = InputUsed[1] == -1U ?
- DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
- // At least one input vector was used. Create a new shuffle vector.
- Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
- }
-
- Ops.clear();
- }
-}
-
-
-//===----------------------------------------------------------------------===//
-// Operand Vector Splitting
-//===----------------------------------------------------------------------===//
-
-/// SplitVectorOperand - This method is called when the specified operand of the
-/// specified node is found to need vector splitting. At this point, all of the
-/// result types of the node are known to be legal, but other operands of the
-/// node may need legalization as well as the specified one.
-bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Split node operand: ";
- N->dump(&DAG);
- dbgs() << "\n");
- SDValue Res = SDValue();
-
- // See if the target wants to custom split this node.
- if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
- return false;
-
- if (!Res.getNode()) {
- switch (N->getOpcode()) {
- default:
-#ifndef NDEBUG
- dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
-#endif
- report_fatal_error("Do not know how to split this operator's "
- "operand!\n");
-
- case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
- case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
- case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
- case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
- case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
- case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break;
- case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
- case ISD::STORE:
- Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
- break;
- case ISD::VSELECT:
- Res = SplitVecOp_VSELECT(N, OpNo);
- break;
- case ISD::CTTZ:
- case ISD::CTLZ:
- case ISD::CTPOP:
- case ISD::FP_EXTEND:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- case ISD::FTRUNC:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- Res = SplitVecOp_UnaryOp(N);
- break;
- }
- }
-
- // If the result is null, the sub-method took care of registering results etc.
- if (!Res.getNode()) return false;
-
- // If the result is N, the sub-method updated N in place. Tell the legalizer
- // core about this.
- if (Res.getNode() == N)
- return true;
-
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand expansion");
-
- ReplaceValueWith(SDValue(N, 0), Res);
- return false;
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
- // The only possibility for an illegal operand is the mask, since result type
- // legalization would have handled this node already otherwise.
- assert(OpNo == 0 && "Illegal operand must be mask");
-
- SDValue Mask = N->getOperand(0);
- SDValue Src0 = N->getOperand(1);
- SDValue Src1 = N->getOperand(2);
- EVT Src0VT = Src0.getValueType();
- SDLoc DL(N);
- assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
-
- SDValue Lo, Hi;
- GetSplitVector(N->getOperand(0), Lo, Hi);
- assert(Lo.getValueType() == Hi.getValueType() &&
- "Lo and Hi have differing types");
-
- EVT LoOpVT, HiOpVT;
- std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
- assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
-
- SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
- std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
- std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
- std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
-
- SDValue LoSelect =
- DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
- SDValue HiSelect =
- DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
- // The result has a legal vector type, but the input needs splitting.
- EVT ResVT = N->getValueType(0);
- SDValue Lo, Hi;
- SDLoc dl(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
- EVT InVT = Lo.getValueType();
-
- EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
- InVT.getVectorNumElements());
-
- Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
- // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
- // end up being split all the way down to individual components. Convert the
- // split pieces into integers and reassemble.
- SDValue Lo, Hi;
- GetSplitVector(N->getOperand(0), Lo, Hi);
- Lo = BitConvertToInteger(Lo);
- Hi = BitConvertToInteger(Hi);
-
- if (TLI.isBigEndian())
- std::swap(Lo, Hi);
-
- return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
- JoinIntegers(Lo, Hi));
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
- // We know that the extracted result type is legal.
- EVT SubVT = N->getValueType(0);
- SDValue Idx = N->getOperand(1);
- SDLoc dl(N);
- SDValue Lo, Hi;
- GetSplitVector(N->getOperand(0), Lo, Hi);
-
- uint64_t LoElts = Lo.getValueType().getVectorNumElements();
- uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-
- if (IdxVal < LoElts) {
- assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
- "Extracted subvector crosses vector split!");
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
- } else {
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
- DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
- }
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
- SDValue Vec = N->getOperand(0);
- SDValue Idx = N->getOperand(1);
- EVT VecVT = Vec.getValueType();
-
- if (isa<ConstantSDNode>(Idx)) {
- uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
-
- SDValue Lo, Hi;
- GetSplitVector(Vec, Lo, Hi);
-
- uint64_t LoElts = Lo.getValueType().getVectorNumElements();
-
- if (IdxVal < LoElts)
- return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
- return SDValue(DAG.UpdateNodeOperands(N, Hi,
- DAG.getConstant(IdxVal - LoElts,
- Idx.getValueType())), 0);
- }
-
- // See if the target wants to custom expand this node.
- if (CustomLowerNode(N, N->getValueType(0), true))
- return SDValue();
-
- // Store the vector to the stack.
- EVT EltVT = VecVT.getVectorElementType();
- SDLoc dl(N);
- SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
- MachinePointerInfo(), false, false, 0);
-
- // Load back the required element.
- StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- MachinePointerInfo(), EltVT, false, false, false, 0);
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
- assert(N->isUnindexed() && "Indexed store of vector?");
- assert(OpNo == 1 && "Can only split the stored value");
- SDLoc DL(N);
-
- bool isTruncating = N->isTruncatingStore();
- SDValue Ch = N->getChain();
- SDValue Ptr = N->getBasePtr();
- EVT MemoryVT = N->getMemoryVT();
- unsigned Alignment = N->getOriginalAlignment();
- bool isVol = N->isVolatile();
- bool isNT = N->isNonTemporal();
- AAMDNodes AAInfo = N->getAAInfo();
- SDValue Lo, Hi;
- GetSplitVector(N->getOperand(1), Lo, Hi);
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-
- if (isTruncating)
- Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- LoMemVT, isVol, isNT, Alignment, AAInfo);
- else
- Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
- isVol, isNT, Alignment, AAInfo);
-
- // Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, Ptr.getValueType()));
-
- if (isTruncating)
- Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
- N->getPointerInfo().getWithOffset(IncrementSize),
- HiMemVT, isVol, isNT, Alignment, AAInfo);
- else
- Hi = DAG.getStore(Ch, DL, Hi, Ptr,
- N->getPointerInfo().getWithOffset(IncrementSize),
- isVol, isNT, Alignment, AAInfo);
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
- SDLoc DL(N);
-
- // The input operands all must have the same type, and we know the result
- // type is valid. Convert this to a buildvector which extracts all the
- // input elements.
- // TODO: If the input elements are power-two vectors, we could convert this to
- // a new CONCAT_VECTORS node with elements that are half-wide.
- SmallVector<SDValue, 32> Elts;
- EVT EltVT = N->getValueType(0).getVectorElementType();
- for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
- SDValue Op = N->getOperand(op);
- for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
- i != e; ++i) {
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
- Op, DAG.getConstant(i, TLI.getVectorIdxTy())));
-
- }
- }
-
- return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
- // The result type is legal, but the input type is illegal. If splitting
- // ends up with the result type of each half still being legal, just
- // do that. If, however, that would result in an illegal result type,
- // we can try to get more clever with power-two vectors. Specifically,
- // split the input type, but also widen the result element size, then
- // concatenate the halves and truncate again. For example, consider a target
- // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
- // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
- // %inlo = v4i32 extract_subvector %in, 0
- // %inhi = v4i32 extract_subvector %in, 4
- // %lo16 = v4i16 trunc v4i32 %inlo
- // %hi16 = v4i16 trunc v4i32 %inhi
- // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
- // %res = v8i8 trunc v8i16 %in16
- //
- // Without this transform, the original truncate would end up being
- // scalarized, which is pretty much always a last resort.
- SDValue InVec = N->getOperand(0);
- EVT InVT = InVec->getValueType(0);
- EVT OutVT = N->getValueType(0);
- unsigned NumElements = OutVT.getVectorNumElements();
- // Widening should have already made sure this is a power-two vector
- // if we're trying to split it at all. assert() that's true, just in case.
- assert(!(NumElements & 1) && "Splitting vector, but not in half!");
-
- unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
- unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
-
- // If the input elements are only 1/2 the width of the result elements,
- // just use the normal splitting. Our trick only work if there's room
- // to split more than once.
- if (InElementSize <= OutElementSize * 2)
- return SplitVecOp_UnaryOp(N);
- SDLoc DL(N);
-
- // Extract the halves of the input via extract_subvector.
- SDValue InLoVec, InHiVec;
- std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
- // Truncate them to 1/2 the element size.
- EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
- EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
- NumElements/2);
- SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
- SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
- // Concatenate them to get the full intermediate truncation result.
- EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
- SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
- HalfHi);
- // Now finish up by truncating all the way down to the original result
- // type. This should normally be something that ends up being legal directly,
- // but in theory if a target has very wide vectors and an annoyingly
- // restricted set of legal types, this split can chain to build things up.
- return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
-}
-
-SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- N->getOperand(0).getValueType().isVector() &&
- "Operand types must be vectors");
- // The result has a legal vector type, but the input needs splitting.
- SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
- SDLoc DL(N);
- GetSplitVector(N->getOperand(0), Lo0, Hi0);
- GetSplitVector(N->getOperand(1), Lo1, Hi1);
- unsigned PartElements = Lo0.getValueType().getVectorNumElements();
- EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
- EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
-
- LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
- HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
- SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
- return PromoteTargetBoolean(Con, N->getValueType(0));
-}
-
-
-SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
- // The result has a legal vector type, but the input needs splitting.
- EVT ResVT = N->getValueType(0);
- SDValue Lo, Hi;
- SDLoc DL(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
- EVT InVT = Lo.getValueType();
-
- EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
- InVT.getVectorNumElements());
-
- Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
- Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
-
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
-}
-
-
-
-//===----------------------------------------------------------------------===//
-// Result Vector Widening
-//===----------------------------------------------------------------------===//
-
-void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
- DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
-
- // See if the target wants to custom widen this node.
- if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
- return;
-
- SDValue Res = SDValue();
- switch (N->getOpcode()) {
- default:
-#ifndef NDEBUG
- dbgs() << "WidenVectorResult #" << ResNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
-#endif
- llvm_unreachable("Do not know how to widen the result of this operator!");
-
- case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
- case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
- case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
- case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
- case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
- case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
- case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
- case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
- case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
- case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
- case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
- case ISD::VSELECT:
- case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
- case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
- case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
- case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
- case ISD::VECTOR_SHUFFLE:
- Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
- break;
-
- case ISD::ADD:
- case ISD::AND:
- case ISD::MUL:
- case ISD::MULHS:
- case ISD::MULHU:
- case ISD::OR:
- case ISD::SUB:
- case ISD::XOR:
- case ISD::FMINNUM:
- case ISD::FMAXNUM:
- Res = WidenVecRes_Binary(N);
- break;
-
- case ISD::FADD:
- case ISD::FCOPYSIGN:
- case ISD::FMUL:
- case ISD::FPOW:
- case ISD::FSUB:
- case ISD::FDIV:
- case ISD::FREM:
- case ISD::SDIV:
- case ISD::UDIV:
- case ISD::SREM:
- case ISD::UREM:
- Res = WidenVecRes_BinaryCanTrap(N);
- break;
-
- case ISD::FPOWI:
- Res = WidenVecRes_POWI(N);
- break;
-
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- Res = WidenVecRes_Shift(N);
- break;
-
- case ISD::ANY_EXTEND:
- case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::SIGN_EXTEND:
- case ISD::SINT_TO_FP:
- case ISD::TRUNCATE:
- case ISD::UINT_TO_FP:
- case ISD::ZERO_EXTEND:
- Res = WidenVecRes_Convert(N);
- break;
-
- case ISD::BSWAP:
- case ISD::CTLZ:
- case ISD::CTPOP:
- case ISD::CTTZ:
- case ISD::FABS:
- case ISD::FCEIL:
- case ISD::FCOS:
- case ISD::FEXP:
- case ISD::FEXP2:
- case ISD::FFLOOR:
- case ISD::FLOG:
- case ISD::FLOG10:
- case ISD::FLOG2:
- case ISD::FNEARBYINT:
- case ISD::FNEG:
- case ISD::FRINT:
- case ISD::FROUND:
- case ISD::FSIN:
- case ISD::FSQRT:
- case ISD::FTRUNC:
- Res = WidenVecRes_Unary(N);
- break;
- case ISD::FMA:
- Res = WidenVecRes_Ternary(N);
- break;
- }
-
- // If Res is null, the sub-method took care of registering the result.
- if (Res.getNode())
- SetWidenedVector(SDValue(N, ResNo), Res);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
- // Ternary op widening.
- SDLoc dl(N);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- SDValue InOp3 = GetWidenedVector(N->getOperand(2));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
- // Binary op widening.
- SDLoc dl(N);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
- // Binary op widening for operations that can trap.
- unsigned Opcode = N->getOpcode();
- SDLoc dl(N);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- EVT WidenEltVT = WidenVT.getVectorElementType();
- EVT VT = WidenVT;
- unsigned NumElts = VT.getVectorNumElements();
- while (!TLI.isTypeLegal(VT) && NumElts != 1) {
- NumElts = NumElts / 2;
- VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- }
-
- if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
- // Operation doesn't trap so just widen as normal.
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
- }
-
- // No legal vector version so unroll the vector operation and then widen.
- if (NumElts == 1)
- return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
-
- // Since the operation can trap, apply operation on the original vector.
- EVT MaxVT = VT;
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
-
- SmallVector<SDValue, 16> ConcatOps(CurNumElts);
- unsigned ConcatEnd = 0; // Current ConcatOps index.
- int Idx = 0; // Current Idx into input vectors.
-
- // NumElts := greatest legal vector size (at most WidenVT)
- // while (orig. vector has unhandled elements) {
- // take munches of size NumElts from the beginning and add to ConcatOps
- // NumElts := next smaller supported vector size or 1
- // }
- while (CurNumElts != 0) {
- while (CurNumElts >= NumElts) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
- DAG.getConstant(Idx, TLI.getVectorIdxTy()));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
- DAG.getConstant(Idx, TLI.getVectorIdxTy()));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
- Idx += NumElts;
- CurNumElts -= NumElts;
- }
- do {
- NumElts = NumElts / 2;
- VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeLegal(VT) && NumElts != 1);
-
- if (NumElts == 1) {
- for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp1, DAG.getConstant(Idx,
- TLI.getVectorIdxTy()));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp2, DAG.getConstant(Idx,
- TLI.getVectorIdxTy()));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
- EOp1, EOp2);
- }
- CurNumElts = 0;
- }
- }
-
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
-
- // while (Some element of ConcatOps is not of type MaxVT) {
- // From the end of ConcatOps, collect elements of the same type and put
- // them into an op of the next larger supported type
- // }
- while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
- Idx = ConcatEnd - 1;
- VT = ConcatOps[Idx--].getValueType();
- while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
- Idx--;
-
- int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
- EVT NextVT;
- do {
- NextSize *= 2;
- NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeLegal(NextVT));
-
- if (!VT.isVector()) {
- // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
- SDValue VecOp = DAG.getUNDEF(NextVT);
- unsigned NumToInsert = ConcatEnd - Idx - 1;
- for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
- ConcatOps[OpIdx], DAG.getConstant(i,
- TLI.getVectorIdxTy()));
- }
- ConcatOps[Idx+1] = VecOp;
- ConcatEnd = Idx + 2;
- } else {
- // Vector type, create a CONCAT_VECTORS of type NextVT
- SDValue undefVec = DAG.getUNDEF(VT);
- unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
- SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
- unsigned RealVals = ConcatEnd - Idx - 1;
- unsigned SubConcatEnd = 0;
- unsigned SubConcatIdx = Idx + 1;
- while (SubConcatEnd < RealVals)
- SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
- while (SubConcatEnd < OpsToConcat)
- SubConcatOps[SubConcatEnd++] = undefVec;
- ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
- NextVT, SubConcatOps);
- ConcatEnd = SubConcatIdx + 1;
- }
- }
-
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
-
- // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
- unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
- if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(MaxVT);
- for (unsigned j = ConcatEnd; j < NumOps; ++j)
- ConcatOps[j] = UndefVal;
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- makeArrayRef(ConcatOps.data(), NumOps));
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
- SDValue InOp = N->getOperand(0);
- SDLoc DL(N);
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
- EVT InVT = InOp.getValueType();
- EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
-
- unsigned Opcode = N->getOpcode();
- unsigned InVTNumElts = InVT.getVectorNumElements();
-
- if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
- InOp = GetWidenedVector(N->getOperand(0));
- InVT = InOp.getValueType();
- InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts) {
- if (N->getNumOperands() == 1)
- return DAG.getNode(Opcode, DL, WidenVT, InOp);
- return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
- }
- }
-
- if (TLI.isTypeLegal(InWidenVT)) {
- // Because the result and the input are different vector types, widening
- // the result could create a legal type but widening the input might make
- // it an illegal type that might lead to repeatedly splitting the input
- // and then widening it. To avoid this, we widen the input only if
- // it results in a legal type.
- if (WidenNumElts % InVTNumElts == 0) {
- // Widen the input and call convert on the widened input vector.
- unsigned NumConcat = WidenNumElts/InVTNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- Ops[0] = InOp;
- SDValue UndefVal = DAG.getUNDEF(InVT);
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
- SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
- if (N->getNumOperands() == 1)
- return DAG.getNode(Opcode, DL, WidenVT, InVec);
- return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
- }
-
- if (InVTNumElts % WidenNumElts == 0) {
- SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
- InOp, DAG.getConstant(0,
- TLI.getVectorIdxTy()));
- // Extract the input and convert the shorten input vector.
- if (N->getNumOperands() == 1)
- return DAG.getNode(Opcode, DL, WidenVT, InVal);
- return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
- }
- }
-
- // Otherwise unroll into some nasty scalar code and rebuild the vector.
- SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = WidenVT.getVectorElementType();
- unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
- unsigned i;
- for (i=0; i < MinElts; ++i) {
- SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
- DAG.getConstant(i, TLI.getVectorIdxTy()));
- if (N->getNumOperands() == 1)
- Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
- else
- Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
- }
-
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; i < WidenNumElts; ++i)
- Ops[i] = UndefVal;
-
- return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- SDValue ShOp = N->getOperand(1);
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- SDValue ShOp = N->getOperand(1);
-
- EVT ShVT = ShOp.getValueType();
- if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
- ShOp = GetWidenedVector(ShOp);
- ShVT = ShOp.getValueType();
- }
- EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
- ShVT.getVectorElementType(),
- WidenVT.getVectorNumElements());
- if (ShVT != ShWidenVT)
- ShOp = ModifyToType(ShOp, ShWidenVT);
-
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
- // Unary op widening.
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
- cast<VTSDNode>(N->getOperand(1))->getVT()
- .getVectorElementType(),
- WidenVT.getVectorNumElements());
- SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- WidenVT, WidenLHS, DAG.getValueType(ExtVT));
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
- SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
- return GetWidenedVector(WidenVec);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
- SDValue InOp = N->getOperand(0);
- EVT InVT = InOp.getValueType();
- EVT VT = N->getValueType(0);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- SDLoc dl(N);
-
- switch (getTypeAction(InVT)) {
- case TargetLowering::TypeLegal:
- break;
- case TargetLowering::TypePromoteInteger:
- // If the incoming type is a vector that is being promoted, then
- // we know that the elements are arranged differently and that we
- // must perform the conversion using a stack slot.
- if (InVT.isVector())
- break;
-
- // If the InOp is promoted to the same size, convert it. Otherwise,
- // fall out of the switch and widen the promoted input.
- InOp = GetPromotedInteger(InOp);
- InVT = InOp.getValueType();
- if (WidenVT.bitsEq(InVT))
- return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
- break;
- case TargetLowering::TypeSoftenFloat:
- case TargetLowering::TypeExpandInteger:
- case TargetLowering::TypeExpandFloat:
- case TargetLowering::TypeScalarizeVector:
- case TargetLowering::TypeSplitVector:
- break;
- case TargetLowering::TypeWidenVector:
- // If the InOp is widened to the same size, convert it. Otherwise, fall
- // out of the switch and widen the widened input.
- InOp = GetWidenedVector(InOp);
- InVT = InOp.getValueType();
- if (WidenVT.bitsEq(InVT))
- // The input widens to the same size. Convert to the widen value.
- return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
- break;
- }
-
- unsigned WidenSize = WidenVT.getSizeInBits();
- unsigned InSize = InVT.getSizeInBits();
- // x86mmx is not an acceptable vector element type, so don't try.
- if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
- // Determine new input vector type. The new input vector type will use
- // the same element type (if its a vector) or use the input type as a
- // vector. It is the same size as the type to widen to.
- EVT NewInVT;
- unsigned NewNumElts = WidenSize / InSize;
- if (InVT.isVector()) {
- EVT InEltVT = InVT.getVectorElementType();
- NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
- WidenSize / InEltVT.getSizeInBits());
- } else {
- NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
- }
-
- if (TLI.isTypeLegal(NewInVT)) {
- // Because the result and the input are different vector types, widening
- // the result could create a legal type but widening the input might make
- // it an illegal type that might lead to repeatedly splitting the input
- // and then widening it. To avoid this, we widen the input only if
- // it results in a legal type.
- SmallVector<SDValue, 16> Ops(NewNumElts);
- SDValue UndefVal = DAG.getUNDEF(InVT);
- Ops[0] = InOp;
- for (unsigned i = 1; i < NewNumElts; ++i)
- Ops[i] = UndefVal;
-
- SDValue NewVec;
- if (InVT.isVector())
- NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
- else
- NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
- return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
- }
- }
-
- return CreateStackStoreLoad(InOp, WidenVT);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
- SDLoc dl(N);
- // Build a vector with undefined for the new nodes.
- EVT VT = N->getValueType(0);
-
- // Integer BUILD_VECTOR operands may be larger than the node's vector element
- // type. The UNDEFs need to have the same type as the existing operands.
- EVT EltVT = N->getOperand(0).getValueType();
- unsigned NumElts = VT.getVectorNumElements();
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
- SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
- assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
- NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
- EVT InVT = N->getOperand(0).getValueType();
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDLoc dl(N);
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- unsigned NumInElts = InVT.getVectorNumElements();
- unsigned NumOperands = N->getNumOperands();
-
- bool InputWidened = false; // Indicates we need to widen the input.
- if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
- if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
- // Add undef vectors to widen to correct length.
- unsigned NumConcat = WidenVT.getVectorNumElements() /
- InVT.getVectorNumElements();
- SDValue UndefVal = DAG.getUNDEF(InVT);
- SmallVector<SDValue, 16> Ops(NumConcat);
- for (unsigned i=0; i < NumOperands; ++i)
- Ops[i] = N->getOperand(i);
- for (unsigned i = NumOperands; i != NumConcat; ++i)
- Ops[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
- }
- } else {
- InputWidened = true;
- if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
- // The inputs and the result are widen to the same value.
- unsigned i;
- for (i=1; i < NumOperands; ++i)
- if (N->getOperand(i).getOpcode() != ISD::UNDEF)
- break;
-
- if (i == NumOperands)
- // Everything but the first operand is an UNDEF so just return the
- // widened first operand.
- return GetWidenedVector(N->getOperand(0));
-
- if (NumOperands == 2) {
- // Replace concat of two operands with a shuffle.
- SmallVector<int, 16> MaskOps(WidenNumElts, -1);
- for (unsigned i = 0; i < NumInElts; ++i) {
- MaskOps[i] = i;
- MaskOps[i + NumInElts] = i + WidenNumElts;
- }
- return DAG.getVectorShuffle(WidenVT, dl,
- GetWidenedVector(N->getOperand(0)),
- GetWidenedVector(N->getOperand(1)),
- &MaskOps[0]);
- }
- }
- }
-
- // Fall back to use extracts and build vector.
- EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenNumElts);
- unsigned Idx = 0;
- for (unsigned i=0; i < NumOperands; ++i) {
- SDValue InOp = N->getOperand(i);
- if (InputWidened)
- InOp = GetWidenedVector(InOp);
- for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(j, TLI.getVectorIdxTy()));
- }
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; Idx < WidenNumElts; ++Idx)
- Ops[Idx] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
- SDLoc dl(N);
- SDValue InOp = N->getOperand(0);
- SDValue RndOp = N->getOperand(3);
- SDValue SatOp = N->getOperand(4);
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
- EVT InVT = InOp.getValueType();
- EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
-
- SDValue DTyOp = DAG.getValueType(WidenVT);
- SDValue STyOp = DAG.getValueType(InWidenVT);
- ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
-
- unsigned InVTNumElts = InVT.getVectorNumElements();
- if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
- InOp = GetWidenedVector(InOp);
- InVT = InOp.getValueType();
- InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts)
- return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
- }
-
- if (TLI.isTypeLegal(InWidenVT)) {
- // Because the result and the input are different vector types, widening
- // the result could create a legal type but widening the input might make
- // it an illegal type that might lead to repeatedly splitting the input
- // and then widening it. To avoid this, we widen the input only if
- // it results in a legal type.
- if (WidenNumElts % InVTNumElts == 0) {
- // Widen the input and call convert on the widened input vector.
- unsigned NumConcat = WidenNumElts/InVTNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- Ops[0] = InOp;
- SDValue UndefVal = DAG.getUNDEF(InVT);
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
-
- InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops);
- return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
- }
-
- if (InVTNumElts % WidenNumElts == 0) {
- // Extract the input and convert the shorten input vector.
- InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
- }
- }
-
- // Otherwise unroll into some nasty scalar code and rebuild the vector.
- SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = WidenVT.getVectorElementType();
- DTyOp = DAG.getValueType(EltVT);
- STyOp = DAG.getValueType(InEltVT);
-
- unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
- unsigned i;
- for (i=0; i < MinElts; ++i) {
- SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, TLI.getVectorIdxTy()));
- Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
- SatOp, CvtCode);
- }
-
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; i < WidenNumElts; ++i)
- Ops[i] = UndefVal;
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
- EVT VT = N->getValueType(0);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- SDValue InOp = N->getOperand(0);
- SDValue Idx = N->getOperand(1);
- SDLoc dl(N);
-
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
- InOp = GetWidenedVector(InOp);
-
- EVT InVT = InOp.getValueType();
-
- // Check if we can just return the input vector after widening.
- uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- if (IdxVal == 0 && InVT == WidenVT)
- return InOp;
-
- // Check if we can extract from the vector.
- unsigned InNumElts = InVT.getVectorNumElements();
- if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
-
- // We could try widening the input to the right length but for now, extract
- // the original elements, fill the rest with undefs and build a vector.
- SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = VT.getVectorElementType();
- unsigned NumElts = VT.getVectorNumElements();
- unsigned i;
- for (i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy()));
-
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; i < WidenNumElts; ++i)
- Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
- InOp.getValueType(), InOp,
- N->getOperand(1), N->getOperand(2));
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- ISD::LoadExtType ExtType = LD->getExtensionType();
-
- SDValue Result;
- SmallVector<SDValue, 16> LdChain; // Chain for the series of load
- if (ExtType != ISD::NON_EXTLOAD)
- Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
- else
- Result = GenWidenVectorLoads(LdChain, LD);
-
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
-
- // Modified the chain - switch anything that used the old chain to use
- // the new one.
- ReplaceValueWith(SDValue(N, 1), NewChain);
-
- return Result;
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
- WidenVT, N->getOperand(0));
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
- SDValue Cond1 = N->getOperand(0);
- EVT CondVT = Cond1.getValueType();
- if (CondVT.isVector()) {
- EVT CondEltVT = CondVT.getVectorElementType();
- EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
- CondEltVT, WidenNumElts);
- if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
- Cond1 = GetWidenedVector(Cond1);
-
- // If we have to split the condition there is no point in widening the
- // select. This would result in an cycle of widening the select ->
- // widening the condition operand -> splitting the condition operand ->
- // splitting the select -> widening the select. Instead split this select
- // further and widen the resulting type.
- if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
- SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
- SDValue Res = ModifyToType(SplitSelect, WidenVT);
- return Res;
- }
-
- if (Cond1.getValueType() != CondWidenVT)
- Cond1 = ModifyToType(Cond1, CondWidenVT);
- }
-
- SDValue InOp1 = GetWidenedVector(N->getOperand(1));
- SDValue InOp2 = GetWidenedVector(N->getOperand(2));
- assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
- return DAG.getNode(N->getOpcode(), SDLoc(N),
- WidenVT, Cond1, InOp1, InOp2);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
- SDValue InOp1 = GetWidenedVector(N->getOperand(2));
- SDValue InOp2 = GetWidenedVector(N->getOperand(3));
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
- InOp1.getValueType(), N->getOperand(0),
- N->getOperand(1), InOp1, InOp2, N->getOperand(4));
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() ==
- N->getOperand(0).getValueType().isVector() &&
- "Scalar/Vector type mismatch");
- if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT,
- InOp1, InOp2, N->getOperand(2));
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- return DAG.getUNDEF(WidenVT);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
- EVT VT = N->getValueType(0);
- SDLoc dl(N);
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- unsigned NumElts = VT.getVectorNumElements();
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-
- // Adjust mask based on new input vector length.
- SmallVector<int, 16> NewMask;
- for (unsigned i = 0; i != NumElts; ++i) {
- int Idx = N->getMaskElt(i);
- if (Idx < (int)NumElts)
- NewMask.push_back(Idx);
- else
- NewMask.push_back(Idx - NumElts + WidenNumElts);
- }
- for (unsigned i = NumElts; i != WidenNumElts; ++i)
- NewMask.push_back(-1);
- return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
-}
-
-SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() &&
- N->getOperand(0).getValueType().isVector() &&
- "Operands must be vectors");
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
-
- SDValue InOp1 = N->getOperand(0);
- EVT InVT = InOp1.getValueType();
- assert(InVT.isVector() && "can not widen non-vector type");
- EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
- InVT.getVectorElementType(), WidenNumElts);
- InOp1 = GetWidenedVector(InOp1);
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-
- // Assume that the input and output will be widen appropriately. If not,
- // we will have to unroll it at some point.
- assert(InOp1.getValueType() == WidenInVT &&
- InOp2.getValueType() == WidenInVT &&
- "Input not widened to expected type!");
- (void)WidenInVT;
- return DAG.getNode(ISD::SETCC, SDLoc(N),
- WidenVT, InOp1, InOp2, N->getOperand(2));
-}
-
-
-//===----------------------------------------------------------------------===//
-// Widen Vector Operand
-//===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
- DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n");
- SDValue Res = SDValue();
-
- // See if the target wants to custom widen this node.
- if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
- return false;
-
- switch (N->getOpcode()) {
- default:
-#ifndef NDEBUG
- dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
- N->dump(&DAG);
- dbgs() << "\n";
-#endif
- llvm_unreachable("Do not know how to widen this operator's operand!");
-
- case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
- case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
- case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
- case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
- case ISD::STORE: Res = WidenVecOp_STORE(N); break;
- case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
-
- case ISD::ANY_EXTEND:
- case ISD::SIGN_EXTEND:
- case ISD::ZERO_EXTEND:
- Res = WidenVecOp_EXTEND(N);
- break;
-
- case ISD::FP_EXTEND:
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- case ISD::TRUNCATE:
- Res = WidenVecOp_Convert(N);
- break;
- }
-
- // If Res is null, the sub-method took care of registering the result.
- if (!Res.getNode()) return false;
-
- // If the result is N, the sub-method updated N in place. Tell the legalizer
- // core about this.
- if (Res.getNode() == N)
- return true;
-
-
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand expansion");
-
- ReplaceValueWith(SDValue(N, 0), Res);
- return false;
-}
-
-SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- SDValue InOp = N->getOperand(0);
- // If some legalization strategy other than widening is used on the operand,
- // we can't safely assume that just extending the low lanes is the correct
- // transformation.
- if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
- return WidenVecOp_Convert(N);
- InOp = GetWidenedVector(InOp);
- assert(VT.getVectorNumElements() <
- InOp.getValueType().getVectorNumElements() &&
- "Input wasn't widened!");
-
- // We may need to further widen the operand until it has the same total
- // vector size as the result.
- EVT InVT = InOp.getValueType();
- if (InVT.getSizeInBits() != VT.getSizeInBits()) {
- EVT InEltVT = InVT.getVectorElementType();
- for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
- EVT FixedVT = (MVT::SimpleValueType)i;
- EVT FixedEltVT = FixedVT.getVectorElementType();
- if (TLI.isTypeLegal(FixedVT) &&
- FixedVT.getSizeInBits() == VT.getSizeInBits() &&
- FixedEltVT == InEltVT) {
- assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
- "Not enough elements in the fixed type for the operand!");
- assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
- "We can't have the same type as we started with!");
- if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
- InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
- DAG.getUNDEF(FixedVT), InOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- else
- InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- break;
- }
- }
- InVT = InOp.getValueType();
- if (InVT.getSizeInBits() != VT.getSizeInBits())
- // We couldn't find a legal vector type that was a widening of the input
- // and could be extended in-register to the result type, so we have to
- // scalarize.
- return WidenVecOp_Convert(N);
- }
-
- // Use special DAG nodes to represent the operation of extending the
- // low lanes.
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("Extend legalization on on extend operation!");
- case ISD::ANY_EXTEND:
- return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
- case ISD::SIGN_EXTEND:
- return DAG.getSignExtendVectorInReg(InOp, DL, VT);
- case ISD::ZERO_EXTEND:
- return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
- }
-}
-
-SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
- // Since the result is legal and the input is illegal, it is unlikely
- // that we can fix the input to a legal type so unroll the convert
- // into some scalar code and create a nasty build vector.
- EVT VT = N->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
- SDLoc dl(N);
- unsigned NumElts = VT.getVectorNumElements();
- SDValue InOp = N->getOperand(0);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
- InOp = GetWidenedVector(InOp);
- EVT InVT = InOp.getValueType();
- EVT InEltVT = InVT.getVectorElementType();
-
- unsigned Opcode = N->getOpcode();
- SmallVector<SDValue, 16> Ops(NumElts);
- for (unsigned i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(Opcode, dl, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, TLI.getVectorIdxTy())));
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
-}
-
-SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
- EVT VT = N->getValueType(0);
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- EVT InWidenVT = InOp.getValueType();
- SDLoc dl(N);
-
- // Check if we can convert between two legal vector types and extract.
- unsigned InWidenSize = InWidenVT.getSizeInBits();
- unsigned Size = VT.getSizeInBits();
- // x86mmx is not an acceptable vector element type, so don't try.
- if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
- unsigned NewNumElts = InWidenSize / Size;
- EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
- if (TLI.isTypeLegal(NewVT)) {
- SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- }
- }
-
- return CreateStackStoreLoad(InOp, VT);
-}
-
-SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
- // If the input vector is not legal, it is likely that we will not find a
- // legal vector of the same size. Replace the concatenate vector with a
- // nasty build vector.
- EVT VT = N->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
- SDLoc dl(N);
- unsigned NumElts = VT.getVectorNumElements();
- SmallVector<SDValue, 16> Ops(NumElts);
-
- EVT InVT = N->getOperand(0).getValueType();
- unsigned NumInElts = InVT.getVectorNumElements();
-
- unsigned Idx = 0;
- unsigned NumOperands = N->getNumOperands();
- for (unsigned i=0; i < NumOperands; ++i) {
- SDValue InOp = N->getOperand(i);
- if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
- InOp = GetWidenedVector(InOp);
- for (unsigned j=0; j < NumInElts; ++j)
- Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(j, TLI.getVectorIdxTy()));
- }
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
-}
-
-SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
- N->getValueType(0), InOp, N->getOperand(1));
-}
-
-SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
- SDValue InOp = GetWidenedVector(N->getOperand(0));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
- N->getValueType(0), InOp, N->getOperand(1));
-}
-
-SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
- // We have to widen the value but we want only to store the original
- // vector type.
- StoreSDNode *ST = cast<StoreSDNode>(N);
-
- SmallVector<SDValue, 16> StChain;
- if (ST->isTruncatingStore())
- GenWidenVectorTruncStores(StChain, ST);
- else
- GenWidenVectorStores(StChain, ST);
-
- if (StChain.size() == 1)
- return StChain[0];
- else
- return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
-}
-
-SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
- SDValue InOp0 = GetWidenedVector(N->getOperand(0));
- SDValue InOp1 = GetWidenedVector(N->getOperand(1));
- SDLoc dl(N);
-
- // WARNING: In this code we widen the compare instruction with garbage.
- // This garbage may contain denormal floats which may be slow. Is this a real
- // concern ? Should we zero the unused lanes if this is a float compare ?
-
- // Get a new SETCC node to compare the newly widened operands.
- // Only some of the compared elements are legal.
- EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType());
- SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
- SVT, InOp0, InOp1, N->getOperand(2));
-
- // Extract the needed results from the result vector.
- EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
- SVT.getVectorElementType(),
- N->getValueType(0).getVectorNumElements());
- SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
- ResVT, WideSETCC, DAG.getConstant(0,
- TLI.getVectorIdxTy()));
-
- return PromoteTargetBoolean(CC, N->getValueType(0));
-}
-
-
-//===----------------------------------------------------------------------===//
-// Vector Widening Utilities
-//===----------------------------------------------------------------------===//
-
-// Utility function to find the type to chop up a widen vector for load/store
-// TLI: Target lowering used to determine legal types.
-// Width: Width left need to load/store.
-// WidenVT: The widen vector type to load to/store from
-// Align: If 0, don't allow use of a wider type
-// WidenEx: If Align is not 0, the amount additional we can load/store from.
-
-static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
- unsigned Width, EVT WidenVT,
- unsigned Align = 0, unsigned WidenEx = 0) {
- EVT WidenEltVT = WidenVT.getVectorElementType();
- unsigned WidenWidth = WidenVT.getSizeInBits();
- unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
- unsigned AlignInBits = Align*8;
-
- // If we have one element to load/store, return it.
- EVT RetVT = WidenEltVT;
- if (Width == WidenEltWidth)
- return RetVT;
-
- // See if there is larger legal integer than the element type to load/store
- unsigned VT;
- for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
- VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
- EVT MemVT((MVT::SimpleValueType) VT);
- unsigned MemVTWidth = MemVT.getSizeInBits();
- if (MemVT.getSizeInBits() <= WidenEltWidth)
- break;
- if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
- isPowerOf2_32(WidenWidth / MemVTWidth) &&
- (MemVTWidth <= Width ||
- (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
- RetVT = MemVT;
- break;
- }
- }
-
- // See if there is a larger vector type to load/store that has the same vector
- // element type and is evenly divisible with the WidenVT.
- for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
- VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
- EVT MemVT = (MVT::SimpleValueType) VT;
- unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
- (WidenWidth % MemVTWidth) == 0 &&
- isPowerOf2_32(WidenWidth / MemVTWidth) &&
- (MemVTWidth <= Width ||
- (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
- if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
- return MemVT;
- }
- }
-
- return RetVT;
-}
-
-// Builds a vector type from scalar loads
-// VecTy: Resulting Vector type
-// LDOps: Load operators to build a vector type
-// [Start,End) the list of loads to use.
-static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
- SmallVectorImpl<SDValue> &LdOps,
- unsigned Start, unsigned End) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDLoc dl(LdOps[Start]);
- EVT LdTy = LdOps[Start].getValueType();
- unsigned Width = VecTy.getSizeInBits();
- unsigned NumElts = Width / LdTy.getSizeInBits();
- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
-
- unsigned Idx = 1;
- SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
-
- for (unsigned i = Start + 1; i != End; ++i) {
- EVT NewLdTy = LdOps[i].getValueType();
- if (NewLdTy != LdTy) {
- NumElts = Width / NewLdTy.getSizeInBits();
- NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
- VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
- // Readjust position and vector position based on new load type
- Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
- LdTy = NewLdTy;
- }
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
- DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
- }
- return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
-}
-
-SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
- LoadSDNode *LD) {
- // The strategy assumes that we can efficiently load powers of two widths.
- // The routines chops the vector into the largest vector loads with the same
- // element type or scalar loads and then recombines it to the widen vector
- // type.
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
- unsigned WidenWidth = WidenVT.getSizeInBits();
- EVT LdVT = LD->getMemoryVT();
- SDLoc dl(LD);
- assert(LdVT.isVector() && WidenVT.isVector());
- assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
-
- // Load information
- SDValue Chain = LD->getChain();
- SDValue BasePtr = LD->getBasePtr();
- unsigned Align = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
- bool isInvariant = LD->isInvariant();
- AAMDNodes AAInfo = LD->getAAInfo();
-
- int LdWidth = LdVT.getSizeInBits();
- int WidthDiff = WidenWidth - LdWidth; // Difference
- unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
-
- // Find the vector type that can load from.
- EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
- int NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
- isVolatile, isNonTemporal, isInvariant, Align,
- AAInfo);
- LdChain.push_back(LdOp.getValue(1));
-
- // Check if we can load the element with one instruction
- if (LdWidth <= NewVTWidth) {
- if (!NewVT.isVector()) {
- unsigned NumElts = WidenWidth / NewVTWidth;
- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
- SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
- return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
- }
- if (NewVT == WidenVT)
- return LdOp;
-
- assert(WidenWidth % NewVTWidth == 0);
- unsigned NumConcat = WidenWidth / NewVTWidth;
- SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(NewVT);
- ConcatOps[0] = LdOp;
- for (unsigned i = 1; i != NumConcat; ++i)
- ConcatOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
- }
-
- // Load vector by using multiple loads from largest vector to scalar
- SmallVector<SDValue, 16> LdOps;
- LdOps.push_back(LdOp);
-
- LdWidth -= NewVTWidth;
- unsigned Offset = 0;
-
- while (LdWidth > 0) {
- unsigned Increment = NewVTWidth / 8;
- Offset += Increment;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, BasePtr.getValueType()));
-
- SDValue L;
- if (LdWidth < NewVTWidth) {
- // Our current type we are using is too large, find a better size
- NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
- NewVTWidth = NewVT.getSizeInBits();
- L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset), isVolatile,
- isNonTemporal, isInvariant, MinAlign(Align, Increment),
- AAInfo);
- LdChain.push_back(L.getValue(1));
- if (L->getValueType(0).isVector()) {
- SmallVector<SDValue, 16> Loads;
- Loads.push_back(L);
- unsigned size = L->getValueSizeInBits(0);
- while (size < LdOp->getValueSizeInBits(0)) {
- Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
- size += L->getValueSizeInBits(0);
- }
- L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
- }
- } else {
- L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
- LD->getPointerInfo().getWithOffset(Offset), isVolatile,
- isNonTemporal, isInvariant, MinAlign(Align, Increment),
- AAInfo);
- LdChain.push_back(L.getValue(1));
- }
-
- LdOps.push_back(L);
-
-
- LdWidth -= NewVTWidth;
- }
-
- // Build the vector from the loads operations
- unsigned End = LdOps.size();
- if (!LdOps[0].getValueType().isVector())
- // All the loads are scalar loads.
- return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
-
- // If the load contains vectors, build the vector using concat vector.
- // All of the vectors used to loads are power of 2 and the scalars load
- // can be combined to make a power of 2 vector.
- SmallVector<SDValue, 16> ConcatOps(End);
- int i = End - 1;
- int Idx = End;
- EVT LdTy = LdOps[i].getValueType();
- // First combine the scalar loads to a vector
- if (!LdTy.isVector()) {
- for (--i; i >= 0; --i) {
- LdTy = LdOps[i].getValueType();
- if (LdTy.isVector())
- break;
- }
- ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
- }
- ConcatOps[--Idx] = LdOps[i];
- for (--i; i >= 0; --i) {
- EVT NewLdTy = LdOps[i].getValueType();
- if (NewLdTy != LdTy) {
- // Create a larger vector
- ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
- makeArrayRef(&ConcatOps[Idx], End - Idx));
- Idx = End - 1;
- LdTy = NewLdTy;
- }
- ConcatOps[--Idx] = LdOps[i];
- }
-
- if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- makeArrayRef(&ConcatOps[Idx], End - Idx));
-
- // We need to fill the rest with undefs to build the vector
- unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
- SmallVector<SDValue, 16> WidenOps(NumOps);
- SDValue UndefVal = DAG.getUNDEF(LdTy);
- {
- unsigned i = 0;
- for (; i != End-Idx; ++i)
- WidenOps[i] = ConcatOps[Idx+i];
- for (; i != NumOps; ++i)
- WidenOps[i] = UndefVal;
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
-}
-
-SDValue
-DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
- LoadSDNode *LD,
- ISD::LoadExtType ExtType) {
- // For extension loads, it may not be more efficient to chop up the vector
- // and then extended it. Instead, we unroll the load and build a new vector.
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
- EVT LdVT = LD->getMemoryVT();
- SDLoc dl(LD);
- assert(LdVT.isVector() && WidenVT.isVector());
-
- // Load information
- SDValue Chain = LD->getChain();
- SDValue BasePtr = LD->getBasePtr();
- unsigned Align = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
- bool isInvariant = LD->isInvariant();
- AAMDNodes AAInfo = LD->getAAInfo();
-
- EVT EltVT = WidenVT.getVectorElementType();
- EVT LdEltVT = LdVT.getVectorElementType();
- unsigned NumElts = LdVT.getVectorNumElements();
-
- // Load each element and widen
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
- SmallVector<SDValue, 16> Ops(WidenNumElts);
- unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
- LD->getPointerInfo(),
- LdEltVT, isVolatile, isNonTemporal, isInvariant,
- Align, AAInfo);
- LdChain.push_back(Ops[0].getValue(1));
- unsigned i = 0, Offset = Increment;
- for (i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr,
- DAG.getConstant(Offset,
- BasePtr.getValueType()));
- Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
- LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
- isVolatile, isNonTemporal, isInvariant, Align,
- AAInfo);
- LdChain.push_back(Ops[i].getValue(1));
- }
-
- // Fill the rest with undefs
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for (; i != WidenNumElts; ++i)
- Ops[i] = UndefVal;
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
-}
-
-
-void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
- StoreSDNode *ST) {
- // The strategy assumes that we can efficiently store powers of two widths.
- // The routines chops the vector into the largest vector stores with the same
- // element type or scalar stores.
- SDValue Chain = ST->getChain();
- SDValue BasePtr = ST->getBasePtr();
- unsigned Align = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
- AAMDNodes AAInfo = ST->getAAInfo();
- SDValue ValOp = GetWidenedVector(ST->getValue());
- SDLoc dl(ST);
-
- EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
- EVT ValVT = ValOp.getValueType();
- unsigned ValWidth = ValVT.getSizeInBits();
- EVT ValEltVT = ValVT.getVectorElementType();
- unsigned ValEltWidth = ValEltVT.getSizeInBits();
- assert(StVT.getVectorElementType() == ValEltVT);
-
- int Idx = 0; // current index to store
- unsigned Offset = 0; // offset from base to store
- while (StWidth != 0) {
- // Find the largest vector type we can store with
- EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
- unsigned NewVTWidth = NewVT.getSizeInBits();
- unsigned Increment = NewVTWidth / 8;
- if (NewVT.isVector()) {
- unsigned NumVTElts = NewVT.getVectorNumElements();
- do {
- SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
- DAG.getConstant(Idx, TLI.getVectorIdxTy()));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
- ST->getPointerInfo().getWithOffset(Offset),
- isVolatile, isNonTemporal,
- MinAlign(Align, Offset), AAInfo));
- StWidth -= NewVTWidth;
- Offset += Increment;
- Idx += NumVTElts;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, BasePtr.getValueType()));
- } while (StWidth != 0 && StWidth >= NewVTWidth);
- } else {
- // Cast the vector to the scalar type we can store
- unsigned NumElts = ValWidth / NewVTWidth;
- EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
- SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
- // Readjust index position based on new vector type
- Idx = Idx * ValEltWidth / NewVTWidth;
- do {
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
- DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
- ST->getPointerInfo().getWithOffset(Offset),
- isVolatile, isNonTemporal,
- MinAlign(Align, Offset), AAInfo));
- StWidth -= NewVTWidth;
- Offset += Increment;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, BasePtr.getValueType()));
- } while (StWidth != 0 && StWidth >= NewVTWidth);
- // Restore index back to be relative to the original widen element type
- Idx = Idx * NewVTWidth / ValEltWidth;
- }
- }
-}
-
-void
-DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
- StoreSDNode *ST) {
- // For extension loads, it may not be more efficient to truncate the vector
- // and then store it. Instead, we extract each element and then store it.
- SDValue Chain = ST->getChain();
- SDValue BasePtr = ST->getBasePtr();
- unsigned Align = ST->getAlignment();
- bool isVolatile = ST->isVolatile();
- bool isNonTemporal = ST->isNonTemporal();
- AAMDNodes AAInfo = ST->getAAInfo();
- SDValue ValOp = GetWidenedVector(ST->getValue());
- SDLoc dl(ST);
-
- EVT StVT = ST->getMemoryVT();
- EVT ValVT = ValOp.getValueType();
-
- // It must be true that we the widen vector type is bigger than where
- // we need to store.
- assert(StVT.isVector() && ValOp.getValueType().isVector());
- assert(StVT.bitsLT(ValOp.getValueType()));
-
- // For truncating stores, we can not play the tricks of chopping legal
- // vector types and bit cast it to the right type. Instead, we unroll
- // the store.
- EVT StEltVT = StVT.getVectorElementType();
- EVT ValEltVT = ValVT.getVectorElementType();
- unsigned Increment = ValEltVT.getSizeInBits() / 8;
- unsigned NumElts = StVT.getVectorNumElements();
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
- ST->getPointerInfo(), StEltVT,
- isVolatile, isNonTemporal, Align,
- AAInfo));
- unsigned Offset = Increment;
- for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr, DAG.getConstant(Offset,
- BasePtr.getValueType()));
- SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
- ST->getPointerInfo().getWithOffset(Offset),
- StEltVT, isVolatile, isNonTemporal,
- MinAlign(Align, Offset), AAInfo));
- }
-}
-
-/// Modifies a vector input (widen or narrows) to a vector of NVT. The
-/// input vector must have the same element type as NVT.
-SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
- // Note that InOp might have been widened so it might already have
- // the right width or it might need be narrowed.
- EVT InVT = InOp.getValueType();
- assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
- "input and widen element type must match");
- SDLoc dl(InOp);
-
- // Check if InOp already has the right width.
- if (InVT == NVT)
- return InOp;
-
- unsigned InNumElts = InVT.getVectorNumElements();
- unsigned WidenNumElts = NVT.getVectorNumElements();
- if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
- unsigned NumConcat = WidenNumElts / InNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(InVT);
- Ops[0] = InOp;
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = UndefVal;
-
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
- }
-
- if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
- DAG.getConstant(0, TLI.getVectorIdxTy()));
-
- // Fall back to extract and build.
- SmallVector<SDValue, 16> Ops(WidenNumElts);
- EVT EltVT = NVT.getVectorElementType();
- unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
- unsigned Idx;
- for (Idx = 0; Idx < MinNumElts; ++Idx)
- Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(Idx, TLI.getVectorIdxTy()));
-
- SDValue UndefVal = DAG.getUNDEF(EltVT);
- for ( ; Idx < WidenNumElts; ++Idx)
- Ops[Idx] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
-}
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type. For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size. For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "legalize-types"
+
+//===----------------------------------------------------------------------===//
+// Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue R = SDValue();
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to scalarize the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
+ case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+ case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+ case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
+ case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
+ case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+ case ISD::ANY_EXTEND:
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ R = ScalarizeVecRes_UnaryOp(N);
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FDIV:
+ case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+
+ case ISD::FPOW:
+ case ISD::FREM:
+ case ISD::FSUB:
+ case ISD::MUL:
+ case ISD::OR:
+ case ISD::SDIV:
+ case ISD::SREM:
+ case ISD::SUB:
+ case ISD::UDIV:
+ case ISD::UREM:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ R = ScalarizeVecRes_BinOp(N);
+ break;
+ case ISD::FMA:
+ R = ScalarizeVecRes_TernaryOp(N);
+ break;
+ }
+
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode())
+ SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ SDValue Op1 = GetScalarizedVector(N->getOperand(1));
+ SDValue Op2 = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ Op0.getValueType(), Op0, Op1, Op2);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
+ unsigned ResNo) {
+ SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetScalarizedVector(Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ // The BUILD_VECTOR operands may be of wider element types and
+ // we may need to truncate them back to the requested return type.
+ if (EltVT.isInteger())
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+ return DAG.getConvertRndSat(NewVT, SDLoc(N),
+ Op0, DAG.getValueType(NewVT),
+ DAG.getValueType(Op0.getValueType()),
+ N->getOperand(3),
+ N->getOperand(4),
+ cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ N->getValueType(0).getVectorElementType(),
+ N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ NewVT, Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+ SDValue Op = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::FPOWI, SDLoc(N),
+ Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ // The value to insert may have a wider type than the vector element type,
+ // so be sure to truncate it to the element type if necessary.
+ SDValue Op = N->getOperand(1);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (Op.getValueType() != EltVT)
+ // FIXME: Can this happen for floating point types?
+ Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
+ return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+ assert(N->isUnindexed() && "Indexed vector load?");
+
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED,
+ N->getExtensionType(),
+ N->getValueType(0).getVectorElementType(),
+ SDLoc(N),
+ N->getChain(), N->getBasePtr(),
+ DAG.getUNDEF(N->getBasePtr().getValueType()),
+ N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->isInvariant(), N->getOriginalAlignment(),
+ N->getAAInfo());
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+ // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+ EVT DestVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = N->getOperand(0);
+ EVT OpVT = Op.getValueType();
+ SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ // This is a workaround for targets where it's impossible to scalarize the
+ // result of a conversion, because the source type is legal.
+ // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
+ // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
+ // legal and was not scalarized.
+ // See the similar logic in ScalarizeVecRes_VSETCC
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ Op = GetScalarizedVector(Op);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ }
+ return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
+ LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ // If the operand is wider than the vector element type then it is implicitly
+ // truncated. Make that explicit here.
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue InOp = N->getOperand(0);
+ if (InOp.getValueType() != EltVT)
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
+ return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
+ SDValue Cond = GetScalarizedVector(N->getOperand(0));
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ TargetLowering::BooleanContent ScalarBool =
+ TLI.getBooleanContents(false, false);
+ TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
+
+ // If integer and float booleans have different contents then we can't
+ // reliably optimize in all cases. There is a full explanation for this in
+ // DAGCombiner::visitSELECT() where the same issue affects folding
+ // (select C, 0, 1) to (xor C, 1).
+ if (TLI.getBooleanContents(false, false) !=
+ TLI.getBooleanContents(false, true)) {
+ // At least try the common case where the boolean is generated by a
+ // comparison.
+ if (Cond->getOpcode() == ISD::SETCC) {
+ EVT OpVT = Cond->getOperand(0)->getValueType(0);
+ ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
+ VecBool = TLI.getBooleanContents(OpVT);
+ } else
+ ScalarBool = TargetLowering::UndefinedBooleanContent;
+ }
+
+ if (ScalarBool != VecBool) {
+ EVT CondVT = Cond.getValueType();
+ switch (ScalarBool) {
+ case TargetLowering::UndefinedBooleanContent:
+ break;
+ case TargetLowering::ZeroOrOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent);
+ // Vector read from all ones, scalar expects a single 1 so mask.
+ Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
+ Cond, DAG.getConstant(1, CondVT));
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ assert(VecBool == TargetLowering::UndefinedBooleanContent ||
+ VecBool == TargetLowering::ZeroOrOneBooleanContent);
+ // Vector reads from a one, scalar from all ones so sign extend.
+ Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
+ Cond, DAG.getValueType(MVT::i1));
+ break;
+ }
+ }
+
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), Cond, LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getSelect(SDLoc(N),
+ LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(2));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
+ N->getOperand(0), N->getOperand(1),
+ LHS, GetScalarizedVector(N->getOperand(3)),
+ N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+
+ if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
+
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ SDLoc DL(N);
+
+ // Turn it into a scalar SETCC.
+ return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+ // Figure out if the scalar is the LHS or RHS and return it.
+ SDValue Arg = N->getOperand(2).getOperand(0);
+ if (Arg.getOpcode() == ISD::UNDEF)
+ return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+ unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+ return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT OpVT = LHS.getValueType();
+ EVT NVT = N->getValueType(0).getVectorElementType();
+ SDLoc DL(N);
+
+ // The result needs scalarizing, but it's not a given that the source does.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ LHS = GetScalarizedVector(LHS);
+ RHS = GetScalarizedVector(RHS);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ }
+
+ // Turn it into a scalar SETCC.
+ SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
+ N->getOperand(2));
+ // Vectors may have a different boolean contents to scalars. Promote the
+ // value appropriately.
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ if (!Res.getNode()) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to scalarize this operator's operand!");
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ Res = ScalarizeVecOp_UnaryOp(N);
+ break;
+ case ISD::CONCAT_VECTORS:
+ Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+ break;
+ case ISD::EXTRACT_VECTOR_ELT:
+ Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+ break;
+ case ISD::VSELECT:
+ Res = ScalarizeVecOp_VSELECT(N);
+ break;
+ case ISD::STORE:
+ Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::FP_ROUND:
+ Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::BITCAST, SDLoc(N),
+ N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Do the operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0).getScalarType(), Elt);
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+ SmallVector<SDValue, 8> Ops(N->getNumOperands());
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+ Ops[i] = GetScalarizedVector(N->getOperand(i));
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops);
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0),
+ Res);
+ return Res;
+}
+
+
+/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be
+/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT
+/// (still with vector output type since that was acceptable if we got here).
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
+ SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
+ EVT VT = N->getValueType(0);
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
+ N->getOperand(2));
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>. Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+ assert(N->isUnindexed() && "Indexed store of one-element vector?");
+ assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+ SDLoc dl(N);
+
+ if (N->isTruncatingStore())
+ return DAG.getTruncStore(N->getChain(), dl,
+ GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->getMemoryVT().getVectorElementType(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getAlignment(), N->getAAInfo());
+
+ return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+ N->getBasePtr(), N->getPointerInfo(),
+ N->isVolatile(), N->isNonTemporal(),
+ N->getOriginalAlignment(), N->getAAInfo());
+}
+
+/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs
+/// to be scalarized, it must be <1 x ty>. Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+ SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
+ N->getValueType(0).getVectorElementType(), Elt,
+ N->getOperand(1));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+//===----------------------------------------------------------------------===//
+// Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting. At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Split node result: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Lo, Hi;
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ return;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split the result of this "
+ "operator!\n");
+
+ case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
+ case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
+ case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
+ case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+ case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+ case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
+ case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
+ case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+ case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+ case ISD::LOAD:
+ SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::MLOAD:
+ SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
+ break;
+ case ISD::SETCC:
+ SplitVecRes_SETCC(N, Lo, Hi);
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+ break;
+
+ case ISD::BSWAP:
+ case ISD::CONVERT_RNDSAT:
+ case ISD::CTLZ:
+ case ISD::CTTZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTPOP:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ SplitVecRes_ExtendOp(N, Lo, Hi);
+ break;
+
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::FDIV:
+ case ISD::FPOW:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::UREM:
+ case ISD::SREM:
+ case ISD::FREM:
+ SplitVecRes_BinOp(N, Lo, Hi);
+ break;
+ case ISD::FMA:
+ SplitVecRes_TernaryOp(N, Lo, Hi);
+ break;
+ }
+
+ // If Lo/Hi is null, the sub-method took care of registering results etc.
+ if (Lo.getNode())
+ SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDValue RHSLo, RHSHi;
+ GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+ SDLoc dl(N);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Op0Lo, Op0Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ SDValue Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ SDValue Op2Lo, Op2Hi;
+ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
+ SDLoc dl(N);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(),
+ Op0Lo, Op1Lo, Op2Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(),
+ Op0Hi, Op1Hi, Op2Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // We know the result is a vector. The input may be either a vector or a
+ // scalar value.
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ SDLoc dl(N);
+
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+
+ // Handle some special cases efficiently.
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ case TargetLowering::TypePromoteInteger:
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeWidenVector:
+ break;
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ // A scalar to vector conversion, where the scalar needs expansion.
+ // If the vector is being split in two then we can just convert the
+ // expanded pieces.
+ if (LoVT == HiVT) {
+ GetExpandedOp(InOp, Lo, Hi);
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+ break;
+ case TargetLowering::TypeSplitVector:
+ // If the input is a vector that needs to be split, convert each split
+ // piece of the input now.
+ GetSplitVector(InOp, Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+ return;
+ }
+
+ // In the general case, convert the input to an integer and split it by hand.
+ EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+ EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+ if (TLI.isBigEndian())
+ std::swap(LoIntVT, HiIntVT);
+
+ SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ unsigned LoNumElts = LoVT.getVectorNumElements();
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps);
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps);
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+ SDLoc dl(N);
+ unsigned NumSubvectors = N->getNumOperands() / 2;
+ if (NumSubvectors == 1) {
+ Lo = N->getOperand(0);
+ Hi = N->getOperand(1);
+ return;
+ }
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
+
+ SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getConstant(IdxVal + LoVT.getVectorNumElements(),
+ TLI.getVectorIdxTy()));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue SubVec = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT SubVecVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Store the new subvector into the specified index.
+ SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+ Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(),
+ false, false, 0);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr =
+ DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ SDLoc dl(N);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) =
+ DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ DAG.getValueType(LoVT));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Elt = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+ SDLoc dl(N);
+ GetSplitVector(Vec, Lo, Hi);
+
+ if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned IdxVal = CIdx->getZExtValue();
+ unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+ if (IdxVal < LoNumElts)
+ Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+ Lo.getValueType(), Lo, Elt, Idx);
+ else
+ Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+ DAG.getConstant(IdxVal - LoNumElts,
+ TLI.getVectorIdxTy()));
+ return;
+ }
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return;
+
+ // Spill the vector to the stack.
+ EVT VecVT = Vec.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Store the new element. This may be larger than the vector element type,
+ // so use a truncating store.
+ SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+ unsigned Alignment =
+ TLI.getDataLayout()->getPrefTypeAlignment(VecType);
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
+ false, false, 0);
+
+ // Load the Lo part from the stack slot.
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Increment the pointer to the other part.
+ unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ DAG.getConstant(IncrementSize, StackPtr.getValueType()));
+
+ // Load the Hi part from the stack slot.
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, false, MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+ Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+ SDValue &Hi) {
+ assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+ EVT LoVT, HiVT;
+ SDLoc dl(LD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ SDValue Ch = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+ EVT MemoryVT = LD->getMemoryVT();
+ unsigned Alignment = LD->getOriginalAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+ isInvariant, Alignment, AAInfo);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment,
+ AAInfo);
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
+ SDValue &Lo, SDValue &Hi) {
+ EVT LoVT, HiVT;
+ SDLoc dl(MLD);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
+
+ SDValue Ch = MLD->getChain();
+ SDValue Ptr = MLD->getBasePtr();
+ SDValue Mask = MLD->getMask();
+ unsigned Alignment = MLD->getOriginalAlignment();
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ SDValue MaskLo, MaskHi;
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+
+ EVT MemoryVT = MLD->getMemoryVT();
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue Src0 = MLD->getSrc0();
+ SDValue Src0Lo, Src0Hi;
+ std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
+ Alignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(MLD->getPointerInfo(),
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
+
+ Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO);
+
+
+ // Build a factor node to remember that this load is independent of the
+ // other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalized the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(MLD, 1), Ch);
+
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+
+ EVT LoVT, HiVT;
+ SDLoc DL(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // Split the input.
+ SDValue LL, LH, RL, RH;
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // Get the dest types - they may not match the input types, e.g. int_to_fp.
+ EVT LoVT, HiVT;
+ SDLoc dl(N);
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ else
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+
+ if (N->getOpcode() == ISD::FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) {
+ SDValue DTyOpLo = DAG.getValueType(LoVT);
+ SDValue DTyOpHi = DAG.getValueType(HiVT);
+ SDValue STyOpLo = DAG.getValueType(Lo.getValueType());
+ SDValue STyOpHi = DAG.getValueType(Hi.getValueType());
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+ Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp,
+ CvtCode);
+ Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp,
+ CvtCode);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ }
+}
+
+void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDLoc dl(N);
+ EVT SrcVT = N->getOperand(0).getValueType();
+ EVT DestVT = N->getValueType(0);
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
+
+ // We can do better than a generic split operation if the extend is doing
+ // more than just doubling the width of the elements and the following are
+ // true:
+ // - The number of vector elements is even,
+ // - the source type is legal,
+ // - the type of a split source is illegal,
+ // - the type of an extended (by doubling element size) source is legal, and
+ // - the type of that extended source when split is legal.
+ //
+ // This won't necessarily completely legalize the operation, but it will
+ // more effectively move in the right direction and prevent falling down
+ // to scalarization in many cases due to the input vector being split too
+ // far.
+ unsigned NumElements = SrcVT.getVectorNumElements();
+ if ((NumElements & 1) == 0 &&
+ SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) {
+ LLVMContext &Ctx = *DAG.getContext();
+ EVT NewSrcVT = EVT::getVectorVT(
+ Ctx, EVT::getIntegerVT(
+ Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2),
+ NumElements);
+ EVT SplitSrcVT =
+ EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
+ EVT SplitLoVT, SplitHiVT;
+ std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
+ if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
+ TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
+ DEBUG(dbgs() << "Split vector extend via incremental extend:";
+ N->dump(&DAG); dbgs() << "\n");
+ // Extend the source vector by one step.
+ SDValue NewSrc =
+ DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
+ // Get the low and high halves of the new, extended one step, vector.
+ std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
+ // Extend those vector halves the rest of the way.
+ Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+ return;
+ }
+ }
+ // Fall back to the generic unary operator splitting otherwise.
+ SplitVecRes_UnaryOp(N, Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // The low and high parts of the original input give four input vectors.
+ SDValue Inputs[4];
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+ GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+ EVT NewVT = Inputs[0].getValueType();
+ unsigned NewElts = NewVT.getVectorNumElements();
+
+ // If Lo or Hi uses elements from at most two of the four input vectors, then
+ // express it as a vector shuffle of those two inputs. Otherwise extract the
+ // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+ SmallVector<int, 16> Ops;
+ for (unsigned High = 0; High < 2; ++High) {
+ SDValue &Output = High ? Hi : Lo;
+
+ // Build a shuffle mask for the output, discovering on the fly which
+ // input vectors to use as shuffle operands (recorded in InputUsed).
+ // If building a suitable shuffle vector proves too hard, then bail
+ // out with useBuildVector set.
+ unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+ unsigned FirstMaskIdx = High * NewElts;
+ bool useBuildVector = false;
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element does not index into any input vector.
+ Ops.push_back(-1);
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Find or create a shuffle vector operand to hold this input.
+ unsigned OpNo;
+ for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+ if (InputUsed[OpNo] == Input) {
+ // This input vector is already an operand.
+ break;
+ } else if (InputUsed[OpNo] == -1U) {
+ // Create a new operand for this input vector.
+ InputUsed[OpNo] = Input;
+ break;
+ }
+ }
+
+ if (OpNo >= array_lengthof(InputUsed)) {
+ // More than two input vectors used! Give up on trying to create a
+ // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
+ useBuildVector = true;
+ break;
+ }
+
+ // Add the mask index for the new shuffle vector.
+ Ops.push_back(Idx + OpNo * NewElts);
+ }
+
+ if (useBuildVector) {
+ EVT EltVT = NewVT.getVectorElementType();
+ SmallVector<SDValue, 16> SVOps;
+
+ // Extract the input elements by hand.
+ for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+ // The mask element. This indexes into the input.
+ int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+ // The input vector this mask element indexes into.
+ unsigned Input = (unsigned)Idx / NewElts;
+
+ if (Input >= array_lengthof(Inputs)) {
+ // The mask element is "undef" or indexes off the end of the input.
+ SVOps.push_back(DAG.getUNDEF(EltVT));
+ continue;
+ }
+
+ // Turn the index into an offset from the start of the input vector.
+ Idx -= Input * NewElts;
+
+ // Extract the vector element by hand.
+ SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ Inputs[Input], DAG.getConstant(Idx,
+ TLI.getVectorIdxTy())));
+ }
+
+ // Construct the Lo/Hi output using a BUILD_VECTOR.
+ Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps);
+ } else if (InputUsed[0] == -1U) {
+ // No input vectors were used! The result is undefined.
+ Output = DAG.getUNDEF(NewVT);
+ } else {
+ SDValue Op0 = Inputs[InputUsed[0]];
+ // If only one input was used, use an undefined vector for the other.
+ SDValue Op1 = InputUsed[1] == -1U ?
+ DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+ // At least one input vector was used. Create a new shuffle vector.
+ Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+ }
+
+ Ops.clear();
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting. At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Split node operand: ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom split this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ if (!Res.getNode()) {
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ report_fatal_error("Do not know how to split this operator's "
+ "operand!\n");
+
+ case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::TRUNCATE: Res = SplitVecOp_TRUNCATE(N); break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
+ case ISD::STORE:
+ Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+ break;
+ case ISD::MSTORE:
+ Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
+ break;
+ case ISD::VSELECT:
+ Res = SplitVecOp_VSELECT(N, OpNo);
+ break;
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::FTRUNC:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ Res = SplitVecOp_UnaryOp(N);
+ break;
+ }
+ }
+
+ // If the result is null, the sub-method took care of registering results etc.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
+ // The only possibility for an illegal operand is the mask, since result type
+ // legalization would have handled this node already otherwise.
+ assert(OpNo == 0 && "Illegal operand must be mask");
+
+ SDValue Mask = N->getOperand(0);
+ SDValue Src0 = N->getOperand(1);
+ SDValue Src1 = N->getOperand(2);
+ EVT Src0VT = Src0.getValueType();
+ SDLoc DL(N);
+ assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
+
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ assert(Lo.getValueType() == Hi.getValueType() &&
+ "Lo and Hi have differing types");
+
+ EVT LoOpVT, HiOpVT;
+ std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
+ assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
+
+ SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
+ std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
+ std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
+ std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
+
+ SDValue LoSelect =
+ DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
+ SDValue HiSelect =
+ DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc dl(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
+ // end up being split all the way down to individual components. Convert the
+ // split pieces into integers and reassemble.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (TLI.isBigEndian())
+ std::swap(Lo, Hi);
+
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
+ JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ // We know that the extracted result type is legal.
+ EVT SubVT = N->getValueType(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+ if (IdxVal < LoElts) {
+ assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+ "Extracted subvector crosses vector split!");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+ } else {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+ DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+ }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue Vec = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ EVT VecVT = Vec.getValueType();
+
+ if (isa<ConstantSDNode>(Idx)) {
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+ SDValue Lo, Hi;
+ GetSplitVector(Vec, Lo, Hi);
+
+ uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+ if (IdxVal < LoElts)
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
+ DAG.getConstant(IdxVal - LoElts,
+ Idx.getValueType())), 0);
+ }
+
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(0), true))
+ return SDValue();
+
+ // Store the vector to the stack.
+ EVT EltVT = VecVT.getVectorElementType();
+ SDLoc dl(N);
+ SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Load back the required element.
+ StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT, false, false, false, 0);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
+ unsigned OpNo) {
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ SDValue Mask = N->getMask();
+ SDValue Data = N->getData();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ SDLoc DL(N);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ SDValue DataLo, DataHi;
+ GetSplitVector(Data, DataLo, DataHi);
+ SDValue MaskLo, MaskHi;
+ GetSplitVector(Mask, MaskLo, MaskHi);
+
+ // if Alignment is equal to the vector size,
+ // take the half of it for the second part
+ unsigned SecondHalfAlignment =
+ (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
+ Alignment/2 : Alignment;
+
+ SDValue Lo, Hi;
+ MachineMemOperand *MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
+ Alignment, N->getAAInfo(), N->getRanges());
+
+ Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ MMO = DAG.getMachineFunction().
+ getMachineMemOperand(N->getPointerInfo(),
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
+ SecondHalfAlignment, N->getAAInfo(), N->getRanges());
+
+ Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO);
+
+
+ // Build a factor node to remember that this store is independent of the
+ // other one.
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+ assert(N->isUnindexed() && "Indexed store of vector?");
+ assert(OpNo == 1 && "Can only split the stored value");
+ SDLoc DL(N);
+
+ bool isTruncating = N->isTruncatingStore();
+ SDValue Ch = N->getChain();
+ SDValue Ptr = N->getBasePtr();
+ EVT MemoryVT = N->getMemoryVT();
+ unsigned Alignment = N->getOriginalAlignment();
+ bool isVol = N->isVolatile();
+ bool isNT = N->isNonTemporal();
+ AAMDNodes AAInfo = N->getAAInfo();
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(1), Lo, Hi);
+
+ EVT LoMemVT, HiMemVT;
+ std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
+
+ unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+ if (isTruncating)
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+ LoMemVT, isVol, isNT, Alignment, AAInfo);
+ else
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+ isVol, isNT, Alignment, AAInfo);
+
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, Ptr.getValueType()));
+
+ if (isTruncating)
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVol, isNT, Alignment, AAInfo);
+ else
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ isVol, isNT, Alignment, AAInfo);
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ SDLoc DL(N);
+
+ // The input operands all must have the same type, and we know the result
+ // type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ SDValue Op = N->getOperand(op);
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Op, DAG.getConstant(i, TLI.getVectorIdxTy())));
+
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
+ // The result type is legal, but the input type is illegal. If splitting
+ // ends up with the result type of each half still being legal, just
+ // do that. If, however, that would result in an illegal result type,
+ // we can try to get more clever with power-two vectors. Specifically,
+ // split the input type, but also widen the result element size, then
+ // concatenate the halves and truncate again. For example, consider a target
+ // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
+ // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
+ // %inlo = v4i32 extract_subvector %in, 0
+ // %inhi = v4i32 extract_subvector %in, 4
+ // %lo16 = v4i16 trunc v4i32 %inlo
+ // %hi16 = v4i16 trunc v4i32 %inhi
+ // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
+ // %res = v8i8 trunc v8i16 %in16
+ //
+ // Without this transform, the original truncate would end up being
+ // scalarized, which is pretty much always a last resort.
+ SDValue InVec = N->getOperand(0);
+ EVT InVT = InVec->getValueType(0);
+ EVT OutVT = N->getValueType(0);
+ unsigned NumElements = OutVT.getVectorNumElements();
+ // Widening should have already made sure this is a power-two vector
+ // if we're trying to split it at all. assert() that's true, just in case.
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+
+ unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
+ unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+
+ // If the input elements are only 1/2 the width of the result elements,
+ // just use the normal splitting. Our trick only work if there's room
+ // to split more than once.
+ if (InElementSize <= OutElementSize * 2)
+ return SplitVecOp_UnaryOp(N);
+ SDLoc DL(N);
+
+ // Extract the halves of the input via extract_subvector.
+ SDValue InLoVec, InHiVec;
+ std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL);
+ // Truncate them to 1/2 the element size.
+ EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
+ NumElements/2);
+ SDValue HalfLo = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InLoVec);
+ SDValue HalfHi = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, InHiVec);
+ // Concatenate them to get the full intermediate truncation result.
+ EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
+ SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
+ HalfHi);
+ // Now finish up by truncating all the way down to the original result
+ // type. This should normally be something that ends up being legal directly,
+ // but in theory if a target has very wide vectors and an annoyingly
+ // restricted set of legal types, this split can chain to build things up.
+ return DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operand types must be vectors");
+ // The result has a legal vector type, but the input needs splitting.
+ SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(0), Lo0, Hi0);
+ GetSplitVector(N->getOperand(1), Lo1, Hi1);
+ unsigned PartElements = Lo0.getValueType().getVectorNumElements();
+ EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements);
+ EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements);
+
+ LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
+ HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
+ SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
+ return PromoteTargetBoolean(Con, N->getValueType(0));
+}
+
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ SDLoc DL(N);
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+ DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+
+ // See if the target wants to custom widen this node.
+ if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+ return;
+
+ SDValue Res = SDValue();
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorResult #" << ResNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen the result of this operator!");
+
+ case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
+ case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
+ case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+ case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
+ case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+ case ISD::VSELECT:
+ case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
+ case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
+ case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
+ case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
+ case ISD::VECTOR_SHUFFLE:
+ Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+ break;
+
+ case ISD::ADD:
+ case ISD::AND:
+ case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
+ case ISD::OR:
+ case ISD::SUB:
+ case ISD::XOR:
+ case ISD::FMINNUM:
+ case ISD::FMAXNUM:
+ Res = WidenVecRes_Binary(N);
+ break;
+
+ case ISD::FADD:
+ case ISD::FCOPYSIGN:
+ case ISD::FMUL:
+ case ISD::FPOW:
+ case ISD::FSUB:
+ case ISD::FDIV:
+ case ISD::FREM:
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ Res = WidenVecRes_BinaryCanTrap(N);
+ break;
+
+ case ISD::FPOWI:
+ Res = WidenVecRes_POWI(N);
+ break;
+
+ case ISD::SHL:
+ case ISD::SRA:
+ case ISD::SRL:
+ Res = WidenVecRes_Shift(N);
+ break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SIGN_EXTEND:
+ case ISD::SINT_TO_FP:
+ case ISD::TRUNCATE:
+ case ISD::UINT_TO_FP:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecRes_Convert(N);
+ break;
+
+ case ISD::BSWAP:
+ case ISD::CTLZ:
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::FABS:
+ case ISD::FCEIL:
+ case ISD::FCOS:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FFLOOR:
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ case ISD::FLOG2:
+ case ISD::FNEARBYINT:
+ case ISD::FNEG:
+ case ISD::FRINT:
+ case ISD::FROUND:
+ case ISD::FSIN:
+ case ISD::FSQRT:
+ case ISD::FTRUNC:
+ Res = WidenVecRes_Unary(N);
+ break;
+ case ISD::FMA:
+ Res = WidenVecRes_Ternary(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (Res.getNode())
+ SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
+ // Ternary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = GetWidenedVector(N->getOperand(2));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+ // Binary op widening.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
+ // Binary op widening for operations that can trap.
+ unsigned Opcode = N->getOpcode();
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ EVT VT = WidenVT;
+ unsigned NumElts = VT.getVectorNumElements();
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ }
+
+ if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+ // Operation doesn't trap so just widen as normal.
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
+ return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest legal vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getConstant(Idx,
+ TLI.getVectorIdxTy()));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getConstant(Idx,
+ TLI.getVectorIdxTy()));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
+ }
+ CurNumElts = 0;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeLegal(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getConstant(i,
+ TLI.getVectorIdxTy()));
+ }
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, SubConcatOps);
+ ConcatEnd = SubConcatIdx + 1;
+ }
+ }
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(ConcatOps.data(), NumOps));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ SDLoc DL(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ unsigned Opcode = N->getOpcode();
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(N->getOperand(0));
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ }
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+ InOp, DAG.getConstant(0,
+ TLI.getVectorIdxTy()));
+ // Extract the input and convert the shorten input vector.
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getConstant(i, TLI.getVectorIdxTy()));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+
+ EVT ShVT = ShOp.getValueType();
+ if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) {
+ ShOp = GetWidenedVector(ShOp);
+ ShVT = ShOp.getValueType();
+ }
+ EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ ShVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ if (ShVT != ShWidenVT)
+ ShOp = ModifyToType(ShOp, ShWidenVT);
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+ // Unary op widening.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+ cast<VTSDNode>(N->getOperand(1))->getVT()
+ .getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
+ SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
+ return GetWidenedVector(WidenVec);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+ SDValue InOp = N->getOperand(0);
+ EVT InVT = InOp.getValueType();
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDLoc dl(N);
+
+ switch (getTypeAction(InVT)) {
+ case TargetLowering::TypeLegal:
+ break;
+ case TargetLowering::TypePromoteInteger:
+ // If the incoming type is a vector that is being promoted, then
+ // we know that the elements are arranged differently and that we
+ // must perform the conversion using a stack slot.
+ if (InVT.isVector())
+ break;
+
+ // If the InOp is promoted to the same size, convert it. Otherwise,
+ // fall out of the switch and widen the promoted input.
+ InOp = GetPromotedInteger(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ case TargetLowering::TypeSoftenFloat:
+ case TargetLowering::TypeExpandInteger:
+ case TargetLowering::TypeExpandFloat:
+ case TargetLowering::TypeScalarizeVector:
+ case TargetLowering::TypeSplitVector:
+ break;
+ case TargetLowering::TypeWidenVector:
+ // If the InOp is widened to the same size, convert it. Otherwise, fall
+ // out of the switch and widen the widened input.
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ if (WidenVT.bitsEq(InVT))
+ // The input widens to the same size. Convert to the widen value.
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+ break;
+ }
+
+ unsigned WidenSize = WidenVT.getSizeInBits();
+ unsigned InSize = InVT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+ // Determine new input vector type. The new input vector type will use
+ // the same element type (if its a vector) or use the input type as a
+ // vector. It is the same size as the type to widen to.
+ EVT NewInVT;
+ unsigned NewNumElts = WidenSize / InSize;
+ if (InVT.isVector()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
+ } else {
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+ }
+
+ if (TLI.isTypeLegal(NewInVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ SmallVector<SDValue, 16> Ops(NewNumElts);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i < NewNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ SDValue NewVec;
+ if (InVT.isVector())
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
+ else
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+ // Build a vector with undefined for the new nodes.
+ EVT VT = N->getValueType(0);
+
+ // Integer BUILD_VECTOR operands may be larger than the node's vector element
+ // type. The UNDEFs need to have the same type as the existing operands.
+ EVT EltVT = N->getOperand(0).getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+ assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
+ NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+ EVT InVT = N->getOperand(0).getValueType();
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDLoc dl(N);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ unsigned NumInElts = InVT.getVectorNumElements();
+ unsigned NumOperands = N->getNumOperands();
+
+ bool InputWidened = false; // Indicates we need to widen the input.
+ if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
+ if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+ // Add undef vectors to widen to correct length.
+ unsigned NumConcat = WidenVT.getVectorNumElements() /
+ InVT.getVectorNumElements();
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ for (unsigned i=0; i < NumOperands; ++i)
+ Ops[i] = N->getOperand(i);
+ for (unsigned i = NumOperands; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
+ }
+ } else {
+ InputWidened = true;
+ if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+ // The inputs and the result are widen to the same value.
+ unsigned i;
+ for (i=1; i < NumOperands; ++i)
+ if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+ break;
+
+ if (i == NumOperands)
+ // Everything but the first operand is an UNDEF so just return the
+ // widened first operand.
+ return GetWidenedVector(N->getOperand(0));
+
+ if (NumOperands == 2) {
+ // Replace concat of two operands with a shuffle.
+ SmallVector<int, 16> MaskOps(WidenNumElts, -1);
+ for (unsigned i = 0; i < NumInElts; ++i) {
+ MaskOps[i] = i;
+ MaskOps[i + NumInElts] = i + WidenNumElts;
+ }
+ return DAG.getVectorShuffle(WidenVT, dl,
+ GetWidenedVector(N->getOperand(0)),
+ GetWidenedVector(N->getOperand(1)),
+ &MaskOps[0]);
+ }
+ }
+ }
+
+ // Fall back to use extracts and build vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Idx = 0;
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (InputWidened)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(j, TLI.getVectorIdxTy()));
+ }
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+ SDLoc dl(N);
+ SDValue InOp = N->getOperand(0);
+ SDValue RndOp = N->getOperand(3);
+ SDValue SatOp = N->getOperand(4);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+ SDValue DTyOp = DAG.getValueType(WidenVT);
+ SDValue STyOp = DAG.getValueType(InWidenVT);
+ ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+ unsigned InVTNumElts = InVT.getVectorNumElements();
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp = GetWidenedVector(InOp);
+ InVT = InOp.getValueType();
+ InVTNumElts = InVT.getVectorNumElements();
+ if (InVTNumElts == WidenNumElts)
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (TLI.isTypeLegal(InWidenVT)) {
+ // Because the result and the input are different vector types, widening
+ // the result could create a legal type but widening the input might make
+ // it an illegal type that might lead to repeatedly splitting the input
+ // and then widening it. To avoid this, we widen the input only if
+ // it results in a legal type.
+ if (WidenNumElts % InVTNumElts == 0) {
+ // Widen the input and call convert on the widened input vector.
+ unsigned NumConcat = WidenNumElts/InVTNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ Ops[0] = InOp;
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops);
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ if (InVTNumElts % WidenNumElts == 0) {
+ // Extract the input and convert the shorten input vector.
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+ }
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = WidenVT.getVectorElementType();
+ DTyOp = DAG.getValueType(EltVT);
+ STyOp = DAG.getValueType(InEltVT);
+
+ unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+ unsigned i;
+ for (i=0; i < MinElts; ++i) {
+ SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, TLI.getVectorIdxTy()));
+ Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+ SatOp, CvtCode);
+ }
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ SDValue Idx = N->getOperand(1);
+ SDLoc dl(N);
+
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+
+ EVT InVT = InOp.getValueType();
+
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+ // We could try widening the input to the right length but for now, extract
+ // the original elements, fill the rest with undefs and build a vector.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned i;
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy()));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i < WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
+ InOp.getValueType(), InOp,
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N),
+ WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue Cond1 = N->getOperand(0);
+ EVT CondVT = Cond1.getValueType();
+ if (CondVT.isVector()) {
+ EVT CondEltVT = CondVT.getVectorElementType();
+ EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(),
+ CondEltVT, WidenNumElts);
+ if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
+ Cond1 = GetWidenedVector(Cond1);
+
+ // If we have to split the condition there is no point in widening the
+ // select. This would result in an cycle of widening the select ->
+ // widening the condition operand -> splitting the condition operand ->
+ // splitting the select -> widening the select. Instead split this select
+ // further and widen the resulting type.
+ if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
+ SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
+ SDValue Res = ModifyToType(SplitSelect, WidenVT);
+ return Res;
+ }
+
+ if (Cond1.getValueType() != CondWidenVT)
+ Cond1 = ModifyToType(Cond1, CondWidenVT);
+ }
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+ assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+ return DAG.getNode(N->getOpcode(), SDLoc(N),
+ WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+ SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
+ InOp1.getValueType(), N->getOperand(0),
+ N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() ==
+ N->getOperand(0).getValueType().isVector() &&
+ "Scalar/Vector type mismatch");
+ if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT,
+ InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Adjust mask based on new input vector length.
+ SmallVector<int, 16> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = N->getMaskElt(i);
+ if (Idx < (int)NumElts)
+ NewMask.push_back(Idx);
+ else
+ NewMask.push_back(Idx - NumElts + WidenNumElts);
+ }
+ for (unsigned i = NumElts; i != WidenNumElts; ++i)
+ NewMask.push_back(-1);
+ return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+ assert(N->getValueType(0).isVector() &&
+ N->getOperand(0).getValueType().isVector() &&
+ "Operands must be vectors");
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+ SDValue InOp1 = N->getOperand(0);
+ EVT InVT = InOp1.getValueType();
+ assert(InVT.isVector() && "can not widen non-vector type");
+ EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), WidenNumElts);
+ InOp1 = GetWidenedVector(InOp1);
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+ // Assume that the input and output will be widen appropriately. If not,
+ // we will have to unroll it at some point.
+ assert(InOp1.getValueType() == WidenInVT &&
+ InOp2.getValueType() == WidenInVT &&
+ "Input not widened to expected type!");
+ (void)WidenInVT;
+ return DAG.getNode(ISD::SETCC, SDLoc(N),
+ WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
+ DEBUG(dbgs() << "Widen node operand " << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n");
+ SDValue Res = SDValue();
+
+ // See if the target wants to custom widen this node.
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ return false;
+
+ switch (N->getOpcode()) {
+ default:
+#ifndef NDEBUG
+ dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
+ N->dump(&DAG);
+ dbgs() << "\n";
+#endif
+ llvm_unreachable("Do not know how to widen this operator's operand!");
+
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
+ case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
+ case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+ case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::STORE: Res = WidenVecOp_STORE(N); break;
+ case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ Res = WidenVecOp_EXTEND(N);
+ break;
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ case ISD::TRUNCATE:
+ Res = WidenVecOp_Convert(N);
+ break;
+ }
+
+ // If Res is null, the sub-method took care of registering the result.
+ if (!Res.getNode()) return false;
+
+ // If the result is N, the sub-method updated N in place. Tell the legalizer
+ // core about this.
+ if (Res.getNode() == N)
+ return true;
+
+
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SDValue InOp = N->getOperand(0);
+ // If some legalization strategy other than widening is used on the operand,
+ // we can't safely assume that just extending the low lanes is the correct
+ // transformation.
+ if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector)
+ return WidenVecOp_Convert(N);
+ InOp = GetWidenedVector(InOp);
+ assert(VT.getVectorNumElements() <
+ InOp.getValueType().getVectorNumElements() &&
+ "Input wasn't widened!");
+
+ // We may need to further widen the operand until it has the same total
+ // vector size as the result.
+ EVT InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits()) {
+ EVT InEltVT = InVT.getVectorElementType();
+ for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) {
+ EVT FixedVT = (MVT::SimpleValueType)i;
+ EVT FixedEltVT = FixedVT.getVectorElementType();
+ if (TLI.isTypeLegal(FixedVT) &&
+ FixedVT.getSizeInBits() == VT.getSizeInBits() &&
+ FixedEltVT == InEltVT) {
+ assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
+ "Not enough elements in the fixed type for the operand!");
+ assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
+ "We can't have the same type as we started with!");
+ if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
+ InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT,
+ DAG.getUNDEF(FixedVT), InOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ else
+ InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ break;
+ }
+ }
+ InVT = InOp.getValueType();
+ if (InVT.getSizeInBits() != VT.getSizeInBits())
+ // We couldn't find a legal vector type that was a widening of the input
+ // and could be extended in-register to the result type, so we have to
+ // scalarize.
+ return WidenVecOp_Convert(N);
+ }
+
+ // Use special DAG nodes to represent the operation of extending the
+ // low lanes.
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Extend legalization on on extend operation!");
+ case ISD::ANY_EXTEND:
+ return DAG.getAnyExtendVectorInReg(InOp, DL, VT);
+ case ISD::SIGN_EXTEND:
+ return DAG.getSignExtendVectorInReg(InOp, DL, VT);
+ case ISD::ZERO_EXTEND:
+ return DAG.getZeroExtendVectorInReg(InOp, DL, VT);
+ }
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+ // Since the result is legal and the input is illegal, it is unlikely
+ // that we can fix the input to a legal type so unroll the convert
+ // into some scalar code and create a nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+ unsigned NumElts = VT.getVectorNumElements();
+ SDValue InOp = N->getOperand(0);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+ SmallVector<SDValue, 16> Ops(NumElts);
+ for (unsigned i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, TLI.getVectorIdxTy())));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ EVT InWidenVT = InOp.getValueType();
+ SDLoc dl(N);
+
+ // Check if we can convert between two legal vector types and extract.
+ unsigned InWidenSize = InWidenVT.getSizeInBits();
+ unsigned Size = VT.getSizeInBits();
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
+ unsigned NewNumElts = InWidenSize / Size;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ }
+ }
+
+ return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+ // If the input vector is not legal, it is likely that we will not find a
+ // legal vector of the same size. Replace the concatenate vector with a
+ // nasty build vector.
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+ unsigned NumElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(NumElts);
+
+ EVT InVT = N->getOperand(0).getValueType();
+ unsigned NumInElts = InVT.getVectorNumElements();
+
+ unsigned Idx = 0;
+ unsigned NumOperands = N->getNumOperands();
+ for (unsigned i=0; i < NumOperands; ++i) {
+ SDValue InOp = N->getOperand(i);
+ if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector)
+ InOp = GetWidenedVector(InOp);
+ for (unsigned j=0; j < NumInElts; ++j)
+ Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(j, TLI.getVectorIdxTy()));
+ }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
+ N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+ // We have to widen the value but we want only to store the original
+ // vector type.
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+
+ SmallVector<SDValue, 16> StChain;
+ if (ST->isTruncatingStore())
+ GenWidenVectorTruncStores(StChain, ST);
+ else
+ GenWidenVectorStores(StChain, ST);
+
+ if (StChain.size() == 1)
+ return StChain[0];
+ else
+ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
+ SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+ SDLoc dl(N);
+
+ // WARNING: In this code we widen the compare instruction with garbage.
+ // This garbage may contain denormal floats which may be slow. Is this a real
+ // concern ? Should we zero the unused lanes if this is a float compare ?
+
+ // Get a new SETCC node to compare the newly widened operands.
+ // Only some of the compared elements are legal.
+ EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType());
+ SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
+ SVT, InOp0, InOp1, N->getOperand(2));
+
+ // Extract the needed results from the result vector.
+ EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+ SVT.getVectorElementType(),
+ N->getValueType(0).getVectorNumElements());
+ SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ ResVT, WideSETCC, DAG.getConstant(0,
+ TLI.getVectorIdxTy()));
+
+ return PromoteTargetBoolean(CC, N->getValueType(0));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+// TLI: Target lowering used to determine legal types.
+// Width: Width left need to load/store.
+// WidenVT: The widen vector type to load to/store from
+// Align: If 0, don't allow use of a wider type
+// WidenEx: If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+ unsigned Width, EVT WidenVT,
+ unsigned Align = 0, unsigned WidenEx = 0) {
+ EVT WidenEltVT = WidenVT.getVectorElementType();
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+ unsigned AlignInBits = Align*8;
+
+ // If we have one element to load/store, return it.
+ EVT RetVT = WidenEltVT;
+ if (Width == WidenEltWidth)
+ return RetVT;
+
+ // See if there is larger legal integer than the element type to load/store
+ unsigned VT;
+ for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+ EVT MemVT((MVT::SimpleValueType) VT);
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (MemVT.getSizeInBits() <= WidenEltWidth)
+ break;
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ RetVT = MemVT;
+ break;
+ }
+ }
+
+ // See if there is a larger vector type to load/store that has the same vector
+ // element type and is evenly divisible with the WidenVT.
+ for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+ VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+ EVT MemVT = (MVT::SimpleValueType) VT;
+ unsigned MemVTWidth = MemVT.getSizeInBits();
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ (WidenWidth % MemVTWidth) == 0 &&
+ isPowerOf2_32(WidenWidth / MemVTWidth) &&
+ (MemVTWidth <= Width ||
+ (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+ return MemVT;
+ }
+ }
+
+ return RetVT;
+}
+
+// Builds a vector type from scalar loads
+// VecTy: Resulting Vector type
+// LDOps: Load operators to build a vector type
+// [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+ SmallVectorImpl<SDValue> &LdOps,
+ unsigned Start, unsigned End) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDLoc dl(LdOps[Start]);
+ EVT LdTy = LdOps[Start].getValueType();
+ unsigned Width = VecTy.getSizeInBits();
+ unsigned NumElts = Width / LdTy.getSizeInBits();
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+ unsigned Idx = 1;
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+ for (unsigned i = Start + 1; i != End; ++i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ NumElts = Width / NewLdTy.getSizeInBits();
+ NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+ // Readjust position and vector position based on new load type
+ Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+ LdTy = NewLdTy;
+ }
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+ DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
+ }
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD) {
+ // The strategy assumes that we can efficiently load powers of two widths.
+ // The routines chops the vector into the largest vector loads with the same
+ // element type or scalar loads and then recombines it to the widen vector
+ // type.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ unsigned WidenWidth = WidenVT.getSizeInBits();
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+ assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ int LdWidth = LdVT.getSizeInBits();
+ int WidthDiff = WidenWidth - LdWidth; // Difference
+ unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+
+ // Find the vector type that can load from.
+ EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ int NewVTWidth = NewVT.getSizeInBits();
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+ isVolatile, isNonTemporal, isInvariant, Align,
+ AAInfo);
+ LdChain.push_back(LdOp.getValue(1));
+
+ // Check if we can load the element with one instruction
+ if (LdWidth <= NewVTWidth) {
+ if (!NewVT.isVector()) {
+ unsigned NumElts = WidenWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+ }
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
+ }
+
+ // Load vector by using multiple loads from largest vector to scalar
+ SmallVector<SDValue, 16> LdOps;
+ LdOps.push_back(LdOp);
+
+ LdWidth -= NewVTWidth;
+ unsigned Offset = 0;
+
+ while (LdWidth > 0) {
+ unsigned Increment = NewVTWidth / 8;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Increment, BasePtr.getValueType()));
+
+ SDValue L;
+ if (LdWidth < NewVTWidth) {
+ // Our current type we are using is too large, find a better size
+ NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+ NewVTWidth = NewVT.getSizeInBits();
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment),
+ AAInfo);
+ LdChain.push_back(L.getValue(1));
+ if (L->getValueType(0).isVector()) {
+ SmallVector<SDValue, 16> Loads;
+ Loads.push_back(L);
+ unsigned size = L->getValueSizeInBits(0);
+ while (size < LdOp->getValueSizeInBits(0)) {
+ Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
+ size += L->getValueSizeInBits(0);
+ }
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
+ }
+ } else {
+ L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), isVolatile,
+ isNonTemporal, isInvariant, MinAlign(Align, Increment),
+ AAInfo);
+ LdChain.push_back(L.getValue(1));
+ }
+
+ LdOps.push_back(L);
+
+
+ LdWidth -= NewVTWidth;
+ }
+
+ // Build the vector from the loads operations
+ unsigned End = LdOps.size();
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
+ for (--i; i >= 0; --i) {
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
+ }
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ makeArrayRef(&ConcatOps[Idx], End - Idx));
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
+
+ if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(&ConcatOps[Idx], End - Idx));
+
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
+ LoadSDNode *LD,
+ ISD::LoadExtType ExtType) {
+ // For extension loads, it may not be more efficient to chop up the vector
+ // and then extended it. Instead, we unroll the load and build a new vector.
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+ EVT LdVT = LD->getMemoryVT();
+ SDLoc dl(LD);
+ assert(LdVT.isVector() && WidenVT.isVector());
+
+ // Load information
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ unsigned Align = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+ bool isInvariant = LD->isInvariant();
+ AAMDNodes AAInfo = LD->getAAInfo();
+
+ EVT EltVT = WidenVT.getVectorElementType();
+ EVT LdEltVT = LdVT.getVectorElementType();
+ unsigned NumElts = LdVT.getVectorNumElements();
+
+ // Load each element and widen
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ unsigned Increment = LdEltVT.getSizeInBits() / 8;
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+ LD->getPointerInfo(),
+ LdEltVT, isVolatile, isNonTemporal, isInvariant,
+ Align, AAInfo);
+ LdChain.push_back(Ops[0].getValue(1));
+ unsigned i = 0, Offset = Increment;
+ for (i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr,
+ DAG.getConstant(Offset,
+ BasePtr.getValueType()));
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ isVolatile, isNonTemporal, isInvariant, Align,
+ AAInfo);
+ LdChain.push_back(Ops[i].getValue(1));
+ }
+
+ // Fill the rest with undefs
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for (; i != WidenNumElts; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST) {
+ // The strategy assumes that we can efficiently store powers of two widths.
+ // The routines chops the vector into the largest vector stores with the same
+ // element type or scalar stores.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ SDLoc dl(ST);
+
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ EVT ValVT = ValOp.getValueType();
+ unsigned ValWidth = ValVT.getSizeInBits();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned ValEltWidth = ValEltVT.getSizeInBits();
+ assert(StVT.getVectorElementType() == ValEltVT);
+
+ int Idx = 0; // current index to store
+ unsigned Offset = 0; // offset from base to store
+ while (StWidth != 0) {
+ // Find the largest vector type we can store with
+ EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+ unsigned NewVTWidth = NewVT.getSizeInBits();
+ unsigned Increment = NewVTWidth / 8;
+ if (NewVT.isVector()) {
+ unsigned NumVTElts = NewVT.getVectorNumElements();
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+ DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset), AAInfo));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ Idx += NumVTElts;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Increment, BasePtr.getValueType()));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ } else {
+ // Cast the vector to the scalar type we can store
+ unsigned NumElts = ValWidth / NewVTWidth;
+ EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+ // Readjust index position based on new vector type
+ Idx = Idx * ValEltWidth / NewVTWidth;
+ do {
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+ DAG.getConstant(Idx++, TLI.getVectorIdxTy()));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset), AAInfo));
+ StWidth -= NewVTWidth;
+ Offset += Increment;
+ BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+ DAG.getConstant(Increment, BasePtr.getValueType()));
+ } while (StWidth != 0 && StWidth >= NewVTWidth);
+ // Restore index back to be relative to the original widen element type
+ Idx = Idx * NewVTWidth / ValEltWidth;
+ }
+ }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
+ StoreSDNode *ST) {
+ // For extension loads, it may not be more efficient to truncate the vector
+ // and then store it. Instead, we extract each element and then store it.
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ unsigned Align = ST->getAlignment();
+ bool isVolatile = ST->isVolatile();
+ bool isNonTemporal = ST->isNonTemporal();
+ AAMDNodes AAInfo = ST->getAAInfo();
+ SDValue ValOp = GetWidenedVector(ST->getValue());
+ SDLoc dl(ST);
+
+ EVT StVT = ST->getMemoryVT();
+ EVT ValVT = ValOp.getValueType();
+
+ // It must be true that we the widen vector type is bigger than where
+ // we need to store.
+ assert(StVT.isVector() && ValOp.getValueType().isVector());
+ assert(StVT.bitsLT(ValOp.getValueType()));
+
+ // For truncating stores, we can not play the tricks of chopping legal
+ // vector types and bit cast it to the right type. Instead, we unroll
+ // the store.
+ EVT StEltVT = StVT.getVectorElementType();
+ EVT ValEltVT = ValVT.getVectorElementType();
+ unsigned Increment = ValEltVT.getSizeInBits() / 8;
+ unsigned NumElts = StVT.getVectorNumElements();
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT,
+ isVolatile, isNonTemporal, Align,
+ AAInfo));
+ unsigned Offset = Increment;
+ for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+ SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+ BasePtr, DAG.getConstant(Offset,
+ BasePtr.getValueType()));
+ SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, isVolatile, isNonTemporal,
+ MinAlign(Align, Offset), AAInfo));
+ }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT. The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+ // Note that InOp might have been widened so it might already have
+ // the right width or it might need be narrowed.
+ EVT InVT = InOp.getValueType();
+ assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+ "input and widen element type must match");
+ SDLoc dl(InOp);
+
+ // Check if InOp already has the right width.
+ if (InVT == NVT)
+ return InOp;
+
+ unsigned InNumElts = InVT.getVectorNumElements();
+ unsigned WidenNumElts = NVT.getVectorNumElements();
+ if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+ unsigned NumConcat = WidenNumElts / InNumElts;
+ SmallVector<SDValue, 16> Ops(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(InVT);
+ Ops[0] = InOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ Ops[i] = UndefVal;
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
+ }
+
+ if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+
+ // Fall back to extract and build.
+ SmallVector<SDValue, 16> Ops(WidenNumElts);
+ EVT EltVT = NVT.getVectorElementType();
+ unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+ unsigned Idx;
+ for (Idx = 0; Idx < MinNumElts; ++Idx)
+ Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getConstant(Idx, TLI.getVectorIdxTy()));
+
+ SDValue UndefVal = DAG.getUNDEF(EltVT);
+ for ( ; Idx < WidenNumElts; ++Idx)
+ Ops[Idx] = UndefVal;
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
+}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7961e66d8c8..57ec81bba4a 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4917,6 +4917,60 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
return SDValue(N, 0);
}
+SDValue
+SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain,
+ SDValue Ptr, SDValue Mask, SDValue Src0,
+ MachineMemOperand *MMO) {
+
+ SDVTList VTs = getVTList(VT, MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
+ MMO->isVolatile(),
+ MMO->isNonTemporal(),
+ MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MaskedLoadSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Ops, 4, VTs,
+ VT, MMO);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val,
+ SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other &&
+ "Invalid chain type");
+ EVT VT = Val.getValueType();
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = { Chain, Ptr, Mask, Val };
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
+ ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal(), MMO->isInvariant()));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ void *IP = nullptr;
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ cast<MaskedStoreSDNode>(E)->refineAlignment(MMO);
+ return SDValue(E, 0);
+ }
+ SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(),
+ dl.getDebugLoc(), Ops, 4,
+ VTs, VT, MMO);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ return SDValue(N, 0);
+}
+
SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
SDValue Chain, SDValue Ptr,
SDValue SV,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 8f582f1aeb6..8618ab212ee 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3613,6 +3613,70 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
DAG.setRoot(StoreNode);
}
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ Value *PtrOperand = I.getArgOperand(0);
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(I.getArgOperand(1));
+ SDValue Mask = getValue(I.getArgOperand(3));
+ EVT VT = Src0.getValueType();
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOStore, VT.getStoreSize(),
+ Alignment, AAInfo);
+ SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO);
+ DAG.setRoot(StoreNode);
+ setValue(&I, StoreNode);
+}
+
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
+ SDLoc sdl = getCurSDLoc();
+
+ Value *PtrOperand = I.getArgOperand(0);
+ SDValue Ptr = getValue(PtrOperand);
+ SDValue Src0 = getValue(I.getArgOperand(1));
+ SDValue Mask = getValue(I.getArgOperand(3));
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = TLI.getValueType(I.getType());
+ unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
+ if (!Alignment)
+ Alignment = DAG.getEVTAlignment(VT);
+
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
+
+ SDValue InChain = DAG.getRoot();
+ if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(PtrOperand,
+ AA->getTypeStoreSize(I.getType()),
+ AAInfo))) {
+ // Do not serialize (non-volatile) loads of constant memory with anything.
+ InChain = DAG.getEntryNode();
+ }
+
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(PtrOperand),
+ MachineMemOperand::MOLoad, VT.getStoreSize(),
+ Alignment, AAInfo, Ranges);
+
+ SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO);
+ SDValue OutChain = Load.getValue(1);
+ DAG.setRoot(OutChain);
+ setValue(&I, Load);
+}
+
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
AtomicOrdering SuccessOrder = I.getSuccessOrdering();
@@ -4914,6 +4978,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
+ case Intrinsic::masked_load:
+ visitMaskedLoad(I);
+ return nullptr;
+ case Intrinsic::masked_store:
+ visitMaskedStore(I);
+ return nullptr;
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index f74e6525b0c..2637e07a49b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -756,6 +756,8 @@ private:
void visitAlloca(const AllocaInst &I);
void visitLoad(const LoadInst &I);
void visitStore(const StoreInst &I);
+ void visitMaskedLoad(const CallInst &I);
+ void visitMaskedStore(const CallInst &I);
void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
void visitAtomicRMW(const AtomicRMWInst &I);
void visitFence(const FenceInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index c9f6cff9155..e8577d898c2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -269,6 +269,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Other operators
case ISD::LOAD: return "load";
case ISD::STORE: return "store";
+ case ISD::MLOAD: return "masked_load";
+ case ISD::MSTORE: return "masked_store";
case ISD::VAARG: return "vaarg";
case ISD::VACOPY: return "vacopy";
case ISD::VAEND: return "vaend";
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index 32b2ec5845c..7ed90996f69 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -537,7 +537,8 @@ enum IIT_Info {
IIT_ANYPTR = 26,
IIT_V1 = 27,
IIT_VARARG = 28,
- IIT_HALF_VEC_ARG = 29
+ IIT_HALF_VEC_ARG = 29,
+ IIT_SAME_VEC_WIDTH_ARG = 30
};
@@ -645,6 +646,12 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
ArgInfo));
return;
}
+ case IIT_SAME_VEC_WIDTH_ARG: {
+ unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::SameVecWidthArgument,
+ ArgInfo));
+ return;
+ }
case IIT_EMPTYSTRUCT:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
return;
@@ -752,7 +759,14 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::HalfVecArgument:
return VectorType::getHalfElementsVectorType(cast<VectorType>(
Tys[D.getArgumentNumber()]));
- }
+ case IITDescriptor::SameVecWidthArgument:
+ Type *EltTy = DecodeFixedType(Infos, Tys, Context);
+ Type *Ty = Tys[D.getArgumentNumber()];
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ return VectorType::get(EltTy, VTy->getNumElements());
+ }
+ llvm_unreachable("unhandled");
+ }
llvm_unreachable("unhandled");
}
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index a4c5d9766a2..5f63ded48fe 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -183,3 +183,29 @@ CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
return createCallHelper(FnAssume, Ops, this);
}
+/// Create a call to a Masked Load intrinsic.
+/// Ops - an array of operands.
+CallInst *IRBuilderBase::CreateMaskedLoad(ArrayRef<Value *> Ops) {
+ // The only one overloaded type - the type of passthru value in this case
+ Type *DataTy = Ops[1]->getType();
+ return CreateMaskedIntrinsic(Intrinsic::masked_load, Ops, DataTy);
+}
+
+/// Create a call to a Masked Store intrinsic.
+/// Ops - an array of operands.
+CallInst *IRBuilderBase::CreateMaskedStore(ArrayRef<Value *> Ops) {
+ // DataTy - type of the data to be stored - the only one overloaded type
+ Type *DataTy = Ops[1]->getType();
+ return CreateMaskedIntrinsic(Intrinsic::masked_store, Ops, DataTy);
+}
+
+/// Create a call to a Masked intrinsic, with given intrinsic Id,
+/// an array of operands - Ops, and one overloaded type - DataTy
+CallInst *IRBuilderBase::CreateMaskedIntrinsic(unsigned Id,
+ ArrayRef<Value *> Ops,
+ Type *DataTy) {
+ Module *M = BB->getParent()->getParent();
+ Type *OverloadedTypes[] = { DataTy };
+ Value *TheFn = Intrinsic::getDeclaration(M, (Intrinsic::ID)Id, OverloadedTypes);
+ return createCallHelper(TheFn, Ops, this);
+}
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 9698dbd77fd..b3ca756b87a 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -2405,6 +2405,19 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
!isa<VectorType>(ArgTys[D.getArgumentNumber()]) ||
VectorType::getHalfElementsVectorType(
cast<VectorType>(ArgTys[D.getArgumentNumber()])) != Ty;
+ case IITDescriptor::SameVecWidthArgument: {
+ if (D.getArgumentNumber() >= ArgTys.size())
+ return true;
+ VectorType * ReferenceType =
+ dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+ VectorType *ThisArgType = dyn_cast<VectorType>(Ty);
+ if (!ThisArgType || !ReferenceType ||
+ (ReferenceType->getVectorNumElements() !=
+ ThisArgType->getVectorNumElements()))
+ return true;
+ return VerifyIntrinsicType(ThisArgType->getVectorElementType(),
+ Infos, ArgTys);
+ }
}
llvm_unreachable("unhandled");
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0c14dc32c50..90e88171dcc 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1321,13 +1321,21 @@ void X86TargetLowering::resetOperationActions() {
// Extract subvector is special because the value type
// (result) is 128-bit but the source is 256-bit wide.
- if (VT.is128BitVector())
+ if (VT.is128BitVector()) {
+ if (VT.getScalarSizeInBits() >= 32) {
+ setOperationAction(ISD::MLOAD, VT, Custom);
+ setOperationAction(ISD::MSTORE, VT, Custom);
+ }
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
-
+ }
// Do not attempt to custom lower other non-256-bit vectors
if (!VT.is256BitVector())
continue;
+ if (VT.getScalarSizeInBits() >= 32) {
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
+ }
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
@@ -1494,9 +1502,13 @@ void X86TargetLowering::resetOperationActions() {
unsigned EltSize = VT.getVectorElementType().getSizeInBits();
// Extract subvector is special because the value type
// (result) is 256/128-bit but the source is 512-bit wide.
- if (VT.is128BitVector() || VT.is256BitVector())
+ if (VT.is128BitVector() || VT.is256BitVector()) {
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
-
+ if ( EltSize >= 32) {
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
+ }
+ }
if (VT.getVectorElementType() == MVT::i1)
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
@@ -1512,6 +1524,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
+ setOperationAction(ISD::MLOAD, VT, Legal);
+ setOperationAction(ISD::MSTORE, VT, Legal);
}
}
for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index b188cd504cd..d786d98655b 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -2097,6 +2097,41 @@ def: Pat<(int_x86_avx512_mask_storeu_pd_512 addr:$ptr, (v8f64 VR512:$src),
(VMOVUPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
VR512:$src)>;
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
+ (VMOVUPSZmrk addr:$ptr,
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
+ (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
+ (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
+
+def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src)),
+ (VMOVUPSZmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
+
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src)),
+ (VMOVUPDZmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
+
+def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
+ (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask,
+ (bc_v16f32 (v16i32 immAllZerosV)))),
+ (VMOVUPSZrmkz VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v16f32 (masked_load addr:$ptr, VK16WM:$mask, (v16f32 VR512:$src0))),
+ (VMOVUPSZrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
+ (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask,
+ (bc_v8f64 (v16i32 immAllZerosV)))),
+ (VMOVUPDZrmkz VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8f64 (masked_load addr:$ptr, VK8WM:$mask, (v8f64 VR512:$src0))),
+ (VMOVUPDZrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
+
defm VMOVDQA32 : avx512_load_vl<0x6F, "vmovdqa32", "alignedload", "i", "32",
"16", "8", "4", SSEPackedInt, HasAVX512>,
avx512_store_vl<0x7F, "vmovdqa32", "alignedstore",
@@ -2171,6 +2206,46 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
(VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
}
+def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 immAllZerosV))),
+ (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, undef)),
+ (VMOVDQU32Zrmkz VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v16i32 (masked_load addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src0))),
+ (VMOVDQU32Zrmk VR512:$src0, VK16WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask,
+ (bc_v8i64 (v16i32 immAllZerosV)))),
+ (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, undef)),
+ (VMOVDQU64Zrmkz VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(v8i64 (masked_load addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src0))),
+ (VMOVDQU64Zrmk VR512:$src0, VK8WM:$mask, addr:$ptr)>;
+
+def: Pat<(masked_store addr:$ptr, VK16WM:$mask, (v16i32 VR512:$src)),
+ (VMOVDQU32Zmrk addr:$ptr, VK16WM:$mask, VR512:$src)>;
+
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i64 VR512:$src)),
+ (VMOVDQU64Zmrk addr:$ptr, VK8WM:$mask, VR512:$src)>;
+
+// SKX replacement
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
+ (VMOVDQU32Z256mrk addr:$ptr, VK8WM:$mask, VR256:$src)>;
+
+// KNL replacement
+def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
+ (VMOVDQU32Zmrk addr:$ptr,
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
+ (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
+
+def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
+ (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz
+ (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
+
+
// Move Int Doubleword to Packed Double Int
//
def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index cc896f08528..e73ed1e355b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -9260,6 +9260,61 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskstore_q,
int_x86_avx2_maskstore_q_256>, VEX_W;
+def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
+ (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
+ (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
+ (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask),
+ (bc_v8f32 (v8i32 immAllZerosV)))),
+ (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))),
+ (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr),
+ VR256:$mask)>;
+
+def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
+ (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))),
+ (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))),
+ (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr),
+ VR256:$mask)>;
+
+def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
+ (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+
+def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
+ (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
+
+def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
+ (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
+ (v4f64 immAllZerosV))),
+ (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))),
+ (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
+ VR256:$mask)>;
+
+def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
+ (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask),
+ (bc_v4i64 (v8i32 immAllZerosV)))),
+ (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>;
+
+def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))),
+ (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
+ VR256:$mask)>;
+
//===----------------------------------------------------------------------===//
// Variable Bit Shifts
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 2b70fd0ecf8..1811a205284 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -111,6 +111,8 @@ public:
Type *Ty) const override;
unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) const override;
+ bool isLegalPredicatedLoad (Type *DataType, int Consecutive) const;
+ bool isLegalPredicatedStore(Type *DataType, int Consecutive) const;
/// @}
};
@@ -1156,3 +1158,19 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
}
return X86TTI::getIntImmCost(Imm, Ty);
}
+
+bool X86TTI::isLegalPredicatedLoad(Type *DataType, int Consecutive) const {
+ int ScalarWidth = DataType->getScalarSizeInBits();
+
+ // Todo: AVX512 allows gather/scatter, works with strided and random as well
+ if ((ScalarWidth < 32) || (Consecutive == 0))
+ return false;
+ if (ST->hasAVX512() || ST->hasAVX2())
+ return true;
+ return false;
+}
+
+bool X86TTI::isLegalPredicatedStore(Type *DataType, int Consecutive) const {
+ return isLegalPredicatedLoad(DataType, Consecutive);
+}
+
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 35b2ecf99ce..de4cb262575 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -580,9 +580,10 @@ public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI,
- AliasAnalysis *AA, Function *F)
+ AliasAnalysis *AA, Function *F,
+ const TargetTransformInfo *TTI)
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), AA(AA), TheFunction(F), Induction(nullptr),
+ DT(DT), TLI(TLI), AA(AA), TheFunction(F), TTI(TTI), Induction(nullptr),
WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {
}
@@ -768,6 +769,15 @@ public:
}
SmallPtrSet<Value *, 8>::iterator strides_end() { return StrideSet.end(); }
+ bool canPredicateStore(Type *DataType, Value *Ptr) {
+ return TTI->isLegalPredicatedStore(DataType, isConsecutivePtr(Ptr));
+ }
+ bool canPredicateLoad(Type *DataType, Value *Ptr) {
+ return TTI->isLegalPredicatedLoad(DataType, isConsecutivePtr(Ptr));
+ }
+ bool setMaskedOp(const Instruction* I) {
+ return (MaskedOp.find(I) != MaskedOp.end());
+ }
private:
/// Check if a single basic block loop is vectorizable.
/// At this point we know that this is a loop with a constant trip count
@@ -840,6 +850,8 @@ private:
AliasAnalysis *AA;
/// Parent function
Function *TheFunction;
+ /// Target Transform Info
+ const TargetTransformInfo *TTI;
// --- vectorization state --- //
@@ -871,6 +883,10 @@ private:
ValueToValueMap Strides;
SmallPtrSet<Value *, 8> StrideSet;
+
+ /// While vectorizing these instructions we have to generate a
+ /// call to an appropriate masked intrinsic
+ std::set<const Instruction*> MaskedOp;
};
/// LoopVectorizationCostModel - estimates the expected speedups due to
@@ -1375,7 +1391,7 @@ struct LoopVectorize : public FunctionPass {
}
// Check if it is legal to vectorize the loop.
- LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F);
+ LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, AA, F, TTI);
if (!LVL.canVectorize()) {
DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
emitMissedWarning(F, L, Hints);
@@ -1763,7 +1779,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
unsigned ScalarAllocatedSize = DL->getTypeAllocSize(ScalarDataTy);
unsigned VectorElementSize = DL->getTypeStoreSize(DataTy)/VF;
- if (SI && Legal->blockNeedsPredication(SI->getParent()))
+ if (SI && Legal->blockNeedsPredication(SI->getParent()) &&
+ !Legal->setMaskedOp(SI))
return scalarizeInstruction(Instr, true);
if (ScalarAllocatedSize != VectorElementSize)
@@ -1857,8 +1874,25 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
Value *VecPtr = Builder.CreateBitCast(PartPtr,
DataTy->getPointerTo(AddressSpace));
- StoreInst *NewSI =
- Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment);
+
+ Instruction *NewSI;
+ if (Legal->setMaskedOp(SI)) {
+ Type *I8PtrTy =
+ Builder.getInt8PtrTy(PartPtr->getType()->getPointerAddressSpace());
+
+ Value *I8Ptr = Builder.CreateBitCast(PartPtr, I8PtrTy);
+
+ VectorParts Cond = createEdgeMask(SI->getParent()->getSinglePredecessor(),
+ SI->getParent());
+ SmallVector <Value *, 8> Ops;
+ Ops.push_back(I8Ptr);
+ Ops.push_back(StoredVal[Part]);
+ Ops.push_back(Builder.getInt32(Alignment));
+ Ops.push_back(Cond[Part]);
+ NewSI = Builder.CreateMaskedStore(Ops);
+ }
+ else
+ NewSI = Builder.CreateAlignedStore(StoredVal[Part], VecPtr, Alignment);
propagateMetadata(NewSI, SI);
}
return;
@@ -1873,14 +1907,31 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
if (Reverse) {
// If the address is consecutive but reversed, then the
- // wide store needs to start at the last vector element.
+ // wide load needs to start at the last vector element.
PartPtr = Builder.CreateGEP(Ptr, Builder.getInt32(-Part * VF));
PartPtr = Builder.CreateGEP(PartPtr, Builder.getInt32(1 - VF));
}
- Value *VecPtr = Builder.CreateBitCast(PartPtr,
- DataTy->getPointerTo(AddressSpace));
- LoadInst *NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
+ Instruction* NewLI;
+ if (Legal->setMaskedOp(LI)) {
+ Type *I8PtrTy =
+ Builder.getInt8PtrTy(PartPtr->getType()->getPointerAddressSpace());
+
+ Value *I8Ptr = Builder.CreateBitCast(PartPtr, I8PtrTy);
+
+ VectorParts SrcMask = createBlockInMask(LI->getParent());
+ SmallVector <Value *, 8> Ops;
+ Ops.push_back(I8Ptr);
+ Ops.push_back(UndefValue::get(DataTy));
+ Ops.push_back(Builder.getInt32(Alignment));
+ Ops.push_back(SrcMask[Part]);
+ NewLI = Builder.CreateMaskedLoad(Ops);
+ }
+ else {
+ Value *VecPtr = Builder.CreateBitCast(PartPtr,
+ DataTy->getPointerTo(AddressSpace));
+ NewLI = Builder.CreateAlignedLoad(VecPtr, Alignment, "wide.load");
+ }
propagateMetadata(NewLI, LI);
Entry[Part] = Reverse ? reverseVector(NewLI) : NewLI;
}
@@ -5304,8 +5355,15 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
// We might be able to hoist the load.
if (it->mayReadFromMemory()) {
LoadInst *LI = dyn_cast<LoadInst>(it);
- if (!LI || !SafePtrs.count(LI->getPointerOperand()))
+ if (!LI)
+ return false;
+ if (!SafePtrs.count(LI->getPointerOperand())) {
+ if (canPredicateLoad(LI->getType(), LI->getPointerOperand())) {
+ MaskedOp.insert(LI);
+ continue;
+ }
return false;
+ }
}
// We don't predicate stores at the moment.
@@ -5313,10 +5371,20 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
StoreInst *SI = dyn_cast<StoreInst>(it);
// We only support predication of stores in basic blocks with one
// predecessor.
- if (!SI || ++NumPredStores > NumberOfStoresToPredicate ||
+ if (!SI)
+ return false;
+
+ if (++NumPredStores > NumberOfStoresToPredicate ||
!SafePtrs.count(SI->getPointerOperand()) ||
- !SI->getParent()->getSinglePredecessor())
+ !SI->getParent()->getSinglePredecessor()) {
+ if (canPredicateStore(SI->getValueOperand()->getType(),
+ SI->getPointerOperand())) {
+ MaskedOp.insert(SI);
+ --NumPredStores;
+ continue;
+ }
return false;
+ }
}
if (it->mayThrow())
return false;
@@ -5380,7 +5448,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
MaxVectorSize = 1;
}
- assert(MaxVectorSize <= 32 && "Did not expect to pack so many elements"
+ assert(MaxVectorSize <= 64 && "Did not expect to pack so many elements"
" into one vector!");
unsigned VF = MaxVectorSize;
@@ -5441,7 +5509,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
// the vector elements.
float VectorCost = expectedCost(i) / (float)i;
DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " <<
- (int)VectorCost << ".\n");
+ VectorCost << ".\n");
if (VectorCost < Cost) {
Cost = VectorCost;
Width = i;
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
new file mode 100644
index 00000000000..8cb2d63d5f6
--- /dev/null
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -0,0 +1,73 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=knl < %s | FileCheck %s -check-prefix=AVX512
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
+
+; AVX512-LABEL: test1
+; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
+
+; AVX2-LABEL: test1
+; AVX2: vpmaskmovd 32(%rdi)
+; AVX2: vpmaskmovd (%rdi)
+; AVX2-NOT: blend
+
+define <16 x i32> @test1(<16 x i32> %trigger, i8* %addr) {
+ %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+ %res = call <16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32>undef, i32 4, <16 x i1>%mask)
+ ret <16 x i32> %res
+}
+
+; AVX512-LABEL: test2
+; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
+
+; AVX2-LABEL: test2
+; AVX2: vpmaskmovd {{.*}}(%rdi)
+; AVX2: vpmaskmovd {{.*}}(%rdi)
+; AVX2-NOT: blend
+define <16 x i32> @test2(<16 x i32> %trigger, i8* %addr) {
+ %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+ %res = call <16 x i32> @llvm.masked.load.v16i32(i8* %addr, <16 x i32>zeroinitializer, i32 4, <16 x i1>%mask)
+ ret <16 x i32> %res
+}
+
+; AVX512-LABEL: test3
+; AVX512: vmovdqu32 %zmm1, (%rdi) {%k1}
+
+define void @test3(<16 x i32> %trigger, i8* %addr, <16 x i32> %val) {
+ %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+ call void @llvm.masked.store.v16i32(i8* %addr, <16 x i32>%val, i32 4, <16 x i1>%mask)
+ ret void
+}
+
+; AVX512-LABEL: test4
+; AVX512: vmovups (%rdi), %zmm{{.*{%k[1-7]}}}
+
+; AVX2-LABEL: test4
+; AVX2: vpmaskmovd {{.*}}(%rdi)
+; AVX2: vpmaskmovd {{.*}}(%rdi)
+; AVX2: blend
+define <16 x float> @test4(<16 x i32> %trigger, i8* %addr, <16 x float> %dst) {
+ %mask = icmp eq <16 x i32> %trigger, zeroinitializer
+ %res = call <16 x float> @llvm.masked.load.v16f32(i8* %addr, <16 x float>%dst, i32 4, <16 x i1>%mask)
+ ret <16 x float> %res
+}
+
+; AVX512-LABEL: test5
+; AVX512: vmovupd (%rdi), %zmm1 {%k1}
+
+; AVX2-LABEL: test5
+; AVX2: vpmaskmovq
+; AVX2: vblendvpd
+; AVX2: vpmaskmovq
+; AVX2: vblendvpd
+define <8 x double> @test5(<8 x i32> %trigger, i8* %addr, <8 x double> %dst) {
+ %mask = icmp eq <8 x i32> %trigger, zeroinitializer
+ %res = call <8 x double> @llvm.masked.load.v8f64(i8* %addr, <8 x double>%dst, i32 4, <8 x i1>%mask)
+ ret <8 x double> %res
+}
+
+declare <16 x i32> @llvm.masked.load.v16i32(i8*, <16 x i32>, i32, <16 x i1>)
+declare void @llvm.masked.store.v16i32(i8*, <16 x i32>, i32, <16 x i1>)
+declare <16 x float> @llvm.masked.load.v16f32(i8*, <16 x float>, i32, <16 x i1>)
+declare void @llvm.masked.store.v16f32(i8*, <16 x float>, i32, <16 x i1>)
+declare <8 x double> @llvm.masked.load.v8f64(i8*, <8 x double>, i32, <8 x i1>)
+declare void @llvm.masked.store.v8f64(i8*, <8 x double>, i32, <8 x i1>)
+
diff --git a/test/Transforms/LoopVectorize/X86/mask1.ll b/test/Transforms/LoopVectorize/X86/mask1.ll
new file mode 100644
index 00000000000..1721955a869
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/mask1.ll
@@ -0,0 +1,83 @@
+; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
+; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
+; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; The source code:
+;
+;void foo(int *A, int *B, int *trigger) {
+;
+; for (int i=0; i<10000; i++) {
+; if (trigger[i] < 100) {
+; A[i] = B[i] + trigger[i];
+; }
+; }
+;}
+
+
+;AVX2: llvm.masked.load.v8i32
+;AVX2: llvm.masked.store.v8i32
+;AVX512: llvm.masked.load.v16i32
+;AVX512: llvm.masked.store.v16i32
+;AVX1-NOT: llvm.masked
+
+; Function Attrs: nounwind uwtable
+define void @foo(i32* %A, i32* %B, i32* %trigger) {
+entry:
+ %A.addr = alloca i32*, align 8
+ %B.addr = alloca i32*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store i32* %A, i32** %A.addr, align 8
+ store i32* %B, i32** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load i32** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds i32* %5, i64 %idxprom2
+ %6 = load i32* %arrayidx3, align 4
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %add = add nsw i32 %6, %9
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load i32** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds i32* %11, i64 %idxprom6
+ store i32 %add, i32* %arrayidx7, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/mask2.ll b/test/Transforms/LoopVectorize/X86/mask2.ll
new file mode 100644
index 00000000000..929c88dbe7b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/mask2.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
+; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
+; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; The source code:
+;
+;void foo(float *A, float *B, int *trigger) {
+;
+; for (int i=0; i<10000; i++) {
+; if (trigger[i] < 100) {
+; A[i] = B[i] + trigger[i];
+; }
+; }
+;}
+
+
+;AVX2: llvm.masked.load.v8f32
+;AVX2: llvm.masked.store.v8f32
+;AVX512: llvm.masked.load.v16f32
+;AVX512: llvm.masked.store.v16f32
+;AVX1-NOT: llvm.masked
+
+; Function Attrs: nounwind uwtable
+define void @foo(float* %A, float* %B, i32* %trigger) {
+entry:
+ %A.addr = alloca float*, align 8
+ %B.addr = alloca float*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store float* %A, float** %A.addr, align 8
+ store float* %B, float** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load float** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds float* %5, i64 %idxprom2
+ %6 = load float* %arrayidx3, align 4
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %conv = sitofp i32 %9 to float
+ %add = fadd float %6, %conv
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load float** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds float* %11, i64 %idxprom6
+ store float %add, float* %arrayidx7, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/mask3.ll b/test/Transforms/LoopVectorize/X86/mask3.ll
new file mode 100644
index 00000000000..9fb554ef127
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/mask3.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
+; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
+; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; The source code:
+;
+;void foo(double *A, double *B, int *trigger) {
+;
+; for (int i=0; i<10000; i++) {
+; if (trigger[i] < 100) {
+; A[i] = B[i] + trigger[i];
+; }
+; }
+;}
+
+
+;AVX2: llvm.masked.load.v4f64
+;AVX2: llvm.masked.store.v4f64
+;AVX512: llvm.masked.load.v8f64
+;AVX512: llvm.masked.store.v8f64
+;AVX1-NOT: llvm.masked
+
+; Function Attrs: nounwind uwtable
+define void @foo(double* %A, double* %B, i32* %trigger) #0 {
+entry:
+ %A.addr = alloca double*, align 8
+ %B.addr = alloca double*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store double* %A, double** %A.addr, align 8
+ store double* %B, double** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load double** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
+ %6 = load double* %arrayidx3, align 8
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %conv = sitofp i32 %9 to double
+ %add = fadd double %6, %conv
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load double** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6
+ store double %add, double* %arrayidx7, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/mask4.ll b/test/Transforms/LoopVectorize/X86/mask4.ll
new file mode 100644
index 00000000000..5cc71f8f6aa
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/mask4.ll
@@ -0,0 +1,83 @@
+; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
+; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
+; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; The source code:
+;
+;void foo(double *A, double *B, int *trigger) {
+;
+; for (int i=0; i<10000; i++) {
+; if (trigger[i] < 100) {
+; A[i] = B[i*2] + trigger[i]; << non-cosecutive access
+; }
+; }
+;}
+
+
+;AVX2-NOT: llvm.masked
+;AVX512-NOT: llvm.masked
+;AVX1-NOT: llvm.masked
+
+; Function Attrs: nounwind uwtable
+define void @foo(double* %A, double* %B, i32* %trigger) {
+entry:
+ %A.addr = alloca double*, align 8
+ %B.addr = alloca double*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store double* %A, double** %A.addr, align 8
+ store double* %B, double** %B.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32* %2, i64 %idxprom
+ %3 = load i32* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32* %i, align 4
+ %mul = mul nsw i32 %4, 2
+ %idxprom2 = sext i32 %mul to i64
+ %5 = load double** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds double* %5, i64 %idxprom2
+ %6 = load double* %arrayidx3, align 8
+ %7 = load i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32* %8, i64 %idxprom4
+ %9 = load i32* %arrayidx5, align 4
+ %conv = sitofp i32 %9 to double
+ %add = fadd double %6, %conv
+ %10 = load i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load double** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds double* %11, i64 %idxprom6
+ store double %add, double* %arrayidx7, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 597da68dcbc..f4f5e2b2346 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -539,7 +539,8 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
// variants with iAny types; otherwise, if the intrinsic is not
// overloaded, all the types can be specified directly.
assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
- !TyEl->isSubClassOf("LLVMTruncatedType")) ||
+ !TyEl->isSubClassOf("LLVMTruncatedType") &&
+ !TyEl->isSubClassOf("LLVMVectorSameWidth")) ||
VT == MVT::iAny || VT == MVT::vAny) &&
"Expected iAny or vAny type");
} else
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 37f6de057da..dcf4b80e4ef 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -257,7 +257,8 @@ enum IIT_Info {
IIT_ANYPTR = 26,
IIT_V1 = 27,
IIT_VARARG = 28,
- IIT_HALF_VEC_ARG = 29
+ IIT_HALF_VEC_ARG = 29,
+ IIT_SAME_VEC_WIDTH_ARG = 30
};
@@ -305,6 +306,13 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
Sig.push_back(IIT_TRUNC_ARG);
else if (R->isSubClassOf("LLVMHalfElementsVectorType"))
Sig.push_back(IIT_HALF_VEC_ARG);
+ else if (R->isSubClassOf("LLVMVectorSameWidth")) {
+ Sig.push_back(IIT_SAME_VEC_WIDTH_ARG);
+ Sig.push_back((Number << 2) | ArgCodes[Number]);
+ MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy"));
+ EncodeFixedValueType(VT, Sig);
+ return;
+ }
else
Sig.push_back(IIT_ARG);
return Sig.push_back((Number << 2) | ArgCodes[Number]);