diff options
Diffstat (limited to 'lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 51 |
1 files changed, 48 insertions, 3 deletions
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1bdb762a4b7..97103504198 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4192,6 +4192,16 @@ static bool isUndefOrInRange(int Val, int Low, int Hi) { return (Val < 0) || (Val >= Low && Val < Hi); } +/// Return true if every element in Mask is undef or if its value +/// falls within the specified range (L, H]. +static bool isUndefOrInRange(ArrayRef<int> Mask, + int Low, int Hi) { + for (int M : Mask) + if (!isUndefOrInRange(M, Low, Hi)) + return false; + return true; +} + /// Val is either less than zero (undef) or equal to the specified value. static bool isUndefOrEqual(int Val, int CmpVal) { return (Val < 0 || Val == CmpVal); @@ -24834,12 +24844,47 @@ static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask, "Expected unary shuffle"); } - // We only support permutation of 32/64 bit elements. - // TODO - support PSHUFLW/PSHUFHW. unsigned MaskScalarSizeInBits = SrcVT.getSizeInBits() / Mask.size(); + MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); + + // Handle PSHUFLW/PSHUFHW repeated patterns. + if (MaskScalarSizeInBits == 16) { + SmallVector<int, 4> RepeatedMask; + if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { + ArrayRef<int> LoMask(Mask.data() + 0, 4); + ArrayRef<int> HiMask(Mask.data() + 4, 4); + + // PSHUFLW: permute lower 4 elements only. + if (isUndefOrInRange(LoMask, 0, 4) && + isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { + Shuffle = X86ISD::PSHUFLW; + ShuffleVT = MVT::getVectorVT(MVT::i16, SrcVT.getSizeInBits() / 16); + PermuteImm = getV4X86ShuffleImm(LoMask); + return true; + } + + // PSHUFHW: permute upper 4 elements only. + if (isUndefOrInRange(HiMask, 4, 8) && + isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { + // Offset the HiMask so that we can create the shuffle immediate. + int OffsetHiMask[4]; + for (int i = 0; i != 4; ++i) + OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4); + + Shuffle = X86ISD::PSHUFHW; + ShuffleVT = MVT::getVectorVT(MVT::i16, SrcVT.getSizeInBits() / 16); + PermuteImm = getV4X86ShuffleImm(OffsetHiMask); + return true; + } + + return false; + } + return false; + } + + // We only support permutation of 32/64 bit elements after this. if (MaskScalarSizeInBits != 32 && MaskScalarSizeInBits != 64) return false; - MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here). |