diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-01-22 19:24:14 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-01-22 19:24:14 +0000 |
commit | 7dd37ae57a00f1c664b9ae0e9451c1717cf5348d (patch) | |
tree | 5d8dbdcdb3fd9d7086591a870d716f12ed8941ca /lib | |
parent | 79e3fb53d618d12e239275ef055200bbd6f8253e (diff) |
R600/SI: Add support for i8 and i16 private loads/stores
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199823 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.cpp | 78 | ||||
-rw-r--r-- | lib/Target/R600/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | lib/Target/R600/R600ISelLowering.cpp | 13 | ||||
-rw-r--r-- | lib/Target/R600/SIISelLowering.cpp | 60 |
4 files changed, 141 insertions, 11 deletions
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index c59be7ce243..a65dd65b6e6 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -589,18 +589,96 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); } +SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + LoadSDNode *Load = cast<LoadSDNode>(Op); + ISD::LoadExtType ExtType = Load->getExtensionType(); + + if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS || + ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32)) + return SDValue(); + + + EVT VT = Op.getValueType(); + EVT MemVT = Load->getMemoryVT(); + unsigned Mask = 0; + if (Load->getMemoryVT() == MVT::i8) { + Mask = 0xff; + } else if (Load->getMemoryVT() == MVT::i16) { + Mask = 0xffff; + } + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), + DAG.getConstant(2, MVT::i32)); + SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), + Load->getChain(), Ptr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); + SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, + Load->getBasePtr(), + DAG.getConstant(0x3, MVT::i32)); + SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, + DAG.getConstant(3, MVT::i32)); + Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt); + Ret = DAG.getNode(ISD::AND, DL, MVT::i32, Ret, + DAG.getConstant(Mask, MVT::i32)); + if (ExtType == ISD::SEXTLOAD) { + SDValue SExtShift = DAG.getConstant( + VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32); + Ret = DAG.getNode(ISD::SHL, DL, MVT::i32, Ret, SExtShift); + Ret = DAG.getNode(ISD::SRA, DL, MVT::i32, Ret, SExtShift); + } + + return Ret; +} + SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); if (Result.getNode()) { return Result; } StoreSDNode *Store = cast<StoreSDNode>(Op); + SDValue Chain = Store->getChain(); if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && Store->getValue().getValueType().isVector()) { return SplitVectorStore(Op, DAG); } + + if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS && + Store->getMemoryVT().bitsLT(MVT::i32)) { + unsigned Mask = 0; + if (Store->getMemoryVT() == MVT::i8) { + Mask = 0xff; + } else if (Store->getMemoryVT() == MVT::i16) { + Mask = 0xffff; + } + SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32); + SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr, + DAG.getConstant(2, MVT::i32)); + SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, + Chain, Ptr, DAG.getTargetConstant(0, MVT::i32)); + SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, TruncPtr, + DAG.getConstant(0x3, MVT::i32)); + SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, + DAG.getConstant(3, MVT::i32)); + SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, + Store->getValue()); + SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, SExtValue, + DAG.getConstant(Mask, MVT::i32)); + SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, + MaskedValue, ShiftAmt); + SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(Mask, MVT::i32), + ShiftAmt); + DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask, + DAG.getConstant(0xffffffff, MVT::i32)); + Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); + + SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); + return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, + Chain, Value, Ptr, DAG.getTargetConstant(0, MVT::i32)); + } return SDValue(); } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index 2dfd3cf492a..fd6e3a59985 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -54,6 +54,7 @@ protected: /// \brief Split a vector load into multiple scalar loads. SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const; SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index 8d71919704d..03feabe23e6 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -1113,6 +1113,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } + SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG); + if (Ret.getNode()) { + return Ret; + } // Lowering for indirect addressing const MachineFunction &MF = DAG.getMachineFunction(); @@ -1204,6 +1208,15 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const SDValue Ptr = Op.getOperand(1); SDValue LoweredLoad; + SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); + if (Ret.getNode()) { + SDValue Ops[2]; + Ops[0] = Ret; + Ops[1] = Chain; + return DAG.getMergeValues(Ops, 2, DL); + } + + if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) { SDValue MergedValues[2] = { SplitVectorLoad(Op, DAG), diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 4fb844439ab..9430689c61c 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -125,11 +125,17 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); + setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setTruncStoreAction(MVT::i32, MVT::i8, Custom); + setTruncStoreAction(MVT::i32, MVT::i16, Custom); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::i64, MVT::i32, Expand); setTruncStoreAction(MVT::i128, MVT::i64, Expand); @@ -700,21 +706,26 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *Load = cast<LoadSDNode>(Op); + SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); + SDValue MergedValues[2]; + MergedValues[1] = Load->getChain(); + if (Ret.getNode()) { + MergedValues[0] = Ret; + return DAG.getMergeValues(MergedValues, 2, DL); + } - if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { return SDValue(); + } SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), DAG.getConstant(2, MVT::i32)); + Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), + Load->getChain(), Ptr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); - SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), - Load->getChain(), Ptr, - DAG.getTargetConstant(0, MVT::i32), - Op.getOperand(2)); - SDValue MergedValues[2] = { - Ret, - Load->getChain() - }; + MergedValues[0] = Ret; return DAG.getMergeValues(MergedValues, 2, DL); } @@ -796,7 +807,34 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Store->getChain(); SmallVector<SDValue, 8> Values; - if (VT == MVT::i64) { + if (Store->isTruncatingStore()) { + unsigned Mask = 0; + if (Store->getMemoryVT() == MVT::i8) { + Mask = 0xff; + } else if (Store->getMemoryVT() == MVT::i16) { + Mask = 0xffff; + } + SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, + Chain, Store->getBasePtr(), + DAG.getConstant(0, MVT::i32)); + SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(), + DAG.getConstant(0x3, MVT::i32)); + SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, + DAG.getConstant(3, MVT::i32)); + SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(), + DAG.getConstant(Mask, MVT::i32)); + SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, + MaskedValue, ShiftAmt); + SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(32, MVT::i32), ShiftAmt); + SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32, + DAG.getConstant(Mask, MVT::i32), + RotrAmt); + Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); + Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); + + Values.push_back(Dst); + } else if (VT == MVT::i64) { for (unsigned i = 0; i < 2; ++i) { Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Store->getValue(), DAG.getConstant(i, MVT::i32))); |