diff options
Diffstat (limited to 'lib/Target/AMDGPU')
-rw-r--r-- | lib/Target/AMDGPU/AMDGPU.td | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUInstrInfo.cpp | 7 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUInstrInfo.h | 11 | ||||
-rw-r--r-- | lib/Target/AMDGPU/AMDGPUSearchableTables.td | 29 | ||||
-rw-r--r-- | lib/Target/AMDGPU/CMakeLists.txt | 1 | ||||
-rw-r--r-- | lib/Target/AMDGPU/MIMGInstructions.td | 80 | ||||
-rw-r--r-- | lib/Target/AMDGPU/SIISelLowering.cpp | 251 |
7 files changed, 166 insertions, 214 deletions
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 6355c4fa6eb..f01d9ab5bc4 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -798,3 +798,4 @@ include "AMDGPURegisterInfo.td" include "AMDGPURegisterBanks.td" include "AMDGPUInstructions.td" include "AMDGPUCallingConv.td" +include "AMDGPUSearchableTables.td" diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index 9b9ec063864..248632efeb9 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -25,6 +25,13 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AMDGPUGenInstrInfo.inc" +namespace llvm { +namespace AMDGPU { +#define GET_RSRCINTRINSIC_IMPL +#include "AMDGPUGenSearchableTables.inc" +} +} + // Pin the vtable to this file. void AMDGPUInstrInfo::anchor() {} diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index a9fcd483463..7488bbcb76f 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -51,6 +51,17 @@ public: /// not exist. If Opcode is not a pseudo instruction, this is identity. int pseudoToMCOpcode(int Opcode) const; }; + +namespace AMDGPU { + +struct RsrcIntrinsic { + unsigned Intr; + uint8_t RsrcArg; + bool IsImage; +}; +const RsrcIntrinsic *lookupRsrcIntrinsicByIntr(unsigned Intr); + +} // end AMDGPU namespace } // End llvm namespace #endif diff --git a/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/lib/Target/AMDGPU/AMDGPUSearchableTables.td new file mode 100644 index 00000000000..d61bdbeb3bd --- /dev/null +++ b/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -0,0 +1,29 @@ +//===-- AMDGPUSearchableTables.td - ------------------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "llvm/TableGen/SearchableTable.td" + +//===----------------------------------------------------------------------===// +// Resource intrinsics table. +//===----------------------------------------------------------------------===// + +class RsrcIntrinsic<AMDGPURsrcIntrinsic intr> : SearchableTable { + let SearchableFields = ["Intr"]; + let EnumNameField = ?; + + Intrinsic Intr = !cast<Intrinsic>(intr); + bits<8> RsrcArg = intr.RsrcArg; + bit IsImage = intr.IsImage; +} + +foreach intr = !listconcat(AMDGPUBufferIntrinsics, + AMDGPUImageIntrinsics, + AMDGPUImageDimSampleIntrinsics) in { + def : RsrcIntrinsic<!cast<AMDGPURsrcIntrinsic>(intr)>; +} diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 1a14db4d62c..748c1391650 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -13,6 +13,7 @@ tablegen(LLVM AMDGPUGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM AMDGPUGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM AMDGPUGenRegisterBank.inc -gen-register-bank) +tablegen(LLVM AMDGPUGenSearchableTables.inc -gen-searchable-tables) add_public_tablegen_target(AMDGPUCommonTableGen) add_llvm_target(AMDGPUCodeGen diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index 9fd0abd9a3d..e2935318bb6 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -429,6 +429,86 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" //def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>; } +/********** ============================== **********/ +/********** Dimension-aware image patterns **********/ +/********** ============================== **********/ + +class getDwordsType<int dwords> { + string suffix = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), "_V1", + !if(!eq(dwords, 2), "_V2", + !if(!le(dwords, 4), "_V4", + !if(!le(dwords, 8), "_V8", + !if(!le(dwords, 16), "_V16", ?)))))); + ValueType VT = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), f32, + !if(!eq(dwords, 2), v2f32, + !if(!le(dwords, 4), v4f32, + !if(!le(dwords, 8), v8f32, + !if(!le(dwords, 16), v16f32, ?)))))); + RegisterClass VReg = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), VGPR_32, + !if(!eq(dwords, 2), VReg_64, + !if(!le(dwords, 4), VReg_128, + !if(!le(dwords, 8), VReg_256, + !if(!le(dwords, 16), VReg_512, ?)))))); +} + +class makeRegSequence_Fold<int i, dag d> { + int idx = i; + dag lhs = d; +} + +class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> { + dag ret = + !if(!eq(!size(names), 1), + !dag(COPY, [?]<dag>, [names[0]]), + !foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name, + makeRegSequence_Fold< + !add(f.idx, 1), + !con((INSERT_SUBREG f.lhs), + !dag(INSERT_SUBREG, [?, !cast<SubRegIndex>("sub"#f.idx)], + [name, ?]))>).lhs); +} + +class ImageSampleDimPattern<AMDGPUImageDimSample I, + string dop, ValueType dty, + string suffix = ""> : GCNPat<(undef), (undef)> { + dag AddrDag = !dag(I, !foreach(arg, I.P.AddrFloatArgs, arg.Type.VT), + !foreach(arg, I.P.AddrFloatArgs, arg.Name)); + getDwordsType AddrDwords = getDwordsType<!size(I.P.AddrFloatArgs)>; + string aop = AddrDwords.suffix; + ValueType aty = AddrDwords.VT; + + let PatternToMatch = + (dty !con(AddrDag, (I v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, + 0, 0))); +// i32:$texfailctrl, i32:$cachepolicy))); + let ResultInstrs = [ + (!cast<Instruction>(!strconcat("IMAGE_SAMPLE", I.P.OpMod, dop, aop, suffix)) + makeRegSequence<AddrDwords.VT, AddrDwords.VReg, + !foreach(arg, I.P.AddrFloatArgs, arg.Name)>.ret, + $rsrc, $sampler, + (as_i32imm $dmask), (as_i1imm $unorm), 0 /*(as_i1imm $glc)*/, 0 /*(as_i1imm $slc)*/, + 0, 0, 0 /*(as_i1imm $lwe)*/, { I.P.Dim.DA }) + ]; +} + +foreach intr = AMDGPUImageDimSampleIntrinsics in { + def intr#_pat1 : ImageSampleDimPattern<intr, "_V1", f32>; + def intr#_pat2 : ImageSampleDimPattern<intr, "_V2", v2f32>; + def intr#_pat3 : ImageSampleDimPattern<intr, "_V4", v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + def intr#_pat4 : ImageSampleDimPattern<intr, "_V1", f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + def intr#_pat5 : ImageSampleDimPattern<intr, "_V1", f16, "_D16">; + def intr#_pat6 : ImageSampleDimPattern<intr, "_V1", v2f16, "_D16">; + } // End HasPackedD16VMem. +} + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 7dc9dcf31fc..562ca2003db 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -563,6 +563,43 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, MachineFunction &MF, unsigned IntrID) const { + if (const AMDGPU::RsrcIntrinsic *RsrcIntr = + AMDGPU::lookupRsrcIntrinsicByIntr(IntrID)) { + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + + if (RsrcIntr->IsImage) { + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(RsrcIntr->RsrcArg)); + Info.align = 0; + } else { + Info.ptrVal = MFI->getBufferPSV( + *MF.getSubtarget<SISubtarget>().getInstrInfo(), + CI.getArgOperand(RsrcIntr->RsrcArg)); + } + + AttributeList Attr = Intrinsic::getAttributes(CI.getContext(), + (Intrinsic::ID)IntrID); + + Info.flags = MachineMemOperand::MODereferenceable; + if (Attr.hasFnAttribute(Attribute::ReadOnly)) + Info.flags |= MachineMemOperand::MOLoad; + else if (Attr.hasFnAttribute(Attribute::ReadNone)) + Info.flags |= MachineMemOperand::MOStore; + else { + // Atomic + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + } + return true; + } + switch (IntrID) { case Intrinsic::amdgcn_atomic_inc: case Intrinsic::amdgcn_atomic_dec: @@ -582,220 +619,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } - // Image load. - case Intrinsic::amdgcn_image_load: - case Intrinsic::amdgcn_image_load_mip: - - // Sample. - case Intrinsic::amdgcn_image_sample: - case Intrinsic::amdgcn_image_sample_cl: - case Intrinsic::amdgcn_image_sample_d: - case Intrinsic::amdgcn_image_sample_d_cl: - case Intrinsic::amdgcn_image_sample_l: - case Intrinsic::amdgcn_image_sample_b: - case Intrinsic::amdgcn_image_sample_b_cl: - case Intrinsic::amdgcn_image_sample_lz: - case Intrinsic::amdgcn_image_sample_cd: - case Intrinsic::amdgcn_image_sample_cd_cl: - - // Sample with comparison. - case Intrinsic::amdgcn_image_sample_c: - case Intrinsic::amdgcn_image_sample_c_cl: - case Intrinsic::amdgcn_image_sample_c_d: - case Intrinsic::amdgcn_image_sample_c_d_cl: - case Intrinsic::amdgcn_image_sample_c_l: - case Intrinsic::amdgcn_image_sample_c_b: - case Intrinsic::amdgcn_image_sample_c_b_cl: - case Intrinsic::amdgcn_image_sample_c_lz: - case Intrinsic::amdgcn_image_sample_c_cd: - case Intrinsic::amdgcn_image_sample_c_cd_cl: - - // Sample with offsets. - case Intrinsic::amdgcn_image_sample_o: - case Intrinsic::amdgcn_image_sample_cl_o: - case Intrinsic::amdgcn_image_sample_d_o: - case Intrinsic::amdgcn_image_sample_d_cl_o: - case Intrinsic::amdgcn_image_sample_l_o: - case Intrinsic::amdgcn_image_sample_b_o: - case Intrinsic::amdgcn_image_sample_b_cl_o: - case Intrinsic::amdgcn_image_sample_lz_o: - case Intrinsic::amdgcn_image_sample_cd_o: - case Intrinsic::amdgcn_image_sample_cd_cl_o: - - // Sample with comparison and offsets. - case Intrinsic::amdgcn_image_sample_c_o: - case Intrinsic::amdgcn_image_sample_c_cl_o: - case Intrinsic::amdgcn_image_sample_c_d_o: - case Intrinsic::amdgcn_image_sample_c_d_cl_o: - case Intrinsic::amdgcn_image_sample_c_l_o: - case Intrinsic::amdgcn_image_sample_c_b_o: - case Intrinsic::amdgcn_image_sample_c_b_cl_o: - case Intrinsic::amdgcn_image_sample_c_lz_o: - case Intrinsic::amdgcn_image_sample_c_cd_o: - case Intrinsic::amdgcn_image_sample_c_cd_cl_o: - - // Basic gather4 - case Intrinsic::amdgcn_image_gather4: - case Intrinsic::amdgcn_image_gather4_cl: - case Intrinsic::amdgcn_image_gather4_l: - case Intrinsic::amdgcn_image_gather4_b: - case Intrinsic::amdgcn_image_gather4_b_cl: - case Intrinsic::amdgcn_image_gather4_lz: - - // Gather4 with comparison - case Intrinsic::amdgcn_image_gather4_c: - case Intrinsic::amdgcn_image_gather4_c_cl: - case Intrinsic::amdgcn_image_gather4_c_l: - case Intrinsic::amdgcn_image_gather4_c_b: - case Intrinsic::amdgcn_image_gather4_c_b_cl: - case Intrinsic::amdgcn_image_gather4_c_lz: - - // Gather4 with offsets - case Intrinsic::amdgcn_image_gather4_o: - case Intrinsic::amdgcn_image_gather4_cl_o: - case Intrinsic::amdgcn_image_gather4_l_o: - case Intrinsic::amdgcn_image_gather4_b_o: - case Intrinsic::amdgcn_image_gather4_b_cl_o: - case Intrinsic::amdgcn_image_gather4_lz_o: - - // Gather4 with comparison and offsets - case Intrinsic::amdgcn_image_gather4_c_o: - case Intrinsic::amdgcn_image_gather4_c_cl_o: - case Intrinsic::amdgcn_image_gather4_c_l_o: - case Intrinsic::amdgcn_image_gather4_c_b_o: - case Intrinsic::amdgcn_image_gather4_c_b_cl_o: - case Intrinsic::amdgcn_image_gather4_c_lz_o: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(CI.getType()); - Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(1)); - Info.align = 0; - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MODereferenceable; - return true; - } - case Intrinsic::amdgcn_image_store: - case Intrinsic::amdgcn_image_store_mip: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_VOID; - Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); - Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(2)); - Info.flags = MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; - Info.align = 0; - return true; - } - case Intrinsic::amdgcn_image_atomic_swap: - case Intrinsic::amdgcn_image_atomic_add: - case Intrinsic::amdgcn_image_atomic_sub: - case Intrinsic::amdgcn_image_atomic_smin: - case Intrinsic::amdgcn_image_atomic_umin: - case Intrinsic::amdgcn_image_atomic_smax: - case Intrinsic::amdgcn_image_atomic_umax: - case Intrinsic::amdgcn_image_atomic_and: - case Intrinsic::amdgcn_image_atomic_or: - case Intrinsic::amdgcn_image_atomic_xor: - case Intrinsic::amdgcn_image_atomic_inc: - case Intrinsic::amdgcn_image_atomic_dec: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(CI.getType()); - Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(2)); - - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; - - // XXX - Should this be volatile without known ordering? - Info.flags |= MachineMemOperand::MOVolatile; - return true; - } - case Intrinsic::amdgcn_image_atomic_cmpswap: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = MVT::getVT(CI.getType()); - Info.ptrVal = MFI->getImagePSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(3)); - - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; - - // XXX - Should this be volatile without known ordering? - Info.flags |= MachineMemOperand::MOVolatile; - return true; - } - case Intrinsic::amdgcn_tbuffer_load: - case Intrinsic::amdgcn_buffer_load: - case Intrinsic::amdgcn_buffer_load_format: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(0)); - Info.memVT = MVT::getVT(CI.getType()); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MODereferenceable; - - // There is a constant offset component, but there are additional register - // offsets which could break AA if we set the offset to anything non-0. - return true; - } - case Intrinsic::amdgcn_tbuffer_store: - case Intrinsic::amdgcn_buffer_store: - case Intrinsic::amdgcn_buffer_store_format: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_VOID; - Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(1)); - Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); - Info.flags = MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable; - return true; - } - case Intrinsic::amdgcn_buffer_atomic_swap: - case Intrinsic::amdgcn_buffer_atomic_add: - case Intrinsic::amdgcn_buffer_atomic_sub: - case Intrinsic::amdgcn_buffer_atomic_smin: - case Intrinsic::amdgcn_buffer_atomic_umin: - case Intrinsic::amdgcn_buffer_atomic_smax: - case Intrinsic::amdgcn_buffer_atomic_umax: - case Intrinsic::amdgcn_buffer_atomic_and: - case Intrinsic::amdgcn_buffer_atomic_or: - case Intrinsic::amdgcn_buffer_atomic_xor: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(1)); - Info.memVT = MVT::getVT(CI.getType()); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile; - return true; - } - case Intrinsic::amdgcn_buffer_atomic_cmpswap: { - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.ptrVal = MFI->getBufferPSV( - *MF.getSubtarget<SISubtarget>().getInstrInfo(), - CI.getArgOperand(2)); - Info.memVT = MVT::getVT(CI.getType()); - Info.flags = MachineMemOperand::MOLoad | - MachineMemOperand::MOStore | - MachineMemOperand::MODereferenceable | - MachineMemOperand::MOVolatile; - return true; - } default: return false; } |