diff options
Diffstat (limited to 'lib/Target/AMDGPU/MIMGInstructions.td')
-rw-r--r-- | lib/Target/AMDGPU/MIMGInstructions.td | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td index 9fd0abd9a3d..e2935318bb6 100644 --- a/lib/Target/AMDGPU/MIMGInstructions.td +++ b/lib/Target/AMDGPU/MIMGInstructions.td @@ -429,6 +429,86 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o" //def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>; } +/********** ============================== **********/ +/********** Dimension-aware image patterns **********/ +/********** ============================== **********/ + +class getDwordsType<int dwords> { + string suffix = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), "_V1", + !if(!eq(dwords, 2), "_V2", + !if(!le(dwords, 4), "_V4", + !if(!le(dwords, 8), "_V8", + !if(!le(dwords, 16), "_V16", ?)))))); + ValueType VT = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), f32, + !if(!eq(dwords, 2), v2f32, + !if(!le(dwords, 4), v4f32, + !if(!le(dwords, 8), v8f32, + !if(!le(dwords, 16), v16f32, ?)))))); + RegisterClass VReg = !if(!lt(dwords, 1), ?, + !if(!eq(dwords, 1), VGPR_32, + !if(!eq(dwords, 2), VReg_64, + !if(!le(dwords, 4), VReg_128, + !if(!le(dwords, 8), VReg_256, + !if(!le(dwords, 16), VReg_512, ?)))))); +} + +class makeRegSequence_Fold<int i, dag d> { + int idx = i; + dag lhs = d; +} + +class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> { + dag ret = + !if(!eq(!size(names), 1), + !dag(COPY, [?]<dag>, [names[0]]), + !foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name, + makeRegSequence_Fold< + !add(f.idx, 1), + !con((INSERT_SUBREG f.lhs), + !dag(INSERT_SUBREG, [?, !cast<SubRegIndex>("sub"#f.idx)], + [name, ?]))>).lhs); +} + +class ImageSampleDimPattern<AMDGPUImageDimSample I, + string dop, ValueType dty, + string suffix = ""> : GCNPat<(undef), (undef)> { + dag AddrDag = !dag(I, !foreach(arg, I.P.AddrFloatArgs, arg.Type.VT), + !foreach(arg, I.P.AddrFloatArgs, arg.Name)); + getDwordsType AddrDwords = getDwordsType<!size(I.P.AddrFloatArgs)>; + string aop = AddrDwords.suffix; + ValueType aty = AddrDwords.VT; + + let PatternToMatch = + (dty !con(AddrDag, (I v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, + 0, 0))); +// i32:$texfailctrl, i32:$cachepolicy))); + let ResultInstrs = [ + (!cast<Instruction>(!strconcat("IMAGE_SAMPLE", I.P.OpMod, dop, aop, suffix)) + makeRegSequence<AddrDwords.VT, AddrDwords.VReg, + !foreach(arg, I.P.AddrFloatArgs, arg.Name)>.ret, + $rsrc, $sampler, + (as_i32imm $dmask), (as_i1imm $unorm), 0 /*(as_i1imm $glc)*/, 0 /*(as_i1imm $slc)*/, + 0, 0, 0 /*(as_i1imm $lwe)*/, { I.P.Dim.DA }) + ]; +} + +foreach intr = AMDGPUImageDimSampleIntrinsics in { + def intr#_pat1 : ImageSampleDimPattern<intr, "_V1", f32>; + def intr#_pat2 : ImageSampleDimPattern<intr, "_V2", v2f32>; + def intr#_pat3 : ImageSampleDimPattern<intr, "_V4", v4f32>; + + let SubtargetPredicate = HasUnpackedD16VMem in { + def intr#_pat4 : ImageSampleDimPattern<intr, "_V1", f16, "_D16_gfx80">; + } // End HasUnpackedD16VMem. + + let SubtargetPredicate = HasPackedD16VMem in { + def intr#_pat5 : ImageSampleDimPattern<intr, "_V1", f16, "_D16">; + def intr#_pat6 : ImageSampleDimPattern<intr, "_V1", v2f16, "_D16">; + } // End HasPackedD16VMem. +} + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ |