summaryrefslogtreecommitdiff
path: root/lib/Target/AMDGPU/MIMGInstructions.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/AMDGPU/MIMGInstructions.td')
-rw-r--r--lib/Target/AMDGPU/MIMGInstructions.td80
1 files changed, 80 insertions, 0 deletions
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 9fd0abd9a3d..e2935318bb6 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -429,6 +429,86 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
}
+/********** ============================== **********/
+/********** Dimension-aware image patterns **********/
+/********** ============================== **********/
+
+class getDwordsType<int dwords> {
+ string suffix = !if(!lt(dwords, 1), ?,
+ !if(!eq(dwords, 1), "_V1",
+ !if(!eq(dwords, 2), "_V2",
+ !if(!le(dwords, 4), "_V4",
+ !if(!le(dwords, 8), "_V8",
+ !if(!le(dwords, 16), "_V16", ?))))));
+ ValueType VT = !if(!lt(dwords, 1), ?,
+ !if(!eq(dwords, 1), f32,
+ !if(!eq(dwords, 2), v2f32,
+ !if(!le(dwords, 4), v4f32,
+ !if(!le(dwords, 8), v8f32,
+ !if(!le(dwords, 16), v16f32, ?))))));
+ RegisterClass VReg = !if(!lt(dwords, 1), ?,
+ !if(!eq(dwords, 1), VGPR_32,
+ !if(!eq(dwords, 2), VReg_64,
+ !if(!le(dwords, 4), VReg_128,
+ !if(!le(dwords, 8), VReg_256,
+ !if(!le(dwords, 16), VReg_512, ?))))));
+}
+
+class makeRegSequence_Fold<int i, dag d> {
+ int idx = i;
+ dag lhs = d;
+}
+
+class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> {
+ dag ret =
+ !if(!eq(!size(names), 1),
+ !dag(COPY, [?]<dag>, [names[0]]),
+ !foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name,
+ makeRegSequence_Fold<
+ !add(f.idx, 1),
+ !con((INSERT_SUBREG f.lhs),
+ !dag(INSERT_SUBREG, [?, !cast<SubRegIndex>("sub"#f.idx)],
+ [name, ?]))>).lhs);
+}
+
+class ImageSampleDimPattern<AMDGPUImageDimSample I,
+ string dop, ValueType dty,
+ string suffix = ""> : GCNPat<(undef), (undef)> {
+ dag AddrDag = !dag(I, !foreach(arg, I.P.AddrFloatArgs, arg.Type.VT),
+ !foreach(arg, I.P.AddrFloatArgs, arg.Name));
+ getDwordsType AddrDwords = getDwordsType<!size(I.P.AddrFloatArgs)>;
+ string aop = AddrDwords.suffix;
+ ValueType aty = AddrDwords.VT;
+
+ let PatternToMatch =
+ (dty !con(AddrDag, (I v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm,
+ 0, 0)));
+// i32:$texfailctrl, i32:$cachepolicy)));
+ let ResultInstrs = [
+ (!cast<Instruction>(!strconcat("IMAGE_SAMPLE", I.P.OpMod, dop, aop, suffix))
+ makeRegSequence<AddrDwords.VT, AddrDwords.VReg,
+ !foreach(arg, I.P.AddrFloatArgs, arg.Name)>.ret,
+ $rsrc, $sampler,
+ (as_i32imm $dmask), (as_i1imm $unorm), 0 /*(as_i1imm $glc)*/, 0 /*(as_i1imm $slc)*/,
+ 0, 0, 0 /*(as_i1imm $lwe)*/, { I.P.Dim.DA })
+ ];
+}
+
+foreach intr = AMDGPUImageDimSampleIntrinsics in {
+ def intr#_pat1 : ImageSampleDimPattern<intr, "_V1", f32>;
+ def intr#_pat2 : ImageSampleDimPattern<intr, "_V2", v2f32>;
+ def intr#_pat3 : ImageSampleDimPattern<intr, "_V4", v4f32>;
+
+ let SubtargetPredicate = HasUnpackedD16VMem in {
+ def intr#_pat4 : ImageSampleDimPattern<intr, "_V1", f16, "_D16_gfx80">;
+ } // End HasUnpackedD16VMem.
+
+ let SubtargetPredicate = HasPackedD16VMem in {
+ def intr#_pat5 : ImageSampleDimPattern<intr, "_V1", f16, "_D16">;
+ def intr#_pat6 : ImageSampleDimPattern<intr, "_V1", v2f16, "_D16">;
+ } // End HasPackedD16VMem.
+}
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/