1 files changed, 80 insertions, 0 deletions
diff --git a/lib/Target/AMDGPU/MIMGInstructions.td b/lib/Target/AMDGPU/MIMGInstructions.td
index 9fd0abd9a3d..e2935318bb6 100644
--- a/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/lib/Target/AMDGPU/MIMGInstructions.td
@@ -429,6 +429,86 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
 //def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
 }
 
+/********** ============================== **********/
+/********** Dimension-aware image patterns **********/
+/********** ============================== **********/
+
+class getDwordsType<int dwords> {
+  string suffix = !if(!lt(dwords, 1), ?,
+                  !if(!eq(dwords, 1), "_V1",
+                  !if(!eq(dwords, 2), "_V2",
+                  !if(!le(dwords, 4), "_V4",
+                  !if(!le(dwords, 8), "_V8",
+                  !if(!le(dwords, 16), "_V16", ?))))));
+  ValueType VT = !if(!lt(dwords, 1), ?,
+                 !if(!eq(dwords, 1), f32,
+                 !if(!eq(dwords, 2), v2f32,
+                 !if(!le(dwords, 4), v4f32,
+                 !if(!le(dwords, 8), v8f32,
+                 !if(!le(dwords, 16), v16f32, ?))))));
+  RegisterClass VReg = !if(!lt(dwords, 1), ?,
+                       !if(!eq(dwords, 1), VGPR_32,
+                       !if(!eq(dwords, 2), VReg_64,
+                       !if(!le(dwords, 4), VReg_128,
+                       !if(!le(dwords, 8), VReg_256,
+                       !if(!le(dwords, 16), VReg_512, ?))))));
+}
+
+class makeRegSequence_Fold<int i, dag d> {
+  int idx = i;
+  dag lhs = d;
+}
+
+class makeRegSequence<ValueType vt, RegisterClass RC, list<string> names> {
+  dag ret =
+    !if(!eq(!size(names), 1),
+        !dag(COPY, [?]<dag>, [names[0]]),
+        !foldl(makeRegSequence_Fold<0, (vt (IMPLICIT_DEF))>, names, f, name,
+               makeRegSequence_Fold<
+                 !add(f.idx, 1),
+                 !con((INSERT_SUBREG f.lhs),
+                      !dag(INSERT_SUBREG, [?,    !cast<SubRegIndex>("sub"#f.idx)],
+                                          [name, ?]))>).lhs);
+}
+
+class ImageSampleDimPattern<AMDGPUImageDimSample I,
+                            string dop, ValueType dty,
+                            string suffix = ""> : GCNPat<(undef), (undef)> {
+  dag AddrDag = !dag(I, !foreach(arg, I.P.AddrFloatArgs, arg.Type.VT),
+                        !foreach(arg, I.P.AddrFloatArgs, arg.Name));
+  getDwordsType AddrDwords = getDwordsType<!size(I.P.AddrFloatArgs)>;
+  string aop = AddrDwords.suffix;
+  ValueType aty = AddrDwords.VT;
+
+  let PatternToMatch =
+    (dty !con(AddrDag, (I v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm,
+                          0, 0)));
+//                           i32:$texfailctrl, i32:$cachepolicy)));
+  let ResultInstrs = [
+     (!cast<Instruction>(!strconcat("IMAGE_SAMPLE", I.P.OpMod, dop, aop, suffix))
+       makeRegSequence<AddrDwords.VT, AddrDwords.VReg,
+                       !foreach(arg, I.P.AddrFloatArgs, arg.Name)>.ret,
+       $rsrc, $sampler,
+       (as_i32imm $dmask), (as_i1imm $unorm), 0 /*(as_i1imm $glc)*/, 0 /*(as_i1imm $slc)*/,
+       0, 0, 0 /*(as_i1imm $lwe)*/, { I.P.Dim.DA })
+  ];
+}
+
+foreach intr = AMDGPUImageDimSampleIntrinsics in {
+  def intr#_pat1 : ImageSampleDimPattern<intr, "_V1", f32>;
+  def intr#_pat2 : ImageSampleDimPattern<intr, "_V2", v2f32>;
+  def intr#_pat3 : ImageSampleDimPattern<intr, "_V4", v4f32>;
+
+  let SubtargetPredicate = HasUnpackedD16VMem in {
+    def intr#_pat4 : ImageSampleDimPattern<intr, "_V1", f16, "_D16_gfx80">;
+  } // End HasUnpackedD16VMem.
+
+  let SubtargetPredicate = HasPackedD16VMem in {
+    def intr#_pat5 : ImageSampleDimPattern<intr, "_V1", f16, "_D16">;
+    def intr#_pat6 : ImageSampleDimPattern<intr, "_V1", v2f16, "_D16">;
+  } // End HasPackedD16VMem.
+}
+
 /********** ======================= **********/
 /********** Image sampling patterns **********/
 /********** ======================= **********/