1 files changed, 188 insertions, 44 deletions
diff --git a/src/microsoft/clc/clc_compiler_test.cpp b/src/microsoft/clc/clc_compiler_test.cpp
index 4d3182f17c3..72d7b1ccf52 100644
--- a/src/microsoft/clc/clc_compiler_test.cpp
+++ b/src/microsoft/clc/clc_compiler_test.cpp
@@ -21,15 +21,18 @@
  * IN THE SOFTWARE.
  */
 
+#include <cmath>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdexcept>
 #include <vector>
 
+#include <unknwn.h>
 #include <directx/d3d12.h>
 #include <dxgi1_4.h>
 #include <gtest/gtest.h>
 #include <wrl.h>
+#include <dxguids/dxguids.h>
 
 #include "compute_test.h"
 
@@ -81,6 +84,37 @@ TEST_F(ComputeTest, two_global_arrays)
       EXPECT_EQ(g1[i], expected[i]);
 }
 
+TEST_F(ComputeTest, nested_arrays)
+{
+   const char *kernel_source = R"(
+float4 DoMagic(float4 inValue)
+{
+    const float testArr[3][3] = {
+        {0.1f, 0.2f, 0.3f},
+        {0.4f, 0.5f, 0.6f},
+        {0.7f, 0.8f, 0.9f}}; 
+    float4 outValue = inValue;
+    outValue.x = inValue.x * testArr[0][0] + inValue.y * testArr[0][1] + inValue.z * testArr[0][2];
+    outValue.y = inValue.x * testArr[1][0] + inValue.y * testArr[1][1] + inValue.z * testArr[1][2];
+    outValue.z = inValue.x * testArr[2][0] + inValue.y * testArr[2][1] + inValue.z * testArr[2][2];
+    return outValue;
+}
+__kernel void main_test(__global float4 *g1, __global float4 *g2)
+{
+   uint idx = get_global_id(0);
+   g1[idx] = DoMagic(g2[idx]);
+})";
+   auto g1 = ShaderArg<float>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
+   auto g2 = ShaderArg<float>({ 0.2f, 0.4f, 0.6f, 1.0f }, SHADER_ARG_INPUT);
+   const float expected[] = {
+      0.28f, 0.64f, 1.0f, 1.0f
+   };
+
+   run_shader(kernel_source, 1, 1, 1, g1, g2);
+   for (int i = 0; i < g1.size(); ++i)
+      EXPECT_FLOAT_EQ(g1[i], expected[i]);
+}
+
 /* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
 TEST_F(ComputeTest, DISABLED_i64tof32)
 {
@@ -104,7 +138,7 @@ TEST_F(ComputeTest, DISABLED_i64tof32)
                                   -0x4000003fffffffffLL,
                                   -0x4000004000000001LL,
                                   0,
-				  INT64_MIN },
+                                  INT64_MIN },
                                 SHADER_ARG_INPUT);
    auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
    const int64_t expected[] = {
@@ -178,8 +212,7 @@ TEST_F(ComputeTest, null_constant_ptr)
       EXPECT_EQ(g1[i], expected2[i]);
 }
 
-/* This test seems to fail on older versions of WARP. */
-TEST_F(ComputeTest, DISABLED_null_global_ptr)
+TEST_F(ComputeTest, null_global_ptr)
 {
    const char *kernel_source =
    "__kernel void main_test(__global uint *g1, __global uint *g2)\n\
@@ -351,11 +384,8 @@ TEST_F(ComputeTest, globals_16bit)
       EXPECT_EQ(inout[i], expected[i]);
 }
 
-TEST_F(ComputeTest, DISABLED_globals_64bit)
+TEST_F(ComputeTest, globals_64bit)
 {
-   /* Test disabled, because we need a fixed version of WARP that hasn't
-      been officially shipped yet */
-
    const char *kernel_source =
    "__kernel void main_test(__global unsigned long *inout)\n\
    {\n\
@@ -428,6 +458,7 @@ TEST_F(ComputeTest, types_float_basics)
 
 TEST_F(ComputeTest, DISABLED_types_double_basics)
 {
+   /* Disabled because doubles are unsupported */
    const char *kernel_source =
    "__kernel void main_test(__global uint *output)\n\
    {\n\
@@ -541,12 +572,12 @@ TEST_F(ComputeTest, types_for_loop)
       EXPECT_EQ(output[i], expected[i]);
 }
 
-TEST_F(ComputeTest, DISABLED_complex_types_local_array_long)
+TEST_F(ComputeTest, complex_types_local_array_long)
 {
    const char *kernel_source =
    "__kernel void main_test(__global ulong *inout)\n\
    {\n\
-      ushort tmp[] = {\n\
+      ulong tmp[] = {\n\
          get_global_id(1) + 0x00000000,\n\
          get_global_id(1) + 0x10000001,\n\
          get_global_id(1) + 0x20000020,\n\
@@ -556,7 +587,7 @@ TEST_F(ComputeTest, DISABLED_complex_types_local_array_long)
       inout[idx] = tmp[idx];\n\
    }\n";
    auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
-   const uint16_t expected[] = {
+   const uint64_t expected[] = {
       0x00000000, 0x10000001, 0x20000020, 0x30000300,
    };
    run_shader(kernel_source, inout.size(), 1, 1, inout);
@@ -855,11 +886,8 @@ TEST_F(ComputeTest, complex_types_constant_uint8)
    }
 }
 
-TEST_F(ComputeTest, DISABLED_complex_types_const_array)
+TEST_F(ComputeTest, complex_types_const_array)
 {
-   /* DISABLED because current release versions of WARP either return
-    * rubbish from reads or crash: they are not prepared to handle
-    * non-float global constants */
    const char *kernel_source =
    "__kernel void main_test(__global uint *output)\n\
    {\n\
@@ -901,11 +929,8 @@ TEST_F(ComputeTest, mem_access_load_store_ordering)
       EXPECT_EQ(output[i], expected[i]);
 }
 
-TEST_F(ComputeTest, DISABLED_two_const_arrays)
+TEST_F(ComputeTest, two_const_arrays)
 {
-   /* DISABLED because current release versions of WARP either return
-    * rubbish from reads or crash: they are not prepared to handle
-    * non-float global constants */
    const char *kernel_source =
    "__kernel void main_test(__global uint *output)\n\
    {\n\
@@ -1220,10 +1245,8 @@ TEST_F(ComputeTest, sin)
    }
 }
 
-TEST_F(ComputeTest, DISABLED_cosh)
+TEST_F(ComputeTest, cosh)
 {
-   /* Disabled because of WARP failures, where we fetch incorrect results when
-    * sourcing from non-float ICBs */
    const char *kernel_source =
    "__kernel void main_test(__global float *inout)\n\
    {\n\
@@ -1327,7 +1350,7 @@ TEST_F(ComputeTest, log2)
    }\n";
    auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
    const float expected[] = {
-      log(0.0f) / log(2), log(1.0f) / log(2), log(2.0f) / log(2), log(3.0f) / log(2)
+      log(0.0f) / log(2.0f), log(1.0f) / log(2.0f), log(2.0f) / log(2.0f), log(3.0f) / log(2.0f)
    };
    run_shader(kernel_source, inout.size(), 1, 1, inout);
    for (int i = 0; i < inout.size(); ++i)
@@ -1554,13 +1577,33 @@ TEST_F(ComputeTest, image)
 
 TEST_F(ComputeTest, image_two_reads)
 {
+   // Note: unnecessary control flow is present so that nir_opt_dead_cf kicks in, causing
+   // nir_rematerialize_derefs_in_use_blocks to run. The duplicated uses ensure that the
+   // per-var-deref processing works correctly.
+   const char* kernel_source =
+   R"(__kernel void main_test(image2d_t image, int is_float, __global float* output)
+   {
+      int x = get_global_id(0);
+      if (is_float)
+         x = get_global_id(0);
+      if (is_float)
+         output[x] = read_imagef(image, (int2)(0, 0)).x;
+      else
+         output[x] = (float)read_imagei(image, (int2)(0, 0)).x;
+      if (is_float)
+         output[x] = read_imagef(image, (int2)(0, 0)).x;
+      else
+         output[x] = (float)read_imagei(image, (int2)(0, 0)).x;
+   })";
+   Shader shader = compile(std::vector<const char*>({ kernel_source }));
+   validate(shader);
+}
+
+TEST_F(ComputeTest, image_unused)
+{
    const char* kernel_source =
-   "__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\
+   "__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
    {\n\
-      if (is_float)\n\
-         output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\
-      else \n\
-         output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\
    }\n";
    Shader shader = compile(std::vector<const char*>({ kernel_source }));
    validate(shader);
@@ -1688,8 +1731,8 @@ TEST_F(ComputeTest, vec_hint_float4)
       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
    }";
    Shader shader = compile({ kernel_source });
-   EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 4);
-   EXPECT_EQ(shader.obj->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
+   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 4);
+   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
 }
 
 TEST_F(ComputeTest, vec_hint_uchar2)
@@ -1700,8 +1743,8 @@ TEST_F(ComputeTest, vec_hint_uchar2)
       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
    }";
    Shader shader = compile({ kernel_source });
-   EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 2);
-   EXPECT_EQ(shader.obj->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
+   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 2);
+   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
 }
 
 TEST_F(ComputeTest, vec_hint_none)
@@ -1712,11 +1755,14 @@ TEST_F(ComputeTest, vec_hint_none)
       inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
    }";
    Shader shader = compile({ kernel_source });
-   EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 0);
+   EXPECT_EQ(shader.metadata->kernels[0].vec_hint_size, 0);
 }
 
 TEST_F(ComputeTest, DISABLED_debug_layer_failure)
 {
+   /* This is a negative test case, it intentionally triggers a failure to validate the mechanism
+    * is in place, so other tests will fail if they produce debug messages
+    */
    const char *kernel_source =
    "__kernel void main_test(__global float *inout, float mul)\n\
    {\n\
@@ -1758,10 +1804,7 @@ TEST_F(ComputeTest, compiler_defines)
    EXPECT_EQ(out[1], 100);
 }
 
-/* There's a bug in WARP turning atomic_add(ptr, x) into
- * atomic_add(ptr, x * 4). Works fine on intel HW.
- */
-TEST_F(ComputeTest, DISABLED_global_atomic_add)
+TEST_F(ComputeTest, global_atomic_add)
 {
    const char *kernel_source =
    "__kernel void main_test(__global int *inout, __global int *old)\n\
@@ -2110,10 +2153,7 @@ TEST_F(ComputeTest, packed_struct_local)
    }
 }
 
-/* DISABLED because current release versions of WARP either return
- * rubbish from reads or crash: they are not prepared to handle
- * non-float global constants */
-TEST_F(ComputeTest, DISABLED_packed_struct_const)
+TEST_F(ComputeTest, packed_struct_const)
 {
 #pragma pack(push, 1)
    struct s { uint8_t uc; uint64_t ul; uint16_t us; };
@@ -2147,14 +2187,12 @@ TEST_F(ComputeTest, DISABLED_packed_struct_const)
    }
 }
 
-TEST_F(ComputeTest, DISABLED_printf)
+TEST_F(ComputeTest, printf)
 {
    const char *kernel_source = R"(
    __kernel void main_test(__global float *src, __global uint *dest)
    {
-      __constant char *format_str = "%s: %f";
-      __constant char *str_val = "Test";
-      *dest = printf(format_str, str_val, src[0]);
+      *dest = printf("%s: %f", "Test", src[0]);
    })";
 
    auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
@@ -2232,3 +2270,109 @@ TEST_F(ComputeTest, unused_arg)
    for (int i = 0; i < 4; ++i)
       EXPECT_EQ(dest[i], i + 1);
 }
+
+TEST_F(ComputeTest, spec_constant)
+{
+   const char *spirv_asm = R"(
+               OpCapability Addresses
+               OpCapability Kernel
+               OpCapability Int64
+          %1 = OpExtInstImport "OpenCL.std"
+               OpMemoryModel Physical64 OpenCL
+               OpEntryPoint Kernel %2 "main_test" %__spirv_BuiltInGlobalInvocationId
+          %4 = OpString "kernel_arg_type.main_test.uint*,"
+               OpSource OpenCL_C 102000
+               OpName %__spirv_BuiltInGlobalInvocationId "__spirv_BuiltInGlobalInvocationId"
+               OpName %output "output"
+               OpName %entry "entry"
+               OpName %output_addr "output.addr"
+               OpName %id "id"
+               OpName %call "call"
+               OpName %conv "conv"
+               OpName %idxprom "idxprom"
+               OpName %arrayidx "arrayidx"
+               OpName %add "add"
+               OpName %mul "mul"
+               OpName %idxprom1 "idxprom1"
+               OpName %arrayidx2 "arrayidx2"
+               OpDecorate %__spirv_BuiltInGlobalInvocationId BuiltIn GlobalInvocationId
+               OpDecorate %__spirv_BuiltInGlobalInvocationId Constant
+               OpDecorate %id Alignment 4
+               OpDecorate %output_addr Alignment 8
+               OpDecorate %uint_1 SpecId 1
+      %ulong = OpTypeInt 64 0
+       %uint = OpTypeInt 32 0
+     %uint_1 = OpSpecConstant %uint 1
+    %v3ulong = OpTypeVector %ulong 3
+%_ptr_Input_v3ulong = OpTypePointer Input %v3ulong
+       %void = OpTypeVoid
+%_ptr_CrossWorkgroup_uint = OpTypePointer CrossWorkgroup %uint
+         %24 = OpTypeFunction %void %_ptr_CrossWorkgroup_uint
+%_ptr_Function__ptr_CrossWorkgroup_uint = OpTypePointer Function %_ptr_CrossWorkgroup_uint
+%_ptr_Function_uint = OpTypePointer Function %uint
+%__spirv_BuiltInGlobalInvocationId = OpVariable %_ptr_Input_v3ulong Input
+          %2 = OpFunction %void DontInline %24
+     %output = OpFunctionParameter %_ptr_CrossWorkgroup_uint
+      %entry = OpLabel
+%output_addr = OpVariable %_ptr_Function__ptr_CrossWorkgroup_uint Function
+         %id = OpVariable %_ptr_Function_uint Function
+               OpStore %output_addr %output Aligned 8
+         %27 = OpLoad %v3ulong %__spirv_BuiltInGlobalInvocationId Aligned 32
+       %call = OpCompositeExtract %ulong %27 0
+       %conv = OpUConvert %uint %call
+               OpStore %id %conv Aligned 4
+         %28 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
+         %29 = OpLoad %uint %id Aligned 4
+    %idxprom = OpUConvert %ulong %29
+   %arrayidx = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %28 %idxprom
+         %30 = OpLoad %uint %arrayidx Aligned 4
+         %31 = OpLoad %uint %id Aligned 4
+        %add = OpIAdd %uint %31 %uint_1
+        %mul = OpIMul %uint %30 %add
+         %32 = OpLoad %_ptr_CrossWorkgroup_uint %output_addr Aligned 8
+         %33 = OpLoad %uint %id Aligned 4
+   %idxprom1 = OpUConvert %ulong %33
+  %arrayidx2 = OpInBoundsPtrAccessChain %_ptr_CrossWorkgroup_uint %32 %idxprom1
+               OpStore %arrayidx2 %mul Aligned 4
+               OpReturn
+               OpFunctionEnd)";
+   Shader shader = assemble(spirv_asm);
+   Shader spec_shader = specialize(shader, 1, 5);
+
+   auto inout = ShaderArg<uint32_t>({ 0x00000001, 0x10000001, 0x00020002, 0x04010203 },
+      SHADER_ARG_INOUT);
+   const uint32_t expected[] = {
+      0x00000005, 0x60000006, 0x000e000e, 0x20081018
+   };
+   CompileArgs args = { (unsigned)inout.size(), 1, 1 };
+   run_shader(spec_shader, args, inout);
+   for (int i = 0; i < inout.size(); ++i)
+      EXPECT_EQ(inout[i], expected[i]);
+}
+
+TEST_F(ComputeTest, arg_metadata)
+{
+   const char *kernel_source = R"(
+   __kernel void main_test(
+      __global int *undec_ptr,
+      __global volatile int *vol_ptr,
+      __global const int *const_ptr,
+      __global int *restrict restr_ptr,
+      __global const int *restrict const_restr_ptr,
+      __constant int *const_ptr2)
+   {
+   })";
+   Shader shader = compile({ kernel_source });
+   EXPECT_EQ(shader.metadata->kernels[0].args[0].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
+   EXPECT_EQ(shader.metadata->kernels[0].args[0].type_qualifier, 0);
+   EXPECT_EQ(shader.metadata->kernels[0].args[1].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
+   EXPECT_EQ(shader.metadata->kernels[0].args[1].type_qualifier, CLC_KERNEL_ARG_TYPE_VOLATILE);
+   EXPECT_EQ(shader.metadata->kernels[0].args[2].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
+   EXPECT_EQ(shader.metadata->kernels[0].args[2].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
+   EXPECT_EQ(shader.metadata->kernels[0].args[3].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
+   EXPECT_EQ(shader.metadata->kernels[0].args[3].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT);
+   EXPECT_EQ(shader.metadata->kernels[0].args[4].address_qualifier, CLC_KERNEL_ARG_ADDRESS_GLOBAL);
+   EXPECT_EQ(shader.metadata->kernels[0].args[4].type_qualifier, CLC_KERNEL_ARG_TYPE_RESTRICT | CLC_KERNEL_ARG_TYPE_CONST);
+   EXPECT_EQ(shader.metadata->kernels[0].args[5].address_qualifier, CLC_KERNEL_ARG_ADDRESS_CONSTANT);
+   EXPECT_EQ(shader.metadata->kernels[0].args[5].type_qualifier, CLC_KERNEL_ARG_TYPE_CONST);
+}