summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Schürmann <daniel@schuermann.dev>2020-08-31 10:55:51 +0100
committerDylan Baker <dylan.c.baker@intel.com>2021-01-13 11:24:11 -0800
commit12c0bfaaf9cec59392495141e36c37e74f9e553c (patch)
tree42e1644617dbb1b56c251d7e735f6ec08b1a81fd
parent6a7552aa105926609990aa02f6b2313fd41be16b (diff)
radv: vectorize 16bit instructions
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6680> (cherry picked from commit fcd2ef23e5f1d50008166168e772815c0213e37c)
-rw-r--r--.pick_status.json2
-rw-r--r--docs/relnotes/new_features.txt1
-rw-r--r--src/amd/vulkan/radv_pipeline.c35
-rw-r--r--src/amd/vulkan/radv_shader.c1
4 files changed, 38 insertions, 1 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 9f0259d383a..0e0301fa208 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -337,7 +337,7 @@
"description": "radv: vectorize 16bit instructions",
"nominated": false,
"nomination_type": null,
- "resolution": 4,
+ "resolution": 1,
"master_sha": null,
"because_sha": null
},
diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt
index f1299047cd1..b6bdd134346 100644
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@@ -12,3 +12,4 @@ Classic swrast dri driver removed in favor of gallium swrast (llvmpipe or softpi
Panfrost g31/g52/g72 exposes ES 3.0
Panfrost t760+ exposes GL 3.1 (including on Bifrost)
Sparse memory support on RADV
+Rapid packed math (16bit-vectorization) on RADV
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index fbdd9c9b971..a910cfc16b5 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -3157,6 +3157,39 @@ lower_bit_size_callback(const nir_instr *instr, void *_)
return 0;
}
+static bool
+opt_vectorize_callback(const nir_instr *instr, void *_)
+{
+ assert(instr->type == nir_instr_type_alu);
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ unsigned bit_size = alu->dest.dest.ssa.bit_size;
+ if (bit_size != 16)
+ return false;
+
+ switch (alu->op) {
+ case nir_op_fadd:
+ case nir_op_fsub:
+ case nir_op_fmul:
+ case nir_op_fneg:
+ case nir_op_fsat:
+ case nir_op_fmin:
+ case nir_op_fmax:
+ case nir_op_iadd:
+ case nir_op_isub:
+ case nir_op_imul:
+ case nir_op_imin:
+ case nir_op_imax:
+ case nir_op_umin:
+ case nir_op_umax:
+ case nir_op_ishl:
+ case nir_op_ishr:
+ case nir_op_ushr:
+ return true;
+ default:
+ return false;
+ }
+}
+
VkResult radv_create_shaders(struct radv_pipeline *pipeline,
struct radv_device *device,
struct radv_pipeline_cache *cache,
@@ -3373,6 +3406,8 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline,
if (device->physical_device->rad_info.chip_class >= GFX8)
nir_opt_remove_phis(nir[i]); /* cleanup LCSSA phis */
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ NIR_PASS_V(nir[i], nir_opt_vectorize, opt_vectorize_callback, NULL);
}
/* cleanup passes */
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 653bb0b5ca4..846913875dd 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -84,6 +84,7 @@ static const struct nir_shader_compiler_options nir_options = {
.use_scoped_barrier = true,
.max_unroll_iterations = 32,
.use_interpolated_input_intrinsics = true,
+ .vectorize_vec2_16bit = true,
/* nir_lower_int64() isn't actually called for the LLVM backend, but
* this helps the loop unrolling heuristics. */
.lower_int64_options = nir_lower_imul64 |