diff options
author | Daniel Schürmann <daniel@schuermann.dev> | 2020-08-31 10:55:51 +0100 |
---|---|---|
committer | Dylan Baker <dylan.c.baker@intel.com> | 2021-01-13 11:24:11 -0800 |
commit | 12c0bfaaf9cec59392495141e36c37e74f9e553c (patch) | |
tree | 42e1644617dbb1b56c251d7e735f6ec08b1a81fd | |
parent | 6a7552aa105926609990aa02f6b2313fd41be16b (diff) |
radv: vectorize 16bit instructions
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6680>
(cherry picked from commit fcd2ef23e5f1d50008166168e772815c0213e37c)
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | docs/relnotes/new_features.txt | 1 | ||||
-rw-r--r-- | src/amd/vulkan/radv_pipeline.c | 35 | ||||
-rw-r--r-- | src/amd/vulkan/radv_shader.c | 1 |
4 files changed, 38 insertions, 1 deletions
diff --git a/.pick_status.json b/.pick_status.json index 9f0259d383a..0e0301fa208 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -337,7 +337,7 @@ "description": "radv: vectorize 16bit instructions", "nominated": false, "nomination_type": null, - "resolution": 4, + "resolution": 1, "master_sha": null, "because_sha": null }, diff --git a/docs/relnotes/new_features.txt b/docs/relnotes/new_features.txt index f1299047cd1..b6bdd134346 100644 --- a/docs/relnotes/new_features.txt +++ b/docs/relnotes/new_features.txt @@ -12,3 +12,4 @@ Classic swrast dri driver removed in favor of gallium swrast (llvmpipe or softpi Panfrost g31/g52/g72 exposes ES 3.0 Panfrost t760+ exposes GL 3.1 (including on Bifrost) Sparse memory support on RADV +Rapid packed math (16bit-vectorization) on RADV diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index fbdd9c9b971..a910cfc16b5 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -3157,6 +3157,39 @@ lower_bit_size_callback(const nir_instr *instr, void *_) return 0; } +static bool +opt_vectorize_callback(const nir_instr *instr, void *_) +{ + assert(instr->type == nir_instr_type_alu); + nir_alu_instr *alu = nir_instr_as_alu(instr); + unsigned bit_size = alu->dest.dest.ssa.bit_size; + if (bit_size != 16) + return false; + + switch (alu->op) { + case nir_op_fadd: + case nir_op_fsub: + case nir_op_fmul: + case nir_op_fneg: + case nir_op_fsat: + case nir_op_fmin: + case nir_op_fmax: + case nir_op_iadd: + case nir_op_isub: + case nir_op_imul: + case nir_op_imin: + case nir_op_imax: + case nir_op_umin: + case nir_op_umax: + case nir_op_ishl: + case nir_op_ishr: + case nir_op_ushr: + return true; + default: + return false; + } +} + VkResult radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device, struct radv_pipeline_cache *cache, @@ -3373,6 +3406,8 @@ VkResult radv_create_shaders(struct radv_pipeline *pipeline, if (device->physical_device->rad_info.chip_class >= GFX8) nir_opt_remove_phis(nir[i]); /* cleanup LCSSA phis */ + if (device->physical_device->rad_info.chip_class >= GFX9) + NIR_PASS_V(nir[i], nir_opt_vectorize, opt_vectorize_callback, NULL); } /* cleanup passes */ diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index 653bb0b5ca4..846913875dd 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -84,6 +84,7 @@ static const struct nir_shader_compiler_options nir_options = { .use_scoped_barrier = true, .max_unroll_iterations = 32, .use_interpolated_input_intrinsics = true, + .vectorize_vec2_16bit = true, /* nir_lower_int64() isn't actually called for the LLVM backend, but * this helps the loop unrolling heuristics. */ .lower_int64_options = nir_lower_imul64 | |