diff options
author | Marek Olšák <marek.olsak@amd.com> | 2023-04-06 05:39:02 -0400 |
---|---|---|
committer | Marge Bot <emma+marge@anholt.net> | 2024-03-15 19:55:46 +0000 |
commit | abf0de57ab5249abc5bff92e7eca88c0f3b3001e (patch) | |
tree | 8c8e2d2ae4d7e1e8f27f44988cf1935afcbd9d9f /src/gallium/drivers/radeonsi | |
parent | db5cf460d7aee0e7b39fe3b990078191b8dc995e (diff) |
radeonsi: enable uniform propagation for varyings except VP/Energy
It regresses performance of VP/Energy.
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26819>
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r-- | src/gallium/drivers/radeonsi/si_get.c | 133 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader.h | 1 | ||||
-rw-r--r-- | src/gallium/drivers/radeonsi/si_shader_info.c | 5 |
3 files changed, 139 insertions, 0 deletions
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index c2fc213d9d7..8a3dbda96de 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -1339,6 +1339,137 @@ static int si_get_screen_fd(struct pipe_screen *screen) return ws->get_fd(ws); } +static unsigned si_varying_expression_max_cost(nir_shader *producer, nir_shader *consumer) +{ + unsigned num_profiles = si_get_num_shader_profiles(); + + for (unsigned i = 0; i < num_profiles; i++) { + if (_mesa_printed_sha1_equal(consumer->info.source_sha1, si_shader_profiles[i].sha1)) { + if (si_shader_profiles[i].options & SI_PROFILE_NO_OPT_UNIFORM_VARYINGS) + return 0; /* only propagate constants */ + break; + } + } + + switch (consumer->info.stage) { + case MESA_SHADER_TESS_CTRL: /* VS->TCS */ + /* Non-amplifying shaders can always have their variyng expressions + * moved into later shaders. + */ + return UINT_MAX; + + case MESA_SHADER_GEOMETRY: /* VS->GS, TES->GS */ + return consumer->info.gs.vertices_in == 1 ? UINT_MAX : + consumer->info.gs.vertices_in == 2 ? 20 : 14; + + case MESA_SHADER_TESS_EVAL: /* VS->TES, TCS->TES */ + case MESA_SHADER_FRAGMENT: + /* Up to 3 uniforms and 5 ALUs. */ + return 14; + + default: + unreachable("unexpected shader stage"); + } +} + +static unsigned si_varying_estimate_instr_cost(nir_instr *instr) +{ + unsigned dst_bit_size, src_bit_size, num_dst_dwords; + nir_op alu_op; + + /* This is a very loose approximation based on gfx10. */ + switch (instr->type) { + case nir_instr_type_alu: + dst_bit_size = nir_instr_as_alu(instr)->def.bit_size; + src_bit_size = nir_instr_as_alu(instr)->src[0].src.ssa->bit_size; + alu_op = nir_instr_as_alu(instr)->op; + num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32); + + switch (alu_op) { + case nir_op_mov: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + case nir_op_vec5: + case nir_op_vec8: + case nir_op_vec16: + case nir_op_fabs: + case nir_op_fneg: + case nir_op_fsat: + return 0; + + case nir_op_imul: + case nir_op_umul_low: + return dst_bit_size <= 16 ? 1 : 4 * num_dst_dwords; + + case nir_op_imul_high: + case nir_op_umul_high: + case nir_op_imul_2x32_64: + case nir_op_umul_2x32_64: + return 4; + + case nir_op_fexp2: + case nir_op_flog2: + case nir_op_frcp: + case nir_op_frsq: + case nir_op_fsqrt: + case nir_op_fsin: + case nir_op_fcos: + case nir_op_fsin_amd: + case nir_op_fcos_amd: + return 4; /* FP16 & FP32. */ + + case nir_op_fpow: + return 4 + 1 + 4; /* log2 + mul + exp2 */ + + case nir_op_fsign: + return dst_bit_size == 64 ? 4 : 3; /* See ac_build_fsign. */ + + case nir_op_idiv: + case nir_op_udiv: + case nir_op_imod: + case nir_op_umod: + case nir_op_irem: + return dst_bit_size == 64 ? 80 : 40; + + case nir_op_fdiv: + return dst_bit_size == 64 ? 80 : 5; /* FP16 & FP32: rcp + mul */ + + case nir_op_fmod: + case nir_op_frem: + return dst_bit_size == 64 ? 80 : 8; + + default: + /* Double opcodes. Comparisons have always full performance. */ + if ((dst_bit_size == 64 && + nir_op_infos[alu_op].output_type & nir_type_float) || + (dst_bit_size >= 8 && src_bit_size == 64 && + nir_op_infos[alu_op].input_types[0] & nir_type_float)) + return 16; + + return DIV_ROUND_UP(MAX2(dst_bit_size, src_bit_size), 32); + } + + case nir_instr_type_intrinsic: + dst_bit_size = nir_instr_as_intrinsic(instr)->def.bit_size; + num_dst_dwords = DIV_ROUND_UP(dst_bit_size, 32); + + switch (nir_instr_as_intrinsic(instr)->intrinsic) { + case nir_intrinsic_load_deref: + /* Uniform or UBO load. + * Set a low cost to balance the number of scalar loads and ALUs. + */ + return 3 * num_dst_dwords; + + default: + unreachable("unexpected intrinsic"); + } + + default: + unreachable("unexpected instr type"); + } +} + void si_init_screen_get_functions(struct si_screen *sscreen) { sscreen->b.get_name = si_get_name; @@ -1439,4 +1570,6 @@ void si_init_screen_get_functions(struct si_screen *sscreen) options->support_indirect_inputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL) | BITFIELD_BIT(MESA_SHADER_TESS_EVAL); options->support_indirect_outputs = BITFIELD_BIT(MESA_SHADER_TESS_CTRL); + options->varying_expression_max_cost = si_varying_expression_max_cost; + options->varying_estimate_instr_cost = si_varying_estimate_instr_cost; } diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index 6fb351fa30d..44e7dca11ce 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -326,6 +326,7 @@ unsigned si_get_num_shader_profiles(void); #define SI_PROFILE_VS_NO_BINNING (1 << 3) #define SI_PROFILE_GFX9_GFX10_PS_NO_BINNING (1 << 4) #define SI_PROFILE_CLAMP_DIV_BY_ZERO (1 << 5) +#define SI_PROFILE_NO_OPT_UNIFORM_VARYINGS (1 << 6) enum si_shader_dump_type { SI_DUMP_SHADER_KEY, diff --git a/src/gallium/drivers/radeonsi/si_shader_info.c b/src/gallium/drivers/radeonsi/si_shader_info.c index 09b00aaf2a7..e446ac081df 100644 --- a/src/gallium/drivers/radeonsi/si_shader_info.c +++ b/src/gallium/drivers/radeonsi/si_shader_info.c @@ -18,6 +18,11 @@ struct si_shader_profile si_shader_profiles[] = SI_PROFILE_VS_NO_BINNING, }, { + /* Viewperf/Energy */ + {0x17118671, 0xd0102e0c, 0x947f3592, 0xb2057e7b, 0x4da5d9b0}, + SI_PROFILE_NO_OPT_UNIFORM_VARYINGS, /* Uniform propagation regresses performance. */ + }, + { /* Viewperf/Medical */ {0x4dce4331, 0x38f778d5, 0x1b75a717, 0x3e454fb9, 0xeb1527f0}, SI_PROFILE_GFX9_GFX10_PS_NO_BINNING, |