From 771aad30276397d6388c147b93c9e298fe30b72b Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 16 Sep 2020 21:48:18 -0400 Subject: nir: split lower_ffma into lower_ffma16/32/64 AMD wants different behavior for each bit size Acked-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/amd/vulkan/radv_shader.c | 8 ++++++-- src/broadcom/compiler/nir_to_vir.c | 4 +++- src/compiler/nir/nir.h | 4 +++- src/compiler/nir/nir_lower_flrp.c | 12 +++++++++++- src/compiler/nir/nir_opt_algebraic.py | 4 +++- src/gallium/drivers/lima/lima_program.c | 8 ++++++-- src/gallium/drivers/llvmpipe/lp_screen.c | 4 +++- src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 4 +++- src/gallium/drivers/radeonsi/si_get.c | 4 +++- src/gallium/drivers/vc4/vc4_program.c | 4 +++- src/gallium/drivers/zink/zink_compiler.c | 4 +++- src/intel/compiler/brw_compiler.c | 4 +++- src/panfrost/midgard/midgard_compile.h | 4 +++- 13 files changed, 53 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index c44a7b46115..3df8ccd3e62 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -70,7 +70,9 @@ static const struct nir_shader_compiler_options nir_options_llvm = { .lower_unpack_unorm_4x8 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_fpow = true, .lower_mul_2x32_64 = true, .lower_rotate = true, @@ -113,7 +115,9 @@ static const struct nir_shader_compiler_options nir_options_aco = { .lower_unpack_half_2x16 = true, .lower_extract_byte = true, .lower_extract_word = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_fpow = true, .lower_mul_2x32_64 = true, .lower_rotate = true, diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c index 3ec6bc47edb..25b3230a2e7 100644 --- a/src/broadcom/compiler/nir_to_vir.c +++ b/src/broadcom/compiler/nir_to_vir.c @@ -2874,7 +2874,9 @@ const nir_shader_compiler_options v3d_nir_options = { .lower_unpack_half_2x16 = true, .lower_fdiv = true, .lower_find_lsb = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_flrp32 = true, .lower_fpow = true, .lower_fsat = true, diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index d048cb35b5d..1741f4f9bbf 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -3055,7 +3055,9 @@ typedef enum { typedef struct nir_shader_compiler_options { bool lower_fdiv; - bool lower_ffma; + bool lower_ffma16; + bool lower_ffma32; + bool lower_ffma64; bool fuse_ffma16; bool fuse_ffma32; bool fuse_ffma64; diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c index 3b4d23bae11..d9c45877dd0 100644 --- a/src/compiler/nir/nir_lower_flrp.c +++ b/src/compiler/nir/nir_lower_flrp.c @@ -366,7 +366,17 @@ convert_flrp_instruction(nir_builder *bld, nir_alu_instr *alu, bool always_precise) { - bool have_ffma = !bld->shader->options->lower_ffma; + bool have_ffma = false; + unsigned bit_size = nir_dest_bit_size(alu->dest.dest); + + if (bit_size == 16) + have_ffma = !bld->shader->options->lower_ffma16; + else if (bit_size == 32) + have_ffma = !bld->shader->options->lower_ffma32; + else if (bit_size == 64) + have_ffma = !bld->shader->options->lower_ffma64; + else + unreachable("invalid bit_size"); bld->cursor = nir_before_instr(&alu->instr); diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index a03cc549e5f..be836f954ed 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -193,7 +193,9 @@ optimizations.extend([ (('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'), (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'), - (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), + (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'), + (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'), + (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'), # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late). (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'), (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'), diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c index 87029d3140a..30a3f527181 100644 --- a/src/gallium/drivers/lima/lima_program.c +++ b/src/gallium/drivers/lima/lima_program.c @@ -42,7 +42,9 @@ #include "ir/lima_ir.h" static const nir_shader_compiler_options vs_nir_options = { - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_fpow = true, .lower_ffract = true, .lower_fdiv = true, @@ -60,7 +62,9 @@ static const nir_shader_compiler_options vs_nir_options = { }; static const nir_shader_compiler_options fs_nir_options = { - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_fpow = true, .lower_fdiv = true, .lower_fmod = true, diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c index 5f05a468e3f..218d80e5c48 100644 --- a/src/gallium/drivers/llvmpipe/lp_screen.c +++ b/src/gallium/drivers/llvmpipe/lp_screen.c @@ -549,7 +549,9 @@ static const struct nir_shader_compiler_options gallivm_nir_options = { .lower_bitfield_insert_to_shifts = true, .lower_bitfield_extract_to_shifts = true, .lower_sub = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_fmod = true, .lower_hadd = true, .lower_add_sat = true, diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index c5e54779ad7..64453edf19b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -3206,7 +3206,9 @@ nvir_nir_shader_compiler_options(int chipset) { nir_shader_compiler_options op = {}; op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET); - op.lower_ffma = false; + op.lower_ffma16 = false; + op.lower_ffma32 = false; + op.lower_ffma64 = false; op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */ op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */ op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */ diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index 1f19fdd8817..51a839056e7 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -945,7 +945,9 @@ void si_init_screen_get_functions(struct si_screen *sscreen) * Keep FMA enabled on gfx10 to test it, which helps us validate correctness * for gfx10.3 on gfx10. */ - .lower_ffma = sscreen->info.chip_class <= GFX9, + .lower_ffma16 = sscreen->info.chip_class <= GFX9, + .lower_ffma32 = sscreen->info.chip_class <= GFX9, + .lower_ffma64 = sscreen->info.chip_class <= GFX9, .fuse_ffma16 = sscreen->info.chip_class >= GFX10, .fuse_ffma32 = sscreen->info.chip_class >= GFX10, .fuse_ffma64 = sscreen->info.chip_class >= GFX10, diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 4f2956bb735..8bffb1d3840 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2179,7 +2179,9 @@ static const nir_shader_compiler_options nir_options = { .lower_extract_byte = true, .lower_extract_word = true, .lower_fdiv = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_flrp32 = true, .lower_fmod = true, .lower_fpow = true, diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index b9d1b666b14..8f0c16cb13b 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -126,7 +126,9 @@ lower_discard_if(nir_shader *shader) static const struct nir_shader_compiler_options nir_options = { .lower_all_io_to_temps = true, - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_fdph = true, .lower_flrp32 = true, .lower_fpow = true, diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index f31d4f89fe8..3d0fcbe8d6f 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -183,7 +183,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) /* Prior to Gen6, there are no three source operations, and Gen11 loses * LRP. */ - nir_options->lower_ffma = devinfo->gen < 6; + nir_options->lower_ffma16 = devinfo->gen < 6; + nir_options->lower_ffma32 = devinfo->gen < 6; + nir_options->lower_ffma64 = devinfo->gen < 6; nir_options->lower_flrp32 = devinfo->gen < 6 || devinfo->gen >= 11; nir_options->lower_fpow = devinfo->gen >= 12; diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h index 850e72641e7..fabed8bb5b2 100644 --- a/src/panfrost/midgard/midgard_compile.h +++ b/src/panfrost/midgard/midgard_compile.h @@ -36,7 +36,9 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b * solution. */ static const nir_shader_compiler_options midgard_nir_options = { - .lower_ffma = true, + .lower_ffma16 = true, + .lower_ffma32 = true, + .lower_ffma64 = true, .lower_scmp = true, .lower_flrp16 = true, .lower_flrp32 = true, -- cgit v1.2.3