summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2020-09-16 21:48:18 -0400
committerMarge Bot <eric+marge@anholt.net>2020-09-24 12:29:11 +0000
commit771aad30276397d6388c147b93c9e298fe30b72b (patch)
tree6969ec76bf633e7b7d3b346b3281fa098d8a0e86 /src
parent21174dedec43df155309653764d76b4acd4d5f86 (diff)
nir: split lower_ffma into lower_ffma16/32/64
AMD wants different behavior for each bit size Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6756>
Diffstat (limited to 'src')
-rw-r--r--src/amd/vulkan/radv_shader.c8
-rw-r--r--src/broadcom/compiler/nir_to_vir.c4
-rw-r--r--src/compiler/nir/nir.h4
-rw-r--r--src/compiler/nir/nir_lower_flrp.c12
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py4
-rw-r--r--src/gallium/drivers/lima/lima_program.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_screen.c4
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp4
-rw-r--r--src/gallium/drivers/radeonsi/si_get.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c4
-rw-r--r--src/gallium/drivers/zink/zink_compiler.c4
-rw-r--r--src/intel/compiler/brw_compiler.c4
-rw-r--r--src/panfrost/midgard/midgard_compile.h4
13 files changed, 53 insertions, 15 deletions
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index c44a7b46115..3df8ccd3e62 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -70,7 +70,9 @@ static const struct nir_shader_compiler_options nir_options_llvm = {
.lower_unpack_unorm_4x8 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
@@ -113,7 +115,9 @@ static const struct nir_shader_compiler_options nir_options_aco = {
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_fpow = true,
.lower_mul_2x32_64 = true,
.lower_rotate = true,
diff --git a/src/broadcom/compiler/nir_to_vir.c b/src/broadcom/compiler/nir_to_vir.c
index 3ec6bc47edb..25b3230a2e7 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -2874,7 +2874,9 @@ const nir_shader_compiler_options v3d_nir_options = {
.lower_unpack_half_2x16 = true,
.lower_fdiv = true,
.lower_find_lsb = true,
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_flrp32 = true,
.lower_fpow = true,
.lower_fsat = true,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index d048cb35b5d..1741f4f9bbf 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3055,7 +3055,9 @@ typedef enum {
typedef struct nir_shader_compiler_options {
bool lower_fdiv;
- bool lower_ffma;
+ bool lower_ffma16;
+ bool lower_ffma32;
+ bool lower_ffma64;
bool fuse_ffma16;
bool fuse_ffma32;
bool fuse_ffma64;
diff --git a/src/compiler/nir/nir_lower_flrp.c b/src/compiler/nir/nir_lower_flrp.c
index 3b4d23bae11..d9c45877dd0 100644
--- a/src/compiler/nir/nir_lower_flrp.c
+++ b/src/compiler/nir/nir_lower_flrp.c
@@ -366,7 +366,17 @@ convert_flrp_instruction(nir_builder *bld,
nir_alu_instr *alu,
bool always_precise)
{
- bool have_ffma = !bld->shader->options->lower_ffma;
+ bool have_ffma = false;
+ unsigned bit_size = nir_dest_bit_size(alu->dest.dest);
+
+ if (bit_size == 16)
+ have_ffma = !bld->shader->options->lower_ffma16;
+ else if (bit_size == 32)
+ have_ffma = !bld->shader->options->lower_ffma32;
+ else if (bit_size == 64)
+ have_ffma = !bld->shader->options->lower_ffma64;
+ else
+ unreachable("invalid bit_size");
bld->cursor = nir_before_instr(&alu->instr);
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index a03cc549e5f..be836f954ed 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -193,7 +193,9 @@ optimizations.extend([
(('fadd', a, ('fneg', ('ffract', a))), ('ffloor', a), '!options->lower_ffloor'),
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
(('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
- (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
+ (('ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma16'),
+ (('ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma32'),
+ (('ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma64'),
# Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
(('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
(('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 87029d3140a..30a3f527181 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -42,7 +42,9 @@
#include "ir/lima_ir.h"
static const nir_shader_compiler_options vs_nir_options = {
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_fpow = true,
.lower_ffract = true,
.lower_fdiv = true,
@@ -60,7 +62,9 @@ static const nir_shader_compiler_options vs_nir_options = {
};
static const nir_shader_compiler_options fs_nir_options = {
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_fpow = true,
.lower_fdiv = true,
.lower_fmod = true,
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 5f05a468e3f..218d80e5c48 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -549,7 +549,9 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_sub = true,
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_fmod = true,
.lower_hadd = true,
.lower_add_sat = true,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index c5e54779ad7..64453edf19b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3206,7 +3206,9 @@ nvir_nir_shader_compiler_options(int chipset)
{
nir_shader_compiler_options op = {};
op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
- op.lower_ffma = false;
+ op.lower_ffma16 = false;
+ op.lower_ffma32 = false;
+ op.lower_ffma64 = false;
op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 1f19fdd8817..51a839056e7 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -945,7 +945,9 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
* Keep FMA enabled on gfx10 to test it, which helps us validate correctness
* for gfx10.3 on gfx10.
*/
- .lower_ffma = sscreen->info.chip_class <= GFX9,
+ .lower_ffma16 = sscreen->info.chip_class <= GFX9,
+ .lower_ffma32 = sscreen->info.chip_class <= GFX9,
+ .lower_ffma64 = sscreen->info.chip_class <= GFX9,
.fuse_ffma16 = sscreen->info.chip_class >= GFX10,
.fuse_ffma32 = sscreen->info.chip_class >= GFX10,
.fuse_ffma64 = sscreen->info.chip_class >= GFX10,
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 4f2956bb735..8bffb1d3840 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2179,7 +2179,9 @@ static const nir_shader_compiler_options nir_options = {
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_fdiv = true,
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_flrp32 = true,
.lower_fmod = true,
.lower_fpow = true,
diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index b9d1b666b14..8f0c16cb13b 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -126,7 +126,9 @@ lower_discard_if(nir_shader *shader)
static const struct nir_shader_compiler_options nir_options = {
.lower_all_io_to_temps = true,
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_fdph = true,
.lower_flrp32 = true,
.lower_fpow = true,
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index f31d4f89fe8..3d0fcbe8d6f 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -183,7 +183,9 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo)
/* Prior to Gen6, there are no three source operations, and Gen11 loses
* LRP.
*/
- nir_options->lower_ffma = devinfo->gen < 6;
+ nir_options->lower_ffma16 = devinfo->gen < 6;
+ nir_options->lower_ffma32 = devinfo->gen < 6;
+ nir_options->lower_ffma64 = devinfo->gen < 6;
nir_options->lower_flrp32 = devinfo->gen < 6 || devinfo->gen >= 11;
nir_options->lower_fpow = devinfo->gen >= 12;
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index 850e72641e7..fabed8bb5b2 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -36,7 +36,9 @@ midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b
* solution. */
static const nir_shader_compiler_options midgard_nir_options = {
- .lower_ffma = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
.lower_scmp = true,
.lower_flrp16 = true,
.lower_flrp32 = true,