diff options
author | Jason Ekstrand <jason.ekstrand@intel.com> | 2021-01-14 18:00:00 -0600 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-03-17 17:49:58 +0000 |
commit | 1ce3660a5a5a942c54d2da761dc4d3b5fa6864e7 (patch) | |
tree | 269fed93653f566187fa53a7d1d425c2f1e72695 /src/intel/compiler/brw_fs_nir.cpp | |
parent | 2407952ec99ab1c2a16e01a9e300f799e4d53320 (diff) |
intel/fs,rt: Add a predicate to load_global_const_block
This allows us to do bounds checked A64 block load without the it being
counted as control-flow by NIR. This means that NIR optimizations like
CSE will be able to work on these the same as a regular load.
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8635>
Diffstat (limited to 'src/intel/compiler/brw_fs_nir.cpp')
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 45 |
1 files changed, 38 insertions, 7 deletions
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 8123c89f410..f3f59006ad0 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4665,12 +4665,43 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr assert(instr->num_components == 8 || instr->num_components == 16); const fs_builder ubld = bld.exec_all().group(instr->num_components, 0); - fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD); - ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL, - tmp, - bld.emit_uniformize(get_nir_src(instr->src[0])), /* Address */ - fs_reg(), /* No source data */ - brw_imm_ud(instr->num_components)); + fs_reg load_val; + + bool is_pred_const = nir_src_is_const(instr->src[1]); + if (is_pred_const && nir_src_as_uint(instr->src[1]) == 0) { + /* In this case, we don't want the UBO load at all. We really + * shouldn't get here but it's possible. + */ + load_val = brw_imm_ud(0); + } else { + /* The uniform process may stomp the flag so do this first */ + fs_reg addr = bld.emit_uniformize(get_nir_src(instr->src[0])); + + load_val = ubld.vgrf(BRW_REGISTER_TYPE_UD); + + /* If the predicate is constant and we got here, then it's non-zero + * and we don't need the predicate at all. + */ + if (!is_pred_const) { + /* Load the predicate */ + fs_reg pred = bld.emit_uniformize(get_nir_src(instr->src[1])); + fs_inst *mov = ubld.MOV(bld.null_reg_d(), pred); + mov->conditional_mod = BRW_CONDITIONAL_NZ; + + /* Stomp the destination with 0 if we're OOB */ + mov = ubld.MOV(load_val, brw_imm_ud(0)); + mov->predicate = BRW_PREDICATE_NORMAL; + mov->predicate_inverse = true; + } + + fs_inst *load = ubld.emit(SHADER_OPCODE_A64_OWORD_BLOCK_READ_LOGICAL, + load_val, addr, + fs_reg(), /* No source data */ + brw_imm_ud(instr->num_components)); + + if (!is_pred_const) + load->predicate = BRW_PREDICATE_NORMAL; + } /* From the HW perspective, we just did a single SIMD16 instruction * which loaded a dword in each SIMD channel. From NIR's perspective, @@ -4681,7 +4712,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr */ for (unsigned i = 0; i < instr->num_components; i++) { bld.MOV(retype(offset(dest, bld, i), BRW_REGISTER_TYPE_UD), - component(tmp, i)); + component(load_val, i)); } break; } |