diff options
-rw-r--r-- | .pick_status.json | 2 | ||||
-rw-r--r-- | src/intel/compiler/brw_eu_defines.h | 5 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs.cpp | 8 | ||||
-rw-r--r-- | src/intel/compiler/brw_fs_nir.cpp | 23 |
4 files changed, 35 insertions, 3 deletions
diff --git a/.pick_status.json b/.pick_status.json index 3fcbabe8d55..3631b2cf885 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -364,7 +364,7 @@ "description": "intel/fs: Disable sample mask predication for scratch stores", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "53bfcdeecf4c9632e09ee641d2ca02dd9ec25e34" }, diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index d63360222ec..33c6887f889 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -901,6 +901,11 @@ enum surface_logical_srcs { SURFACE_LOGICAL_SRC_IMM_DIMS, /** Per-opcode immediate argument. For atomics, this is the atomic opcode */ SURFACE_LOGICAL_SRC_IMM_ARG, + /** + * Some instructions with side-effects should not be predicated on + * sample mask, e.g. lowered stores to scratch. + */ + SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK, SURFACE_LOGICAL_NUM_SRCS }; diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 5f5e3b21b6a..d1c5c3a6122 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5462,7 +5462,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE]; const UNUSED fs_reg &dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS]; const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG]; + const fs_reg &allow_sample_mask = + inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK]; assert(arg.file == IMM); + assert(allow_sample_mask.file == IMM); /* We must have exactly one of surface and surface_handle */ assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE)); @@ -5486,8 +5489,9 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst) surface.ud == GEN8_BTI_STATELESS_NON_COHERENT); const bool has_side_effects = inst->has_side_effects(); - fs_reg sample_mask = has_side_effects ? sample_mask_reg(bld) : - fs_reg(brw_imm_d(0xffff)); + + fs_reg sample_mask = allow_sample_mask.ud ? sample_mask_reg(bld) : + fs_reg(brw_imm_d(0xffff)); /* From the BDW PRM Volume 7, page 147: * diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index c75ee6e23a8..8e08a1fcfa5 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -3767,6 +3767,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(surface); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(1); /* num components */ + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); /* Read the 3 GLuint components of gl_NumWorkGroups */ for (unsigned i = 0; i < 3; i++) { @@ -3804,6 +3805,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[0]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); /* Make dest unsigned because that's what the temporary will be */ dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); @@ -3840,6 +3842,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); fs_reg data = get_nir_src(instr->src[0]); data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); @@ -4123,6 +4126,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr if (instr->intrinsic == nir_intrinsic_image_load || instr->intrinsic == nir_intrinsic_bindless_image_load) { srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); fs_inst *inst = bld.emit(SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL, dest, srcs, SURFACE_LOGICAL_NUM_SRCS); @@ -4131,6 +4135,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr instr->intrinsic == nir_intrinsic_bindless_image_store) { srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components); srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[3]); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); bld.emit(SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL, fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); } else { @@ -4153,6 +4158,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr data = tmp; } srcs[SURFACE_LOGICAL_SRC_DATA] = data; + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); bld.emit(SHADER_OPCODE_TYPED_ATOMIC_LOGICAL, dest, srcs, SURFACE_LOGICAL_NUM_SRCS); @@ -4210,6 +4216,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); fs_inst *inst = bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL, @@ -4229,6 +4236,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[2]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL, fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS); @@ -4643,6 +4651,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr get_nir_ssbo_intrinsic_index(bld, instr); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); /* Make dest unsigned because that's what the temporary will be */ dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); @@ -4682,6 +4691,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr get_nir_ssbo_intrinsic_index(bld, instr); srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[2]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); fs_reg data = get_nir_src(instr->src[0]); data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD); @@ -4820,6 +4830,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); const fs_reg nir_addr = get_nir_src(instr->src[0]); /* Make dest unsigned because that's what the temporary will be */ @@ -4865,6 +4876,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size); + /** + * While this instruction has side-effects, it should not be predicated + * on sample mask, because otherwise fs helper invocations would + * load undefined values from scratch memory. And scratch memory + * load-stores are produced from operations without side-effects, thus + * they should not have different behaviour in the helper invocations. + */ + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0); const fs_reg nir_addr = get_nir_src(instr->src[1]); fs_reg data = get_nir_src(instr->src[0]); @@ -5316,6 +5335,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); fs_reg data; if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) @@ -5351,6 +5371,7 @@ fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); fs_reg data = get_nir_src(instr->src[2]); if (op == BRW_AOP_FCMPWR) { @@ -5379,6 +5400,7 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); fs_reg data; if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) @@ -5420,6 +5442,7 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld, srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM); srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1); srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op); + srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1); fs_reg data = get_nir_src(instr->src[1]); if (op == BRW_AOP_FCMPWR) { |