summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDanylo Piliaiev <danylo.piliaiev@globallogic.com>2020-07-23 15:15:34 +0300
committerEric Engestrom <eric@engestrom.ch>2020-09-28 17:00:59 +0200
commit79bed11bdd7689299d80d7612c9b0515c774fad1 (patch)
treee1b901601772a8de801c6097a90530b0e7d541ce
parent80c6955c23bea29fbd4279220e7a37ab0ae35888 (diff)
intel/fs: Disable sample mask predication for scratch stores
Scratch stores are being lowered to the instructions with side-effects, however they should be enabled in fs helper invocations, since they are produced from operations which don't imply side-effects. To fix this - we move the decision of whether the sample mask predication is enable to the point where logical brw instructions are created. GLSL example of the issue: int tmp[1024]; ... do { // changes to tmp } while (some_condition(tmp)) If `tmp` is lowered to scrach memory, `some_condition` would be undefined if scratch write is predicated on sample mask, making possible for the while loop to become infinite and hang the GPU. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3256 Fixes: 53bfcdeecf4c9632e09ee641d2ca02dd9ec25e34 Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Matt Turner <mattst88@gmail.com> Acked-by: Jason Ekstrand <jason@jlekstrand.net> (cherry picked from commit 77486db867bd39aa9b76e549c946b0a165fcb21a)
-rw-r--r--.pick_status.json2
-rw-r--r--src/intel/compiler/brw_eu_defines.h5
-rw-r--r--src/intel/compiler/brw_fs.cpp8
-rw-r--r--src/intel/compiler/brw_fs_nir.cpp23
4 files changed, 35 insertions, 3 deletions
diff --git a/.pick_status.json b/.pick_status.json
index 3fcbabe8d55..3631b2cf885 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -364,7 +364,7 @@
"description": "intel/fs: Disable sample mask predication for scratch stores",
"nominated": true,
"nomination_type": 1,
- "resolution": 0,
+ "resolution": 1,
"master_sha": null,
"because_sha": "53bfcdeecf4c9632e09ee641d2ca02dd9ec25e34"
},
diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h
index d63360222ec..33c6887f889 100644
--- a/src/intel/compiler/brw_eu_defines.h
+++ b/src/intel/compiler/brw_eu_defines.h
@@ -901,6 +901,11 @@ enum surface_logical_srcs {
SURFACE_LOGICAL_SRC_IMM_DIMS,
/** Per-opcode immediate argument. For atomics, this is the atomic opcode */
SURFACE_LOGICAL_SRC_IMM_ARG,
+ /**
+ * Some instructions with side-effects should not be predicated on
+ * sample mask, e.g. lowered stores to scratch.
+ */
+ SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK,
SURFACE_LOGICAL_NUM_SRCS
};
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 5f5e3b21b6a..d1c5c3a6122 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5462,7 +5462,10 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
const fs_reg &surface_handle = inst->src[SURFACE_LOGICAL_SRC_SURFACE_HANDLE];
const UNUSED fs_reg &dims = inst->src[SURFACE_LOGICAL_SRC_IMM_DIMS];
const fs_reg &arg = inst->src[SURFACE_LOGICAL_SRC_IMM_ARG];
+ const fs_reg &allow_sample_mask =
+ inst->src[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK];
assert(arg.file == IMM);
+ assert(allow_sample_mask.file == IMM);
/* We must have exactly one of surface and surface_handle */
assert((surface.file == BAD_FILE) != (surface_handle.file == BAD_FILE));
@@ -5486,8 +5489,9 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
surface.ud == GEN8_BTI_STATELESS_NON_COHERENT);
const bool has_side_effects = inst->has_side_effects();
- fs_reg sample_mask = has_side_effects ? sample_mask_reg(bld) :
- fs_reg(brw_imm_d(0xffff));
+
+ fs_reg sample_mask = allow_sample_mask.ud ? sample_mask_reg(bld) :
+ fs_reg(brw_imm_d(0xffff));
/* From the BDW PRM Volume 7, page 147:
*
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index c75ee6e23a8..8e08a1fcfa5 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -3767,6 +3767,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(surface);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(1); /* num components */
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
/* Read the 3 GLuint components of gl_NumWorkGroups */
for (unsigned i = 0; i < 3; i++) {
@@ -3804,6 +3805,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[0]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
/* Make dest unsigned because that's what the temporary will be */
dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -3840,6 +3842,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[0]);
data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -4123,6 +4126,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
if (instr->intrinsic == nir_intrinsic_image_load ||
instr->intrinsic == nir_intrinsic_bindless_image_load) {
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
fs_inst *inst =
bld.emit(SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -4131,6 +4135,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
instr->intrinsic == nir_intrinsic_bindless_image_store) {
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[3]);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
bld.emit(SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL,
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
} else {
@@ -4153,6 +4158,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
data = tmp;
}
srcs[SURFACE_LOGICAL_SRC_DATA] = data;
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
bld.emit(SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -4210,6 +4216,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
fs_inst *inst =
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL,
@@ -4229,6 +4236,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
srcs[SURFACE_LOGICAL_SRC_DATA] = get_nir_src(instr->src[2]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(instr->num_components);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
bld.emit(SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL,
fs_reg(), srcs, SURFACE_LOGICAL_NUM_SRCS);
@@ -4643,6 +4651,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
get_nir_ssbo_intrinsic_index(bld, instr);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
/* Make dest unsigned because that's what the temporary will be */
dest.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -4682,6 +4691,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
get_nir_ssbo_intrinsic_index(bld, instr);
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[2]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[0]);
data.type = brw_reg_type_from_bit_size(bit_size, BRW_REGISTER_TYPE_UD);
@@ -4820,6 +4830,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
const fs_reg nir_addr = get_nir_src(instr->src[0]);
/* Make dest unsigned because that's what the temporary will be */
@@ -4865,6 +4876,14 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(bit_size);
+ /**
+ * While this instruction has side-effects, it should not be predicated
+ * on sample mask, because otherwise fs helper invocations would
+ * load undefined values from scratch memory. And scratch memory
+ * load-stores are produced from operations without side-effects, thus
+ * they should not have different behaviour in the helper invocations.
+ */
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(0);
const fs_reg nir_addr = get_nir_src(instr->src[1]);
fs_reg data = get_nir_src(instr->src[0]);
@@ -5316,6 +5335,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data;
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
@@ -5351,6 +5371,7 @@ fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_ADDRESS] = get_nir_src(instr->src[1]);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[2]);
if (op == BRW_AOP_FCMPWR) {
@@ -5379,6 +5400,7 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data;
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
@@ -5420,6 +5442,7 @@ fs_visitor::nir_emit_shared_atomic_float(const fs_builder &bld,
srcs[SURFACE_LOGICAL_SRC_SURFACE] = brw_imm_ud(GEN7_BTI_SLM);
srcs[SURFACE_LOGICAL_SRC_IMM_DIMS] = brw_imm_ud(1);
srcs[SURFACE_LOGICAL_SRC_IMM_ARG] = brw_imm_ud(op);
+ srcs[SURFACE_LOGICAL_SRC_ALLOW_SAMPLE_MASK] = brw_imm_ud(1);
fs_reg data = get_nir_src(instr->src[1]);
if (op == BRW_AOP_FCMPWR) {