summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFrancisco Jerez <currojerez@riseup.net>2021-05-24 22:53:27 -0700
committerMarge Bot <eric+marge@anholt.net>2021-06-23 07:34:22 +0000
commitc19cfa9dc22b805581ac5ed3ad835fed3c8506c2 (patch)
treee91177274626f51160da0f9decbceb6ae288e703 /src
parent63abc083ce5e03843d72465a76fdb93064bc3eb9 (diff)
intel/fs: Fix synchronization of accumulator-clearing W/A move on TGL+.
Right now the accumulator-clearing move emitted by the generator for Wa_14010017096 inherits the SWSB field from the previous instruction. This can lead to redundant synchronization, or possibly more serious issues if the previous instruction had a TGL_SBID_SET SWSB synchronization mode. Take the SWSB synchronization information from the IR. Fixes: a27542c5ddec8 ("intel/compiler: Clear accumulator register before EOT") Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11433>
Diffstat (limited to 'src')
-rw-r--r--src/intel/compiler/brw_fs_generator.cpp9
1 files changed, 6 insertions, 3 deletions
diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp
index 16240b158a2..b0cabe48c42 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1923,6 +1923,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
struct brw_reg src[4], dst;
unsigned int last_insn_offset = p->next_insn_offset;
bool multiple_instructions_emitted = false;
+ tgl_swsb swsb = inst->sched;
/* From the Broadwell PRM, Volume 7, "3D-Media-GPGPU", in the
* "Register Region Restrictions" section: for BDW, SKL:
@@ -1957,8 +1958,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
brw_set_default_exec_size(p, BRW_EXECUTE_16);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_swsb(p, tgl_swsb_src_dep(swsb));
brw_MOV(p, brw_acc_reg(8), brw_imm_f(0.0f));
last_insn_offset = p->next_insn_offset;
+ swsb = tgl_swsb_dst_dep(swsb, 1);
}
if (!is_accum_used && !inst->eot) {
@@ -2016,7 +2019,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
brw_set_default_saturate(p, inst->saturate);
brw_set_default_mask_control(p, inst->force_writemask_all);
brw_set_default_acc_write_control(p, inst->writes_accumulator);
- brw_set_default_swsb(p, inst->sched);
+ brw_set_default_swsb(p, swsb);
unsigned exec_size = inst->exec_size;
if (devinfo->verx10 == 70 &&
@@ -2447,8 +2450,8 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
}
case FS_OPCODE_SCHEDULING_FENCE:
- if (inst->sources == 0 && inst->sched.regdist == 0 &&
- inst->sched.mode == TGL_SBID_NULL) {
+ if (inst->sources == 0 && swsb.regdist == 0 &&
+ swsb.mode == TGL_SBID_NULL) {
if (unlikely(debug_flag))
disasm_info->use_tail = true;
break;