summaryrefslogtreecommitdiff
path: root/src/amd/compiler/aco_insert_NOPs.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/amd/compiler/aco_insert_NOPs.cpp')
-rw-r--r--src/amd/compiler/aco_insert_NOPs.cpp30
1 files changed, 20 insertions, 10 deletions
diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp
index 2442f60f48e..05af344c9c6 100644
--- a/src/amd/compiler/aco_insert_NOPs.cpp
+++ b/src/amd/compiler/aco_insert_NOPs.cpp
@@ -40,7 +40,6 @@ struct NOP_ctx {
int VALU_wrsgpr = -10;
/* GFX10 */
- int last_VMEM_since_scalar_write = -1;
bool has_VOPC = false;
bool has_nonVALU_exec_read = false;
bool has_VMEM = false;
@@ -48,6 +47,7 @@ struct NOP_ctx {
bool has_DS = false;
bool has_branch_after_DS = false;
std::bitset<128> sgprs_read_by_SMEM;
+ std::bitset<128> sgprs_read_by_VMEM;
NOP_ctx(Program* program) : chip_class(program->chip_class) {
vcc_physical = program->config->num_sgprs - 2;
@@ -342,21 +342,31 @@ std::pair<int, int> handle_instruction_gfx10(NOP_ctx& ctx, aco_ptr<Instruction>&
if (instr->format == Format::SMEM)
sNOPs = std::max(sNOPs, handle_SMEM_clause(instr, new_idx, new_instructions));
- /* handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" in-between */
- if (instr->isSALU() || instr->format == Format::SMEM) {
- if (!instr->definitions.empty() && ctx.last_VMEM_since_scalar_write != -1) {
- ctx.last_VMEM_since_scalar_write = -1;
- vNOPs = 1;
+ /* VMEMtoScalarWriteHazard
+ * Handle EXEC/M0/SGPR write following a VMEM instruction without a VALU or "waitcnt vmcnt(0)" in-between.
+ */
+ if (instr->isVMEM() || instr->format == Format::FLAT || instr->format == Format::GLOBAL ||
+ instr->format == Format::SCRATCH || instr->format == Format::DS) {
+ /* Remember all SGPRs that are read by the VMEM instruction */
+ mark_read_regs(instr, ctx.sgprs_read_by_VMEM);
+ } else if (instr->isSALU() || instr->format == Format::SMEM) {
+ /* Check if SALU writes an SGPR that was previously read by the VALU */
+ if (check_written_regs(instr, ctx.sgprs_read_by_VMEM)) {
+ ctx.sgprs_read_by_VMEM.reset();
+
+ /* Insert v_nop to mitigate the problem */
+ aco_ptr<VOP1_instruction> nop{create_instruction<VOP1_instruction>(aco_opcode::v_nop, Format::VOP1, 0, 0)};
+ new_instructions.emplace_back(std::move(nop));
}
- } else if (instr->isVMEM() || instr->isFlatOrGlobal()) {
- ctx.last_VMEM_since_scalar_write = new_idx;
} else if (instr->opcode == aco_opcode::s_waitcnt) {
+ /* Hazard is mitigated by "s_waitcnt vmcnt(0)" */
uint16_t imm = static_cast<SOPP_instruction*>(instr.get())->imm;
unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);
if (vmcnt == 0)
- ctx.last_VMEM_since_scalar_write = -1;
+ ctx.sgprs_read_by_VMEM.reset();
} else if (instr->isVALU()) {
- ctx.last_VMEM_since_scalar_write = -1;
+ /* Hazard is mitigated by any VALU instruction */
+ ctx.sgprs_read_by_VMEM.reset();
}
/* VcmpxPermlaneHazard