summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRhys Perry <pendingchaos02@gmail.com>2020-07-17 15:01:41 +0100
committerMarge Bot <eric+marge@anholt.net>2020-07-21 18:25:35 +0000
commit3a4847179b95d1f2a03e575ef9f0e5c71631de65 (patch)
treef2424086d0231c64c918a5ae7544f60be1a11425
parentd169f09e378e6380d3137bd974c558535dafa166 (diff)
aco: allow overflow for some SMEM instructions
fossil-db (Navi): Totals from 10184 (7.49% of 135946) affected shaders: CodeSize: 83419748 -> 82430824 (-1.19%); split: -1.19%, +0.01% Instrs: 16054612 -> 15908523 (-0.91%); split: -0.93%, +0.02% VMEM: 1608018 -> 1581829 (-1.63%); split: +0.20%, -1.83% SMEM: 577031 -> 563492 (-2.35%); split: +0.10%, -2.45% VClause: 242643 -> 242512 (-0.05%); split: -0.06%, +0.00% SClause: 640966 -> 569897 (-11.09%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/2720>
-rw-r--r--src/amd/compiler/aco_instruction_selection.cpp2
-rw-r--r--src/amd/compiler/aco_ir.h3
-rw-r--r--src/amd/compiler/aco_opt_value_numbering.cpp5
-rw-r--r--src/amd/compiler/aco_optimizer.cpp3
4 files changed, 9 insertions, 4 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 4d161cf0dbd..0af1f1f5c15 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -5329,7 +5329,7 @@ void visit_load_push_constant(isel_context *ctx, nir_intrinsic_instr *instr)
unreachable("unimplemented or forbidden load_push_constant.");
}
- bld.smem(op, Definition(vec), ptr, index);
+ static_cast<SMEM_instruction*>(bld.smem(op, Definition(vec), ptr, index).instr)->prevent_overflow = true;
if (!aligned) {
Operand byte_offset = index_cv ? Operand((offset + index_cv->u32) % 4) : Operand(index);
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index c1735a0d31a..a23ff7ce017 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -924,7 +924,8 @@ struct SMEM_instruction : public Instruction {
bool nv : 1; /* VEGA only: Non-volatile */
bool can_reorder : 1;
bool disable_wqm : 1;
- uint32_t padding: 19;
+ bool prevent_overflow : 1; /* avoid overflow when combining additions */
+ uint32_t padding: 18;
};
static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index 93668442d32..9a1972ff34e 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -226,8 +226,11 @@ struct InstrPred {
case Format::SMEM: {
SMEM_instruction* aS = static_cast<SMEM_instruction*>(a);
SMEM_instruction* bS = static_cast<SMEM_instruction*>(b);
+ /* isel shouldn't be creating situations where this assertion fails */
+ assert(aS->prevent_overflow == bS->prevent_overflow);
return aS->can_reorder && bS->can_reorder &&
- aS->glc == bS->glc && aS->nv == bS->nv;
+ aS->glc == bS->glc && aS->nv == bS->nv &&
+ aS->prevent_overflow == bS->prevent_overflow;
}
case Format::VINTRP: {
Interp_instruction* aI = static_cast<Interp_instruction*>(a);
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index f66521399ca..a254728baa5 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -1001,13 +1001,14 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
SMEM_instruction *smem = static_cast<SMEM_instruction *>(instr.get());
Temp base;
uint32_t offset;
+ bool prevent_overflow = smem->operands[0].size() > 2 || smem->prevent_overflow;
if (i == 1 && info.is_constant_or_literal(32) &&
((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) ||
(ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) ||
(ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) {
instr->operands[i] = Operand(info.val);
continue;
- } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) {
+ } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && base.regClass() == s1 && offset <= 0xFFFFF && ctx.program->chip_class >= GFX9) {
bool soe = smem->operands.size() >= (!smem->definitions.empty() ? 3 : 4);
if (soe &&
(!ctx.info[smem->operands.back().tempId()].is_constant_or_literal(32) ||