From 17aac7926a8bf06873fc8b9b5a03a3508131714f Mon Sep 17 00:00:00 2001 From: Daniel Schürmann Date: Fri, 5 Feb 2021 18:25:18 +0100 Subject: aco: fix shared VGPR allocation on RDNA2 VGPRs are now allocated in blocks of 8 normal or 16 shared VGPRs, respectively. Fixes: 14a5021aff661a26d76f330fec55d400d35443a8 ('aco/gfx10: Refactor of GFX10 wave64 bpermute.') Reviewed-by: Rhys Perry Part-of: (cherry picked from commit bacc3b36f55ed1f0fbb8a7efdfb072a0f3ee4ee1) --- .pick_status.json | 2 +- src/amd/compiler/aco_instruction_selection.cpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index c9e47ea2620..52c59a1ed07 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -5836,7 +5836,7 @@ "description": "aco: fix shared VGPR allocation on RDNA2", "nominated": true, "nomination_type": 1, - "resolution": 0, + "resolution": 1, "master_sha": null, "because_sha": "14a5021aff661a26d76f330fec55d400d35443a8" }, diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 9824324aa0a..142c75c2b48 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -196,8 +196,9 @@ static Temp emit_bpermute(isel_context *ctx, Builder &bld, Temp index, Temp data /* GFX10 wave64 mode: emulate full-wave bpermute */ if (!ctx->has_gfx10_wave64_bpermute) { ctx->has_gfx10_wave64_bpermute = true; - ctx->program->config->num_shared_vgprs = 8; /* Shared VGPRs are allocated in groups of 8 */ - ctx->program->vgpr_limit -= 4; /* We allocate 8 shared VGPRs, so we'll have 4 fewer normal VGPRs */ + /* Shared VGPRs are allocated in groups of 8/16 */ + ctx->program->config->num_shared_vgprs = ctx->program->chip_class >= GFX10_3 ? 16 : 8; + ctx->program->vgpr_limit -= ctx->program->chip_class >= GFX10_3 ? 8 : 4; } Temp index_is_lo = bld.vopc(aco_opcode::v_cmp_ge_u32, bld.def(bld.lm), Operand(31u), index); -- cgit v1.2.3