diff options
author | Daniel Schürmann <daniel@schuermann.dev> | 2021-02-05 18:25:18 +0100 |
---|---|---|
committer | Marge Bot <eric+marge@anholt.net> | 2021-02-12 19:00:18 +0000 |
commit | bacc3b36f55ed1f0fbb8a7efdfb072a0f3ee4ee1 (patch) | |
tree | e0f7d7077dbef3eb7df38e1f6e93ae2251a3afed | |
parent | 04df0cb4ae7055b0a4a6dc9875aa5926131fe5f4 (diff) |
aco: fix shared VGPR allocation on RDNA2
VGPRs are now allocated in blocks of 8 normal
or 16 shared VGPRs, respectively.
Fixes: 14a5021aff661a26d76f330fec55d400d35443a8 ('aco/gfx10: Refactor of GFX10 wave64 bpermute.')
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8921>
-rw-r--r-- | src/amd/compiler/aco_instruction_selection.cpp | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index 7eb95bc43f7..89d1b0edea1 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -195,8 +195,9 @@ static Temp emit_bpermute(isel_context *ctx, Builder &bld, Temp index, Temp data /* GFX10 wave64 mode: emulate full-wave bpermute */ if (!ctx->has_gfx10_wave64_bpermute) { ctx->has_gfx10_wave64_bpermute = true; - ctx->program->config->num_shared_vgprs = 8; /* Shared VGPRs are allocated in groups of 8 */ - ctx->program->vgpr_limit -= 4; /* We allocate 8 shared VGPRs, so we'll have 4 fewer normal VGPRs */ + /* Shared VGPRs are allocated in groups of 8/16 */ + ctx->program->config->num_shared_vgprs = ctx->program->chip_class >= GFX10_3 ? 16 : 8; + ctx->program->vgpr_limit -= ctx->program->chip_class >= GFX10_3 ? 8 : 4; } Temp index_is_lo = bld.vopc(aco_opcode::v_cmp_ge_u32, bld.def(bld.lm), Operand(31u), index); |