summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-06-08 13:21:25 +0200
committerMarek Olšák <marek.olsak@amd.com>2016-06-13 18:13:51 +0200
commit6e1b12c7881fe663cb500cb2f7374f4862bae179 (patch)
treec2ccb937970fb27c70f364b528736d7bebfd1330
parent0c0f841e5de27d01312f8857641668ca439b1ab1 (diff)
radeonsi: enable scratch coalescing
This makes one particular compute shader 8x faster. Latest LLVM git is required. Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c12
1 files changed, 10 insertions, 2 deletions
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 754b4aff335..f2bd3370c8a 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5903,8 +5903,16 @@ void si_shader_apply_scratch_relocs(struct si_context *sctx,
unsigned i;
uint32_t scratch_rsrc_dword0 = scratch_va;
uint32_t scratch_rsrc_dword1 =
- S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
- | S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
+ S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
+
+ /* Enable scratch coalescing if LLVM sets ELEMENT_SIZE & INDEX_STRIDE
+ * correctly.
+ */
+ if (HAVE_LLVM >= 0x0309)
+ scratch_rsrc_dword1 |= S_008F04_SWIZZLE_ENABLE(1);
+ else
+ scratch_rsrc_dword1 |=
+ S_008F04_STRIDE(config->scratch_bytes_per_wave / 64);
for (i = 0 ; i < shader->binary.reloc_count; i++) {
const struct radeon_shader_reloc *reloc =