summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2016-11-29 19:25:03 +0100
committerEmil Velikov <emil.l.velikov@gmail.com>2016-12-14 19:03:11 +0000
commit40e16eac7535181adacf6fb583eb278380ef8dfd (patch)
tree77ae619628ac96d6c11fa663dc89c7a5909d07d9
parent3ece25662945396689b20954d2278447841024cc (diff)
radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips
All codepaths are handled except for clover. Cc: 13.0 <mesa-stable@lists.freedesktop.org> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (cherry picked from commit 72d48fcd8eb5862c72d27e5462c289c5de65396e)
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c24
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c6
4 files changed, 29 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index a35187cac79..0845711008d 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -343,6 +343,7 @@ static bool si_switch_compute_shader(struct si_context *sctx,
lds_blocks += align(program->local_size, 512) >> 9;
}
+ /* TODO: use si_multiwave_lds_size_workaround */
assert(lds_blocks <= 0xFF);
config->rsrc2 &= C_00B84C_LDS_SIZE;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 00e78cfa042..0b022357311 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -7741,11 +7741,31 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
return true;
}
-static void si_fix_num_sgprs(struct si_shader *shader)
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+ unsigned *lds_size)
+{
+ /* SPI barrier management bug:
+ * Make sure we have at least 4k of LDS in use to avoid the bug.
+ * It applies to workgroup sizes of more than one wavefront.
+ */
+ if (sscreen->b.family == CHIP_BONAIRE ||
+ sscreen->b.family == CHIP_KABINI ||
+ sscreen->b.family == CHIP_MULLINS)
+ *lds_size = MAX2(*lds_size, 8);
+}
+
+static void si_fix_resource_usage(struct si_screen *sscreen,
+ struct si_shader *shader)
{
unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
+
+ if (shader->selector->type == PIPE_SHADER_COMPUTE &&
+ si_get_max_workgroup_size(shader) > 64) {
+ si_multiwave_lds_size_workaround(sscreen,
+ &shader->config.lds_size);
+ }
}
int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
@@ -7841,7 +7861,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
}
}
- si_fix_num_sgprs(shader);
+ si_fix_resource_usage(sscreen, shader);
si_shader_dump(sscreen, shader, debug, sel->info.processor,
stderr);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index b07210c90f5..10bafca6514 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -482,6 +482,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader)
void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *f);
+void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
+ unsigned *lds_size);
void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
struct si_shader_config *config,
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index d18137b6691..447acc1292c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -162,11 +162,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
if (sctx->b.chip_class >= CIK) {
assert(lds_size <= 65536);
- ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512);
+ lds_size = align(lds_size, 512) / 512;
} else {
assert(lds_size <= 32768);
- ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256);
+ lds_size = align(lds_size, 256) / 256;
}
+ si_multiwave_lds_size_workaround(sctx->screen, &lds_size);
+ ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size);
if (sctx->last_ls == ls->current &&
sctx->last_tcs == tcs &&