summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/radeonsi
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2019-08-19 13:15:54 -0400
committerJuan A. Suarez Romero <jasuarez@igalia.com>2019-08-28 08:19:30 +0000
commit915a272b5a6eaf09e0acee96925762e5ad608927 (patch)
tree81d48041da68cfa23113f83ce35ec4c66515fc6d /src/gallium/drivers/radeonsi
parente4df7ffc23e8b48e852c95a7083f6cb86751134f (diff)
radeonsi: fix scratch buffer WAVESIZE setting leading to corruption
Cc: 19.2 19.1 <mesa-stable@lists.freedesktop.org> Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> (cherry picked from commit 360cf3c4b05679709574ef4d20b5097b0fd0be82)
Diffstat (limited to 'src/gallium/drivers/radeonsi')
-rw-r--r--src/gallium/drivers/radeonsi/si_compute.c6
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c62
3 files changed, 39 insertions, 31 deletions
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index f1a433b72df..756f5372fa2 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -500,9 +500,13 @@ static bool si_switch_compute_shader(struct si_context *sctx,
COMPUTE_DBG(sctx->screen, "COMPUTE_PGM_RSRC1: 0x%08x "
"COMPUTE_PGM_RSRC2: 0x%08x\n", config->rsrc1, config->rsrc2);
+ sctx->max_seen_compute_scratch_bytes_per_wave =
+ MAX2(sctx->max_seen_compute_scratch_bytes_per_wave,
+ config->scratch_bytes_per_wave);
+
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
S_00B860_WAVES(sctx->scratch_waves)
- | S_00B860_WAVESIZE(config->scratch_bytes_per_wave >> 10));
+ | S_00B860_WAVESIZE(sctx->max_seen_compute_scratch_bytes_per_wave >> 10));
sctx->cs_shader_state.emitted_program = program;
sctx->cs_shader_state.offset = offset;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 949fa0755cb..35e548cdec5 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -984,6 +984,8 @@ struct si_context {
struct si_resource *scratch_buffer;
unsigned scratch_waves;
unsigned spi_tmpring_size;
+ unsigned max_seen_scratch_bytes_per_wave;
+ unsigned max_seen_compute_scratch_bytes_per_wave;
struct si_resource *compute_scratch_buffer;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index db4c77da2ff..ef8943d9011 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -3104,11 +3104,6 @@ static int si_update_scratch_buffer(struct si_context *sctx,
return 1;
}
-static unsigned si_get_current_scratch_buffer_size(struct si_context *sctx)
-{
- return sctx->scratch_buffer ? sctx->scratch_buffer->b.b.width0 : 0;
-}
-
static unsigned si_get_scratch_buffer_bytes_per_wave(struct si_shader *shader)
{
return shader ? shader->config.scratch_bytes_per_wave : 0;
@@ -3123,23 +3118,6 @@ static struct si_shader *si_get_tcs_current(struct si_context *sctx)
sctx->fixed_func_tcs_shader.current;
}
-static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
-{
- unsigned bytes = 0;
-
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
-
- if (sctx->tes_shader.cso) {
- struct si_shader *tcs = si_get_tcs_current(sctx);
-
- bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(tcs));
- }
- return bytes;
-}
-
static bool si_update_scratch_relocs(struct si_context *sctx)
{
struct si_shader *tcs = si_get_tcs_current(sctx);
@@ -3197,16 +3175,40 @@ static bool si_update_scratch_relocs(struct si_context *sctx)
static bool si_update_spi_tmpring_size(struct si_context *sctx)
{
- unsigned current_scratch_buffer_size =
- si_get_current_scratch_buffer_size(sctx);
- unsigned scratch_bytes_per_wave =
- si_get_max_scratch_bytes_per_wave(sctx);
- unsigned scratch_needed_size = scratch_bytes_per_wave *
- sctx->scratch_waves;
+ /* SPI_TMPRING_SIZE.WAVESIZE must be constant for each scratch buffer.
+ * There are 2 cases to handle:
+ *
+ * - If the current needed size is less than the maximum seen size,
+ * use the maximum seen size, so that WAVESIZE remains the same.
+ *
+ * - If the current needed size is greater than the maximum seen size,
+ * the scratch buffer is reallocated, so we can increase WAVESIZE.
+ *
+ * Shaders that set SCRATCH_EN=0 don't allocate scratch space.
+ * Otherwise, the number of waves that can use scratch is
+ * SPI_TMPRING_SIZE.WAVES.
+ */
+ unsigned bytes = 0;
+
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->ps_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->gs_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->vs_shader.current));
+
+ if (sctx->tes_shader.cso) {
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(sctx->tes_shader.current));
+ bytes = MAX2(bytes, si_get_scratch_buffer_bytes_per_wave(si_get_tcs_current(sctx)));
+ }
+
+ sctx->max_seen_scratch_bytes_per_wave =
+ MAX2(sctx->max_seen_scratch_bytes_per_wave, bytes);
+
+ unsigned scratch_needed_size =
+ sctx->max_seen_scratch_bytes_per_wave * sctx->scratch_waves;
unsigned spi_tmpring_size;
if (scratch_needed_size > 0) {
- if (scratch_needed_size > current_scratch_buffer_size) {
+ if (!sctx->scratch_buffer ||
+ scratch_needed_size > sctx->scratch_buffer->b.b.width0) {
/* Create a bigger scratch buffer */
si_resource_reference(&sctx->scratch_buffer, NULL);
@@ -3232,7 +3234,7 @@ static bool si_update_spi_tmpring_size(struct si_context *sctx)
"scratch size should already be aligned correctly.");
spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
- S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
+ S_0286E8_WAVESIZE(sctx->max_seen_scratch_bytes_per_wave >> 10);
if (spi_tmpring_size != sctx->spi_tmpring_size) {
sctx->spi_tmpring_size = spi_tmpring_size;
si_mark_atom_dirty(sctx, &sctx->atoms.s.scratch_state);