summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichel Dänzer <michel.daenzer@amd.com>2012-08-23 17:10:37 +0200
committerMichel Dänzer <michel@daenzer.net>2012-08-27 11:51:41 +0200
commitd1e40b3d40b2e90ad4f275565f1ae27fe6f964cc (patch)
tree41a93fa06f09fad3ba0ae3071a4997c58c0406e6
parent84fdda280f4361add3802476ce9a30ac1c0b4c69 (diff)
radeonsi: Maintain cache of pixel shader variants according to contxt state.
Mostly inspired by r600g commit 4acf71f01ea1edb253cd38cc059d4af1a2a40bf4 ('r600g: cache shader variants instead of rebuilding v3'). Signed-off-by: Michel Dänzer <michel.daenzer@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_pipe.h4
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.c14
-rw-r--r--src/gallium/drivers/radeonsi/radeonsi_shader.h24
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c185
-rw-r--r--src/gallium/drivers/radeonsi/si_state.h5
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c37
6 files changed, 210 insertions, 59 deletions
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
index bec2939d3f7..989bb49cbee 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h
@@ -126,8 +126,8 @@ struct r600_context {
unsigned pa_cl_vs_out_cntl;
/* for saving when using blitter */
struct pipe_stencil_ref stencil_ref;
- struct si_pipe_shader *ps_shader;
- struct si_pipe_shader *vs_shader;
+ struct si_pipe_shader_selector *ps_shader;
+ struct si_pipe_shader_selector *vs_shader;
struct pipe_query *current_render_cond;
unsigned current_render_cond_mode;
struct pipe_query *saved_render_cond;
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index 0f2aaef8175..671eda4a381 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -282,7 +282,8 @@ static void declare_input_fs(
switch (decl->Interp.Interpolate) {
case TGSI_INTERPOLATE_COLOR:
/* XXX: Flat shading hangs the GPU */
- if (si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
+ if (si_shader_ctx->rctx->queued.named.rasterizer &&
+ si_shader_ctx->rctx->queued.named.rasterizer->flatshade) {
#if 0
intr_name = "llvm.SI.fs.interp.constant";
#else
@@ -617,6 +618,7 @@ int si_pipe_shader_create(
struct si_pipe_shader *shader)
{
struct r600_context *rctx = (struct r600_context*)ctx;
+ struct si_pipe_shader_selector *sel = shader->selector;
struct si_shader_context si_shader_ctx;
struct tgsi_shader_info shader_info;
struct lp_build_tgsi_context * bld_base;
@@ -633,7 +635,7 @@ int si_pipe_shader_create(
radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
- tgsi_scan_shader(shader->tokens, &shader_info);
+ tgsi_scan_shader(sel->tokens, &shader_info);
bld_base->info = &shader_info;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
bld_base->emit_epilogue = si_llvm_emit_epilogue;
@@ -642,7 +644,7 @@ int si_pipe_shader_create(
bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
si_shader_ctx.radeon_bld.load_input = declare_input;
- si_shader_ctx.tokens = shader->tokens;
+ si_shader_ctx.tokens = sel->tokens;
tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
si_shader_ctx.shader = shader;
si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
@@ -653,10 +655,10 @@ int si_pipe_shader_create(
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
if (dump) {
- tgsi_dump(shader->tokens, 0);
+ tgsi_dump(sel->tokens, 0);
}
- if (!lp_build_tgsi_llvm(bld_base, shader->tokens)) {
+ if (!lp_build_tgsi_llvm(bld_base, sel->tokens)) {
fprintf(stderr, "Failed to translate shader from TGSI to LLVM\n");
return -EINVAL;
}
@@ -710,6 +712,4 @@ int si_pipe_shader_create(
void si_pipe_shader_destroy(struct pipe_context *ctx, struct si_pipe_shader *shader)
{
si_resource_reference(&shader->bo, NULL);
-
- memset(&shader->shader,0,sizeof(struct si_shader));
}
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h
index d44ee9b128b..aa2888ccf30 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.h
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h
@@ -37,6 +37,25 @@ struct si_shader_io {
bool centroid;
};
+struct si_pipe_shader;
+
+struct si_pipe_shader_selector {
+ struct si_pipe_shader *current;
+
+ struct tgsi_token *tokens;
+ struct pipe_stream_output_info so;
+
+ unsigned num_shaders;
+
+ /* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
+ unsigned type;
+
+ /* 1 when the shader contains
+ * TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, otherwise it's 0.
+ * Used to determine whether we need to include nr_cbufs in the key */
+ unsigned fs_write_all;
+};
+
struct si_shader {
unsigned ninput;
struct si_shader_io input[32];
@@ -50,16 +69,17 @@ struct si_shader {
};
struct si_pipe_shader {
+ struct si_pipe_shader_selector *selector;
+ struct si_pipe_shader *next_variant;
struct si_shader shader;
struct si_pm4_state *pm4;
struct si_resource *bo;
- struct tgsi_token *tokens;
unsigned num_sgprs;
unsigned num_vgprs;
unsigned spi_ps_input_ena;
unsigned sprite_coord_enable;
- struct pipe_stream_output_info so;
unsigned so_strides[4];
+ unsigned key;
};
/* radeonsi_shader.c */
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 5df22dd5f3a..5c2e7434ba3 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1717,77 +1717,200 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
* shaders
*/
+/* Compute the key for the hw shader variant */
+static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx,
+ struct si_pipe_shader_selector *sel)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ unsigned key = 0;
+
+ if (sel->type == PIPE_SHADER_FRAGMENT) {
+ if (sel->fs_write_all)
+ key |= rctx->framebuffer.nr_cbufs;
+ /*if (rctx->queued.named.rasterizer)
+ key |= rctx->queued.named.rasterizer->flatshade << 4;*/
+ /*key |== rctx->two_side << 5;*/
+ }
+
+ return key;
+}
+
+/* Select the hw shader variant depending on the current state.
+ * (*dirty) is set to 1 if current variant was changed */
+int si_shader_select(struct pipe_context *ctx,
+ struct si_pipe_shader_selector *sel,
+ unsigned *dirty)
+{
+ unsigned key;
+ struct si_pipe_shader * shader = NULL;
+ int r;
+
+ key = si_shader_selector_key(ctx, sel);
+
+ /* Check if we don't need to change anything.
+ * This path is also used for most shaders that don't need multiple
+ * variants, it will cost just a computation of the key and this
+ * test. */
+ if (likely(sel->current && sel->current->key == key)) {
+ return 0;
+ }
+
+ /* lookup if we have other variants in the list */
+ if (sel->num_shaders > 1) {
+ struct si_pipe_shader *p = sel->current, *c = p->next_variant;
+
+ while (c && c->key != key) {
+ p = c;
+ c = c->next_variant;
+ }
+
+ if (c) {
+ p->next_variant = c->next_variant;
+ shader = c;
+ }
+ }
+
+ if (unlikely(!shader)) {
+ shader = CALLOC(1, sizeof(struct si_pipe_shader));
+ shader->selector = sel;
+
+ r = si_pipe_shader_create(ctx, shader);
+ if (unlikely(r)) {
+ R600_ERR("Failed to build shader variant (type=%u, key=%u) %d\n",
+ sel->type, key, r);
+ sel->current = NULL;
+ return r;
+ }
+
+ /* We don't know the value of fs_write_all property until we built
+ * at least one variant, so we may need to recompute the key (include
+ * rctx->framebuffer.nr_cbufs) after building first variant. */
+ if (sel->type == PIPE_SHADER_FRAGMENT &&
+ sel->num_shaders == 0 &&
+ shader->shader.fs_write_all) {
+ sel->fs_write_all = 1;
+ key = si_shader_selector_key(ctx, sel);
+ }
+
+ shader->key = key;
+ sel->num_shaders++;
+ }
+
+ if (dirty)
+ *dirty = 1;
+
+ shader->next_variant = sel->current;
+ sel->current = shader;
+
+ return 0;
+}
+
static void *si_create_shader_state(struct pipe_context *ctx,
- const struct pipe_shader_state *state)
+ const struct pipe_shader_state *state,
+ unsigned pipe_shader_type)
{
- struct si_pipe_shader *shader = CALLOC_STRUCT(si_pipe_shader);
+ struct si_pipe_shader_selector *sel = CALLOC_STRUCT(si_pipe_shader_selector);
+ int r;
- shader->tokens = tgsi_dup_tokens(state->tokens);
- shader->so = state->stream_output;
+ sel->type = pipe_shader_type;
+ sel->tokens = tgsi_dup_tokens(state->tokens);
+ sel->so = state->stream_output;
+
+ r = si_shader_select(ctx, sel, NULL);
+ if (r) {
+ free(sel);
+ return NULL;
+ }
+
+ return sel;
+}
+
+static void *si_create_fs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
+}
- return shader;
+static void *si_create_vs_state(struct pipe_context *ctx,
+ const struct pipe_shader_state *state)
+{
+ return si_create_shader_state(ctx, state, PIPE_SHADER_VERTEX);
}
static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct si_pipe_shader *shader = state;
+ struct si_pipe_shader_selector *sel = state;
- if (rctx->vs_shader == state)
+ if (rctx->vs_shader == sel)
return;
rctx->shader_dirty = true;
- rctx->vs_shader = shader;
+ rctx->vs_shader = sel;
- if (shader) {
- si_pm4_bind_state(rctx, vs, shader->pm4);
- }
+ if (sel && sel->current)
+ si_pm4_bind_state(rctx, vs, sel->current->pm4);
+ else
+ si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
}
static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct si_pipe_shader *shader = state;
+ struct si_pipe_shader_selector *sel = state;
- if (rctx->ps_shader == state)
+ if (rctx->ps_shader == sel)
return;
rctx->shader_dirty = true;
- rctx->ps_shader = shader;
+ rctx->ps_shader = sel;
- if (shader) {
- si_pm4_bind_state(rctx, ps, shader->pm4);
- }
+ if (sel && sel->current)
+ si_pm4_bind_state(rctx, ps, sel->current->pm4);
+ else
+ si_pm4_bind_state(rctx, ps, rctx->dummy_pixel_shader->pm4);
}
+static void si_delete_shader_selector(struct pipe_context *ctx,
+ struct si_pipe_shader_selector *sel)
+{
+ struct r600_context *rctx = (struct r600_context *)ctx;
+ struct si_pipe_shader *p = sel->current, *c;
+
+ while (p) {
+ c = p->next_variant;
+ si_pm4_delete_state(rctx, vs, p->pm4);
+ si_pipe_shader_destroy(ctx, p);
+ free(p);
+ p = c;
+ }
+
+ free(sel->tokens);
+ free(sel);
+ }
+
static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct si_pipe_shader *shader = (struct si_pipe_shader *)state;
+ struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
- if (rctx->vs_shader == shader) {
+ if (rctx->vs_shader == sel) {
rctx->vs_shader = NULL;
}
- si_pm4_delete_state(rctx, vs, shader->pm4);
- free(shader->tokens);
- si_pipe_shader_destroy(ctx, shader);
- free(shader);
+ si_delete_shader_selector(ctx, sel);
}
static void si_delete_ps_shader(struct pipe_context *ctx, void *state)
{
struct r600_context *rctx = (struct r600_context *)ctx;
- struct si_pipe_shader *shader = (struct si_pipe_shader *)state;
+ struct si_pipe_shader_selector *sel = (struct si_pipe_shader_selector *)state;
- if (rctx->ps_shader == shader) {
+ if (rctx->ps_shader == sel) {
rctx->ps_shader = NULL;
}
- si_pm4_delete_state(rctx, ps, shader->pm4);
- free(shader->tokens);
- si_pipe_shader_destroy(ctx, shader);
- free(shader);
+ si_delete_shader_selector(ctx, sel);
}
/*
@@ -2269,8 +2392,8 @@ void si_init_state_functions(struct r600_context *rctx)
rctx->context.set_framebuffer_state = si_set_framebuffer_state;
- rctx->context.create_vs_state = si_create_shader_state;
- rctx->context.create_fs_state = si_create_shader_state;
+ rctx->context.create_vs_state = si_create_vs_state;
+ rctx->context.create_fs_state = si_create_fs_state;
rctx->context.bind_vs_state = si_bind_vs_shader;
rctx->context.bind_fs_state = si_bind_ps_shader;
rctx->context.delete_vs_state = si_delete_vs_shader;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index f53ecb720ff..d59624cd8e6 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -126,11 +126,16 @@ union si_state {
} while(0)
/* si_state.c */
+struct si_pipe_shader_selector;
+
bool si_is_format_supported(struct pipe_screen *screen,
enum pipe_format format,
enum pipe_texture_target target,
unsigned sample_count,
unsigned usage);
+int si_shader_select(struct pipe_context *ctx,
+ struct si_pipe_shader_selector *sel,
+ unsigned *dirty);
void si_init_state_functions(struct r600_context *rctx);
void si_init_config(struct r600_context *rctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index fda8b0bd6a5..95821dc5f5c 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -317,8 +317,8 @@ static void si_update_alpha_ref(struct r600_context *rctx)
static void si_update_spi_map(struct r600_context *rctx)
{
- struct si_shader *ps = &rctx->ps_shader->shader;
- struct si_shader *vs = &rctx->vs_shader->shader;
+ struct si_shader *ps = &rctx->ps_shader->current->shader;
+ struct si_shader *vs = &rctx->vs_shader->current->shader;
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
unsigned i, j, tmp;
@@ -362,36 +362,39 @@ static void si_update_spi_map(struct r600_context *rctx)
static void si_update_derived_state(struct r600_context *rctx)
{
struct pipe_context * ctx = (struct pipe_context*)rctx;
+ unsigned ps_dirty = 0;
if (!rctx->blitter->running) {
if (rctx->have_depth_fb || rctx->have_depth_texture)
si_flush_depth_textures(rctx);
}
- if ((rctx->ps_shader->shader.fs_write_all &&
- (rctx->ps_shader->shader.nr_cbufs != rctx->framebuffer.nr_cbufs)) ||
- (rctx->sprite_coord_enable &&
- (rctx->ps_shader->sprite_coord_enable != rctx->sprite_coord_enable))) {
- si_pipe_shader_destroy(&rctx->context, rctx->ps_shader);
- }
+ si_shader_select(ctx, rctx->ps_shader, &ps_dirty);
if (rctx->alpha_ref_dirty) {
si_update_alpha_ref(rctx);
}
- if (!rctx->vs_shader->bo) {
- si_pipe_shader_vs(ctx, rctx->vs_shader);
+ if (!rctx->vs_shader->current->pm4) {
+ si_pipe_shader_vs(ctx, rctx->vs_shader->current);
}
- if (!rctx->ps_shader->bo) {
- si_pipe_shader_ps(ctx, rctx->ps_shader);
+ if (!rctx->ps_shader->current->pm4) {
+ si_pipe_shader_ps(ctx, rctx->ps_shader->current);
+ ps_dirty = 0;
}
- if (!rctx->ps_shader->bo) {
- if (!rctx->dummy_pixel_shader->bo)
+ if (!rctx->ps_shader->current->bo) {
+ if (!rctx->dummy_pixel_shader->pm4)
si_pipe_shader_ps(ctx, rctx->dummy_pixel_shader);
-
- if (rctx->dummy_pixel_shader->pm4)
+ else
si_pm4_bind_state(rctx, vs, rctx->dummy_pixel_shader->pm4);
+
+ ps_dirty = 0;
+ }
+
+ if (ps_dirty) {
+ si_pm4_bind_state(rctx, ps, rctx->ps_shader->current->pm4);
+ rctx->shader_dirty = true;
}
if (rctx->shader_dirty) {
@@ -545,7 +548,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
r600_context_draw_opaque_count(rctx, (struct r600_so_target*)info->count_from_stream_output);
}
- rctx->vs_shader_so_strides = rctx->vs_shader->so_strides;
+ rctx->vs_shader_so_strides = rctx->vs_shader->current->so_strides;
if (!si_update_draw_info_state(rctx, info))
return;