summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2016-10-31 12:50:09 +0100
committerNicolai Hähnle <nicolai.haehnle@amd.com>2016-11-02 12:36:32 +0100
commit36047ed3b4d29636cb48fe486c805b3ea374ce96 (patch)
tree742ac3c8b677bd07491a861c81fe536b5566d550
parent4b99016d9b2b44b780cec43658759895622af2cb (diff)
radeonsi: generate GS prolog to (partially) fix triangle strip adjacency rotationwip
Fixes GL45-CTS.geometry_shader.adjacency.adjacency_indiced_triangle_strip and others. This leaves the case of triangle strips with adjacency and primitive restarts open. It seems that the only thing that cares about that is a piglit test. Fixing this efficiently would be really involved, and I don't want to use the hammer of degrading to software handling of indices because there may well be software that uses this draw mode (without caring about the precise rotation of triangles).
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.c1
-rw-r--r--src/gallium/drivers/radeonsi/si_pipe.h2
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.c112
-rw-r--r--src/gallium/drivers/radeonsi/si_shader.h10
-rw-r--r--src/gallium/drivers/radeonsi/si_state_draw.c18
-rw-r--r--src/gallium/drivers/radeonsi/si_state_shaders.c7
6 files changed, 146 insertions, 4 deletions
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index bf3b442dbc..bc633bb927 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -672,6 +672,7 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
sscreen->vs_prologs,
sscreen->vs_epilogs,
sscreen->tcs_epilogs,
+ sscreen->gs_prologs,
sscreen->ps_prologs,
sscreen->ps_epilogs
};
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index e7617bc497..8e6a94dead 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -96,6 +96,7 @@ struct si_screen {
struct si_shader_part *vs_prologs;
struct si_shader_part *vs_epilogs;
struct si_shader_part *tcs_epilogs;
+ struct si_shader_part *gs_prologs;
struct si_shader_part *ps_prologs;
struct si_shader_part *ps_epilogs;
@@ -319,6 +320,7 @@ struct si_context {
unsigned last_sc_line_stipple;
int last_vtx_reuse_depth;
int current_rast_prim; /* primitive type after TES, GS */
+ bool gs_tri_strip_adj_fix;
unsigned last_gsvs_itemsize;
/* Scratch buffer */
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index fe1542088f..9141d62fae 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6747,6 +6747,78 @@ static void si_get_ps_epilog_key(struct si_shader *shader,
}
/**
+ * Build the GS prolog function. Rotate the input vertices for triangle strips
+ * with adjacency.
+ */
+static void si_build_gs_prolog_function(struct si_shader_context *ctx,
+ union si_shader_part_key *key)
+{
+ const unsigned num_sgprs = SI_GS_NUM_USER_SGPR + 2;
+ const unsigned num_vgprs = 8;
+ struct gallivm_state *gallivm = &ctx->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMTypeRef params[32];
+ LLVMTypeRef returns[32];
+ LLVMValueRef func, ret;
+
+ for (unsigned i = 0; i < num_sgprs; ++i) {
+ params[i] = ctx->i32;
+ returns[i] = ctx->i32;
+ }
+
+ for (unsigned i = 0; i < num_vgprs; ++i) {
+ params[num_sgprs + i] = ctx->i32;
+ returns[num_sgprs + i] = ctx->f32;
+ }
+
+ /* Create the function. */
+ si_create_function(ctx, "gs_prolog", returns, num_sgprs + num_vgprs,
+ params, num_sgprs + num_vgprs, num_sgprs - 1);
+ func = ctx->main_fn;
+
+ /* Copy inputs to outputs. This should be no-op, as the registers match,
+ * but it will prevent the compiler from overwriting them unintentionally.
+ */
+ ret = ctx->return_value;
+ for (unsigned i = 0; i < num_sgprs; i++) {
+ LLVMValueRef p = LLVMGetParam(func, i);
+ ret = LLVMBuildInsertValue(builder, ret, p, i, "");
+ }
+ for (unsigned i = 0; i < num_vgprs; i++) {
+ LLVMValueRef p = LLVMGetParam(func, num_sgprs + i);
+ p = LLVMBuildBitCast(builder, p, ctx->f32, "");
+ ret = LLVMBuildInsertValue(builder, ret, p, num_sgprs + i, "");
+ }
+
+ if (key->gs_prolog.states.tri_strip_adj_fix) {
+ /* Remap the input vertices for every other primitive. */
+ const unsigned vtx_params[6] = {
+ num_sgprs,
+ num_sgprs + 1,
+ num_sgprs + 3,
+ num_sgprs + 4,
+ num_sgprs + 5,
+ num_sgprs + 6
+ };
+ LLVMValueRef prim_id, rotate;
+
+ prim_id = LLVMGetParam(func, num_sgprs + 2);
+ rotate = LLVMBuildTrunc(builder, prim_id, ctx->i1, "");
+
+ for (unsigned i = 0; i < 6; ++i) {
+ LLVMValueRef base, rotated, actual;
+ base = LLVMGetParam(func, vtx_params[i]);
+ rotated = LLVMGetParam(func, vtx_params[(i + 4) % 6]);
+ actual = LLVMBuildSelect(builder, rotate, rotated, base, "");
+ actual = LLVMBuildBitCast(builder, actual, ctx->f32, "");
+ ret = LLVMBuildInsertValue(builder, ret, actual, vtx_params[i], "");
+ }
+ }
+
+ LLVMBuildRet(builder, ret);
+}
+
+/**
* Given a list of shader part functions, build a wrapper function that
* runs them in sequence to form a monolithic shader.
*/
@@ -7019,6 +7091,18 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
parts[1] = ctx.main_fn;
si_build_wrapper_function(&ctx, parts, 2, 0);
+ } else if (is_monolithic && ctx.type == PIPE_SHADER_GEOMETRY) {
+ LLVMValueRef parts[2];
+ union si_shader_part_key prolog_key;
+
+ parts[1] = ctx.main_fn;
+
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.gs_prolog.states = shader->key.gs.prolog;
+ si_build_gs_prolog_function(&ctx, &prolog_key);
+ parts[0] = ctx.main_fn;
+
+ si_build_wrapper_function(&ctx, parts, 2, 1);
} else if (is_monolithic && ctx.type == PIPE_SHADER_FRAGMENT) {
LLVMValueRef parts[3];
union si_shader_part_key prolog_key;
@@ -7207,6 +7291,9 @@ si_get_shader_part(struct si_screen *sscreen,
assert(!prolog);
shader.key.tcs.epilog = key->tcs_epilog.states;
break;
+ case PIPE_SHADER_GEOMETRY:
+ assert(prolog);
+ break;
case PIPE_SHADER_FRAGMENT:
if (prolog)
shader.key.ps.prolog = key->ps_prolog.states;
@@ -7531,6 +7618,27 @@ static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
}
/**
+ * Select and compile (or reuse) GS parts (prolog).
+ */
+static bool si_shader_select_gs_parts(struct si_screen *sscreen,
+ LLVMTargetMachineRef tm,
+ struct si_shader *shader,
+ struct pipe_debug_callback *debug)
+{
+ union si_shader_part_key prolog_key;
+
+ memset(&prolog_key, 0, sizeof(prolog_key));
+ prolog_key.gs_prolog.states = shader->key.gs.prolog;
+
+ shader->prolog = si_get_shader_part(sscreen, &sscreen->gs_prologs,
+ PIPE_SHADER_GEOMETRY, true,
+ &prolog_key, tm, debug,
+ si_build_gs_prolog_function,
+ "Geometry Shader Prolog");
+ return shader->prolog != NULL;
+}
+
+/**
* Build the pixel shader prolog function. This handles:
* - two-side color selection and interpolation
* - overriding interpolation parameters for the API PS
@@ -8047,6 +8155,10 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
if (!si_shader_select_tes_parts(sscreen, tm, shader, debug))
return -1;
break;
+ case PIPE_SHADER_GEOMETRY:
+ if (!si_shader_select_gs_parts(sscreen, tm, shader, debug))
+ return - 1;
+ break;
case PIPE_SHADER_FRAGMENT:
if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
return -1;
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 91f9cbffd8..d8ab2a41c9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -325,6 +325,10 @@ struct si_tcs_epilog_bits {
uint64_t inputs_to_copy;
};
+struct si_gs_prolog_bits {
+ unsigned tri_strip_adj_fix:1;
+};
+
/* Common PS bits between the shader key and the prolog key. */
struct si_ps_prolog_bits {
unsigned color_two_side:1;
@@ -363,6 +367,9 @@ union si_shader_part_key {
struct si_tcs_epilog_bits states;
} tcs_epilog;
struct {
+ struct si_gs_prolog_bits states;
+ } gs_prolog;
+ struct {
struct si_ps_prolog_bits states;
unsigned num_input_sgprs:5;
unsigned num_input_vgprs:5;
@@ -401,6 +408,9 @@ union si_shader_key {
struct si_vs_epilog_bits epilog; /* same as VS */
unsigned as_es:1; /* export shader */
} tes; /* tessellation evaluation shader */
+ struct {
+ struct si_gs_prolog_bits prolog;
+ } gs;
};
struct si_shader_config {
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index c0e2642ba3..b934100425 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -982,6 +982,24 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
sctx->do_update_shaders = true;
}
+ if (sctx->gs_shader.cso) {
+ /* Determine whether the GS triangle strip adjacency fix should
+ * be applied. Rotate every other triangle if
+ * - triangle strips with adjacency are fed to the GS and
+ * - primitive restart is disabled (the rotation doesn't help
+ * when the restart occurs after an odd number of triangles).
+ */
+ bool gs_tri_strip_adj_fix =
+ !sctx->tcs_shader.cso && !sctx->tes_shader.cso &&
+ info->mode == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY &&
+ !info->primitive_restart;
+
+ if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) {
+ sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix;
+ sctx->do_update_shaders = true;
+ }
+ }
+
if (sctx->do_update_shaders && !si_update_shaders(sctx))
return;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4c647cbbf0..2a41bf1c20 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -896,6 +896,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
key->tes.epilog.export_prim_id = 1;
break;
case PIPE_SHADER_GEOMETRY:
+ key->gs.prolog.tri_strip_adj_fix = sctx->gs_tri_strip_adj_fix;
break;
case PIPE_SHADER_FRAGMENT: {
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
@@ -1155,8 +1156,7 @@ void si_init_shader_selector_async(void *job, int thread_index)
* If this fails, the driver will try to compile a monolithic shader
* on demand.
*/
- if (sel->type != PIPE_SHADER_GEOMETRY &&
- !sscreen->use_monolithic_shaders) {
+ if (!sscreen->use_monolithic_shaders) {
struct si_shader *shader = CALLOC_STRUCT(si_shader);
void *tgsi_binary;
@@ -1201,8 +1201,7 @@ void si_init_shader_selector_async(void *job, int thread_index)
}
/* Pre-compilation. */
- if (sel->type == PIPE_SHADER_GEOMETRY ||
- sscreen->b.debug_flags & DBG_PRECOMPILE) {
+ if (sscreen->b.debug_flags & DBG_PRECOMPILE) {
struct si_shader_ctx_state state = {sel};
union si_shader_key key;