summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarek Olšák <marek.olsak@amd.com>2021-06-01 01:40:37 -0400
committerMarek Olšák <marek.olsak@amd.com>2021-06-20 01:22:01 -0400
commit9c3225cb53bae8dacf6c5b1407b3c59f3de95c1c (patch)
tree54bec891ca1ed8436089df8ae76d75c83b4f9305
parent12d2df15f178591d45193b6cc3e093281e2ea0aa (diff)
radeonsi: move the accepting code into the bbox cull branch in NGG cull code
This reduces the number of jumps. No change in behavior. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11486>
-rw-r--r--src/gallium/drivers/radeonsi/gfx10_shader_ngg.c36
1 files changed, 24 insertions, 12 deletions
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
index 0bcd8b4b3d9..02372d82b54 100644
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -782,6 +782,23 @@ static void update_thread_counts(struct si_shader_context *ctx, LLVMValueRef *ne
"");
}
+static void gfx10_build_primitive_accepted(struct ac_llvm_context *ac, LLVMValueRef accepted,
+ void *userdata)
+{
+ struct si_shader_context *ctx = container_of(ac, struct si_shader_context, ac);
+ LLVMValueRef *params = (LLVMValueRef *)userdata;
+ LLVMValueRef gs_accepted = params[0];
+ LLVMValueRef *gs_vtxptr = (LLVMValueRef *)params[1];
+
+ ac_build_ifcc(&ctx->ac, accepted, 0);
+ LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_1, gs_accepted);
+ for (unsigned vtx = 0; vtx < 3; vtx++) {
+ LLVMBuildStore(ctx->ac.builder, ctx->ac.i8_1,
+ si_build_gep_i8(ctx, gs_vtxptr[vtx], lds_byte0_accept_flag));
+ }
+ ac_build_endif(&ctx->ac, 0);
+}
+
/**
* Cull primitives for NGG VS or TES, then compact vertices, which happens
* before the VS or TES main function. Return values for the main function.
@@ -983,18 +1000,13 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
options.cull_w = true;
/* Tell ES threads whether their vertex survived. */
- ac_build_ifcc(&ctx->ac,
- ac_cull_triangle(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate,
- small_prim_precision, &options, NULL, NULL),
- 16003);
- {
- LLVMBuildStore(builder, ctx->ac.i32_1, gs_accepted);
- for (unsigned vtx = 0; vtx < 3; vtx++) {
- LLVMBuildStore(builder, ctx->ac.i8_1,
- si_build_gep_i8(ctx, gs_vtxptr[vtx], lds_byte0_accept_flag));
- }
- }
- ac_build_endif(&ctx->ac, 16003);
+ LLVMValueRef params[] = {
+ gs_accepted,
+ (void*)gs_vtxptr,
+ };
+ ac_cull_triangle(&ctx->ac, pos, ctx->ac.i1true, vp_scale, vp_translate,
+ small_prim_precision, &options,
+ gfx10_build_primitive_accepted, params);
}
ac_build_endif(&ctx->ac, 16002);
ac_build_s_barrier(&ctx->ac);