summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2017-02-21 14:09:11 +1000
committerDave Airlie <airlied@redhat.com>2017-02-23 15:31:37 +1000
commitbec584ec0ea29e81f87fbd4d4a07ef398b8961d1 (patch)
tree848e38c9cf3133a93fd5a40a3f9344ab4f735e88
parentc2cfb54f139c27023913212d92c463be4661de38 (diff)
radv/ac: pass clips properly from vertex->geometry shader stages.
This works out the geometry shader clip/cull inputs separately to the outputs, and uses that information to read from the ES->GS ring buffer. It stores the clip/cull distances packed into one or two slots. It fixes the es output emission and gs input reading to match. Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com>
-rw-r--r--src/amd/common/ac_nir_to_llvm.c46
1 files changed, 40 insertions, 6 deletions
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6c6a1d19228..43d52952e11 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -141,6 +141,8 @@ struct nir_to_llvm_context {
int num_locals;
LLVMValueRef *locals;
bool has_ddxy;
+ uint8_t num_input_clips;
+ uint8_t num_input_culls;
uint8_t num_output_clips;
uint8_t num_output_culls;
@@ -172,9 +174,11 @@ static unsigned shader_io_get_unique_index(gl_varying_slot slot)
return 0;
if (slot == VARYING_SLOT_PSIZ)
return 1;
- if (slot == VARYING_SLOT_CLIP_DIST0)
+ if (slot == VARYING_SLOT_CLIP_DIST0 ||
+ slot == VARYING_SLOT_CULL_DIST0)
return 2;
- if (slot == VARYING_SLOT_CLIP_DIST1)
+ if (slot == VARYING_SLOT_CLIP_DIST1 ||
+ slot == VARYING_SLOT_CULL_DIST1)
return 3;
if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
return 4 + (slot - VARYING_SLOT_VAR0);
@@ -2089,6 +2093,7 @@ load_gs_input(struct nir_to_llvm_context *ctx,
unsigned param, vtx_offset_param;
LLVMValueRef value[4], result;
unsigned vertex_index;
+ unsigned cull_offset = 0;
radv_get_deref_offset(ctx, &instr->variables[0]->deref,
false, &vertex_index,
&const_index, &indir_index);
@@ -2097,11 +2102,14 @@ load_gs_input(struct nir_to_llvm_context *ctx,
vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
LLVMConstInt(ctx->i32, 4, false), "");
+ param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
+ if (instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0)
+ cull_offset += ctx->num_input_clips;
for (unsigned i = 0; i < instr->num_components; i++) {
- param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
+
args[0] = ctx->esgs_ring;
args[1] = vtx_offset;
- args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
+ args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index + cull_offset) * 256, false);
args[3] = ctx->i32zero;
args[4] = ctx->i32one; /* OFFEN */
args[5] = ctx->i32zero; /* IDXEN */
@@ -3942,6 +3950,21 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
}
}
+static void
+handle_gs_input_decl(struct nir_to_llvm_context *ctx,
+ struct nir_variable *variable)
+{
+ int idx = variable->data.location;
+
+ if (idx == VARYING_SLOT_CLIP_DIST0 ||
+ idx == VARYING_SLOT_CULL_DIST0) {
+ int length = glsl_get_length(glsl_get_array_element(variable->type));
+ if (idx == VARYING_SLOT_CLIP_DIST0)
+ ctx->num_input_clips = length;
+ else
+ ctx->num_input_culls = length;
+ }
+}
static void interp_fs_input(struct nir_to_llvm_context *ctx,
unsigned attr,
@@ -4035,6 +4058,9 @@ handle_shader_input_decl(struct nir_to_llvm_context *ctx,
case MESA_SHADER_FRAGMENT:
handle_fs_input_decl(ctx, variable);
break;
+ case MESA_SHADER_GEOMETRY:
+ handle_gs_input_decl(ctx, variable);
+ break;
default:
break;
}
@@ -4520,15 +4546,23 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
int param_index;
+ int length = 4;
+ int start = 0;
if (!(ctx->output_mask & (1ull << i)))
continue;
+ if (i == VARYING_SLOT_CLIP_DIST0) {
+ length = ctx->num_output_clips;
+ } else if (i == VARYING_SLOT_CULL_DIST0) {
+ start = ctx->num_output_clips;
+ length = ctx->num_output_culls;
+ }
param_index = shader_io_get_unique_index(i);
if (param_index > max_output_written)
max_output_written = param_index;
- for (j = 0; j < 4; j++) {
+ for (j = 0; j < length; j++) {
LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
@@ -4536,7 +4570,7 @@ handle_es_outputs_post(struct nir_to_llvm_context *ctx)
ctx->esgs_ring,
out_val, 1,
LLVMGetUndef(ctx->i32), ctx->es2gs_offset,
- (4 * param_index + j) * 4,
+ (4 * param_index + j + start) * 4,
V_008F0C_BUF_DATA_FORMAT_32,
V_008F0C_BUF_NUM_FORMAT_UINT,
0, 0, 1, 1, 0);