summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Graunke <kenneth@whitecape.org>2015-10-19 11:44:28 -0700
committerKenneth Graunke <kenneth@whitecape.org>2015-11-04 10:18:56 -0800
commit666945df2d6853dfd032a5075d2f552618e541d8 (patch)
tree90e859244a4a0579416ff0d1d69e3bb2bd9c0b68
parentd6ea427ad19bdda08de8f45d175a18709a138bbc (diff)
nir: Allow outputs reads and add the relevant intrinsics.
Normally, we rely on nir_lower_outputs_to_temporaries to create shadow variables for outputs, buffering the results and writing them all out at the end of the program. However, this is infeasible for tessellation control shader outputs. Tessellation control shaders can generate multiple output vertices, and write per-vertex outputs. These are arrays indexed by the vertex number; each thread only writes one element, but can read any other element - including those being concurrently written by other threads. The barrier() intrinsic synchronizes between threads. Even if we tried to shadow every output element (which is of dubious value), we'd have to read updated values in at barrier() time, which means we need to allow output reads. Most stages should continue using nir_lower_outputs_to_temporaries(), but in theory drivers could choose not to if they really wanted. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
-rw-r--r--src/glsl/nir/nir_intrinsics.h2
-rw-r--r--src/glsl/nir/nir_lower_io.c28
-rw-r--r--src/glsl/nir/nir_print.c2
-rw-r--r--src/glsl/nir/nir_validate.c4
4 files changed, 24 insertions, 12 deletions
diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h
index 2b69d3f867..1c0f41dbab 100644
--- a/src/glsl/nir/nir_intrinsics.h
+++ b/src/glsl/nir/nir_intrinsics.h
@@ -244,6 +244,8 @@ LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(per_vertex_input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER)
LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(output, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE)
+LOAD(per_vertex_output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE)
/*
* Stores work the same way as loads, except now the first register input is
diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c
index e4601601e3..e81524c266 100644
--- a/src/glsl/nir/nir_lower_io.c
+++ b/src/glsl/nir/nir_lower_io.c
@@ -158,6 +158,15 @@ load_op(struct lower_io_state *state,
nir_intrinsic_load_input;
}
break;
+ case nir_var_shader_out:
+ if (per_vertex) {
+ op = has_indirect ? nir_intrinsic_load_per_vertex_output_indirect :
+ nir_intrinsic_load_per_vertex_output;
+ } else {
+ op = has_indirect ? nir_intrinsic_load_output_indirect :
+ nir_intrinsic_load_output;
+ }
+ break;
case nir_var_uniform:
op = has_indirect ? nir_intrinsic_load_uniform_indirect :
nir_intrinsic_load_uniform;
@@ -188,14 +197,18 @@ nir_lower_io_block(nir_block *block, void *void_state)
if (state->mode != -1 && state->mode != mode)
continue;
+ if (mode != nir_var_shader_in &&
+ mode != nir_var_shader_out &&
+ mode != nir_var_uniform)
+ continue;
+
switch (intrin->intrinsic) {
case nir_intrinsic_load_var: {
- if (mode != nir_var_shader_in && mode != nir_var_uniform)
- continue;
-
- bool per_vertex = stage_uses_per_vertex_inputs(state) &&
- mode == nir_var_shader_in &&
- !intrin->variables[0]->var->data.patch;
+ bool per_vertex = !intrin->variables[0]->var->data.patch &&
+ ((mode == nir_var_shader_in &&
+ stage_uses_per_vertex_inputs(state)) ||
+ (mode == nir_var_shader_out &&
+ stage_uses_per_vertex_outputs(state)));
nir_ssa_def *indirect;
nir_ssa_def *vertex_index;
@@ -239,8 +252,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
}
case nir_intrinsic_store_var: {
- if (intrin->variables[0]->var->data.mode != nir_var_shader_out)
- continue;
+ assert(mode == nir_var_shader_out);
nir_ssa_def *indirect;
nir_ssa_def *vertex_index;
diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c
index 23fcafeb7e..f7f5fdf318 100644
--- a/src/glsl/nir/nir_print.c
+++ b/src/glsl/nir/nir_print.c
@@ -448,6 +448,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
case nir_intrinsic_load_per_vertex_input_indirect:
var_list = &state->shader->inputs;
break;
+ case nir_intrinsic_load_output:
+ case nir_intrinsic_load_output_indirect:
case nir_intrinsic_store_output:
case nir_intrinsic_store_output_indirect:
case nir_intrinsic_store_per_vertex_output:
diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c
index c6fedf9b1a..c0b6768314 100644
--- a/src/glsl/nir/nir_validate.c
+++ b/src/glsl/nir/nir_validate.c
@@ -398,9 +398,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
}
switch (instr->intrinsic) {
- case nir_intrinsic_load_var:
- assert(instr->variables[0]->var->data.mode != nir_var_shader_out);
- break;
case nir_intrinsic_store_var:
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
instr->variables[0]->var->data.mode != nir_var_uniform &&
@@ -410,7 +407,6 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
assert(instr->variables[0]->var->data.mode != nir_var_shader_in &&
instr->variables[0]->var->data.mode != nir_var_uniform &&
instr->variables[0]->var->data.mode != nir_var_shader_storage);
- assert(instr->variables[1]->var->data.mode != nir_var_shader_out);
break;
default:
break;