diff options
authorKenneth Graunke <>2015-10-13 15:30:03 -0700
committerKenneth Graunke <>2015-11-04 10:18:56 -0800
commit19a5167322cb92ac3404e383396e61683b0a8858 (patch)
parent445a48c1d5aa7b4b5770a4524dd48e2c4a7baca7 (diff)
i965: Fix scalar VS float[] and vec2[] output arrays.
The scalar VS backend has never handled float[] and vec2[] outputs correctly (my original code was broken). Outputs need to be padded out to vec4 slots. In fs_visitor::nir_setup_outputs(), we tried to process each vec4 slot by looping from 0 to ALIGN(type_size_scalar(type), 4) / 4. However, this is wrong: type_size_scalar() for a float[2] would return 2, or for vec2[2] it would return 4. This looked like a single slot, even though in reality each array element would be stored in separate vec4 slots. Because of this bug, outputs[] and output_components[] would not get initialized for the second element's VARYING_SLOT, which meant emit_urb_writes() would skip writing them. Nothing used those values, and dead code elimination threw a party. To fix this, we introduce a new type_size_vec4_times_4() function which pads array elements correctly, but still counts in scalar components, generating correct indices in store_output intrinsics. Normally, varying packing avoids this problem by turning varyings into vec4s. So this doesn't actually fix any Piglit or dEQP tests today. However, if varying packing is disabled, things would be broken. Tessellation shaders can't use varying packing, so this fixes various tcs-input Piglit tests on a branch of mine. v2: Shorten the implementation of type_size_4x to a single line (caught by Connor Abbott), and rename it to type_size_vec4_times_4() (renaming suggested by Jason Ekstrand). Use type_size_vec4 rather than using type_size_vec4_times_4 and then dividing by 4. Signed-off-by: Kenneth Graunke <>
4 files changed, 17 insertions, 2 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 4cc962613b..3efa415dec 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -514,6 +514,19 @@ type_size_scalar(const struct glsl_type *type)
+ * Returns the number of scalar components needed to store type, assuming
+ * that vectors are padded out to vec4.
+ *
+ * This has the packing rules of type_size_vec4(), but counts components
+ * similar to type_size_scalar().
+ */
+extern "C" int
+type_size_vec4_times_4(const struct glsl_type *type)
+ return 4 * type_size_vec4(type);
* Create a MOV to read the timestamp register.
* The caller is responsible for emitting the MOV. The return value is
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index e7a39ff741..e012a29847 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -104,7 +104,7 @@ fs_visitor::nir_setup_outputs()
switch (stage) {
- for (unsigned int i = 0; i < ALIGN(type_size_scalar(var->type), 4) / 4; i++) {
+ for (int i = 0; i < type_size_vec4(var->type); i++) {
int output = var->data.location + i;
this->outputs[output] = offset(reg, bld, 4 * i);
this->output_components[output] = vector_elements;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index a7a5eb511c..dece208233 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -150,7 +150,8 @@ brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
if (is_scalar) {
nir_assign_var_locations(&nir->outputs, &nir->num_outputs,
- type_size_scalar);
+ type_size_vec4_times_4);
+ nir_lower_io(nir, nir_var_shader_out, type_size_vec4_times_4);
} else {
nir_foreach_variable(var, &nir->outputs)
var->data.driver_location = var->data.location;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 6a2dfc9bbb..29baebf0cc 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -277,6 +277,7 @@ bool brw_cs_precompile(struct gl_context *ctx,
int type_size_scalar(const struct glsl_type *type);
int type_size_vec4(const struct glsl_type *type);
+int type_size_vec4_times_4(const struct glsl_type *type);
bool is_scalar_shader_stage(const struct brw_compiler *compiler, int stage);