summaryrefslogtreecommitdiff
path: root/src/intel
diff options
context:
space:
mode:
authorIago Toral Quiroga <itoral@igalia.com>2017-01-16 11:51:50 +0100
committerIago Toral Quiroga <itoral@igalia.com>2017-01-17 07:57:04 +0100
commit9fe9db8031f4979fde0849855c6aca1a83432f4e (patch)
treea120f5f934b6620090737f3737bd9c5e161b0b1a /src/intel
parentff0dd67d2f9fc7cc240da732153ac4fac4e8d04d (diff)
anv: set UAV coherence required bit when needed
The same we do in the OpenGL driver (comment copied from there). This is required to ensure that we execute the fragment shader stage when side-effects (such as image or ssbo stores) are present but there are no color writes. I found this while writing a test to check rendering to a framebuffer without attachments where the fragment shader does not produce any color outputs but writes to an image via imageStore(). Without this patch the fragment shader does not execute and the image is not written, which is not correct. Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Diffstat (limited to 'src/intel')
-rw-r--r--src/intel/vulkan/genX_pipeline.c51
1 files changed, 51 insertions, 0 deletions
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index a537a40905c..9d28466f796 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1327,6 +1327,26 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
}
}
+static inline bool
+has_color_buffer_write_enabled(const struct anv_pipeline *pipeline)
+{
+ const struct anv_shader_bin *shader_bin =
+ pipeline->shaders[MESA_SHADER_FRAGMENT];
+ if (!shader_bin)
+ return false;
+
+ const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map;
+ for (int i = 0; i < bind_map->surface_count; i++) {
+ if (bind_map->surface_to_descriptor[i].set !=
+ ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
+ continue;
+ if (bind_map->surface_to_descriptor[i].index != UINT8_MAX)
+ return true;
+ }
+
+ return false;
+}
+
#if GEN_GEN >= 8
static void
emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
@@ -1357,6 +1377,37 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
ps.PixelShaderKillsPixel = subpass->has_ds_self_dep ||
wm_prog_data->uses_kill;
+ /* The stricter cross-primitive coherency guarantees that the hardware
+ * gives us with the "Accesses UAV" bit set for at least one shader stage
+ * and the "UAV coherency required" bit set on the 3DPRIMITIVE command are
+ * redundant within the current image, atomic counter and SSBO GL APIs,
+ * which all have very loose ordering and coherency requirements and
+ * generally rely on the application to insert explicit barriers when a
+ * shader invocation is expected to see the memory writes performed by the
+ * invocations of some previous primitive. Regardless of the value of
+ * "UAV coherency required", the "Accesses UAV" bits will implicitly cause
+ * an in most cases useless DC flush when the lowermost stage with the bit
+ * set finishes execution.
+ *
+ * It would be nice to disable it, but in some cases we can't because on
+ * Gen8+ it also has an influence on rasterization via the PS UAV-only
+ * signal (which could be set independently from the coherency mechanism
+ * in the 3DSTATE_WM command on Gen7), and because in some cases it will
+ * determine whether the hardware skips execution of the fragment shader
+ * or not via the ThreadDispatchEnable signal. However if we know that
+ * GEN8_PS_BLEND_HAS_WRITEABLE_RT is going to be set and
+ * GEN8_PSX_PIXEL_SHADER_NO_RT_WRITE is not set it shouldn't make any
+ * difference so we may just disable it here.
+ *
+ * Gen8 hardware tries to compute ThreadDispatchEnable for us but doesn't
+ * take into account KillPixels when no depth or stencil writes are
+ * enabled. In order for occlusion queries to work correctly with no
+ * attachments, we need to force-enable here.
+ */
+ if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
+ !has_color_buffer_write_enabled(pipeline))
+ ps.PixelShaderHasUAV = true;
+
#if GEN_GEN >= 9
ps.PixelShaderPullsBary = wm_prog_data->pulls_bary;
ps.InputCoverageMaskState = wm_prog_data->uses_sample_mask ?