summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDanylo Piliaiev <dpiliaiev@igalia.com>2021-02-15 13:51:54 +0200
committerMarge Bot <eric+marge@anholt.net>2021-05-05 10:05:38 +0000
commitd8ab0ec8e4d23c534fe5a9c54941490a26977800 (patch)
tree9fdb7cbeb81867ac0f79574c15e7963ee968d242
parenta898828a636a1c60d6385b4448639b697a6a875b (diff)
turnip: implement VK_KHR_vulkan_memory_model
No handling of Acquire/Release because at the moment scheduler works as if any barrier is Acq+Rel. Instead of removing scoped_barrier with scope/mode that for TCS corresponds to a control_barrier or a memory_barrier_tcs_patch in ir3_nir_lower_tess_ctrl - remove them in emit_intrinsic_barrier. And do the same for memory_barrier_tcs_patch and control_barrier. While in any case hw fence/barrier shouldn't be emitted for them, they still affect ordering of stores, and in feature ir3 backend may want to have that information. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9054>
-rw-r--r--src/freedreno/ir3/ir3_compiler_nir.c103
-rw-r--r--src/freedreno/ir3/ir3_nir.c2
-rw-r--r--src/freedreno/ir3/ir3_nir_lower_tess.c11
-rw-r--r--src/freedreno/vulkan/tu_device.c14
-rw-r--r--src/freedreno/vulkan/tu_extensions.py1
-rw-r--r--src/freedreno/vulkan/tu_shader.c2
6 files changed, 112 insertions, 21 deletions
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 56de79f1b50..45f7fc2021b 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1305,6 +1305,27 @@ emit_intrinsic_image_size_tex(struct ir3_context *ctx, nir_intrinsic_instr *intr
}
static void
+emit_control_barrier(struct ir3_context *ctx)
+{
+ /* Hull shaders dispatch 32 wide so an entire patch will always
+ * fit in a single warp and execute in lock-step. Consequently,
+ * we don't need to do anything for TCS barriers. Emitting
+ * barrier instruction will deadlock.
+ */
+ if (ctx->so->type == MESA_SHADER_TESS_CTRL)
+ return;
+
+ struct ir3_block *b = ctx->block;
+ struct ir3_instruction *barrier = ir3_BAR(b);
+ barrier->cat7.g = true;
+ if (ctx->compiler->gpu_id < 600)
+ barrier->cat7.l = true;
+ barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY;
+ barrier->barrier_class = IR3_BARRIER_EVERYTHING;
+ array_insert(b, b->keeps, barrier);
+}
+
+static void
emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
{
struct ir3_block *b = ctx->block;
@@ -1316,13 +1337,79 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
switch (intr->intrinsic) {
case nir_intrinsic_control_barrier:
- barrier = ir3_BAR(b);
- barrier->cat7.g = true;
- if (ctx->compiler->gpu_id < 600)
- barrier->cat7.l = true;
- barrier->flags = IR3_INSTR_SS | IR3_INSTR_SY;
- barrier->barrier_class = IR3_BARRIER_EVERYTHING;
- break;
+ emit_control_barrier(ctx);
+ return;
+ case nir_intrinsic_scoped_barrier: {
+ nir_scope exec_scope = nir_intrinsic_execution_scope(intr);
+ nir_variable_mode modes = nir_intrinsic_memory_modes(intr);
+
+ if (ctx->so->type == MESA_SHADER_TESS_CTRL) {
+ /* Remove mode corresponding to nir_intrinsic_memory_barrier_tcs_patch,
+ * because hull shaders dispatch 32 wide so an entire patch will
+ * always fit in a single warp and execute in lock-step.
+ *
+ * TODO: memory barrier also tells us not to reorder stores, this
+ * information is lost here (backend doesn't reorder stores so we
+ * are safe for now).
+ */
+ modes &= ~nir_var_shader_out;
+ }
+
+ assert(!(modes & nir_var_shader_out));
+
+ if ((modes & (nir_var_mem_shared | nir_var_mem_ssbo |
+ nir_var_mem_global))) {
+ barrier = ir3_FENCE(b);
+ barrier->cat7.r = true;
+ barrier->cat7.w = true;
+
+ if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) {
+ barrier->cat7.g = true;
+ }
+
+ if (ctx->compiler->gpu_id > 600) {
+ if (modes & nir_var_mem_ssbo) {
+ barrier->cat7.l = true;
+ }
+ } else {
+ if (modes & (nir_var_mem_shared | nir_var_mem_ssbo)) {
+ barrier->cat7.l = true;
+ }
+ }
+
+ barrier->barrier_class = 0;
+ barrier->barrier_conflict = 0;
+
+ if (modes & nir_var_mem_shared) {
+ barrier->barrier_class |= IR3_BARRIER_SHARED_W;
+ barrier->barrier_conflict |= IR3_BARRIER_SHARED_R |
+ IR3_BARRIER_SHARED_W;
+ }
+
+ if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) {
+ barrier->barrier_class |= IR3_BARRIER_BUFFER_W;
+ barrier->barrier_conflict |=
+ IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W;
+ }
+
+ /* TODO: check for image mode when it has a separate one */
+ if (modes & nir_var_mem_ssbo) {
+ barrier->barrier_class |= IR3_BARRIER_IMAGE_W;
+ barrier->barrier_conflict |=
+ IR3_BARRIER_IMAGE_W | IR3_BARRIER_IMAGE_R;
+ }
+ array_insert(b, b->keeps, barrier);
+ }
+
+ if (exec_scope >= NIR_SCOPE_WORKGROUP) {
+ emit_control_barrier(ctx);
+ }
+
+ return;
+ }
+ case nir_intrinsic_memory_barrier_tcs_patch:
+ /* Not applicable, see explanation for scoped_barrier + shader_out */
+ return;
case nir_intrinsic_memory_barrier_buffer:
barrier = ir3_FENCE(b);
barrier->cat7.g = true;
@@ -1830,12 +1917,14 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
ctx->so->no_earlyz = true;
dst[0] = ctx->funcs->emit_intrinsic_atomic_image(ctx, intr);
break;
+ case nir_intrinsic_scoped_barrier:
case nir_intrinsic_control_barrier:
case nir_intrinsic_memory_barrier:
case nir_intrinsic_group_memory_barrier:
case nir_intrinsic_memory_barrier_buffer:
case nir_intrinsic_memory_barrier_image:
case nir_intrinsic_memory_barrier_shared:
+ case nir_intrinsic_memory_barrier_tcs_patch:
emit_intrinsic_barrier(ctx, intr);
/* note that blk ptr no longer valid, make that obvious: */
b = NULL;
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index eb449bbf6f3..48134258226 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -82,6 +82,7 @@ static const nir_shader_compiler_options options = {
*/
.lower_int64_options = (nir_lower_int64_options)~0,
.lower_uniforms_to_ubo = true,
+ .use_scoped_barrier = true,
};
/* we don't want to lower vertex_id to _zero_based on newer gpus: */
@@ -138,6 +139,7 @@ static const nir_shader_compiler_options options_a6xx = {
.lower_int64_options = (nir_lower_int64_options)~0,
.lower_uniforms_to_ubo = true,
.lower_device_index_to_zero = true,
+ .use_scoped_barrier = true,
};
const nir_shader_compiler_options *
diff --git a/src/freedreno/ir3/ir3_nir_lower_tess.c b/src/freedreno/ir3/ir3_nir_lower_tess.c
index f93c7ff4964..2ca328c60f7 100644
--- a/src/freedreno/ir3/ir3_nir_lower_tess.c
+++ b/src/freedreno/ir3/ir3_nir_lower_tess.c
@@ -495,17 +495,6 @@ lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
- case nir_intrinsic_control_barrier:
- case nir_intrinsic_memory_barrier_tcs_patch:
- /* Hull shaders dispatch 32 wide so an entire patch will always
- * fit in a single warp and execute in lock-step. Consequently,
- * we don't need to do anything for TCS barriers so just remove
- * the intrinsic. Otherwise we'll emit an actual barrier
- * instructions, which will deadlock.
- */
- nir_instr_remove(&intr->instr);
- break;
-
case nir_intrinsic_load_per_vertex_output: {
// src[] = { vertex, offset }.
diff --git a/src/freedreno/vulkan/tu_device.c b/src/freedreno/vulkan/tu_device.c
index 399bf0bf4fe..9d4be97e92d 100644
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -454,9 +454,9 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
features->bufferDeviceAddress = false;
features->bufferDeviceAddressCaptureReplay = false;
features->bufferDeviceAddressMultiDevice = false;
- features->vulkanMemoryModel = false;
- features->vulkanMemoryModelDeviceScope = false;
- features->vulkanMemoryModelAvailabilityVisibilityChains = false;
+ features->vulkanMemoryModel = true;
+ features->vulkanMemoryModelDeviceScope = true;
+ features->vulkanMemoryModelAvailabilityVisibilityChains = true;
features->shaderOutputViewportIndex = true;
features->shaderOutputLayer = true;
features->subgroupBroadcastDynamicId = false;
@@ -634,6 +634,14 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
features->shaderTerminateInvocation = true;
break;
}
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
+ VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *feature =
+ (VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
+ feature->vulkanMemoryModel = true;
+ feature->vulkanMemoryModelDeviceScope = true;
+ feature->vulkanMemoryModelAvailabilityVisibilityChains = true;
+ break;
+ }
default:
break;
diff --git a/src/freedreno/vulkan/tu_extensions.py b/src/freedreno/vulkan/tu_extensions.py
index d1c3a2f1f23..a5652aeec1e 100644
--- a/src/freedreno/vulkan/tu_extensions.py
+++ b/src/freedreno/vulkan/tu_extensions.py
@@ -120,6 +120,7 @@ EXTENSIONS = [
Extension('VK_EXT_robustness2', 1, True),
Extension('VK_EXT_shader_demote_to_helper_invocation', 1, True),
Extension('VK_KHR_shader_terminate_invocation', 1, True),
+ Extension('VK_KHR_vulkan_memory_model', 3, True),
]
MAX_API_VERSION = VkVersion(MAX_API_VERSION)
diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c
index 80414c038f5..84389417af4 100644
--- a/src/freedreno/vulkan/tu_shader.c
+++ b/src/freedreno/vulkan/tu_shader.c
@@ -78,6 +78,8 @@ tu_spirv_to_nir(struct tu_device *dev,
.int16 = true,
.storage_16bit = dev->physical_device->gpu_id >= 650,
.demote_to_helper_invocation = true,
+ .vk_memory_model = true,
+ .vk_memory_model_device_scope = true,
},
};