summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>2021-04-10 03:24:05 +0200
committerBas Nieuwenhuizen <bas@basnieuwenhuizen.nl>2021-04-10 03:31:58 +0200
commit59c501ca353f8ec9d2717c98af2bfa1a1dbf4d75 (patch)
treedd56c73e05cea59c5c8931605bf9d5efc986677e /src
parent8451b41022757763a4a46eb597b9392e39a26b6a (diff)
radv: Format.
Using find ./src/amd/vulkan -regex '.*/.*\.\(c\|h\|cpp\)' | xargs -P8 -n1 clang-format --style=file -i Acked-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10091>
Diffstat (limited to 'src')
-rw-r--r--src/amd/vulkan/layers/radv_sqtt_layer.c1604
-rw-r--r--src/amd/vulkan/radv_android.c1333
-rw-r--r--src/amd/vulkan/radv_cmd_buffer.c11863
-rw-r--r--src/amd/vulkan/radv_constants.h61
-rw-r--r--src/amd/vulkan/radv_cs.h222
-rw-r--r--src/amd/vulkan/radv_debug.c1507
-rw-r--r--src/amd/vulkan/radv_debug.h104
-rw-r--r--src/amd/vulkan/radv_descriptor_set.c2705
-rw-r--r--src/amd/vulkan/radv_descriptor_set.h27
-rw-r--r--src/amd/vulkan/radv_device.c13492
-rw-r--r--src/amd/vulkan/radv_formats.c3600
-rw-r--r--src/amd/vulkan/radv_image.c3908
-rw-r--r--src/amd/vulkan/radv_llvm_helper.cpp214
-rw-r--r--src/amd/vulkan/radv_meta.c1001
-rw-r--r--src/amd/vulkan/radv_meta.h299
-rw-r--r--src/amd/vulkan/radv_meta_blit.c2149
-rw-r--r--src/amd/vulkan/radv_meta_blit2d.c2401
-rw-r--r--src/amd/vulkan/radv_meta_buffer.c839
-rw-r--r--src/amd/vulkan/radv_meta_bufimage.c3457
-rw-r--r--src/amd/vulkan/radv_meta_clear.c3958
-rw-r--r--src/amd/vulkan/radv_meta_copy.c938
-rw-r--r--src/amd/vulkan/radv_meta_dcc_retile.c514
-rw-r--r--src/amd/vulkan/radv_meta_decompress.c938
-rw-r--r--src/amd/vulkan/radv_meta_fast_clear.c1668
-rw-r--r--src/amd/vulkan/radv_meta_fmask_expand.c486
-rw-r--r--src/amd/vulkan/radv_meta_resolve.c1651
-rw-r--r--src/amd/vulkan/radv_meta_resolve_cs.c1791
-rw-r--r--src/amd/vulkan/radv_meta_resolve_fs.c2204
-rw-r--r--src/amd/vulkan/radv_nir_lower_ycbcr_textures.c490
-rw-r--r--src/amd/vulkan/radv_nir_to_llvm.c6188
-rw-r--r--src/amd/vulkan/radv_pass.c935
-rw-r--r--src/amd/vulkan/radv_pipeline.c10727
-rw-r--r--src/amd/vulkan/radv_pipeline_cache.c965
-rw-r--r--src/amd/vulkan/radv_private.h3614
-rw-r--r--src/amd/vulkan/radv_query.c3130
-rw-r--r--src/amd/vulkan/radv_radeon_winsys.h440
-rw-r--r--src/amd/vulkan/radv_shader.c3247
-rw-r--r--src/amd/vulkan/radv_shader.h856
-rw-r--r--src/amd/vulkan/radv_shader_args.c1235
-rw-r--r--src/amd/vulkan/radv_shader_args.h38
-rw-r--r--src/amd/vulkan/radv_shader_helper.h16
-rw-r--r--src/amd/vulkan/radv_shader_info.c1197
-rw-r--r--src/amd/vulkan/radv_sqtt.c1072
-rw-r--r--src/amd/vulkan/radv_util.c111
-rw-r--r--src/amd/vulkan/radv_wsi.c390
-rw-r--r--src/amd/vulkan/radv_wsi_display.c392
-rw-r--r--src/amd/vulkan/radv_wsi_wayland.c18
-rw-r--r--src/amd/vulkan/radv_wsi_x11.c59
-rw-r--r--src/amd/vulkan/si_cmd_buffer.c3537
-rw-r--r--src/amd/vulkan/vk_format.h340
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c1759
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h61
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c2954
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h29
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c106
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h2
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c417
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h68
-rw-r--r--src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h2
-rw-r--r--src/amd/vulkan/winsys/null/radv_null_bo.c54
-rw-r--r--src/amd/vulkan/winsys/null/radv_null_bo.h12
-rw-r--r--src/amd/vulkan/winsys/null/radv_null_cs.c82
-rw-r--r--src/amd/vulkan/winsys/null/radv_null_cs.h6
-rw-r--r--src/amd/vulkan/winsys/null/radv_null_winsys.c222
-rw-r--r--src/amd/vulkan/winsys/null/radv_null_winsys.h8
65 files changed, 52880 insertions, 56833 deletions
diff --git a/src/amd/vulkan/layers/radv_sqtt_layer.c b/src/amd/vulkan/layers/radv_sqtt_layer.c
index 30cd453b8e5..fc68d188dab 100644
--- a/src/amd/vulkan/layers/radv_sqtt_layer.c
+++ b/src/amd/vulkan/layers/radv_sqtt_layer.c
@@ -29,1215 +29,1071 @@
static void
radv_write_begin_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_general_api_type api_type)
+ enum rgp_sqtt_marker_general_api_type api_type)
{
- struct rgp_sqtt_marker_general_api marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct rgp_sqtt_marker_general_api marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
- marker.api_type = api_type;
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
+ marker.api_type = api_type;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
}
static void
radv_write_end_general_api_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_general_api_type api_type)
+ enum rgp_sqtt_marker_general_api_type api_type)
{
- struct rgp_sqtt_marker_general_api marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct rgp_sqtt_marker_general_api marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
- marker.api_type = api_type;
- marker.is_end = 1;
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_GENERAL_API;
+ marker.api_type = api_type;
+ marker.is_end = 1;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
}
static void
radv_write_event_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_event_type api_type,
- uint32_t vertex_offset_user_data,
- uint32_t instance_offset_user_data,
- uint32_t draw_index_user_data)
+ enum rgp_sqtt_marker_event_type api_type, uint32_t vertex_offset_user_data,
+ uint32_t instance_offset_user_data, uint32_t draw_index_user_data)
{
- struct rgp_sqtt_marker_event marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct rgp_sqtt_marker_event marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
- marker.api_type = api_type;
- marker.cmd_id = cmd_buffer->state.num_events++;
- marker.cb_id = 0;
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
+ marker.api_type = api_type;
+ marker.cmd_id = cmd_buffer->state.num_events++;
+ marker.cb_id = 0;
- if (vertex_offset_user_data == UINT_MAX ||
- instance_offset_user_data == UINT_MAX) {
- vertex_offset_user_data = 0;
- instance_offset_user_data = 0;
- }
+ if (vertex_offset_user_data == UINT_MAX || instance_offset_user_data == UINT_MAX) {
+ vertex_offset_user_data = 0;
+ instance_offset_user_data = 0;
+ }
- if (draw_index_user_data == UINT_MAX)
- draw_index_user_data = vertex_offset_user_data;
+ if (draw_index_user_data == UINT_MAX)
+ draw_index_user_data = vertex_offset_user_data;
- marker.vertex_offset_reg_idx = vertex_offset_user_data;
- marker.instance_offset_reg_idx = instance_offset_user_data;
- marker.draw_index_reg_idx = draw_index_user_data;
+ marker.vertex_offset_reg_idx = vertex_offset_user_data;
+ marker.instance_offset_reg_idx = instance_offset_user_data;
+ marker.draw_index_reg_idx = draw_index_user_data;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
}
static void
radv_write_event_with_dims_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_event_type api_type,
- uint32_t x, uint32_t y, uint32_t z)
+ enum rgp_sqtt_marker_event_type api_type, uint32_t x, uint32_t y,
+ uint32_t z)
{
- struct rgp_sqtt_marker_event_with_dims marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct rgp_sqtt_marker_event_with_dims marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
- marker.event.api_type = api_type;
- marker.event.cmd_id = cmd_buffer->state.num_events++;
- marker.event.cb_id = 0;
- marker.event.has_thread_dims = 1;
+ marker.event.identifier = RGP_SQTT_MARKER_IDENTIFIER_EVENT;
+ marker.event.api_type = api_type;
+ marker.event.cmd_id = cmd_buffer->state.num_events++;
+ marker.event.cb_id = 0;
+ marker.event.has_thread_dims = 1;
- marker.thread_x = x;
- marker.thread_y = y;
- marker.thread_z = z;
+ marker.thread_x = x;
+ marker.thread_y = y;
+ marker.thread_z = z;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
}
static void
radv_write_user_event_marker(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_sqtt_marker_user_event_type type,
- const char *str)
+ enum rgp_sqtt_marker_user_event_type type, const char *str)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (type == UserEventPop) {
- assert (str == NULL);
- struct rgp_sqtt_marker_user_event marker = { 0 };
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
- marker.data_type = type;
+ if (type == UserEventPop) {
+ assert(str == NULL);
+ struct rgp_sqtt_marker_user_event marker = {0};
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
+ marker.data_type = type;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
- } else {
- assert (str != NULL);
- unsigned len = strlen(str);
- struct rgp_sqtt_marker_user_event_with_length marker = { 0 };
- marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
- marker.user_event.data_type = type;
- marker.length = align(len, 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ } else {
+ assert(str != NULL);
+ unsigned len = strlen(str);
+ struct rgp_sqtt_marker_user_event_with_length marker = {0};
+ marker.user_event.identifier = RGP_SQTT_MARKER_IDENTIFIER_USER_EVENT;
+ marker.user_event.data_type = type;
+ marker.length = align(len, 4);
- uint8_t *buffer = alloca(sizeof(marker) + marker.length);
- memset(buffer, 0, sizeof(marker) + marker.length);
- memcpy(buffer, &marker, sizeof(marker));
- memcpy(buffer + sizeof(marker), str, len);
+ uint8_t *buffer = alloca(sizeof(marker) + marker.length);
+ memset(buffer, 0, sizeof(marker) + marker.length);
+ memcpy(buffer, &marker, sizeof(marker));
+ memcpy(buffer + sizeof(marker), str, len);
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer, sizeof(marker) / 4 + marker.length / 4);
- }
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, buffer,
+ sizeof(marker) / 4 + marker.length / 4);
+ }
}
void
radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
{
- uint64_t device_id = (uintptr_t)cmd_buffer->device;
- struct rgp_sqtt_marker_cb_start marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t device_id = (uintptr_t)cmd_buffer->device;
+ struct rgp_sqtt_marker_cb_start marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (likely(!cmd_buffer->device->thread_trace.bo))
- return;
+ if (likely(!cmd_buffer->device->thread_trace.bo))
+ return;
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
- marker.cb_id = 0;
- marker.device_id_low = device_id;
- marker.device_id_high = device_id >> 32;
- marker.queue = cmd_buffer->queue_family_index;
- marker.queue_flags = VK_QUEUE_COMPUTE_BIT |
- VK_QUEUE_TRANSFER_BIT |
- VK_QUEUE_SPARSE_BINDING_BIT;
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_START;
+ marker.cb_id = 0;
+ marker.device_id_low = device_id;
+ marker.device_id_high = device_id >> 32;
+ marker.queue = cmd_buffer->queue_family_index;
+ marker.queue_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT;
- if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
- marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
+ marker.queue_flags |= VK_QUEUE_GRAPHICS_BIT;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
}
void
radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
{
- uint64_t device_id = (uintptr_t)cmd_buffer->device;
- struct rgp_sqtt_marker_cb_end marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t device_id = (uintptr_t)cmd_buffer->device;
+ struct rgp_sqtt_marker_cb_end marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (likely(!cmd_buffer->device->thread_trace.bo))
- return;
+ if (likely(!cmd_buffer->device->thread_trace.bo))
+ return;
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
- marker.cb_id = 0;
- marker.device_id_low = device_id;
- marker.device_id_high = device_id >> 32;
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_CB_END;
+ marker.cb_id = 0;
+ marker.device_id_low = device_id;
+ marker.device_id_high = device_id >> 32;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
}
void
radv_describe_draw(struct radv_cmd_buffer *cmd_buffer)
{
- if (likely(!cmd_buffer->device->thread_trace.bo))
- return;
+ if (likely(!cmd_buffer->device->thread_trace.bo))
+ return;
- radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type,
- UINT_MAX, UINT_MAX, UINT_MAX);
+ radv_write_event_marker(cmd_buffer, cmd_buffer->state.current_event_type, UINT_MAX, UINT_MAX,
+ UINT_MAX);
}
void
radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z)
{
- if (likely(!cmd_buffer->device->thread_trace.bo))
- return;
+ if (likely(!cmd_buffer->device->thread_trace.bo))
+ return;
- radv_write_event_with_dims_marker(cmd_buffer,
- cmd_buffer->state.current_event_type,
- x, y, z);
+ radv_write_event_with_dims_marker(cmd_buffer, cmd_buffer->state.current_event_type, x, y, z);
}
void
radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlagBits aspects)
+ VkImageAspectFlagBits aspects)
{
- cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT) ?
- EventRenderPassColorClear : EventRenderPassDepthStencilClear;
+ cmd_buffer->state.current_event_type = (aspects & VK_IMAGE_ASPECT_COLOR_BIT)
+ ? EventRenderPassColorClear
+ : EventRenderPassDepthStencilClear;
}
void
radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer)
{
- cmd_buffer->state.current_event_type = EventInternalUnknown;
+ cmd_buffer->state.current_event_type = EventInternalUnknown;
}
void
radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
{
- cmd_buffer->state.current_event_type = EventRenderPassResolve;
+ cmd_buffer->state.current_event_type = EventRenderPassResolve;
}
void
radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer)
{
- cmd_buffer->state.current_event_type = EventInternalUnknown;
+ cmd_buffer->state.current_event_type = EventInternalUnknown;
}
void
radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer)
{
- struct rgp_sqtt_marker_barrier_end marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
- if (likely(!cmd_buffer->device->thread_trace.bo) ||
- !cmd_buffer->state.pending_sqtt_barrier_end)
- return;
-
- cmd_buffer->state.pending_sqtt_barrier_end = false;
-
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
- marker.cb_id = 0;
-
- marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
-
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
- marker.wait_on_eop_ts = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
- marker.vs_partial_flush = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
- marker.ps_partial_flush = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
- marker.cs_partial_flush = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
- marker.pfp_sync_me = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
- marker.sync_cp_dma = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
- marker.inval_tcp = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
- marker.inval_sqI = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
- marker.inval_sqK = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
- marker.flush_tcc = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
- marker.inval_tcc = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
- marker.flush_cb = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
- marker.inval_cb = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
- marker.flush_db = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
- marker.inval_db = true;
- if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
- marker.inval_gl1 = true;
-
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
-
- cmd_buffer->state.num_layout_transitions = 0;
+ struct rgp_sqtt_marker_barrier_end marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+ if (likely(!cmd_buffer->device->thread_trace.bo) || !cmd_buffer->state.pending_sqtt_barrier_end)
+ return;
+
+ cmd_buffer->state.pending_sqtt_barrier_end = false;
+
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_END;
+ marker.cb_id = 0;
+
+ marker.num_layout_transitions = cmd_buffer->state.num_layout_transitions;
+
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_WAIT_ON_EOP_TS)
+ marker.wait_on_eop_ts = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_VS_PARTIAL_FLUSH)
+ marker.vs_partial_flush = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PS_PARTIAL_FLUSH)
+ marker.ps_partial_flush = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_CS_PARTIAL_FLUSH)
+ marker.cs_partial_flush = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_PFP_SYNC_ME)
+ marker.pfp_sync_me = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_SYNC_CP_DMA)
+ marker.sync_cp_dma = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_VMEM_L0)
+ marker.inval_tcp = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_ICACHE)
+ marker.inval_sqI = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_SMEM_L0)
+ marker.inval_sqK = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_L2)
+ marker.flush_tcc = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L2)
+ marker.inval_tcc = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_CB)
+ marker.flush_cb = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_CB)
+ marker.inval_cb = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_FLUSH_DB)
+ marker.flush_db = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_DB)
+ marker.inval_db = true;
+ if (cmd_buffer->state.sqtt_flush_bits & RGP_FLUSH_INVAL_L1)
+ marker.inval_gl1 = true;
+
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+
+ cmd_buffer->state.num_layout_transitions = 0;
}
void
-radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_barrier_reason reason)
+radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer, enum rgp_barrier_reason reason)
{
- struct rgp_sqtt_marker_barrier_start marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct rgp_sqtt_marker_barrier_start marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (likely(!cmd_buffer->device->thread_trace.bo))
- return;
+ if (likely(!cmd_buffer->device->thread_trace.bo))
+ return;
- radv_describe_barrier_end_delayed(cmd_buffer);
- cmd_buffer->state.sqtt_flush_bits = 0;
+ radv_describe_barrier_end_delayed(cmd_buffer);
+ cmd_buffer->state.sqtt_flush_bits = 0;
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
- marker.cb_id = 0;
- marker.dword02 = reason;
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BARRIER_START;
+ marker.cb_id = 0;
+ marker.dword02 = reason;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
}
void
radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer)
{
- cmd_buffer->state.pending_sqtt_barrier_end = true;
+ cmd_buffer->state.pending_sqtt_barrier_end = true;
}
void
radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_barrier_data *barrier)
+ const struct radv_barrier_data *barrier)
{
- struct rgp_sqtt_marker_layout_transition marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct rgp_sqtt_marker_layout_transition marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (likely(!cmd_buffer->device->thread_trace.bo))
- return;
+ if (likely(!cmd_buffer->device->thread_trace.bo))
+ return;
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
- marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
- marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
- marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
- marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
- marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
- marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
- marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
- marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_LAYOUT_TRANSITION;
+ marker.depth_stencil_expand = barrier->layout_transitions.depth_stencil_expand;
+ marker.htile_hiz_range_expand = barrier->layout_transitions.htile_hiz_range_expand;
+ marker.depth_stencil_resummarize = barrier->layout_transitions.depth_stencil_resummarize;
+ marker.dcc_decompress = barrier->layout_transitions.dcc_decompress;
+ marker.fmask_decompress = barrier->layout_transitions.fmask_decompress;
+ marker.fast_clear_eliminate = barrier->layout_transitions.fast_clear_eliminate;
+ marker.fmask_color_expand = barrier->layout_transitions.fmask_color_expand;
+ marker.init_mask_ram = barrier->layout_transitions.init_mask_ram;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
- cmd_buffer->state.num_layout_transitions++;
+ cmd_buffer->state.num_layout_transitions++;
}
static void
radv_describe_pipeline_bind(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint pipelineBindPoint,
- struct radv_pipeline *pipeline)
+ VkPipelineBindPoint pipelineBindPoint, struct radv_pipeline *pipeline)
{
- struct rgp_sqtt_marker_pipeline_bind marker = {0};
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct rgp_sqtt_marker_pipeline_bind marker = {0};
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (likely(!cmd_buffer->device->thread_trace.bo))
- return;
+ if (likely(!cmd_buffer->device->thread_trace.bo))
+ return;
- marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
- marker.cb_id = 0;
- marker.bind_point = pipelineBindPoint;
- marker.api_pso_hash[0] = pipeline->pipeline_hash;
- marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
+ marker.identifier = RGP_SQTT_MARKER_IDENTIFIER_BIND_PIPELINE;
+ marker.cb_id = 0;
+ marker.bind_point = pipelineBindPoint;
+ marker.api_pso_hash[0] = pipeline->pipeline_hash;
+ marker.api_pso_hash[1] = pipeline->pipeline_hash >> 32;
- radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
+ radv_emit_thread_trace_userdata(cmd_buffer->device, cs, &marker, sizeof(marker) / 4);
}
/* TODO: Improve the way to trigger capture (overlay, etc). */
static void
radv_handle_thread_trace(VkQueue _queue)
{
- RADV_FROM_HANDLE(radv_queue, queue, _queue);
- static bool thread_trace_enabled = false;
- static uint64_t num_frames = 0;
- bool resize_trigger = false;
-
- if (thread_trace_enabled) {
- struct ac_thread_trace thread_trace = {0};
-
- radv_end_thread_trace(queue);
- thread_trace_enabled = false;
-
- /* TODO: Do something better than this whole sync. */
- radv_QueueWaitIdle(_queue);
-
- if (radv_get_thread_trace(queue, &thread_trace)) {
- ac_dump_thread_trace(&queue->device->physical_device->rad_info,
- &thread_trace,
- &queue->device->thread_trace);
- } else {
- /* Trigger a new capture if the driver failed to get
- * the trace because the buffer was too small.
- */
- resize_trigger = true;
- }
- }
-
- if (!thread_trace_enabled) {
- bool frame_trigger = num_frames == queue->device->thread_trace.start_frame;
- bool file_trigger = false;
+ RADV_FROM_HANDLE(radv_queue, queue, _queue);
+ static bool thread_trace_enabled = false;
+ static uint64_t num_frames = 0;
+ bool resize_trigger = false;
+
+ if (thread_trace_enabled) {
+ struct ac_thread_trace thread_trace = {0};
+
+ radv_end_thread_trace(queue);
+ thread_trace_enabled = false;
+
+ /* TODO: Do something better than this whole sync. */
+ radv_QueueWaitIdle(_queue);
+
+ if (radv_get_thread_trace(queue, &thread_trace)) {
+ ac_dump_thread_trace(&queue->device->physical_device->rad_info, &thread_trace,
+ &queue->device->thread_trace);
+ } else {
+ /* Trigger a new capture if the driver failed to get
+ * the trace because the buffer was too small.
+ */
+ resize_trigger = true;
+ }
+ }
+
+ if (!thread_trace_enabled) {
+ bool frame_trigger = num_frames == queue->device->thread_trace.start_frame;
+ bool file_trigger = false;
#ifndef _WIN32
- if (queue->device->thread_trace.trigger_file &&
- access(queue->device->thread_trace.trigger_file, W_OK) == 0) {
- if (unlink(queue->device->thread_trace.trigger_file) == 0) {
- file_trigger = true;
- } else {
- /* Do not enable tracing if we cannot remove the file,
- * because by then we'll trace every frame ... */
- fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n");
- }
- }
+ if (queue->device->thread_trace.trigger_file &&
+ access(queue->device->thread_trace.trigger_file, W_OK) == 0) {
+ if (unlink(queue->device->thread_trace.trigger_file) == 0) {
+ file_trigger = true;
+ } else {
+ /* Do not enable tracing if we cannot remove the file,
+ * because by then we'll trace every frame ... */
+ fprintf(stderr, "RADV: could not remove thread trace trigger file, ignoring\n");
+ }
+ }
#endif
- if (frame_trigger || file_trigger || resize_trigger) {
- /* FIXME: SQTT on compute hangs. */
- if (queue->queue_family_index == RADV_QUEUE_COMPUTE) {
- fprintf(stderr, "RADV: Capturing a SQTT trace on the compute "
- "queue is currently broken and might hang! "
- "Please, disable presenting on compute if "
- "you can.\n");
- return;
- }
+ if (frame_trigger || file_trigger || resize_trigger) {
+ /* FIXME: SQTT on compute hangs. */
+ if (queue->queue_family_index == RADV_QUEUE_COMPUTE) {
+ fprintf(stderr, "RADV: Capturing a SQTT trace on the compute "
+ "queue is currently broken and might hang! "
+ "Please, disable presenting on compute if "
+ "you can.\n");
+ return;
+ }
- radv_begin_thread_trace(queue);
- assert(!thread_trace_enabled);
- thread_trace_enabled = true;
- }
- }
- num_frames++;
+ radv_begin_thread_trace(queue);
+ assert(!thread_trace_enabled);
+ thread_trace_enabled = true;
+ }
+ }
+ num_frames++;
}
-VkResult sqtt_QueuePresentKHR(
- VkQueue _queue,
- const VkPresentInfoKHR* pPresentInfo)
+VkResult
+sqtt_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
{
- VkResult result;
+ VkResult result;
- result = radv_QueuePresentKHR(_queue, pPresentInfo);
- if (result != VK_SUCCESS)
- return result;
+ result = radv_QueuePresentKHR(_queue, pPresentInfo);
+ if (result != VK_SUCCESS)
+ return result;
- radv_handle_thread_trace(_queue);
+ radv_handle_thread_trace(_queue);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
- radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
- cmd_buffer->state.current_event_type = EventCmd##api_name; \
- radv_Cmd##cmd_name(__VA_ARGS__); \
- cmd_buffer->state.current_event_type = EventInternalUnknown; \
- radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
+#define EVENT_MARKER_ALIAS(cmd_name, api_name, ...) \
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
+ radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
+ cmd_buffer->state.current_event_type = EventCmd##api_name; \
+ radv_Cmd##cmd_name(__VA_ARGS__); \
+ cmd_buffer->state.current_event_type = EventInternalUnknown; \
+ radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
-#define EVENT_MARKER(cmd_name, ...) \
- EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
+#define EVENT_MARKER(cmd_name, ...) EVENT_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
-void sqtt_CmdDraw(
- VkCommandBuffer commandBuffer,
- uint32_t vertexCount,
- uint32_t instanceCount,
- uint32_t firstVertex,
- uint32_t firstInstance)
+void
+sqtt_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
+ uint32_t firstVertex, uint32_t firstInstance)
{
- EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount,
- firstVertex, firstInstance);
+ EVENT_MARKER(Draw, commandBuffer, vertexCount, instanceCount, firstVertex, firstInstance);
}
-void sqtt_CmdDrawIndexed(
- VkCommandBuffer commandBuffer,
- uint32_t indexCount,
- uint32_t instanceCount,
- uint32_t firstIndex,
- int32_t vertexOffset,
- uint32_t firstInstance)
+void
+sqtt_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
+ uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
{
- EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount,
- firstIndex, vertexOffset, firstInstance);
+ EVENT_MARKER(DrawIndexed, commandBuffer, indexCount, instanceCount, firstIndex, vertexOffset,
+ firstInstance);
}
-void sqtt_CmdDrawIndirect(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- uint32_t drawCount,
- uint32_t stride)
+void
+sqtt_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+ uint32_t drawCount, uint32_t stride)
{
- EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount,
- stride);
+ EVENT_MARKER(DrawIndirect, commandBuffer, buffer, offset, drawCount, stride);
}
-void sqtt_CmdDrawIndexedIndirect(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- uint32_t drawCount,
- uint32_t stride)
+void
+sqtt_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+ uint32_t drawCount, uint32_t stride)
{
- EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset,
- drawCount, stride);
+ EVENT_MARKER(DrawIndexedIndirect, commandBuffer, buffer, offset, drawCount, stride);
}
-void sqtt_CmdDrawIndirectCount(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkBuffer countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
+void
+sqtt_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+ VkBuffer countBuffer, VkDeviceSize countBufferOffset,
+ uint32_t maxDrawCount, uint32_t stride)
{
- EVENT_MARKER(DrawIndirectCount,commandBuffer, buffer, offset,
- countBuffer, countBufferOffset, maxDrawCount, stride);
+ EVENT_MARKER(DrawIndirectCount, commandBuffer, buffer, offset, countBuffer, countBufferOffset,
+ maxDrawCount, stride);
}
-void sqtt_CmdDrawIndexedIndirectCount(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkBuffer countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
+void
+sqtt_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer buffer,
+ VkDeviceSize offset, VkBuffer countBuffer,
+ VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+ uint32_t stride)
{
- EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset,
- countBuffer, countBufferOffset, maxDrawCount, stride);
+ EVENT_MARKER(DrawIndexedIndirectCount, commandBuffer, buffer, offset, countBuffer,
+ countBufferOffset, maxDrawCount, stride);
}
-void sqtt_CmdDispatch(
- VkCommandBuffer commandBuffer,
- uint32_t x,
- uint32_t y,
- uint32_t z)
+void
+sqtt_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
{
- EVENT_MARKER(Dispatch, commandBuffer, x, y, z);
+ EVENT_MARKER(Dispatch, commandBuffer, x, y, z);
}
-void sqtt_CmdDispatchIndirect(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset)
+void
+sqtt_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset)
{
- EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
+ EVENT_MARKER(DispatchIndirect, commandBuffer, buffer, offset);
}
-void sqtt_CmdCopyBuffer2KHR(
- VkCommandBuffer commandBuffer,
- const VkCopyBufferInfo2KHR* pCopyBufferInfo)
+void
+sqtt_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2KHR *pCopyBufferInfo)
{
- EVENT_MARKER_ALIAS(CopyBuffer2KHR, CopyBuffer, commandBuffer,
- pCopyBufferInfo);
+ EVENT_MARKER_ALIAS(CopyBuffer2KHR, CopyBuffer, commandBuffer, pCopyBufferInfo);
}
-void sqtt_CmdFillBuffer(
- VkCommandBuffer commandBuffer,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- VkDeviceSize fillSize,
- uint32_t data)
+void
+sqtt_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
+ VkDeviceSize fillSize, uint32_t data)
{
- EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize,
- data);
+ EVENT_MARKER(FillBuffer, commandBuffer, dstBuffer, dstOffset, fillSize, data);
}
-void sqtt_CmdUpdateBuffer(
- VkCommandBuffer commandBuffer,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- VkDeviceSize dataSize,
- const void* pData)
+void
+sqtt_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
+ VkDeviceSize dataSize, const void *pData)
{
- EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset,
- dataSize, pData);
+ EVENT_MARKER(UpdateBuffer, commandBuffer, dstBuffer, dstOffset, dataSize, pData);
}
-void sqtt_CmdCopyImage2KHR(
- VkCommandBuffer commandBuffer,
- const VkCopyImageInfo2KHR* pCopyImageInfo)
+void
+sqtt_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *pCopyImageInfo)
{
- EVENT_MARKER_ALIAS(CopyImage2KHR, CopyImage, commandBuffer,
- pCopyImageInfo);
+ EVENT_MARKER_ALIAS(CopyImage2KHR, CopyImage, commandBuffer, pCopyImageInfo);
}
-void sqtt_CmdCopyBufferToImage2KHR(
- VkCommandBuffer commandBuffer,
- const VkCopyBufferToImageInfo2KHR* pCopyBufferToImageInfo)
+void
+sqtt_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
{
- EVENT_MARKER_ALIAS(CopyBufferToImage2KHR, CopyBufferToImage,
- commandBuffer, pCopyBufferToImageInfo);
+ EVENT_MARKER_ALIAS(CopyBufferToImage2KHR, CopyBufferToImage, commandBuffer,
+ pCopyBufferToImageInfo);
}
-void sqtt_CmdCopyImageToBuffer2KHR(
- VkCommandBuffer commandBuffer,
- const VkCopyImageToBufferInfo2KHR* pCopyImageToBufferInfo)
+void
+sqtt_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo)
{
- EVENT_MARKER_ALIAS(CopyImageToBuffer2KHR, CopyImageToBuffer,
- commandBuffer, pCopyImageToBufferInfo);
+ EVENT_MARKER_ALIAS(CopyImageToBuffer2KHR, CopyImageToBuffer, commandBuffer,
+ pCopyImageToBufferInfo);
}
-void sqtt_CmdBlitImage2KHR(
- VkCommandBuffer commandBuffer,
- const VkBlitImageInfo2KHR* pBlitImageInfo)
+void
+sqtt_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, const VkBlitImageInfo2KHR *pBlitImageInfo)
{
- EVENT_MARKER_ALIAS(BlitImage2KHR, BlitImage, commandBuffer,
- pBlitImageInfo);
+ EVENT_MARKER_ALIAS(BlitImage2KHR, BlitImage, commandBuffer, pBlitImageInfo);
}
-void sqtt_CmdClearColorImage(
- VkCommandBuffer commandBuffer,
- VkImage image_h,
- VkImageLayout imageLayout,
- const VkClearColorValue* pColor,
- uint32_t rangeCount,
- const VkImageSubresourceRange* pRanges)
+void
+sqtt_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
+ const VkClearColorValue *pColor, uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
{
- EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout,
- pColor, rangeCount, pRanges);
+ EVENT_MARKER(ClearColorImage, commandBuffer, image_h, imageLayout, pColor, rangeCount, pRanges);
}
-void sqtt_CmdClearDepthStencilImage(
- VkCommandBuffer commandBuffer,
- VkImage image_h,
- VkImageLayout imageLayout,
- const VkClearDepthStencilValue* pDepthStencil,
- uint32_t rangeCount,
- const VkImageSubresourceRange* pRanges)
+void
+sqtt_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
+ VkImageLayout imageLayout,
+ const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
{
- EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h,
- imageLayout, pDepthStencil, rangeCount, pRanges);
+ EVENT_MARKER(ClearDepthStencilImage, commandBuffer, image_h, imageLayout, pDepthStencil,
+ rangeCount, pRanges);
}
-void sqtt_CmdClearAttachments(
- VkCommandBuffer commandBuffer,
- uint32_t attachmentCount,
- const VkClearAttachment* pAttachments,
- uint32_t rectCount,
- const VkClearRect* pRects)
+void
+sqtt_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
+ const VkClearAttachment *pAttachments, uint32_t rectCount,
+ const VkClearRect *pRects)
{
- EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount,
- pAttachments, rectCount, pRects);
+ EVENT_MARKER(ClearAttachments, commandBuffer, attachmentCount, pAttachments, rectCount, pRects);
}
-void sqtt_CmdResolveImage2KHR(
- VkCommandBuffer commandBuffer,
- const VkResolveImageInfo2KHR* pResolveImageInfo)
+void
+sqtt_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
+ const VkResolveImageInfo2KHR *pResolveImageInfo)
{
- EVENT_MARKER_ALIAS(ResolveImage2KHR, ResolveImage, commandBuffer,
- pResolveImageInfo);
+ EVENT_MARKER_ALIAS(ResolveImage2KHR, ResolveImage, commandBuffer, pResolveImageInfo);
}
-void sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer,
- uint32_t eventCount,
- const VkEvent* pEvents,
- VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags dstStageMask,
- uint32_t memoryBarrierCount,
- const VkMemoryBarrier* pMemoryBarriers,
- uint32_t bufferMemoryBarrierCount,
- const VkBufferMemoryBarrier* pBufferMemoryBarriers,
- uint32_t imageMemoryBarrierCount,
- const VkImageMemoryBarrier* pImageMemoryBarriers)
+void
+sqtt_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
+ VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
+ uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
+ uint32_t bufferMemoryBarrierCount,
+ const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+ uint32_t imageMemoryBarrierCount,
+ const VkImageMemoryBarrier *pImageMemoryBarriers)
{
- EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents,
- srcStageMask, dstStageMask, memoryBarrierCount,
- pMemoryBarriers, bufferMemoryBarrierCount,
- pBufferMemoryBarriers, imageMemoryBarrierCount,
- pImageMemoryBarriers);
+ EVENT_MARKER(WaitEvents, commandBuffer, eventCount, pEvents, srcStageMask, dstStageMask,
+ memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
+ pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
}
-void sqtt_CmdPipelineBarrier(
- VkCommandBuffer commandBuffer,
- VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags destStageMask,
- VkBool32 byRegion,
- uint32_t memoryBarrierCount,
- const VkMemoryBarrier* pMemoryBarriers,
- uint32_t bufferMemoryBarrierCount,
- const VkBufferMemoryBarrier* pBufferMemoryBarriers,
- uint32_t imageMemoryBarrierCount,
- const VkImageMemoryBarrier* pImageMemoryBarriers)
+void
+sqtt_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,
+ VkPipelineStageFlags destStageMask, VkBool32 byRegion,
+ uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
+ uint32_t bufferMemoryBarrierCount,
+ const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+ uint32_t imageMemoryBarrierCount,
+ const VkImageMemoryBarrier *pImageMemoryBarriers)
{
- EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask,
- destStageMask, byRegion, memoryBarrierCount,
- pMemoryBarriers, bufferMemoryBarrierCount,
- pBufferMemoryBarriers, imageMemoryBarrierCount,
- pImageMemoryBarriers);
+ EVENT_MARKER(PipelineBarrier, commandBuffer, srcStageMask, destStageMask, byRegion,
+ memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
+ pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers);
}
-void sqtt_CmdResetQueryPool(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t firstQuery,
- uint32_t queryCount)
+void
+sqtt_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
+ uint32_t queryCount)
{
- EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery,
- queryCount);
+ EVENT_MARKER(ResetQueryPool, commandBuffer, queryPool, firstQuery, queryCount);
}
-void sqtt_CmdCopyQueryPoolResults(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t firstQuery,
- uint32_t queryCount,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- VkDeviceSize stride,
- VkQueryResultFlags flags)
-{
- EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery,
- queryCount, dstBuffer, dstOffset, stride,
- flags);
+void
+sqtt_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
+ uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
+ VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags)
+{
+ EVENT_MARKER(CopyQueryPoolResults, commandBuffer, queryPool, firstQuery, queryCount, dstBuffer,
+ dstOffset, stride, flags);
}
#undef EVENT_MARKER
-#define API_MARKER_ALIAS(cmd_name, api_name, ...) \
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
- radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
- radv_Cmd##cmd_name(__VA_ARGS__); \
- radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
+#define API_MARKER_ALIAS(cmd_name, api_name, ...) \
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); \
+ radv_write_begin_general_api_marker(cmd_buffer, ApiCmd##api_name); \
+ radv_Cmd##cmd_name(__VA_ARGS__); \
+ radv_write_end_general_api_marker(cmd_buffer, ApiCmd##api_name);
-#define API_MARKER(cmd_name, ...) \
- API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
+#define API_MARKER(cmd_name, ...) API_MARKER_ALIAS(cmd_name, cmd_name, __VA_ARGS__);
static bool
radv_sqtt_dump_pipeline()
{
- return getenv("RADV_THREAD_TRACE_PIPELINE");
+ return getenv("RADV_THREAD_TRACE_PIPELINE");
}
-void sqtt_CmdBindPipeline(
- VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipeline _pipeline)
+void
+sqtt_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+ VkPipeline _pipeline)
{
- RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
- API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
+ API_MARKER(BindPipeline, commandBuffer, pipelineBindPoint, _pipeline);
- if (radv_sqtt_dump_pipeline())
- radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
+ if (radv_sqtt_dump_pipeline())
+ radv_describe_pipeline_bind(cmd_buffer, pipelineBindPoint, pipeline);
}
-void sqtt_CmdBindDescriptorSets(
- VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipelineLayout layout,
- uint32_t firstSet,
- uint32_t descriptorSetCount,
- const VkDescriptorSet* pDescriptorSets,
- uint32_t dynamicOffsetCount,
- const uint32_t* pDynamicOffsets)
+void
+sqtt_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+ VkPipelineLayout layout, uint32_t firstSet, uint32_t descriptorSetCount,
+ const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
+ const uint32_t *pDynamicOffsets)
{
- API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint,
- layout, firstSet, descriptorSetCount,
- pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
+ API_MARKER(BindDescriptorSets, commandBuffer, pipelineBindPoint, layout, firstSet,
+ descriptorSetCount, pDescriptorSets, dynamicOffsetCount, pDynamicOffsets);
}
-void sqtt_CmdBindIndexBuffer(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkIndexType indexType)
+void
+sqtt_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+ VkIndexType indexType)
{
- API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
+ API_MARKER(BindIndexBuffer, commandBuffer, buffer, offset, indexType);
}
-void sqtt_CmdBindVertexBuffers(
- VkCommandBuffer commandBuffer,
- uint32_t firstBinding,
- uint32_t bindingCount,
- const VkBuffer* pBuffers,
- const VkDeviceSize* pOffsets)
+void
+sqtt_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+ uint32_t bindingCount, const VkBuffer *pBuffers,
+ const VkDeviceSize *pOffsets)
{
- API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount,
- pBuffers, pOffsets);
+ API_MARKER(BindVertexBuffers, commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets);
}
-void sqtt_CmdBeginQuery(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t query,
- VkQueryControlFlags flags)
+void
+sqtt_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
+ VkQueryControlFlags flags)
{
- API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
+ API_MARKER(BeginQuery, commandBuffer, queryPool, query, flags);
}
-void sqtt_CmdEndQuery(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t query)
+void
+sqtt_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
{
- API_MARKER(EndQuery, commandBuffer, queryPool, query);
+ API_MARKER(EndQuery, commandBuffer, queryPool, query);
}
-void sqtt_CmdWriteTimestamp(
- VkCommandBuffer commandBuffer,
- VkPipelineStageFlagBits pipelineStage,
- VkQueryPool queryPool,
- uint32_t flags)
+void
+sqtt_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,
+ VkQueryPool queryPool, uint32_t flags)
{
- API_MARKER(WriteTimestamp, commandBuffer, pipelineStage, queryPool, flags);
+ API_MARKER(WriteTimestamp, commandBuffer, pipelineStage, queryPool, flags);
}
-void sqtt_CmdPushConstants(
- VkCommandBuffer commandBuffer,
- VkPipelineLayout layout,
- VkShaderStageFlags stageFlags,
- uint32_t offset,
- uint32_t size,
- const void* pValues)
+void
+sqtt_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
+ VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
+ const void *pValues)
{
- API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset,
- size, pValues);
+ API_MARKER(PushConstants, commandBuffer, layout, stageFlags, offset, size, pValues);
}
-void sqtt_CmdBeginRenderPass2(
- VkCommandBuffer commandBuffer,
- const VkRenderPassBeginInfo* pRenderPassBeginInfo,
- const VkSubpassBeginInfo* pSubpassBeginInfo)
+void
+sqtt_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+ const VkRenderPassBeginInfo *pRenderPassBeginInfo,
+ const VkSubpassBeginInfo *pSubpassBeginInfo)
{
- API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer,
- pRenderPassBeginInfo, pSubpassBeginInfo);
+ API_MARKER_ALIAS(BeginRenderPass2, BeginRenderPass, commandBuffer, pRenderPassBeginInfo,
+ pSubpassBeginInfo);
}
-void sqtt_CmdNextSubpass2(
- VkCommandBuffer commandBuffer,
- const VkSubpassBeginInfo* pSubpassBeginInfo,
- const VkSubpassEndInfo* pSubpassEndInfo)
+void
+sqtt_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo,
+ const VkSubpassEndInfo *pSubpassEndInfo)
{
- API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer,
- pSubpassBeginInfo, pSubpassEndInfo);
+ API_MARKER_ALIAS(NextSubpass2, NextSubpass, commandBuffer, pSubpassBeginInfo, pSubpassEndInfo);
}
-void sqtt_CmdEndRenderPass2(
- VkCommandBuffer commandBuffer,
- const VkSubpassEndInfo* pSubpassEndInfo)
+void
+sqtt_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo)
{
- API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer,
- pSubpassEndInfo);
+ API_MARKER_ALIAS(EndRenderPass2, EndRenderPass, commandBuffer, pSubpassEndInfo);
}
-void sqtt_CmdExecuteCommands(
- VkCommandBuffer commandBuffer,
- uint32_t commandBufferCount,
- const VkCommandBuffer* pCmdBuffers)
+void
+sqtt_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
+ const VkCommandBuffer *pCmdBuffers)
{
- API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount,
- pCmdBuffers);
+ API_MARKER(ExecuteCommands, commandBuffer, commandBufferCount, pCmdBuffers);
}
-void sqtt_CmdSetViewport(
- VkCommandBuffer commandBuffer,
- uint32_t firstViewport,
- uint32_t viewportCount,
- const VkViewport* pViewports)
+void
+sqtt_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
+ const VkViewport *pViewports)
{
- API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount,
- pViewports);
+ API_MARKER(SetViewport, commandBuffer, firstViewport, viewportCount, pViewports);
}
-void sqtt_CmdSetScissor(
- VkCommandBuffer commandBuffer,
- uint32_t firstScissor,
- uint32_t scissorCount,
- const VkRect2D* pScissors)
+void
+sqtt_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
+ const VkRect2D *pScissors)
{
- API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount,
- pScissors);
+ API_MARKER(SetScissor, commandBuffer, firstScissor, scissorCount, pScissors);
}
-void sqtt_CmdSetLineWidth(
- VkCommandBuffer commandBuffer,
- float lineWidth)
+void
+sqtt_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
{
- API_MARKER(SetLineWidth, commandBuffer, lineWidth);
+ API_MARKER(SetLineWidth, commandBuffer, lineWidth);
}
-void sqtt_CmdSetDepthBias(
- VkCommandBuffer commandBuffer,
- float depthBiasConstantFactor,
- float depthBiasClamp,
- float depthBiasSlopeFactor)
+void
+sqtt_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,
+ float depthBiasClamp, float depthBiasSlopeFactor)
{
- API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor,
- depthBiasClamp, depthBiasSlopeFactor);
+ API_MARKER(SetDepthBias, commandBuffer, depthBiasConstantFactor, depthBiasClamp,
+ depthBiasSlopeFactor);
}
-void sqtt_CmdSetBlendConstants(
- VkCommandBuffer commandBuffer,
- const float blendConstants[4])
+void
+sqtt_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
{
- API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
+ API_MARKER(SetBlendConstants, commandBuffer, blendConstants);
}
-void sqtt_CmdSetDepthBounds(
- VkCommandBuffer commandBuffer,
- float minDepthBounds,
- float maxDepthBounds)
+void
+sqtt_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
{
- API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds,
- maxDepthBounds);
+ API_MARKER(SetDepthBounds, commandBuffer, minDepthBounds, maxDepthBounds);
}
-void sqtt_CmdSetStencilCompareMask(
- VkCommandBuffer commandBuffer,
- VkStencilFaceFlags faceMask,
- uint32_t compareMask)
+void
+sqtt_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+ uint32_t compareMask)
{
- API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
+ API_MARKER(SetStencilCompareMask, commandBuffer, faceMask, compareMask);
}
-void sqtt_CmdSetStencilWriteMask(
- VkCommandBuffer commandBuffer,
- VkStencilFaceFlags faceMask,
- uint32_t writeMask)
+void
+sqtt_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+ uint32_t writeMask)
{
- API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
+ API_MARKER(SetStencilWriteMask, commandBuffer, faceMask, writeMask);
}
-void sqtt_CmdSetStencilReference(
- VkCommandBuffer commandBuffer,
- VkStencilFaceFlags faceMask,
- uint32_t reference)
+void
+sqtt_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+ uint32_t reference)
{
- API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
+ API_MARKER(SetStencilReference, commandBuffer, faceMask, reference);
}
/* VK_EXT_debug_marker */
-void sqtt_CmdDebugMarkerBeginEXT(
- VkCommandBuffer commandBuffer,
- const VkDebugMarkerMarkerInfoEXT* pMarkerInfo)
+void
+sqtt_CmdDebugMarkerBeginEXT(VkCommandBuffer commandBuffer,
+ const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_write_user_event_marker(cmd_buffer, UserEventPush,
- pMarkerInfo->pMarkerName);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ radv_write_user_event_marker(cmd_buffer, UserEventPush, pMarkerInfo->pMarkerName);
}
-void sqtt_CmdDebugMarkerEndEXT(
- VkCommandBuffer commandBuffer)
+void
+sqtt_CmdDebugMarkerEndEXT(VkCommandBuffer commandBuffer)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ radv_write_user_event_marker(cmd_buffer, UserEventPop, NULL);
}
-void sqtt_CmdDebugMarkerInsertEXT(
- VkCommandBuffer commandBuffer,
- const VkDebugMarkerMarkerInfoEXT* pMarkerInfo)
+void
+sqtt_CmdDebugMarkerInsertEXT(VkCommandBuffer commandBuffer,
+ const VkDebugMarkerMarkerInfoEXT *pMarkerInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_write_user_event_marker(cmd_buffer, UserEventTrigger,
- pMarkerInfo->pMarkerName);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ radv_write_user_event_marker(cmd_buffer, UserEventTrigger, pMarkerInfo->pMarkerName);
}
-VkResult sqtt_DebugMarkerSetObjectNameEXT(
- VkDevice device,
- const VkDebugMarkerObjectNameInfoEXT* pNameInfo)
+VkResult
+sqtt_DebugMarkerSetObjectNameEXT(VkDevice device, const VkDebugMarkerObjectNameInfoEXT *pNameInfo)
{
- /* no-op */
- return VK_SUCCESS;
+ /* no-op */
+ return VK_SUCCESS;
}
-VkResult sqtt_DebugMarkerSetObjectTagEXT(
- VkDevice device,
- const VkDebugMarkerObjectTagInfoEXT* pTagInfo)
+VkResult
+sqtt_DebugMarkerSetObjectTagEXT(VkDevice device, const VkDebugMarkerObjectTagInfoEXT *pTagInfo)
{
- /* no-op */
- return VK_SUCCESS;
+ /* no-op */
+ return VK_SUCCESS;
}
/* Pipelines */
static enum rgp_hardware_stages
-radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline,
- gl_shader_stage stage)
-{
- struct radv_shader_variant *shader = pipeline->shaders[stage];
-
- switch (stage) {
- case MESA_SHADER_VERTEX:
- if (shader->info.vs.as_ls)
- return RGP_HW_STAGE_LS;
- else if (shader->info.vs.as_es)
- return RGP_HW_STAGE_ES;
- else if (shader->info.is_ngg)
- return RGP_HW_STAGE_GS;
- else
- return RGP_HW_STAGE_VS;
- case MESA_SHADER_TESS_CTRL:
- return RGP_HW_STAGE_HS;
- case MESA_SHADER_TESS_EVAL:
- if (shader->info.tes.as_es)
- return RGP_HW_STAGE_ES;
- else if (shader->info.is_ngg)
- return RGP_HW_STAGE_GS;
- else
- return RGP_HW_STAGE_VS;
- case MESA_SHADER_GEOMETRY:
- return RGP_HW_STAGE_GS;
- case MESA_SHADER_FRAGMENT:
- return RGP_HW_STAGE_PS;
- case MESA_SHADER_COMPUTE:
- return RGP_HW_STAGE_CS;
- default:
- unreachable("invalid mesa shader stage");
- }
+radv_mesa_to_rgp_shader_stage(struct radv_pipeline *pipeline, gl_shader_stage stage)
+{
+ struct radv_shader_variant *shader = pipeline->shaders[stage];
+
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ if (shader->info.vs.as_ls)
+ return RGP_HW_STAGE_LS;
+ else if (shader->info.vs.as_es)
+ return RGP_HW_STAGE_ES;
+ else if (shader->info.is_ngg)
+ return RGP_HW_STAGE_GS;
+ else
+ return RGP_HW_STAGE_VS;
+ case MESA_SHADER_TESS_CTRL:
+ return RGP_HW_STAGE_HS;
+ case MESA_SHADER_TESS_EVAL:
+ if (shader->info.tes.as_es)
+ return RGP_HW_STAGE_ES;
+ else if (shader->info.is_ngg)
+ return RGP_HW_STAGE_GS;
+ else
+ return RGP_HW_STAGE_VS;
+ case MESA_SHADER_GEOMETRY:
+ return RGP_HW_STAGE_GS;
+ case MESA_SHADER_FRAGMENT:
+ return RGP_HW_STAGE_PS;
+ case MESA_SHADER_COMPUTE:
+ return RGP_HW_STAGE_CS;
+ default:
+ unreachable("invalid mesa shader stage");
+ }
}
static VkResult
-radv_add_code_object(struct radv_device *device,
- struct radv_pipeline *pipeline)
-{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
- struct rgp_code_object_record *record;
-
- record = malloc(sizeof(struct rgp_code_object_record));
- if (!record)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- record->shader_stages_mask = 0;
- record->num_shaders_combined = 0;
- record->pipeline_hash[0] = pipeline->pipeline_hash;
- record->pipeline_hash[1] = pipeline->pipeline_hash;
-
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- struct radv_shader_variant *shader = pipeline->shaders[i];
- uint8_t *code;
- uint64_t va;
-
- if (!shader)
- continue;
-
- code = malloc(shader->code_size);
- if (!code) {
- free(record);
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- }
- memcpy(code, shader->code_ptr, shader->code_size);
-
- va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-
- record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader;
- record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32;
- record->shader_data[i].code_size = shader->code_size;
- record->shader_data[i].code = code;
- record->shader_data[i].vgpr_count = shader->config.num_vgprs;
- record->shader_data[i].sgpr_count = shader->config.num_sgprs;
- record->shader_data[i].base_address = va & 0xffffffffffff;
- record->shader_data[i].elf_symbol_offset = 0;
- record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i);
- record->shader_data[i].is_combined = false;
-
- record->shader_stages_mask |= (1 << i);
- record->num_shaders_combined++;
- }
-
- simple_mtx_lock(&code_object->lock);
- list_addtail(&record->list, &code_object->record);
- code_object->record_count++;
- simple_mtx_unlock(&code_object->lock);
-
- return VK_SUCCESS;
+radv_add_code_object(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+ struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+ struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
+ struct rgp_code_object_record *record;
+
+ record = malloc(sizeof(struct rgp_code_object_record));
+ if (!record)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ record->shader_stages_mask = 0;
+ record->num_shaders_combined = 0;
+ record->pipeline_hash[0] = pipeline->pipeline_hash;
+ record->pipeline_hash[1] = pipeline->pipeline_hash;
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct radv_shader_variant *shader = pipeline->shaders[i];
+ uint8_t *code;
+ uint64_t va;
+
+ if (!shader)
+ continue;
+
+ code = malloc(shader->code_size);
+ if (!code) {
+ free(record);
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+ memcpy(code, shader->code_ptr, shader->code_size);
+
+ va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ record->shader_data[i].hash[0] = (uint64_t)(uintptr_t)shader;
+ record->shader_data[i].hash[1] = (uint64_t)(uintptr_t)shader >> 32;
+ record->shader_data[i].code_size = shader->code_size;
+ record->shader_data[i].code = code;
+ record->shader_data[i].vgpr_count = shader->config.num_vgprs;
+ record->shader_data[i].sgpr_count = shader->config.num_sgprs;
+ record->shader_data[i].base_address = va & 0xffffffffffff;
+ record->shader_data[i].elf_symbol_offset = 0;
+ record->shader_data[i].hw_stage = radv_mesa_to_rgp_shader_stage(pipeline, i);
+ record->shader_data[i].is_combined = false;
+
+ record->shader_stages_mask |= (1 << i);
+ record->num_shaders_combined++;
+ }
+
+ simple_mtx_lock(&code_object->lock);
+ list_addtail(&record->list, &code_object->record);
+ code_object->record_count++;
+ simple_mtx_unlock(&code_object->lock);
+
+ return VK_SUCCESS;
}
static VkResult
-radv_register_pipeline(struct radv_device *device,
- struct radv_pipeline *pipeline)
+radv_register_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
{
- bool result;
- uint64_t base_va = ~0;
+ bool result;
+ uint64_t base_va = ~0;
- result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash);
- if (!result)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
+ result = ac_sqtt_add_pso_correlation(&device->thread_trace, pipeline->pipeline_hash);
+ if (!result)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
- /* Find the lowest shader BO VA. */
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- struct radv_shader_variant *shader = pipeline->shaders[i];
- uint64_t va;
+ /* Find the lowest shader BO VA. */
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct radv_shader_variant *shader = pipeline->shaders[i];
+ uint64_t va;
- if (!shader)
- continue;
+ if (!shader)
+ continue;
- va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- base_va = MIN2(base_va, va);
- }
+ va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ base_va = MIN2(base_va, va);
+ }
- result = ac_sqtt_add_code_object_loader_event(&device->thread_trace,
- pipeline->pipeline_hash,
- base_va);
- if (!result)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
+ result =
+ ac_sqtt_add_code_object_loader_event(&device->thread_trace, pipeline->pipeline_hash, base_va);
+ if (!result)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
- result = radv_add_code_object(device, pipeline);
- if (result != VK_SUCCESS)
- return result;
+ result = radv_add_code_object(device, pipeline);
+ if (result != VK_SUCCESS)
+ return result;
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
static void
-radv_unregister_pipeline(struct radv_device *device,
- struct radv_pipeline *pipeline)
-{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
- struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
- struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
-
- /* Destroy the PSO correlation record. */
- simple_mtx_lock(&pso_correlation->lock);
- list_for_each_entry_safe(struct rgp_pso_correlation_record, record,
- &pso_correlation->record, list) {
- if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
- pso_correlation->record_count--;
- list_del(&record->list);
- free(record);
- break;
- }
- }
- simple_mtx_unlock(&pso_correlation->lock);
-
- /* Destroy the code object loader record. */
- simple_mtx_lock(&loader_events->lock);
- list_for_each_entry_safe(struct rgp_loader_events_record, record,
- &loader_events->record, list) {
- if (record->code_object_hash[0] == pipeline->pipeline_hash) {
- loader_events->record_count--;
- list_del(&record->list);
- free(record);
- break;
- }
- }
- simple_mtx_unlock(&loader_events->lock);
-
- /* Destroy the code object record. */
- simple_mtx_lock(&code_object->lock);
- list_for_each_entry_safe(struct rgp_code_object_record, record,
- &code_object->record, list) {
- if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
- uint32_t mask = record->shader_stages_mask;
- int i;
-
- /* Free the disassembly. */
- while (mask) {
- i = u_bit_scan(&mask);
- free(record->shader_data[i].code);
- }
-
- code_object->record_count--;
- list_del(&record->list);
- free(record);
- break;
- }
- }
- simple_mtx_unlock(&code_object->lock);
-}
-
-VkResult sqtt_CreateGraphicsPipelines(
- VkDevice _device,
- VkPipelineCache pipelineCache,
- uint32_t count,
- const VkGraphicsPipelineCreateInfo* pCreateInfos,
- const VkAllocationCallbacks* pAllocator,
- VkPipeline* pPipelines)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- VkResult result;
-
- result = radv_CreateGraphicsPipelines(_device, pipelineCache, count,
- pCreateInfos, pAllocator,
- pPipelines);
- if (result != VK_SUCCESS)
- return result;
-
- if (radv_sqtt_dump_pipeline()) {
- for (unsigned i = 0; i < count; i++) {
- RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
-
- if (!pipeline)
- continue;
-
- result = radv_register_pipeline(device, pipeline);
- if (result != VK_SUCCESS)
- goto fail;
- }
- }
-
- return VK_SUCCESS;
+radv_unregister_pipeline(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+ struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+ struct rgp_pso_correlation *pso_correlation = &thread_trace_data->rgp_pso_correlation;
+ struct rgp_loader_events *loader_events = &thread_trace_data->rgp_loader_events;
+ struct rgp_code_object *code_object = &thread_trace_data->rgp_code_object;
+
+ /* Destroy the PSO correlation record. */
+ simple_mtx_lock(&pso_correlation->lock);
+ list_for_each_entry_safe(struct rgp_pso_correlation_record, record, &pso_correlation->record,
+ list)
+ {
+ if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
+ pso_correlation->record_count--;
+ list_del(&record->list);
+ free(record);
+ break;
+ }
+ }
+ simple_mtx_unlock(&pso_correlation->lock);
+
+ /* Destroy the code object loader record. */
+ simple_mtx_lock(&loader_events->lock);
+ list_for_each_entry_safe(struct rgp_loader_events_record, record, &loader_events->record, list)
+ {
+ if (record->code_object_hash[0] == pipeline->pipeline_hash) {
+ loader_events->record_count--;
+ list_del(&record->list);
+ free(record);
+ break;
+ }
+ }
+ simple_mtx_unlock(&loader_events->lock);
+
+ /* Destroy the code object record. */
+ simple_mtx_lock(&code_object->lock);
+ list_for_each_entry_safe(struct rgp_code_object_record, record, &code_object->record, list)
+ {
+ if (record->pipeline_hash[0] == pipeline->pipeline_hash) {
+ uint32_t mask = record->shader_stages_mask;
+ int i;
+
+ /* Free the disassembly. */
+ while (mask) {
+ i = u_bit_scan(&mask);
+ free(record->shader_data[i].code);
+ }
+
+ code_object->record_count--;
+ list_del(&record->list);
+ free(record);
+ break;
+ }
+ }
+ simple_mtx_unlock(&code_object->lock);
+}
+
+VkResult
+sqtt_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
+ const VkGraphicsPipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ VkResult result;
+
+ result = radv_CreateGraphicsPipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
+ pPipelines);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (radv_sqtt_dump_pipeline()) {
+ for (unsigned i = 0; i < count; i++) {
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
+
+ if (!pipeline)
+ continue;
+
+ result = radv_register_pipeline(device, pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+ }
+
+ return VK_SUCCESS;
fail:
- for (unsigned i = 0; i < count; i++) {
- sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
- pPipelines[i] = VK_NULL_HANDLE;
- }
- return result;
+ for (unsigned i = 0; i < count; i++) {
+ sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
+ pPipelines[i] = VK_NULL_HANDLE;
+ }
+ return result;
}
-VkResult sqtt_CreateComputePipelines(
- VkDevice _device,
- VkPipelineCache pipelineCache,
- uint32_t count,
- const VkComputePipelineCreateInfo* pCreateInfos,
- const VkAllocationCallbacks* pAllocator,
- VkPipeline* pPipelines)
+VkResult
+sqtt_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
+ const VkComputePipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- VkResult result;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ VkResult result;
- result = radv_CreateComputePipelines(_device, pipelineCache, count,
- pCreateInfos, pAllocator,
- pPipelines);
- if (result != VK_SUCCESS)
- return result;
+ result = radv_CreateComputePipelines(_device, pipelineCache, count, pCreateInfos, pAllocator,
+ pPipelines);
+ if (result != VK_SUCCESS)
+ return result;
- if (radv_sqtt_dump_pipeline()) {
- for (unsigned i = 0; i < count; i++) {
- RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
+ if (radv_sqtt_dump_pipeline()) {
+ for (unsigned i = 0; i < count; i++) {
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelines[i]);
- if (!pipeline)
- continue;
+ if (!pipeline)
+ continue;
- result = radv_register_pipeline(device, pipeline);
- if (result != VK_SUCCESS)
- goto fail;
- }
- }
+ result = radv_register_pipeline(device, pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
fail:
- for (unsigned i = 0; i < count; i++) {
- sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
- pPipelines[i] = VK_NULL_HANDLE;
- }
- return result;
+ for (unsigned i = 0; i < count; i++) {
+ sqtt_DestroyPipeline(_device, pPipelines[i], pAllocator);
+ pPipelines[i] = VK_NULL_HANDLE;
+ }
+ return result;
}
-void sqtt_DestroyPipeline(
- VkDevice _device,
- VkPipeline _pipeline,
- const VkAllocationCallbacks* pAllocator)
+void
+sqtt_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
- if (!_pipeline)
- return;
+ if (!_pipeline)
+ return;
- if (radv_sqtt_dump_pipeline())
- radv_unregister_pipeline(device, pipeline);
+ if (radv_sqtt_dump_pipeline())
+ radv_unregister_pipeline(device, pipeline);
- radv_DestroyPipeline(_device, _pipeline, pAllocator);
+ radv_DestroyPipeline(_device, _pipeline, pAllocator);
}
#undef API_MARKER
diff --git a/src/amd/vulkan/radv_android.c b/src/amd/vulkan/radv_android.c
index 76ab39ca480..f8064c011d9 100644
--- a/src/amd/vulkan/radv_android.c
+++ b/src/amd/vulkan/radv_android.c
@@ -22,12 +22,12 @@
*/
#ifdef ANDROID
+#include <libsync.h>
#include <hardware/gralloc.h>
#include <hardware/hardware.h>
#include <hardware/hwvulkan.h>
#include <vulkan/vk_android_native_buffer.h>
#include <vulkan/vk_icd.h>
-#include <libsync.h>
#if ANDROID_API_LEVEL >= 26
#include <hardware/gralloc1.h>
@@ -42,433 +42,421 @@
#ifdef ANDROID
-static int radv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
+static int radv_hal_open(const struct hw_module_t *mod, const char *id, struct hw_device_t **dev);
static int radv_hal_close(struct hw_device_t *dev);
static void UNUSED
static_asserts(void)
{
- STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
+ STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
}
PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
- .common = {
- .tag = HARDWARE_MODULE_TAG,
- .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
- .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
- .id = HWVULKAN_HARDWARE_MODULE_ID,
- .name = "AMD Vulkan HAL",
- .author = "Google",
- .methods = &(hw_module_methods_t) {
- .open = radv_hal_open,
- },
- },
+ .common =
+ {
+ .tag = HARDWARE_MODULE_TAG,
+ .module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
+ .hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
+ .id = HWVULKAN_HARDWARE_MODULE_ID,
+ .name = "AMD Vulkan HAL",
+ .author = "Google",
+ .methods =
+ &(hw_module_methods_t){
+ .open = radv_hal_open,
+ },
+ },
};
/* If any bits in test_mask are set, then unset them and return true. */
static inline bool
unmask32(uint32_t *inout_mask, uint32_t test_mask)
{
- uint32_t orig_mask = *inout_mask;
- *inout_mask &= ~test_mask;
- return *inout_mask != orig_mask;
+ uint32_t orig_mask = *inout_mask;
+ *inout_mask &= ~test_mask;
+ return *inout_mask != orig_mask;
}
static int
-radv_hal_open(const struct hw_module_t* mod, const char* id,
- struct hw_device_t** dev)
+radv_hal_open(const struct hw_module_t *mod, const char *id, struct hw_device_t **dev)
{
- assert(mod == &HAL_MODULE_INFO_SYM.common);
- assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
-
- hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
- if (!hal_dev)
- return -1;
-
- *hal_dev = (hwvulkan_device_t) {
- .common = {
- .tag = HARDWARE_DEVICE_TAG,
- .version = HWVULKAN_DEVICE_API_VERSION_0_1,
- .module = &HAL_MODULE_INFO_SYM.common,
- .close = radv_hal_close,
- },
- .EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties,
- .CreateInstance = radv_CreateInstance,
- .GetInstanceProcAddr = radv_GetInstanceProcAddr,
- };
-
- *dev = &hal_dev->common;
- return 0;
+ assert(mod == &HAL_MODULE_INFO_SYM.common);
+ assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
+
+ hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
+ if (!hal_dev)
+ return -1;
+
+ *hal_dev = (hwvulkan_device_t){
+ .common =
+ {
+ .tag = HARDWARE_DEVICE_TAG,
+ .version = HWVULKAN_DEVICE_API_VERSION_0_1,
+ .module = &HAL_MODULE_INFO_SYM.common,
+ .close = radv_hal_close,
+ },
+ .EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties,
+ .CreateInstance = radv_CreateInstance,
+ .GetInstanceProcAddr = radv_GetInstanceProcAddr,
+ };
+
+ *dev = &hal_dev->common;
+ return 0;
}
static int
radv_hal_close(struct hw_device_t *dev)
{
- /* hwvulkan.h claims that hw_device_t::close() is never called. */
- return -1;
+ /* hwvulkan.h claims that hw_device_t::close() is never called. */
+ return -1;
}
VkResult
-radv_image_from_gralloc(VkDevice device_h,
- const VkImageCreateInfo *base_info,
- const VkNativeBufferANDROID *gralloc_info,
- const VkAllocationCallbacks *alloc,
- VkImage *out_image_h)
+radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
+ const VkNativeBufferANDROID *gralloc_info,
+ const VkAllocationCallbacks *alloc, VkImage *out_image_h)
{
- RADV_FROM_HANDLE(radv_device, device, device_h);
- VkImage image_h = VK_NULL_HANDLE;
- struct radv_image *image = NULL;
- VkResult result;
-
- if (gralloc_info->handle->numFds != 1) {
- return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE,
- "VkNativeBufferANDROID::handle::numFds is %d, "
- "expected 1", gralloc_info->handle->numFds);
- }
-
- /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
- * must exceed that of the gralloc handle, and we do not own the gralloc
- * handle.
- */
- int dma_buf = gralloc_info->handle->data[0];
-
- VkDeviceMemory memory_h;
-
- const VkImportMemoryFdInfoKHR import_info = {
- .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
- .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
- .fd = os_dupfd_cloexec(dma_buf),
- };
-
- /* Find the first VRAM memory type, or GART for PRIME images. */
- int memory_type_index = -1;
- for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
- bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
- if (is_local) {
- memory_type_index = i;
- break;
- }
- }
-
- /* fallback */
- if (memory_type_index == -1)
- memory_type_index = 0;
-
- result = radv_AllocateMemory(device_h,
- &(VkMemoryAllocateInfo) {
- .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
- .pNext = &import_info,
- /* Max buffer size, unused for imports */
- .allocationSize = 0x7FFFFFFF,
- .memoryTypeIndex = memory_type_index,
- },
- alloc,
- &memory_h);
- if (result != VK_SUCCESS)
- return result;
-
- struct radeon_bo_metadata md;
- device->ws->buffer_get_metadata(device->ws, radv_device_memory_from_handle(memory_h)->bo, &md);
-
- VkImageCreateInfo updated_base_info = *base_info;
-
- VkExternalMemoryImageCreateInfo external_memory_info = {
- .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
- .pNext = updated_base_info.pNext,
- .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
- };
-
- updated_base_info.pNext = &external_memory_info;
-
- result = radv_image_create(device_h,
- &(struct radv_image_create_info) {
- .vk_info = &updated_base_info,
- .no_metadata_planes = true,
- .bo_metadata = &md,
- },
- alloc,
- &image_h);
-
- if (result != VK_SUCCESS)
- goto fail_create_image;
-
- image = radv_image_from_handle(image_h);
-
- radv_image_override_offset_stride(device, image, 0, gralloc_info->stride);
-
- radv_BindImageMemory(device_h, image_h, memory_h, 0);
-
- image->owned_memory = memory_h;
- /* Don't clobber the out-parameter until success is certain. */
- *out_image_h = image_h;
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, device_h);
+ VkImage image_h = VK_NULL_HANDLE;
+ struct radv_image *image = NULL;
+ VkResult result;
+
+ if (gralloc_info->handle->numFds != 1) {
+ return vk_errorf(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE,
+ "VkNativeBufferANDROID::handle::numFds is %d, "
+ "expected 1",
+ gralloc_info->handle->numFds);
+ }
+
+ /* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
+ * must exceed that of the gralloc handle, and we do not own the gralloc
+ * handle.
+ */
+ int dma_buf = gralloc_info->handle->data[0];
+
+ VkDeviceMemory memory_h;
+
+ const VkImportMemoryFdInfoKHR import_info = {
+ .sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
+ .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+ .fd = os_dupfd_cloexec(dma_buf),
+ };
+
+ /* Find the first VRAM memory type, or GART for PRIME images. */
+ int memory_type_index = -1;
+ for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
+ bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags &
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
+ if (is_local) {
+ memory_type_index = i;
+ break;
+ }
+ }
+
+ /* fallback */
+ if (memory_type_index == -1)
+ memory_type_index = 0;
+
+ result = radv_AllocateMemory(device_h,
+ &(VkMemoryAllocateInfo){
+ .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+ .pNext = &import_info,
+ /* Max buffer size, unused for imports */
+ .allocationSize = 0x7FFFFFFF,
+ .memoryTypeIndex = memory_type_index,
+ },
+ alloc, &memory_h);
+ if (result != VK_SUCCESS)
+ return result;
+
+ struct radeon_bo_metadata md;
+ device->ws->buffer_get_metadata(device->ws, radv_device_memory_from_handle(memory_h)->bo, &md);
+
+ VkImageCreateInfo updated_base_info = *base_info;
+
+ VkExternalMemoryImageCreateInfo external_memory_info = {
+ .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+ .pNext = updated_base_info.pNext,
+ .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT,
+ };
+
+ updated_base_info.pNext = &external_memory_info;
+
+ result = radv_image_create(device_h,
+ &(struct radv_image_create_info){
+ .vk_info = &updated_base_info,
+ .no_metadata_planes = true,
+ .bo_metadata = &md,
+ },
+ alloc, &image_h);
+
+ if (result != VK_SUCCESS)
+ goto fail_create_image;
+
+ image = radv_image_from_handle(image_h);
+
+ radv_image_override_offset_stride(device, image, 0, gralloc_info->stride);
+
+ radv_BindImageMemory(device_h, image_h, memory_h, 0);
+
+ image->owned_memory = memory_h;
+ /* Don't clobber the out-parameter until success is certain. */
+ *out_image_h = image_h;
+
+ return VK_SUCCESS;
fail_create_image:
- radv_FreeMemory(device_h, memory_h, alloc);
- return result;
+ radv_FreeMemory(device_h, memory_h, alloc);
+ return result;
}
-VkResult radv_GetSwapchainGrallocUsageANDROID(
- VkDevice device_h,
- VkFormat format,
- VkImageUsageFlags imageUsage,
- int* grallocUsage)
+VkResult
+radv_GetSwapchainGrallocUsageANDROID(VkDevice device_h, VkFormat format,
+ VkImageUsageFlags imageUsage, int *grallocUsage)
{
- RADV_FROM_HANDLE(radv_device, device, device_h);
- struct radv_physical_device *phys_dev = device->physical_device;
- VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
- VkResult result;
-
- *grallocUsage = 0;
-
- /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
- * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
- * The relevant code in libvulkan/swapchain.cpp contains this fun comment:
- *
- * TODO(jessehall): I think these are right, but haven't thought hard
- * about it. Do we need to query the driver for support of any of
- * these?
- *
- * Any disagreement between this function and the hardcoded
- * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
- * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
- */
-
- const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
- .format = format,
- .type = VK_IMAGE_TYPE_2D,
- .tiling = VK_IMAGE_TILING_OPTIMAL,
- .usage = imageUsage,
- };
-
- VkImageFormatProperties2 image_format_props = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
- };
-
- /* Check that requested format and usage are supported. */
- result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h,
- &image_format_info, &image_format_props);
- if (result != VK_SUCCESS) {
- return vk_errorf(device->instance, result,
- "radv_GetPhysicalDeviceImageFormatProperties2 failed "
- "inside %s", __func__);
- }
-
- if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
- VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
- *grallocUsage |= GRALLOC_USAGE_HW_RENDER;
-
- if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
- VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_STORAGE_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
- *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
-
- /* All VkImageUsageFlags not explicitly checked here are unsupported for
- * gralloc swapchains.
- */
- if (imageUsage != 0) {
- return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
- "unsupported VkImageUsageFlags(0x%x) for gralloc "
- "swapchain", imageUsage);
- }
-
- /*
- * FINISHME: Advertise all display-supported formats. Mostly
- * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
- * what we need for 30-bit colors.
- */
- if (format == VK_FORMAT_B8G8R8A8_UNORM ||
- format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
- *grallocUsage |= GRALLOC_USAGE_HW_FB |
- GRALLOC_USAGE_HW_COMPOSER |
- GRALLOC_USAGE_EXTERNAL_DISP;
- }
-
- if (*grallocUsage == 0)
- return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, device_h);
+ struct radv_physical_device *phys_dev = device->physical_device;
+ VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
+ VkResult result;
+
+ *grallocUsage = 0;
+
+ /* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
+ * returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
+ * The relevant code in libvulkan/swapchain.cpp contains this fun comment:
+ *
+ * TODO(jessehall): I think these are right, but haven't thought hard
+ * about it. Do we need to query the driver for support of any of
+ * these?
+ *
+ * Any disagreement between this function and the hardcoded
+ * VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
+ * dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
+ */
+
+ const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ .format = format,
+ .type = VK_IMAGE_TYPE_2D,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = imageUsage,
+ };
+
+ VkImageFormatProperties2 image_format_props = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+ };
+
+ /* Check that requested format and usage are supported. */
+ result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info,
+ &image_format_props);
+ if (result != VK_SUCCESS) {
+ return vk_errorf(device->instance, result,
+ "radv_GetPhysicalDeviceImageFormatProperties2 failed "
+ "inside %s",
+ __func__);
+ }
+
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
+ *grallocUsage |= GRALLOC_USAGE_HW_RENDER;
+
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
+ *grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
+
+ /* All VkImageUsageFlags not explicitly checked here are unsupported for
+ * gralloc swapchains.
+ */
+ if (imageUsage != 0) {
+ return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "unsupported VkImageUsageFlags(0x%x) for gralloc "
+ "swapchain",
+ imageUsage);
+ }
+
+ /*
+ * FINISHME: Advertise all display-supported formats. Mostly
+ * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
+ * what we need for 30-bit colors.
+ */
+ if (format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
+ *grallocUsage |=
+ GRALLOC_USAGE_HW_FB | GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_EXTERNAL_DISP;
+ }
+
+ if (*grallocUsage == 0)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ return VK_SUCCESS;
}
-VkResult radv_GetSwapchainGrallocUsage2ANDROID(
- VkDevice device_h,
- VkFormat format,
- VkImageUsageFlags imageUsage,
- VkSwapchainImageUsageFlagsANDROID swapchainImageUsage,
- uint64_t* grallocConsumerUsage,
- uint64_t* grallocProducerUsage)
+VkResult
+radv_GetSwapchainGrallocUsage2ANDROID(VkDevice device_h, VkFormat format,
+ VkImageUsageFlags imageUsage,
+ VkSwapchainImageUsageFlagsANDROID swapchainImageUsage,
+ uint64_t *grallocConsumerUsage,
+ uint64_t *grallocProducerUsage)
{
- /* Before level 26 (Android 8.0/Oreo) the loader uses
- * vkGetSwapchainGrallocUsageANDROID. */
+ /* Before level 26 (Android 8.0/Oreo) the loader uses
+ * vkGetSwapchainGrallocUsageANDROID. */
#if ANDROID_API_LEVEL >= 26
- RADV_FROM_HANDLE(radv_device, device, device_h);
- struct radv_physical_device *phys_dev = device->physical_device;
- VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
- VkResult result;
-
- *grallocConsumerUsage = 0;
- *grallocProducerUsage = 0;
-
- if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID)
- return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
- "The Vulkan loader tried to query shared presentable image support");
-
- const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
- .format = format,
- .type = VK_IMAGE_TYPE_2D,
- .tiling = VK_IMAGE_TILING_OPTIMAL,
- .usage = imageUsage,
- };
-
- VkImageFormatProperties2 image_format_props = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
- };
-
- /* Check that requested format and usage are supported. */
- result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h,
- &image_format_info, &image_format_props);
- if (result != VK_SUCCESS) {
- return vk_errorf(device->instance, result,
- "radv_GetPhysicalDeviceImageFormatProperties2 failed "
- "inside %s", __func__);
- }
-
- if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
- VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
- *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
- *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET;
- }
-
- if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
- VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_STORAGE_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
- *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE;
- }
-
- if (imageUsage != 0) {
- return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
- "unsupported VkImageUsageFlags(0x%x) for gralloc "
- "swapchain", imageUsage);
- }
-
- /*
- * FINISHME: Advertise all display-supported formats. Mostly
- * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
- * what we need for 30-bit colors.
- */
- if (format == VK_FORMAT_B8G8R8A8_UNORM ||
- format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
- *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
- *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER;
- }
-
- if (!*grallocProducerUsage && !*grallocConsumerUsage)
- return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, device_h);
+ struct radv_physical_device *phys_dev = device->physical_device;
+ VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
+ VkResult result;
+
+ *grallocConsumerUsage = 0;
+ *grallocProducerUsage = 0;
+
+ if (swapchainImageUsage & VK_SWAPCHAIN_IMAGE_USAGE_SHARED_BIT_ANDROID)
+ return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "The Vulkan loader tried to query shared presentable image support");
+
+ const VkPhysicalDeviceImageFormatInfo2 image_format_info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ .format = format,
+ .type = VK_IMAGE_TYPE_2D,
+ .tiling = VK_IMAGE_TILING_OPTIMAL,
+ .usage = imageUsage,
+ };
+
+ VkImageFormatProperties2 image_format_props = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+ };
+
+ /* Check that requested format and usage are supported. */
+ result = radv_GetPhysicalDeviceImageFormatProperties2(phys_dev_h, &image_format_info,
+ &image_format_props);
+ if (result != VK_SUCCESS) {
+ return vk_errorf(device->instance, result,
+ "radv_GetPhysicalDeviceImageFormatProperties2 failed "
+ "inside %s",
+ __func__);
+ }
+
+ if (unmask32(&imageUsage,
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT)) {
+ *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
+ *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_CLIENT_TARGET;
+ }
+
+ if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT |
+ VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+ *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_GPU_TEXTURE;
+ }
+
+ if (imageUsage != 0) {
+ return vk_errorf(device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "unsupported VkImageUsageFlags(0x%x) for gralloc "
+ "swapchain",
+ imageUsage);
+ }
+
+ /*
+ * FINISHME: Advertise all display-supported formats. Mostly
+ * DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
+ * what we need for 30-bit colors.
+ */
+ if (format == VK_FORMAT_B8G8R8A8_UNORM || format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
+ *grallocProducerUsage |= GRALLOC1_PRODUCER_USAGE_GPU_RENDER_TARGET;
+ *grallocConsumerUsage |= GRALLOC1_CONSUMER_USAGE_HWCOMPOSER;
+ }
+
+ if (!*grallocProducerUsage && !*grallocConsumerUsage)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ return VK_SUCCESS;
#else
- *grallocConsumerUsage = 0;
- *grallocProducerUsage = 0;
- return VK_ERROR_FORMAT_NOT_SUPPORTED;
+ *grallocConsumerUsage = 0;
+ *grallocProducerUsage = 0;
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
#endif
}
VkResult
-radv_AcquireImageANDROID(
- VkDevice device,
- VkImage image_h,
- int nativeFenceFd,
- VkSemaphore semaphore,
- VkFence fence)
+radv_AcquireImageANDROID(VkDevice device, VkImage image_h, int nativeFenceFd, VkSemaphore semaphore,
+ VkFence fence)
{
- VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS;
-
- if (semaphore != VK_NULL_HANDLE) {
- int semaphore_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd;
- semaphore_result = radv_ImportSemaphoreFdKHR(device,
- &(VkImportSemaphoreFdInfoKHR) {
- .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
- .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
- .fd = semaphore_fd,
- .semaphore = semaphore,
- .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
- });
- }
-
- if (fence != VK_NULL_HANDLE) {
- int fence_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd;
- fence_result = radv_ImportFenceFdKHR(device,
- &(VkImportFenceFdInfoKHR) {
- .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR,
- .flags = VK_FENCE_IMPORT_TEMPORARY_BIT,
- .fd = fence_fd,
- .fence = fence,
- .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT,
- });
- }
-
- close(nativeFenceFd);
-
- if (semaphore_result != VK_SUCCESS)
- return semaphore_result;
- return fence_result;
+ VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS;
+
+ if (semaphore != VK_NULL_HANDLE) {
+ int semaphore_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd;
+ semaphore_result = radv_ImportSemaphoreFdKHR(
+ device, &(VkImportSemaphoreFdInfoKHR){
+ .sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
+ .flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT,
+ .fd = semaphore_fd,
+ .semaphore = semaphore,
+ .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+ });
+ }
+
+ if (fence != VK_NULL_HANDLE) {
+ int fence_fd = nativeFenceFd >= 0 ? os_dupfd_cloexec(nativeFenceFd) : nativeFenceFd;
+ fence_result =
+ radv_ImportFenceFdKHR(device, &(VkImportFenceFdInfoKHR){
+ .sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR,
+ .flags = VK_FENCE_IMPORT_TEMPORARY_BIT,
+ .fd = fence_fd,
+ .fence = fence,
+ .handleType = VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT,
+ });
+ }
+
+ close(nativeFenceFd);
+
+ if (semaphore_result != VK_SUCCESS)
+ return semaphore_result;
+ return fence_result;
}
VkResult
-radv_QueueSignalReleaseImageANDROID(
- VkQueue _queue,
- uint32_t waitSemaphoreCount,
- const VkSemaphore* pWaitSemaphores,
- VkImage image,
- int* pNativeFenceFd)
+radv_QueueSignalReleaseImageANDROID(VkQueue _queue, uint32_t waitSemaphoreCount,
+ const VkSemaphore *pWaitSemaphores, VkImage image,
+ int *pNativeFenceFd)
{
- RADV_FROM_HANDLE(radv_queue, queue, _queue);
- VkResult result = VK_SUCCESS;
-
- if (waitSemaphoreCount == 0) {
- if (pNativeFenceFd)
- *pNativeFenceFd = -1;
- return VK_SUCCESS;
- }
-
- int fd = -1;
-
- for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
- int tmp_fd;
- result = radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device),
- &(VkSemaphoreGetFdInfoKHR) {
- .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
- .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
- .semaphore = pWaitSemaphores[i],
- }, &tmp_fd);
- if (result != VK_SUCCESS) {
- if (fd >= 0)
- close (fd);
- return result;
- }
-
- if (fd < 0)
- fd = tmp_fd;
- else if (tmp_fd >= 0) {
- sync_accumulate("radv", &fd, tmp_fd);
- close(tmp_fd);
- }
- }
-
- if (pNativeFenceFd) {
- *pNativeFenceFd = fd;
- } else if (fd >= 0) {
- close(fd);
- /* We still need to do the exports, to reset the semaphores, but
- * otherwise we don't wait on them. */
- }
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_queue, queue, _queue);
+ VkResult result = VK_SUCCESS;
+
+ if (waitSemaphoreCount == 0) {
+ if (pNativeFenceFd)
+ *pNativeFenceFd = -1;
+ return VK_SUCCESS;
+ }
+
+ int fd = -1;
+
+ for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
+ int tmp_fd;
+ result =
+ radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device),
+ &(VkSemaphoreGetFdInfoKHR){
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
+ .handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT,
+ .semaphore = pWaitSemaphores[i],
+ },
+ &tmp_fd);
+ if (result != VK_SUCCESS) {
+ if (fd >= 0)
+ close(fd);
+ return result;
+ }
+
+ if (fd < 0)
+ fd = tmp_fd;
+ else if (tmp_fd >= 0) {
+ sync_accumulate("radv", &fd, tmp_fd);
+ close(tmp_fd);
+ }
+ }
+
+ if (pNativeFenceFd) {
+ *pNativeFenceFd = fd;
+ } else if (fd >= 0) {
+ close(fd);
+ /* We still need to do the exports, to reset the semaphores, but
+ * otherwise we don't wait on them. */
+ }
+ return VK_SUCCESS;
}
#endif
@@ -482,29 +470,29 @@ enum {
static inline VkFormat
vk_format_from_android(unsigned android_format, unsigned android_usage)
{
- switch (android_format) {
- case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
- case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
- return VK_FORMAT_R8G8B8A8_UNORM;
- case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
- return VK_FORMAT_R8G8B8_UNORM;
- case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
- return VK_FORMAT_R5G6B5_UNORM_PACK16;
- case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
- return VK_FORMAT_R16G16B16A16_SFLOAT;
- case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
- return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
- case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
- return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
- case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED:
- if (android_usage & AHARDWAREBUFFER_USAGE_CAMERA_MASK)
- return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
- else
- return VK_FORMAT_R8G8B8_UNORM;
- case AHARDWAREBUFFER_FORMAT_BLOB:
- default:
- return VK_FORMAT_UNDEFINED;
- }
+ switch (android_format) {
+ case AHARDWAREBUFFER_FORMAT_R8G8B8A8_UNORM:
+ case AHARDWAREBUFFER_FORMAT_R8G8B8X8_UNORM:
+ return VK_FORMAT_R8G8B8A8_UNORM;
+ case AHARDWAREBUFFER_FORMAT_R8G8B8_UNORM:
+ return VK_FORMAT_R8G8B8_UNORM;
+ case AHARDWAREBUFFER_FORMAT_R5G6B5_UNORM:
+ return VK_FORMAT_R5G6B5_UNORM_PACK16;
+ case AHARDWAREBUFFER_FORMAT_R16G16B16A16_FLOAT:
+ return VK_FORMAT_R16G16B16A16_SFLOAT;
+ case AHARDWAREBUFFER_FORMAT_R10G10B10A2_UNORM:
+ return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ case AHARDWAREBUFFER_FORMAT_Y8Cb8Cr8_420:
+ return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+ case AHARDWAREBUFFER_FORMAT_IMPLEMENTATION_DEFINED:
+ if (android_usage & AHARDWAREBUFFER_USAGE_CAMERA_MASK)
+ return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM;
+ else
+ return VK_FORMAT_R8G8B8_UNORM;
+ case AHARDWAREBUFFER_FORMAT_BLOB:
+ default:
+ return VK_FORMAT_UNDEFINED;
+ }
}
static inline unsigned
@@ -529,8 +517,7 @@ android_format_from_vk(unsigned vk_format)
}
uint64_t
-radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
- const VkImageUsageFlags vk_usage)
+radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create, const VkImageUsageFlags vk_usage)
{
uint64_t ahb_usage = 0;
if (vk_usage & VK_IMAGE_USAGE_SAMPLED_BIT)
@@ -555,145 +542,137 @@ radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
}
static VkResult
-get_ahb_buffer_format_properties(
- VkDevice device_h,
- const struct AHardwareBuffer *buffer,
- VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties)
+get_ahb_buffer_format_properties(VkDevice device_h, const struct AHardwareBuffer *buffer,
+ VkAndroidHardwareBufferFormatPropertiesANDROID *pProperties)
{
- RADV_FROM_HANDLE(radv_device, device, device_h);
-
- /* Get a description of buffer contents . */
- AHardwareBuffer_Desc desc;
- AHardwareBuffer_describe(buffer, &desc);
-
- /* Verify description. */
- const uint64_t gpu_usage =
- AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
- AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
- AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
-
- /* "Buffer must be a valid Android hardware buffer object with at least
- * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
- */
- if (!(desc.usage & (gpu_usage)))
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-
- /* Fill properties fields based on description. */
- VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties;
-
- p->format = vk_format_from_android(desc.format, desc.usage);
- p->externalFormat = (uint64_t) (uintptr_t) p->format;
-
- VkFormatProperties format_properties;
- radv_GetPhysicalDeviceFormatProperties(
- radv_physical_device_to_handle(device->physical_device),
- p->format, &format_properties);
-
- if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
- p->formatFeatures = format_properties.linearTilingFeatures;
- else
- p->formatFeatures = format_properties.optimalTilingFeatures;
-
- /* "Images can be created with an external format even if the Android hardware
- * buffer has a format which has an equivalent Vulkan format to enable
- * consistent handling of images from sources that might use either category
- * of format. However, all images created with an external format are subject
- * to the valid usage requirements associated with external formats, even if
- * the Android hardware buffer’s format has a Vulkan equivalent."
- *
- * "The formatFeatures member *must* include
- * VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of
- * VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or
- * VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT"
- */
- assert(p->formatFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
-
- p->formatFeatures |= VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
-
- /* "Implementations may not always be able to determine the color model,
- * numerical range, or chroma offsets of the image contents, so the values
- * in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions.
- * Applications should treat these values as sensible defaults to use in
- * the absence of more reliable information obtained through some other
- * means."
- */
- p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
- p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
- p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
- p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
-
- p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
- p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
-
- p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
- p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, device_h);
+
+ /* Get a description of buffer contents . */
+ AHardwareBuffer_Desc desc;
+ AHardwareBuffer_describe(buffer, &desc);
+
+ /* Verify description. */
+ const uint64_t gpu_usage = AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE |
+ AHARDWAREBUFFER_USAGE_GPU_COLOR_OUTPUT |
+ AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER;
+
+ /* "Buffer must be a valid Android hardware buffer object with at least
+ * one of the AHARDWAREBUFFER_USAGE_GPU_* usage flags."
+ */
+ if (!(desc.usage & (gpu_usage)))
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+ /* Fill properties fields based on description. */
+ VkAndroidHardwareBufferFormatPropertiesANDROID *p = pProperties;
+
+ p->format = vk_format_from_android(desc.format, desc.usage);
+ p->externalFormat = (uint64_t)(uintptr_t)p->format;
+
+ VkFormatProperties format_properties;
+ radv_GetPhysicalDeviceFormatProperties(radv_physical_device_to_handle(device->physical_device),
+ p->format, &format_properties);
+
+ if (desc.usage & AHARDWAREBUFFER_USAGE_GPU_DATA_BUFFER)
+ p->formatFeatures = format_properties.linearTilingFeatures;
+ else
+ p->formatFeatures = format_properties.optimalTilingFeatures;
+
+ /* "Images can be created with an external format even if the Android hardware
+ * buffer has a format which has an equivalent Vulkan format to enable
+ * consistent handling of images from sources that might use either category
+ * of format. However, all images created with an external format are subject
+ * to the valid usage requirements associated with external formats, even if
+ * the Android hardware buffer’s format has a Vulkan equivalent."
+ *
+ * "The formatFeatures member *must* include
+ * VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT and at least one of
+ * VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT or
+ * VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT"
+ */
+ assert(p->formatFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
+
+ p->formatFeatures |= VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
+
+ /* "Implementations may not always be able to determine the color model,
+ * numerical range, or chroma offsets of the image contents, so the values
+ * in VkAndroidHardwareBufferFormatPropertiesANDROID are only suggestions.
+ * Applications should treat these values as sensible defaults to use in
+ * the absence of more reliable information obtained through some other
+ * means."
+ */
+ p->samplerYcbcrConversionComponents.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+ p->samplerYcbcrConversionComponents.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+ p->samplerYcbcrConversionComponents.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+ p->samplerYcbcrConversionComponents.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+
+ p->suggestedYcbcrModel = VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601;
+ p->suggestedYcbcrRange = VK_SAMPLER_YCBCR_RANGE_ITU_FULL;
+
+ p->suggestedXChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+ p->suggestedYChromaOffset = VK_CHROMA_LOCATION_MIDPOINT;
+
+ return VK_SUCCESS;
}
VkResult
-radv_GetAndroidHardwareBufferPropertiesANDROID(
- VkDevice device_h,
- const struct AHardwareBuffer *buffer,
- VkAndroidHardwareBufferPropertiesANDROID *pProperties)
+radv_GetAndroidHardwareBufferPropertiesANDROID(VkDevice device_h,
+ const struct AHardwareBuffer *buffer,
+ VkAndroidHardwareBufferPropertiesANDROID *pProperties)
{
- RADV_FROM_HANDLE(radv_device, dev, device_h);
- struct radv_physical_device *pdevice = dev->physical_device;
-
- VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
- vk_find_struct(pProperties->pNext,
- ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
-
- /* Fill format properties of an Android hardware buffer. */
- if (format_prop)
- get_ahb_buffer_format_properties(device_h, buffer, format_prop);
-
- /* NOTE - We support buffers with only one handle but do not error on
- * multiple handle case. Reason is that we want to support YUV formats
- * where we have many logical planes but they all point to the same
- * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
- */
- const native_handle_t *handle =
- AHardwareBuffer_getNativeHandle(buffer);
- int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
- if (dma_buf < 0)
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-
- /* All memory types. */
- uint32_t memory_types = (1u << pdevice->memory_properties.memoryTypeCount) - 1;
-
- pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END);
- pProperties->memoryTypeBits = memory_types;
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, dev, device_h);
+ struct radv_physical_device *pdevice = dev->physical_device;
+
+ VkAndroidHardwareBufferFormatPropertiesANDROID *format_prop =
+ vk_find_struct(pProperties->pNext, ANDROID_HARDWARE_BUFFER_FORMAT_PROPERTIES_ANDROID);
+
+ /* Fill format properties of an Android hardware buffer. */
+ if (format_prop)
+ get_ahb_buffer_format_properties(device_h, buffer, format_prop);
+
+ /* NOTE - We support buffers with only one handle but do not error on
+ * multiple handle case. Reason is that we want to support YUV formats
+ * where we have many logical planes but they all point to the same
+ * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
+ */
+ const native_handle_t *handle = AHardwareBuffer_getNativeHandle(buffer);
+ int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
+ if (dma_buf < 0)
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+ /* All memory types. */
+ uint32_t memory_types = (1u << pdevice->memory_properties.memoryTypeCount) - 1;
+
+ pProperties->allocationSize = lseek(dma_buf, 0, SEEK_END);
+ pProperties->memoryTypeBits = memory_types;
+
+ return VK_SUCCESS;
}
VkResult
-radv_GetMemoryAndroidHardwareBufferANDROID(
- VkDevice device_h,
- const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
- struct AHardwareBuffer **pBuffer)
+radv_GetMemoryAndroidHardwareBufferANDROID(VkDevice device_h,
+ const VkMemoryGetAndroidHardwareBufferInfoANDROID *pInfo,
+ struct AHardwareBuffer **pBuffer)
{
- RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
-
- /* This should always be set due to the export handle types being set on
- * allocation. */
- assert(mem->android_hardware_buffer);
-
- /* Some quotes from Vulkan spec:
- *
- * "If the device memory was created by importing an Android hardware
- * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
- * Android hardware buffer object."
- *
- * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
- * have been included in VkExportMemoryAllocateInfo::handleTypes when
- * memory was created."
- */
- *pBuffer = mem->android_hardware_buffer;
- /* Increase refcount. */
- AHardwareBuffer_acquire(mem->android_hardware_buffer);
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
+
+ /* This should always be set due to the export handle types being set on
+ * allocation. */
+ assert(mem->android_hardware_buffer);
+
+ /* Some quotes from Vulkan spec:
+ *
+ * "If the device memory was created by importing an Android hardware
+ * buffer, vkGetMemoryAndroidHardwareBufferANDROID must return that same
+ * Android hardware buffer object."
+ *
+ * "VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID must
+ * have been included in VkExportMemoryAllocateInfo::handleTypes when
+ * memory was created."
+ */
+ *pBuffer = mem->android_hardware_buffer;
+ /* Increase refcount. */
+ AHardwareBuffer_acquire(mem->android_hardware_buffer);
+ return VK_SUCCESS;
}
#endif
@@ -702,172 +681,162 @@ VkFormat
radv_select_android_external_format(const void *next, VkFormat default_format)
{
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- const VkExternalFormatANDROID *android_format =
- vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID);
+ const VkExternalFormatANDROID *android_format =
+ vk_find_struct_const(next, EXTERNAL_FORMAT_ANDROID);
- if (android_format && android_format->externalFormat) {
- return (VkFormat)android_format->externalFormat;
- }
+ if (android_format && android_format->externalFormat) {
+ return (VkFormat)android_format->externalFormat;
+ }
#endif
- return default_format;
+ return default_format;
}
-
VkResult
-radv_import_ahb_memory(struct radv_device *device,
- struct radv_device_memory *mem,
- unsigned priority,
- const VkImportAndroidHardwareBufferInfoANDROID *info)
+radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
+ unsigned priority, const VkImportAndroidHardwareBufferInfoANDROID *info)
{
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- /* Import from AHardwareBuffer to radv_device_memory. */
- const native_handle_t *handle =
- AHardwareBuffer_getNativeHandle(info->buffer);
-
- /* NOTE - We support buffers with only one handle but do not error on
- * multiple handle case. Reason is that we want to support YUV formats
- * where we have many logical planes but they all point to the same
- * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
- */
- int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
- if (dma_buf < 0)
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-
- uint64_t alloc_size = 0;
- mem->bo = device->ws->buffer_from_fd(device->ws, dma_buf,
- priority, &alloc_size);
- if (!mem->bo)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- if (mem->image) {
- struct radeon_bo_metadata metadata;
- device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
-
- struct radv_image_create_info create_info = {
- .no_metadata_planes = true,
- .bo_metadata = &metadata
- };
-
- VkResult result = radv_image_create_layout(device, create_info, NULL, mem->image);
- if (result != VK_SUCCESS) {
- device->ws->buffer_destroy(device->ws, mem->bo);
- mem->bo = NULL;
- return result;
- }
-
- if (alloc_size < mem->image->size) {
- device->ws->buffer_destroy(device->ws, mem->bo);
- mem->bo = NULL;
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
- }
- } else if (mem->buffer) {
- if (alloc_size < mem->buffer->size) {
- device->ws->buffer_destroy(device->ws, mem->bo);
- mem->bo = NULL;
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
- }
- }
-
- /* "If the vkAllocateMemory command succeeds, the implementation must
- * acquire a reference to the imported hardware buffer, which it must
- * release when the device memory object is freed. If the command fails,
- * the implementation must not retain a reference."
- */
- AHardwareBuffer_acquire(info->buffer);
- mem->android_hardware_buffer = info->buffer;
-
- return VK_SUCCESS;
+ /* Import from AHardwareBuffer to radv_device_memory. */
+ const native_handle_t *handle = AHardwareBuffer_getNativeHandle(info->buffer);
+
+ /* NOTE - We support buffers with only one handle but do not error on
+ * multiple handle case. Reason is that we want to support YUV formats
+ * where we have many logical planes but they all point to the same
+ * buffer, like is the case with VK_FORMAT_G8_B8R8_2PLANE_420_UNORM.
+ */
+ int dma_buf = (handle && handle->numFds) ? handle->data[0] : -1;
+ if (dma_buf < 0)
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+ uint64_t alloc_size = 0;
+ mem->bo = device->ws->buffer_from_fd(device->ws, dma_buf, priority, &alloc_size);
+ if (!mem->bo)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ if (mem->image) {
+ struct radeon_bo_metadata metadata;
+ device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
+
+ struct radv_image_create_info create_info = {.no_metadata_planes = true,
+ .bo_metadata = &metadata};
+
+ VkResult result = radv_image_create_layout(device, create_info, NULL, mem->image);
+ if (result != VK_SUCCESS) {
+ device->ws->buffer_destroy(device->ws, mem->bo);
+ mem->bo = NULL;
+ return result;
+ }
+
+ if (alloc_size < mem->image->size) {
+ device->ws->buffer_destroy(device->ws, mem->bo);
+ mem->bo = NULL;
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
+ } else if (mem->buffer) {
+ if (alloc_size < mem->buffer->size) {
+ device->ws->buffer_destroy(device->ws, mem->bo);
+ mem->bo = NULL;
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
+ }
+
+ /* "If the vkAllocateMemory command succeeds, the implementation must
+ * acquire a reference to the imported hardware buffer, which it must
+ * release when the device memory object is freed. If the command fails,
+ * the implementation must not retain a reference."
+ */
+ AHardwareBuffer_acquire(info->buffer);
+ mem->android_hardware_buffer = info->buffer;
+
+ return VK_SUCCESS;
#else /* RADV_SUPPORT_ANDROID_HARDWARE_BUFFER */
- return VK_ERROR_EXTENSION_NOT_PRESENT;
+ return VK_ERROR_EXTENSION_NOT_PRESENT;
#endif
}
VkResult
-radv_create_ahb_memory(struct radv_device *device,
- struct radv_device_memory *mem,
- unsigned priority,
- const VkMemoryAllocateInfo *pAllocateInfo)
+radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
+ unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo)
{
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- const VkMemoryDedicatedAllocateInfo *dedicated_info =
- vk_find_struct_const(pAllocateInfo->pNext,
- MEMORY_DEDICATED_ALLOCATE_INFO);
-
- uint32_t w = 0;
- uint32_t h = 1;
- uint32_t layers = 1;
- uint32_t format = 0;
- uint64_t usage = 0;
-
- /* If caller passed dedicated information. */
- if (dedicated_info && dedicated_info->image) {
- RADV_FROM_HANDLE(radv_image, image, dedicated_info->image);
- w = image->info.width;
- h = image->info.height;
- layers = image->info.array_size;
- format = android_format_from_vk(image->vk_format);
- usage = radv_ahb_usage_from_vk_usage(image->flags, image->usage);
- } else if (dedicated_info && dedicated_info->buffer) {
- RADV_FROM_HANDLE(radv_buffer, buffer, dedicated_info->buffer);
- w = buffer->size;
- format = AHARDWAREBUFFER_FORMAT_BLOB;
- usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
- AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
- } else {
- w = pAllocateInfo->allocationSize;
- format = AHARDWAREBUFFER_FORMAT_BLOB;
- usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN |
- AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
- }
-
- struct AHardwareBuffer *android_hardware_buffer = NULL;
- struct AHardwareBuffer_Desc desc = {
- .width = w,
- .height = h,
- .layers = layers,
- .format = format,
- .usage = usage,
- };
-
- if (AHardwareBuffer_allocate(&desc, &android_hardware_buffer) != 0)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- mem->android_hardware_buffer = android_hardware_buffer;
-
- const struct VkImportAndroidHardwareBufferInfoANDROID import_info = {
- .buffer = mem->android_hardware_buffer,
- };
-
- VkResult result = radv_import_ahb_memory(device, mem, priority, &import_info);
- if (result != VK_SUCCESS)
- AHardwareBuffer_release(mem->android_hardware_buffer);
- return result;
+ const VkMemoryDedicatedAllocateInfo *dedicated_info =
+ vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
+
+ uint32_t w = 0;
+ uint32_t h = 1;
+ uint32_t layers = 1;
+ uint32_t format = 0;
+ uint64_t usage = 0;
+
+ /* If caller passed dedicated information. */
+ if (dedicated_info && dedicated_info->image) {
+ RADV_FROM_HANDLE(radv_image, image, dedicated_info->image);
+ w = image->info.width;
+ h = image->info.height;
+ layers = image->info.array_size;
+ format = android_format_from_vk(image->vk_format);
+ usage = radv_ahb_usage_from_vk_usage(image->flags, image->usage);
+ } else if (dedicated_info && dedicated_info->buffer) {
+ RADV_FROM_HANDLE(radv_buffer, buffer, dedicated_info->buffer);
+ w = buffer->size;
+ format = AHARDWAREBUFFER_FORMAT_BLOB;
+ usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+ } else {
+ w = pAllocateInfo->allocationSize;
+ format = AHARDWAREBUFFER_FORMAT_BLOB;
+ usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
+ }
+
+ struct AHardwareBuffer *android_hardware_buffer = NULL;
+ struct AHardwareBuffer_Desc desc = {
+ .width = w,
+ .height = h,
+ .layers = layers,
+ .format = format,
+ .usage = usage,
+ };
+
+ if (AHardwareBuffer_allocate(&desc, &android_hardware_buffer) != 0)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ mem->android_hardware_buffer = android_hardware_buffer;
+
+ const struct VkImportAndroidHardwareBufferInfoANDROID import_info = {
+ .buffer = mem->android_hardware_buffer,
+ };
+
+ VkResult result = radv_import_ahb_memory(device, mem, priority, &import_info);
+ if (result != VK_SUCCESS)
+ AHardwareBuffer_release(mem->android_hardware_buffer);
+ return result;
#else /* RADV_SUPPORT_ANDROID_HARDWARE_BUFFER */
- return VK_ERROR_EXTENSION_NOT_PRESENT;
+ return VK_ERROR_EXTENSION_NOT_PRESENT;
#endif
}
-bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage) {
+bool
+radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage)
+{
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- /* Ideally we check Gralloc for what it supports and then merge that with the radv
- format support, but there is no easy gralloc query besides just creating an image.
- That seems a bit on the expensive side, so just hardcode for now. */
- /* TODO: Add multi-plane formats after confirming everything works between radeonsi
- and radv. */
- switch(format) {
- case VK_FORMAT_R8G8B8A8_UNORM:
- case VK_FORMAT_R5G6B5_UNORM_PACK16:
- return true;
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_R8G8_UNORM:
- return !(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
- default:
- return false;
- }
+ /* Ideally we check Gralloc for what it supports and then merge that with the radv
+ format support, but there is no easy gralloc query besides just creating an image.
+ That seems a bit on the expensive side, so just hardcode for now. */
+ /* TODO: Add multi-plane formats after confirming everything works between radeonsi
+ and radv. */
+ switch (format) {
+ case VK_FORMAT_R8G8B8A8_UNORM:
+ case VK_FORMAT_R5G6B5_UNORM_PACK16:
+ return true;
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_R8G8_UNORM:
+ return !(usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+ default:
+ return false;
+ }
#else
- (void)format;
- (void)usage;
- return false;
+ (void)format;
+ (void)usage;
+ return false;
#endif
}
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index f1751cbfc17..4c015b98d57 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -25,777 +25,736 @@
* IN THE SOFTWARE.
*/
+#include "radv_cs.h"
+#include "radv_debug.h"
+#include "radv_meta.h"
#include "radv_private.h"
#include "radv_radeon_winsys.h"
#include "radv_shader.h"
-#include "radv_cs.h"
#include "sid.h"
#include "vk_format.h"
#include "vk_util.h"
-#include "radv_debug.h"
-#include "radv_meta.h"
#include "ac_debug.h"
enum {
- RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0),
- RADV_PREFETCH_VS = (1 << 1),
- RADV_PREFETCH_TCS = (1 << 2),
- RADV_PREFETCH_TES = (1 << 3),
- RADV_PREFETCH_GS = (1 << 4),
- RADV_PREFETCH_PS = (1 << 5),
- RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS |
- RADV_PREFETCH_TCS |
- RADV_PREFETCH_TES |
- RADV_PREFETCH_GS |
- RADV_PREFETCH_PS)
+ RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0),
+ RADV_PREFETCH_VS = (1 << 1),
+ RADV_PREFETCH_TCS = (1 << 2),
+ RADV_PREFETCH_TES = (1 << 3),
+ RADV_PREFETCH_GS = (1 << 4),
+ RADV_PREFETCH_PS = (1 << 5),
+ RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES |
+ RADV_PREFETCH_GS | RADV_PREFETCH_PS)
};
static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout src_layout,
- bool src_render_loop,
- VkImageLayout dst_layout,
- bool dst_render_loop,
- uint32_t src_family,
- uint32_t dst_family,
- const VkImageSubresourceRange *range,
- struct radv_sample_locations_state *sample_locs);
+ struct radv_image *image, VkImageLayout src_layout,
+ bool src_render_loop, VkImageLayout dst_layout,
+ bool dst_render_loop, uint32_t src_family,
+ uint32_t dst_family, const VkImageSubresourceRange *range,
+ struct radv_sample_locations_state *sample_locs);
const struct radv_dynamic_state default_dynamic_state = {
- .viewport = {
- .count = 0,
- },
- .scissor = {
- .count = 0,
- },
- .line_width = 1.0f,
- .depth_bias = {
- .bias = 0.0f,
- .clamp = 0.0f,
- .slope = 0.0f,
- },
- .blend_constants = { 0.0f, 0.0f, 0.0f, 0.0f },
- .depth_bounds = {
- .min = 0.0f,
- .max = 1.0f,
- },
- .stencil_compare_mask = {
- .front = ~0u,
- .back = ~0u,
- },
- .stencil_write_mask = {
- .front = ~0u,
- .back = ~0u,
- },
- .stencil_reference = {
- .front = 0u,
- .back = 0u,
- },
- .line_stipple = {
- .factor = 0u,
- .pattern = 0u,
- },
- .cull_mode = 0u,
- .front_face = 0u,
- .primitive_topology = 0u,
- .fragment_shading_rate = {
- .size = { 1u, 1u },
- .combiner_ops = { VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
- VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR
- },
- },
+ .viewport =
+ {
+ .count = 0,
+ },
+ .scissor =
+ {
+ .count = 0,
+ },
+ .line_width = 1.0f,
+ .depth_bias =
+ {
+ .bias = 0.0f,
+ .clamp = 0.0f,
+ .slope = 0.0f,
+ },
+ .blend_constants = {0.0f, 0.0f, 0.0f, 0.0f},
+ .depth_bounds =
+ {
+ .min = 0.0f,
+ .max = 1.0f,
+ },
+ .stencil_compare_mask =
+ {
+ .front = ~0u,
+ .back = ~0u,
+ },
+ .stencil_write_mask =
+ {
+ .front = ~0u,
+ .back = ~0u,
+ },
+ .stencil_reference =
+ {
+ .front = 0u,
+ .back = 0u,
+ },
+ .line_stipple =
+ {
+ .factor = 0u,
+ .pattern = 0u,
+ },
+ .cull_mode = 0u,
+ .front_face = 0u,
+ .primitive_topology = 0u,
+ .fragment_shading_rate =
+ {
+ .size = {1u, 1u},
+ .combiner_ops = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
+ VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR},
+ },
};
static void
-radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_dynamic_state *src)
-{
- struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic;
- uint64_t copy_mask = src->mask;
- uint64_t dest_mask = 0;
-
- dest->discard_rectangle.count = src->discard_rectangle.count;
- dest->sample_location.count = src->sample_location.count;
-
- if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
- if (dest->viewport.count != src->viewport.count) {
- dest->viewport.count = src->viewport.count;
- dest_mask |= RADV_DYNAMIC_VIEWPORT;
- }
-
- if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
- src->viewport.count * sizeof(VkViewport))) {
- typed_memcpy(dest->viewport.viewports,
- src->viewport.viewports,
- src->viewport.count);
- dest_mask |= RADV_DYNAMIC_VIEWPORT;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_SCISSOR) {
- if (dest->scissor.count != src->scissor.count) {
- dest->scissor.count = src->scissor.count;
- dest_mask |= RADV_DYNAMIC_SCISSOR;
- }
-
- if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
- src->scissor.count * sizeof(VkRect2D))) {
- typed_memcpy(dest->scissor.scissors,
- src->scissor.scissors, src->scissor.count);
- dest_mask |= RADV_DYNAMIC_SCISSOR;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) {
- if (dest->line_width != src->line_width) {
- dest->line_width = src->line_width;
- dest_mask |= RADV_DYNAMIC_LINE_WIDTH;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) {
- if (memcmp(&dest->depth_bias, &src->depth_bias,
- sizeof(src->depth_bias))) {
- dest->depth_bias = src->depth_bias;
- dest_mask |= RADV_DYNAMIC_DEPTH_BIAS;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) {
- if (memcmp(&dest->blend_constants, &src->blend_constants,
- sizeof(src->blend_constants))) {
- typed_memcpy(dest->blend_constants,
- src->blend_constants, 4);
- dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) {
- if (memcmp(&dest->depth_bounds, &src->depth_bounds,
- sizeof(src->depth_bounds))) {
- dest->depth_bounds = src->depth_bounds;
- dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
- if (memcmp(&dest->stencil_compare_mask,
- &src->stencil_compare_mask,
- sizeof(src->stencil_compare_mask))) {
- dest->stencil_compare_mask = src->stencil_compare_mask;
- dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
- if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
- sizeof(src->stencil_write_mask))) {
- dest->stencil_write_mask = src->stencil_write_mask;
- dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) {
- if (memcmp(&dest->stencil_reference, &src->stencil_reference,
- sizeof(src->stencil_reference))) {
- dest->stencil_reference = src->stencil_reference;
- dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) {
- if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles,
- src->discard_rectangle.count * sizeof(VkRect2D))) {
- typed_memcpy(dest->discard_rectangle.rectangles,
- src->discard_rectangle.rectangles,
- src->discard_rectangle.count);
- dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
- if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
- dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
- dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
- memcmp(&dest->sample_location.locations,
- &src->sample_location.locations,
- src->sample_location.count * sizeof(VkSampleLocationEXT))) {
- dest->sample_location.per_pixel = src->sample_location.per_pixel;
- dest->sample_location.grid_size = src->sample_location.grid_size;
- typed_memcpy(dest->sample_location.locations,
- src->sample_location.locations,
- src->sample_location.count);
- dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) {
- if (memcmp(&dest->line_stipple, &src->line_stipple,
- sizeof(src->line_stipple))) {
- dest->line_stipple = src->line_stipple;
- dest_mask |= RADV_DYNAMIC_LINE_STIPPLE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_CULL_MODE) {
- if (dest->cull_mode != src->cull_mode) {
- dest->cull_mode = src->cull_mode;
- dest_mask |= RADV_DYNAMIC_CULL_MODE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_FRONT_FACE) {
- if (dest->front_face != src->front_face) {
- dest->front_face = src->front_face;
- dest_mask |= RADV_DYNAMIC_FRONT_FACE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
- if (dest->primitive_topology != src->primitive_topology) {
- dest->primitive_topology = src->primitive_topology;
- dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
- if (dest->depth_test_enable != src->depth_test_enable) {
- dest->depth_test_enable = src->depth_test_enable;
- dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
- if (dest->depth_write_enable != src->depth_write_enable) {
- dest->depth_write_enable = src->depth_write_enable;
- dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
- if (dest->depth_compare_op != src->depth_compare_op) {
- dest->depth_compare_op = src->depth_compare_op;
- dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
- if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) {
- dest->depth_bounds_test_enable = src->depth_bounds_test_enable;
- dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
- if (dest->stencil_test_enable != src->stencil_test_enable) {
- dest->stencil_test_enable = src->stencil_test_enable;
- dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_STENCIL_OP) {
- if (memcmp(&dest->stencil_op, &src->stencil_op,
- sizeof(src->stencil_op))) {
- dest->stencil_op = src->stencil_op;
- dest_mask |= RADV_DYNAMIC_STENCIL_OP;
- }
- }
-
- if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
- if (memcmp(&dest->fragment_shading_rate,
- &src->fragment_shading_rate,
- sizeof(src->fragment_shading_rate))) {
- dest->fragment_shading_rate = src->fragment_shading_rate;
- dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
- }
- }
-
- cmd_buffer->state.dirty |= dest_mask;
+radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src)
+{
+ struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic;
+ uint64_t copy_mask = src->mask;
+ uint64_t dest_mask = 0;
+
+ dest->discard_rectangle.count = src->discard_rectangle.count;
+ dest->sample_location.count = src->sample_location.count;
+
+ if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
+ if (dest->viewport.count != src->viewport.count) {
+ dest->viewport.count = src->viewport.count;
+ dest_mask |= RADV_DYNAMIC_VIEWPORT;
+ }
+
+ if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
+ src->viewport.count * sizeof(VkViewport))) {
+ typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count);
+ dest_mask |= RADV_DYNAMIC_VIEWPORT;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_SCISSOR) {
+ if (dest->scissor.count != src->scissor.count) {
+ dest->scissor.count = src->scissor.count;
+ dest_mask |= RADV_DYNAMIC_SCISSOR;
+ }
+
+ if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
+ src->scissor.count * sizeof(VkRect2D))) {
+ typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count);
+ dest_mask |= RADV_DYNAMIC_SCISSOR;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) {
+ if (dest->line_width != src->line_width) {
+ dest->line_width = src->line_width;
+ dest_mask |= RADV_DYNAMIC_LINE_WIDTH;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) {
+ if (memcmp(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias))) {
+ dest->depth_bias = src->depth_bias;
+ dest_mask |= RADV_DYNAMIC_DEPTH_BIAS;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) {
+ if (memcmp(&dest->blend_constants, &src->blend_constants, sizeof(src->blend_constants))) {
+ typed_memcpy(dest->blend_constants, src->blend_constants, 4);
+ dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) {
+ if (memcmp(&dest->depth_bounds, &src->depth_bounds, sizeof(src->depth_bounds))) {
+ dest->depth_bounds = src->depth_bounds;
+ dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
+ if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
+ sizeof(src->stencil_compare_mask))) {
+ dest->stencil_compare_mask = src->stencil_compare_mask;
+ dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
+ if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
+ sizeof(src->stencil_write_mask))) {
+ dest->stencil_write_mask = src->stencil_write_mask;
+ dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) {
+ if (memcmp(&dest->stencil_reference, &src->stencil_reference,
+ sizeof(src->stencil_reference))) {
+ dest->stencil_reference = src->stencil_reference;
+ dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+ if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles,
+ src->discard_rectangle.count * sizeof(VkRect2D))) {
+ typed_memcpy(dest->discard_rectangle.rectangles, src->discard_rectangle.rectangles,
+ src->discard_rectangle.count);
+ dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+ if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
+ dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
+ dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
+ memcmp(&dest->sample_location.locations, &src->sample_location.locations,
+ src->sample_location.count * sizeof(VkSampleLocationEXT))) {
+ dest->sample_location.per_pixel = src->sample_location.per_pixel;
+ dest->sample_location.grid_size = src->sample_location.grid_size;
+ typed_memcpy(dest->sample_location.locations, src->sample_location.locations,
+ src->sample_location.count);
+ dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) {
+ if (memcmp(&dest->line_stipple, &src->line_stipple, sizeof(src->line_stipple))) {
+ dest->line_stipple = src->line_stipple;
+ dest_mask |= RADV_DYNAMIC_LINE_STIPPLE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_CULL_MODE) {
+ if (dest->cull_mode != src->cull_mode) {
+ dest->cull_mode = src->cull_mode;
+ dest_mask |= RADV_DYNAMIC_CULL_MODE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_FRONT_FACE) {
+ if (dest->front_face != src->front_face) {
+ dest->front_face = src->front_face;
+ dest_mask |= RADV_DYNAMIC_FRONT_FACE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
+ if (dest->primitive_topology != src->primitive_topology) {
+ dest->primitive_topology = src->primitive_topology;
+ dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
+ if (dest->depth_test_enable != src->depth_test_enable) {
+ dest->depth_test_enable = src->depth_test_enable;
+ dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
+ if (dest->depth_write_enable != src->depth_write_enable) {
+ dest->depth_write_enable = src->depth_write_enable;
+ dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
+ if (dest->depth_compare_op != src->depth_compare_op) {
+ dest->depth_compare_op = src->depth_compare_op;
+ dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
+ if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) {
+ dest->depth_bounds_test_enable = src->depth_bounds_test_enable;
+ dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
+ if (dest->stencil_test_enable != src->stencil_test_enable) {
+ dest->stencil_test_enable = src->stencil_test_enable;
+ dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_STENCIL_OP) {
+ if (memcmp(&dest->stencil_op, &src->stencil_op, sizeof(src->stencil_op))) {
+ dest->stencil_op = src->stencil_op;
+ dest_mask |= RADV_DYNAMIC_STENCIL_OP;
+ }
+ }
+
+ if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
+ if (memcmp(&dest->fragment_shading_rate, &src->fragment_shading_rate,
+ sizeof(src->fragment_shading_rate))) {
+ dest->fragment_shading_rate = src->fragment_shading_rate;
+ dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
+ }
+ }
+
+ cmd_buffer->state.dirty |= dest_mask;
}
static void
-radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
{
- struct radv_streamout_state *so = &cmd_buffer->state.streamout;
- struct radv_shader_info *info;
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ struct radv_shader_info *info;
- if (!pipeline->streamout_shader ||
- cmd_buffer->device->physical_device->use_ngg_streamout)
- return;
+ if (!pipeline->streamout_shader || cmd_buffer->device->physical_device->use_ngg_streamout)
+ return;
- info = &pipeline->streamout_shader->info;
- for (int i = 0; i < MAX_SO_BUFFERS; i++)
- so->stride_in_dw[i] = info->so.strides[i];
+ info = &pipeline->streamout_shader->info;
+ for (int i = 0; i < MAX_SO_BUFFERS; i++)
+ so->stride_in_dw[i] = info->so.strides[i];
- so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask;
+ so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask;
}
-bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
+bool
+radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
{
- return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
- cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
+ return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
+ cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
}
-enum ring_type radv_queue_family_to_ring(int f) {
- switch (f) {
- case RADV_QUEUE_GENERAL:
- return RING_GFX;
- case RADV_QUEUE_COMPUTE:
- return RING_COMPUTE;
- case RADV_QUEUE_TRANSFER:
- return RING_DMA;
- default:
- unreachable("Unknown queue family");
- }
+enum ring_type
+radv_queue_family_to_ring(int f)
+{
+ switch (f) {
+ case RADV_QUEUE_GENERAL:
+ return RING_GFX;
+ case RADV_QUEUE_COMPUTE:
+ return RING_COMPUTE;
+ case RADV_QUEUE_TRANSFER:
+ return RING_DMA;
+ default:
+ unreachable("Unknown queue family");
+ }
}
static void
radv_destroy_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
{
- list_del(&cmd_buffer->pool_link);
+ list_del(&cmd_buffer->pool_link);
- list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
- &cmd_buffer->upload.list, list) {
- cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
- list_del(&up->list);
- free(up);
- }
+ list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
+ {
+ cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
+ list_del(&up->list);
+ free(up);
+ }
- if (cmd_buffer->upload.upload_bo)
- cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo);
+ if (cmd_buffer->upload.upload_bo)
+ cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo);
- if (cmd_buffer->cs)
- cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
+ if (cmd_buffer->cs)
+ cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
- for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
- free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
+ for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
+ free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
- vk_object_base_finish(&cmd_buffer->base);
- vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
+ vk_object_base_finish(&cmd_buffer->base);
+ vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
}
-static VkResult radv_create_cmd_buffer(
- struct radv_device * device,
- struct radv_cmd_pool * pool,
- VkCommandBufferLevel level,
- VkCommandBuffer* pCommandBuffer)
+static VkResult
+radv_create_cmd_buffer(struct radv_device *device, struct radv_cmd_pool *pool,
+ VkCommandBufferLevel level, VkCommandBuffer *pCommandBuffer)
{
- struct radv_cmd_buffer *cmd_buffer;
- unsigned ring;
- cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (cmd_buffer == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ struct radv_cmd_buffer *cmd_buffer;
+ unsigned ring;
+ cmd_buffer = vk_zalloc(&pool->alloc, sizeof(*cmd_buffer), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (cmd_buffer == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- vk_object_base_init(&device->vk, &cmd_buffer->base,
- VK_OBJECT_TYPE_COMMAND_BUFFER);
+ vk_object_base_init(&device->vk, &cmd_buffer->base, VK_OBJECT_TYPE_COMMAND_BUFFER);
- cmd_buffer->device = device;
- cmd_buffer->pool = pool;
- cmd_buffer->level = level;
+ cmd_buffer->device = device;
+ cmd_buffer->pool = pool;
+ cmd_buffer->level = level;
- list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
- cmd_buffer->queue_family_index = pool->queue_family_index;
+ list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
+ cmd_buffer->queue_family_index = pool->queue_family_index;
- ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
+ ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
- cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
- if (!cmd_buffer->cs) {
- radv_destroy_cmd_buffer(cmd_buffer);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
+ cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
+ if (!cmd_buffer->cs) {
+ radv_destroy_cmd_buffer(cmd_buffer);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
- *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer);
+ *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer);
- list_inithead(&cmd_buffer->upload.list);
+ list_inithead(&cmd_buffer->upload.list);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
static VkResult
radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
{
- cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
-
- list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
- &cmd_buffer->upload.list, list) {
- cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
- list_del(&up->list);
- free(up);
- }
-
- cmd_buffer->push_constant_stages = 0;
- cmd_buffer->scratch_size_per_wave_needed = 0;
- cmd_buffer->scratch_waves_wanted = 0;
- cmd_buffer->compute_scratch_size_per_wave_needed = 0;
- cmd_buffer->compute_scratch_waves_wanted = 0;
- cmd_buffer->esgs_ring_size_needed = 0;
- cmd_buffer->gsvs_ring_size_needed = 0;
- cmd_buffer->tess_rings_needed = false;
- cmd_buffer->gds_needed = false;
- cmd_buffer->gds_oa_needed = false;
- cmd_buffer->sample_positions_needed = false;
-
- if (cmd_buffer->upload.upload_bo)
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- cmd_buffer->upload.upload_bo);
- cmd_buffer->upload.offset = 0;
-
- cmd_buffer->record_result = VK_SUCCESS;
-
- memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings));
-
- for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
- cmd_buffer->descriptors[i].dirty = 0;
- cmd_buffer->descriptors[i].valid = 0;
- cmd_buffer->descriptors[i].push_dirty = false;
- }
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
- cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
- unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
- unsigned fence_offset, eop_bug_offset;
- void *fence_ptr;
-
- radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset,
- &fence_ptr);
- memset(fence_ptr, 0, 8);
-
- cmd_buffer->gfx9_fence_va =
- radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- cmd_buffer->gfx9_fence_va += fence_offset;
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
- /* Allocate a buffer for the EOP bug on GFX9. */
- radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db,
- &eop_bug_offset, &fence_ptr);
- memset(fence_ptr, 0, 16 * num_db);
- cmd_buffer->gfx9_eop_bug_va =
- radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
- }
- }
-
- cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
-
- return cmd_buffer->record_result;
+ cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
+
+ list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
+ {
+ cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
+ list_del(&up->list);
+ free(up);
+ }
+
+ cmd_buffer->push_constant_stages = 0;
+ cmd_buffer->scratch_size_per_wave_needed = 0;
+ cmd_buffer->scratch_waves_wanted = 0;
+ cmd_buffer->compute_scratch_size_per_wave_needed = 0;
+ cmd_buffer->compute_scratch_waves_wanted = 0;
+ cmd_buffer->esgs_ring_size_needed = 0;
+ cmd_buffer->gsvs_ring_size_needed = 0;
+ cmd_buffer->tess_rings_needed = false;
+ cmd_buffer->gds_needed = false;
+ cmd_buffer->gds_oa_needed = false;
+ cmd_buffer->sample_positions_needed = false;
+
+ if (cmd_buffer->upload.upload_bo)
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo);
+ cmd_buffer->upload.offset = 0;
+
+ cmd_buffer->record_result = VK_SUCCESS;
+
+ memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings));
+
+ for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
+ cmd_buffer->descriptors[i].dirty = 0;
+ cmd_buffer->descriptors[i].valid = 0;
+ cmd_buffer->descriptors[i].push_dirty = false;
+ }
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+ cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
+ unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
+ unsigned fence_offset, eop_bug_offset;
+ void *fence_ptr;
+
+ radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, &fence_ptr);
+ memset(fence_ptr, 0, 8);
+
+ cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ cmd_buffer->gfx9_fence_va += fence_offset;
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+ /* Allocate a buffer for the EOP bug on GFX9. */
+ radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr);
+ memset(fence_ptr, 0, 16 * num_db);
+ cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
+ }
+ }
+
+ cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
+
+ return cmd_buffer->record_result;
}
enum radeon_bo_domain
radv_cmdbuffer_domain(const struct radeon_info *info, uint32_t perftest)
{
- bool use_sam = (info->all_vram_visible && info->has_dedicated_vram &&
- !(perftest & RADV_PERFTEST_NO_SAM)) ||
- (perftest & RADV_PERFTEST_SAM);
- return use_sam ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
+ bool use_sam =
+ (info->all_vram_visible && info->has_dedicated_vram && !(perftest & RADV_PERFTEST_NO_SAM)) ||
+ (perftest & RADV_PERFTEST_SAM);
+ return use_sam ? RADEON_DOMAIN_VRAM : RADEON_DOMAIN_GTT;
}
static bool
-radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer,
- uint64_t min_needed)
-{
- uint64_t new_size;
- struct radeon_winsys_bo *bo;
- struct radv_cmd_buffer_upload *upload;
- struct radv_device *device = cmd_buffer->device;
-
- new_size = MAX2(min_needed, 16 * 1024);
- new_size = MAX2(new_size, 2 * cmd_buffer->upload.size);
-
- bo = device->ws->buffer_create(device->ws,
- new_size, 4096,
- radv_cmdbuffer_domain(&device->physical_device->rad_info,
- device->instance->perftest_flags),
- RADEON_FLAG_CPU_ACCESS|
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_32BIT |
- RADEON_FLAG_GTT_WC,
- RADV_BO_PRIORITY_UPLOAD_BUFFER);
-
- if (!bo) {
- cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- return false;
- }
-
- radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
- if (cmd_buffer->upload.upload_bo) {
- upload = malloc(sizeof(*upload));
-
- if (!upload) {
- cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
- device->ws->buffer_destroy(device->ws, bo);
- return false;
- }
-
- memcpy(upload, &cmd_buffer->upload, sizeof(*upload));
- list_add(&upload->list, &cmd_buffer->upload.list);
- }
-
- cmd_buffer->upload.upload_bo = bo;
- cmd_buffer->upload.size = new_size;
- cmd_buffer->upload.offset = 0;
- cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo);
-
- if (!cmd_buffer->upload.map) {
- cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- return false;
- }
-
- return true;
+radv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed)
+{
+ uint64_t new_size;
+ struct radeon_winsys_bo *bo;
+ struct radv_cmd_buffer_upload *upload;
+ struct radv_device *device = cmd_buffer->device;
+
+ new_size = MAX2(min_needed, 16 * 1024);
+ new_size = MAX2(new_size, 2 * cmd_buffer->upload.size);
+
+ bo = device->ws->buffer_create(
+ device->ws, new_size, 4096,
+ radv_cmdbuffer_domain(&device->physical_device->rad_info, device->instance->perftest_flags),
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_32BIT |
+ RADEON_FLAG_GTT_WC,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER);
+
+ if (!bo) {
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ return false;
+ }
+
+ radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
+ if (cmd_buffer->upload.upload_bo) {
+ upload = malloc(sizeof(*upload));
+
+ if (!upload) {
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ device->ws->buffer_destroy(device->ws, bo);
+ return false;
+ }
+
+ memcpy(upload, &cmd_buffer->upload, sizeof(*upload));
+ list_add(&upload->list, &cmd_buffer->upload.list);
+ }
+
+ cmd_buffer->upload.upload_bo = bo;
+ cmd_buffer->upload.size = new_size;
+ cmd_buffer->upload.offset = 0;
+ cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo);
+
+ if (!cmd_buffer->upload.map) {
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ return false;
+ }
+
+ return true;
}
bool
-radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
- unsigned size, unsigned *out_offset, void **ptr)
+radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
+ unsigned *out_offset, void **ptr)
{
- assert(size % 4 == 0);
+ assert(size % 4 == 0);
- struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
+ struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
- /* Align to the scalar cache line size if it results in this allocation
- * being placed in less of them.
- */
- unsigned offset = cmd_buffer->upload.offset;
- unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32;
- unsigned gap = align(offset, line_size) - offset;
- if ((size & (line_size - 1)) > gap)
- offset = align(offset, line_size);
+ /* Align to the scalar cache line size if it results in this allocation
+ * being placed in less of them.
+ */
+ unsigned offset = cmd_buffer->upload.offset;
+ unsigned line_size = rad_info->chip_class >= GFX10 ? 64 : 32;
+ unsigned gap = align(offset, line_size) - offset;
+ if ((size & (line_size - 1)) > gap)
+ offset = align(offset, line_size);
- if (offset + size > cmd_buffer->upload.size) {
- if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
- return false;
- offset = 0;
- }
+ if (offset + size > cmd_buffer->upload.size) {
+ if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
+ return false;
+ offset = 0;
+ }
- *out_offset = offset;
- *ptr = cmd_buffer->upload.map + offset;
+ *out_offset = offset;
+ *ptr = cmd_buffer->upload.map + offset;
- cmd_buffer->upload.offset = offset + size;
- return true;
+ cmd_buffer->upload.offset = offset + size;
+ return true;
}
bool
-radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
- unsigned size, const void *data, unsigned *out_offset)
+radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
+ unsigned *out_offset)
{
- uint8_t *ptr;
+ uint8_t *ptr;
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr))
- return false;
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr))
+ return false;
- if (ptr)
- memcpy(ptr, data, size);
+ if (ptr)
+ memcpy(ptr, data, size);
- return true;
+ return true;
}
static void
-radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
- unsigned count, const uint32_t *data)
+radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned count,
+ const uint32_t *data)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
+ radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit_array(cs, data, count);
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit_array(cs, data, count);
}
-void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
+void
+radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_device *device = cmd_buffer->device;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint64_t va;
+ struct radv_device *device = cmd_buffer->device;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va;
- va = radv_buffer_get_va(device->trace_bo);
- if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
- va += 4;
+ va = radv_buffer_get_va(device->trace_bo);
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
+ va += 4;
- ++cmd_buffer->state.trace_id;
- radv_emit_write_data_packet(cmd_buffer, va, 1,
- &cmd_buffer->state.trace_id);
+ ++cmd_buffer->state.trace_id;
+ radv_emit_write_data_packet(cmd_buffer, va, 1, &cmd_buffer->state.trace_id);
- radeon_check_space(cmd_buffer->device->ws, cs, 2);
+ radeon_check_space(cmd_buffer->device->ws, cs, 2);
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
}
static void
-radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
- enum radv_cmd_flush_bits flags)
+radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags)
{
- if (unlikely(cmd_buffer->device->thread_trace.bo)) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
- }
+ if (unlikely(cmd_buffer->device->thread_trace.bo)) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
+ }
- if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
- enum rgp_flush_bits sqtt_flush_bits = 0;
- assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
+ if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
+ enum rgp_flush_bits sqtt_flush_bits = 0;
+ assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
- /* Force wait for graphics or compute engines to be idle. */
- si_cs_emit_cache_flush(cmd_buffer->cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- &cmd_buffer->gfx9_fence_idx,
- cmd_buffer->gfx9_fence_va,
- radv_cmd_buffer_uses_mec(cmd_buffer),
- flags, &sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
- }
+ /* Force wait for graphics or compute engines to be idle. */
+ si_cs_emit_cache_flush(cmd_buffer->cs,
+ cmd_buffer->device->physical_device->rad_info.chip_class,
+ &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
+ radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits,
+ cmd_buffer->gfx9_eop_bug_va);
+ }
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_cmd_buffer_trace_emit(cmd_buffer);
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
static void
-radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
{
- struct radv_device *device = cmd_buffer->device;
- enum ring_type ring;
- uint32_t data[2];
- uint64_t va;
+ struct radv_device *device = cmd_buffer->device;
+ enum ring_type ring;
+ uint32_t data[2];
+ uint64_t va;
- va = radv_buffer_get_va(device->trace_bo);
+ va = radv_buffer_get_va(device->trace_bo);
- ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
+ ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
- switch (ring) {
- case RING_GFX:
- va += 8;
- break;
- case RING_COMPUTE:
- va += 16;
- break;
- default:
- assert(!"invalid ring type");
- }
+ switch (ring) {
+ case RING_GFX:
+ va += 8;
+ break;
+ case RING_COMPUTE:
+ va += 16;
+ break;
+ default:
+ assert(!"invalid ring type");
+ }
- uint64_t pipeline_address = (uintptr_t)pipeline;
- data[0] = pipeline_address;
- data[1] = pipeline_address >> 32;
+ uint64_t pipeline_address = (uintptr_t)pipeline;
+ data[0] = pipeline_address;
+ data[1] = pipeline_address >> 32;
- radv_emit_write_data_packet(cmd_buffer, va, 2, data);
+ radv_emit_write_data_packet(cmd_buffer, va, 2, data);
}
static void
-radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
- uint64_t vb_ptr)
+radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr)
{
- struct radv_device *device = cmd_buffer->device;
- uint32_t data[2];
- uint64_t va;
+ struct radv_device *device = cmd_buffer->device;
+ uint32_t data[2];
+ uint64_t va;
- va = radv_buffer_get_va(device->trace_bo);
- va += 24;
+ va = radv_buffer_get_va(device->trace_bo);
+ va += 24;
- data[0] = vb_ptr;
- data[1] = vb_ptr >> 32;
+ data[0] = vb_ptr;
+ data[1] = vb_ptr >> 32;
- radv_emit_write_data_packet(cmd_buffer, va, 2, data);
+ radv_emit_write_data_packet(cmd_buffer, va, 2, data);
}
-void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point,
- struct radv_descriptor_set *set,
- unsigned idx)
+void
+radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
+ struct radv_descriptor_set *set, unsigned idx)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
- descriptors_state->sets[idx] = set;
+ descriptors_state->sets[idx] = set;
- descriptors_state->valid |= (1u << idx); /* active descriptors */
- descriptors_state->dirty |= (1u << idx);
+ descriptors_state->valid |= (1u << idx); /* active descriptors */
+ descriptors_state->dirty |= (1u << idx);
}
static void
-radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
+radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
- struct radv_device *device = cmd_buffer->device;
- uint32_t data[MAX_SETS * 2] = {0};
- uint64_t va;
- va = radv_buffer_get_va(device->trace_bo) + 32;
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_device *device = cmd_buffer->device;
+ uint32_t data[MAX_SETS * 2] = {0};
+ uint64_t va;
+ va = radv_buffer_get_va(device->trace_bo) + 32;
- u_foreach_bit(i, descriptors_state->valid) {
- struct radv_descriptor_set *set = descriptors_state->sets[i];
- data[i * 2] = (uint64_t)(uintptr_t)set;
- data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
- }
+ u_foreach_bit(i, descriptors_state->valid)
+ {
+ struct radv_descriptor_set *set = descriptors_state->sets[i];
+ data[i * 2] = (uint64_t)(uintptr_t)set;
+ data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
+ }
- radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data);
+ radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data);
}
struct radv_userdata_info *
-radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
- gl_shader_stage stage,
- int idx)
+radv_lookup_user_sgpr(struct radv_pipeline *pipeline, gl_shader_stage stage, int idx)
{
- struct radv_shader_variant *shader = radv_get_shader(pipeline, stage);
- return &shader->info.user_sgprs_locs.shader_data[idx];
+ struct radv_shader_variant *shader = radv_get_shader(pipeline, stage);
+ return &shader->info.user_sgprs_locs.shader_data[idx];
}
static void
-radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline,
- gl_shader_stage stage,
- int idx, uint64_t va)
+radv_emit_userdata_address(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
+ gl_shader_stage stage, int idx, uint64_t va)
{
- struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
- uint32_t base_reg = pipeline->user_data_0[stage];
- if (loc->sgpr_idx == -1)
- return;
+ struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+ uint32_t base_reg = pipeline->user_data_0[stage];
+ if (loc->sgpr_idx == -1)
+ return;
- assert(loc->num_sgprs == 1);
+ assert(loc->num_sgprs == 1);
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
- base_reg + loc->sgpr_idx * 4, va, false);
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va,
+ false);
}
static void
-radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline,
- struct radv_descriptor_state *descriptors_state,
- gl_shader_stage stage)
+radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
+ struct radv_descriptor_state *descriptors_state,
+ gl_shader_stage stage)
{
- struct radv_device *device = cmd_buffer->device;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint32_t sh_base = pipeline->user_data_0[stage];
- struct radv_userdata_locations *locs =
- &pipeline->shaders[stage]->info.user_sgprs_locs;
- unsigned mask = locs->descriptor_sets_enabled;
+ struct radv_device *device = cmd_buffer->device;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t sh_base = pipeline->user_data_0[stage];
+ struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs;
+ unsigned mask = locs->descriptor_sets_enabled;
- mask &= descriptors_state->dirty & descriptors_state->valid;
+ mask &= descriptors_state->dirty & descriptors_state->valid;
- while (mask) {
- int start, count;
+ while (mask) {
+ int start, count;
- u_bit_scan_consecutive_range(&mask, &start, &count);
+ u_bit_scan_consecutive_range(&mask, &start, &count);
- struct radv_userdata_info *loc = &locs->descriptor_sets[start];
- unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
+ struct radv_userdata_info *loc = &locs->descriptor_sets[start];
+ unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
- radv_emit_shader_pointer_head(cs, sh_offset, count, true);
- for (int i = 0; i < count; i++) {
- struct radv_descriptor_set *set =
- descriptors_state->sets[start + i];
+ radv_emit_shader_pointer_head(cs, sh_offset, count, true);
+ for (int i = 0; i < count; i++) {
+ struct radv_descriptor_set *set = descriptors_state->sets[start + i];
- radv_emit_shader_pointer_body(device, cs, set->header.va, true);
- }
- }
+ radv_emit_shader_pointer_body(device, cs, set->header.va, true);
+ }
+ }
}
/**
@@ -803,30 +762,30 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
* that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
*/
static void
-radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
- uint32_t x, uint32_t y, VkOffset2D *sample_locs)
+radv_convert_user_sample_locs(struct radv_sample_locations_state *state, uint32_t x, uint32_t y,
+ VkOffset2D *sample_locs)
{
- uint32_t x_offset = x % state->grid_size.width;
- uint32_t y_offset = y % state->grid_size.height;
- uint32_t num_samples = (uint32_t)state->per_pixel;
- VkSampleLocationEXT *user_locs;
- uint32_t pixel_offset;
+ uint32_t x_offset = x % state->grid_size.width;
+ uint32_t y_offset = y % state->grid_size.height;
+ uint32_t num_samples = (uint32_t)state->per_pixel;
+ VkSampleLocationEXT *user_locs;
+ uint32_t pixel_offset;
- pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
+ pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
- assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
- user_locs = &state->locations[pixel_offset];
+ assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
+ user_locs = &state->locations[pixel_offset];
- for (uint32_t i = 0; i < num_samples; i++) {
- float shifted_pos_x = user_locs[i].x - 0.5;
- float shifted_pos_y = user_locs[i].y - 0.5;
+ for (uint32_t i = 0; i < num_samples; i++) {
+ float shifted_pos_x = user_locs[i].x - 0.5;
+ float shifted_pos_y = user_locs[i].y - 0.5;
- int32_t scaled_pos_x = floorf(shifted_pos_x * 16);
- int32_t scaled_pos_y = floorf(shifted_pos_y * 16);
+ int32_t scaled_pos_x = floorf(shifted_pos_x * 16);
+ int32_t scaled_pos_y = floorf(shifted_pos_y * 16);
- sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
- sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
- }
+ sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
+ sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
+ }
}
/**
@@ -835,20 +794,20 @@ radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
*/
static void
radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
- uint32_t *sample_locs_pixel)
+ uint32_t *sample_locs_pixel)
{
- for (uint32_t i = 0; i < num_samples; i++) {
- uint32_t sample_reg_idx = i / 4;
- uint32_t sample_loc_idx = i % 4;
- int32_t pos_x = sample_locs[i].x;
- int32_t pos_y = sample_locs[i].y;
+ for (uint32_t i = 0; i < num_samples; i++) {
+ uint32_t sample_reg_idx = i / 4;
+ uint32_t sample_loc_idx = i % 4;
+ int32_t pos_x = sample_locs[i].x;
+ int32_t pos_y = sample_locs[i].y;
- uint32_t shift_x = 8 * sample_loc_idx;
- uint32_t shift_y = shift_x + 4;
+ uint32_t shift_x = 8 * sample_loc_idx;
+ uint32_t shift_y = shift_x + 4;
- sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
- sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
- }
+ sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
+ sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
+ }
}
/**
@@ -856,41 +815,38 @@ radv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
* sample locations.
*/
static uint64_t
-radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer,
- VkOffset2D *sample_locs,
- uint32_t num_samples)
+radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs,
+ uint32_t num_samples)
{
- uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities));
- uint32_t sample_mask = num_samples - 1;
- uint32_t *distances = alloca(num_samples * sizeof(*distances));
- uint64_t centroid_priority = 0;
+ uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities));
+ uint32_t sample_mask = num_samples - 1;
+ uint32_t *distances = alloca(num_samples * sizeof(*distances));
+ uint64_t centroid_priority = 0;
- /* Compute the distances from center for each sample. */
- for (int i = 0; i < num_samples; i++) {
- distances[i] = (sample_locs[i].x * sample_locs[i].x) +
- (sample_locs[i].y * sample_locs[i].y);
- }
+ /* Compute the distances from center for each sample. */
+ for (int i = 0; i < num_samples; i++) {
+ distances[i] = (sample_locs[i].x * sample_locs[i].x) + (sample_locs[i].y * sample_locs[i].y);
+ }
- /* Compute the centroid priorities by looking at the distances array. */
- for (int i = 0; i < num_samples; i++) {
- uint32_t min_idx = 0;
+ /* Compute the centroid priorities by looking at the distances array. */
+ for (int i = 0; i < num_samples; i++) {
+ uint32_t min_idx = 0;
- for (int j = 1; j < num_samples; j++) {
- if (distances[j] < distances[min_idx])
- min_idx = j;
- }
+ for (int j = 1; j < num_samples; j++) {
+ if (distances[j] < distances[min_idx])
+ min_idx = j;
+ }
- centroid_priorities[i] = min_idx;
- distances[min_idx] = 0xffffffff;
- }
+ centroid_priorities[i] = min_idx;
+ distances[min_idx] = 0xffffffff;
+ }
- /* Compute the final centroid priority. */
- for (int i = 0; i < 8; i++) {
- centroid_priority |=
- centroid_priorities[i & sample_mask] << (i * 4);
- }
+ /* Compute the final centroid priority. */
+ for (int i = 0; i < 8; i++) {
+ centroid_priority |= centroid_priorities[i & sample_mask] << (i * 4);
+ }
- return centroid_priority << 32 | centroid_priority;
+ return centroid_priority << 32 | centroid_priority;
}
/**
@@ -899,972 +855,934 @@ radv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer,
static void
radv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_sample_locations_state *sample_location =
- &cmd_buffer->state.dynamic.sample_location;
- uint32_t num_samples = (uint32_t)sample_location->per_pixel;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint32_t sample_locs_pixel[4][2] = {0};
- VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
- uint32_t max_sample_dist = 0;
- uint64_t centroid_priority;
-
- if (!cmd_buffer->state.dynamic.sample_location.count)
- return;
-
- /* Convert the user sample locations to hardware sample locations. */
- radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
- radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
- radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
- radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
-
- /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
- for (uint32_t i = 0; i < 4; i++) {
- radv_compute_sample_locs_pixel(num_samples, sample_locs[i],
- sample_locs_pixel[i]);
- }
-
- /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */
- centroid_priority =
- radv_compute_centroid_priority(cmd_buffer, sample_locs[0],
- num_samples);
-
- /* Compute the maximum sample distance from the specified locations. */
- for (unsigned i = 0; i < 4; ++i) {
- for (uint32_t j = 0; j < num_samples; j++) {
- VkOffset2D offset = sample_locs[i][j];
- max_sample_dist = MAX2(max_sample_dist,
- MAX2(abs(offset.x), abs(offset.y)));
- }
- }
-
- /* Emit the specified user sample locations. */
- switch (num_samples) {
- case 2:
- case 4:
- radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
- radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
- radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
- radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
- break;
- case 8:
- radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_pixel[0][0]);
- radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_pixel[1][0]);
- radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_pixel[2][0]);
- radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_pixel[3][0]);
- radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, sample_locs_pixel[0][1]);
- radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, sample_locs_pixel[1][1]);
- radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, sample_locs_pixel[2][1]);
- radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, sample_locs_pixel[3][1]);
- break;
- default:
- unreachable("invalid number of samples");
- }
-
- /* Emit the maximum sample distance and the centroid priority. */
- radeon_set_context_reg_rmw(cs, R_028BE0_PA_SC_AA_CONFIG,
- S_028BE0_MAX_SAMPLE_DIST(max_sample_dist),
- ~C_028BE0_MAX_SAMPLE_DIST);
-
- radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
- radeon_emit(cs, centroid_priority);
- radeon_emit(cs, centroid_priority >> 32);
-
- /* GFX9: Flush DFSM when the AA mode changes. */
- if (cmd_buffer->device->dfsm_allowed) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
-
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ struct radv_sample_locations_state *sample_location = &cmd_buffer->state.dynamic.sample_location;
+ uint32_t num_samples = (uint32_t)sample_location->per_pixel;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t sample_locs_pixel[4][2] = {0};
+ VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
+ uint32_t max_sample_dist = 0;
+ uint64_t centroid_priority;
+
+ if (!cmd_buffer->state.dynamic.sample_location.count)
+ return;
+
+ /* Convert the user sample locations to hardware sample locations. */
+ radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
+ radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
+ radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
+ radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
+
+ /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
+ for (uint32_t i = 0; i < 4; i++) {
+ radv_compute_sample_locs_pixel(num_samples, sample_locs[i], sample_locs_pixel[i]);
+ }
+
+ /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */
+ centroid_priority = radv_compute_centroid_priority(cmd_buffer, sample_locs[0], num_samples);
+
+ /* Compute the maximum sample distance from the specified locations. */
+ for (unsigned i = 0; i < 4; ++i) {
+ for (uint32_t j = 0; j < num_samples; j++) {
+ VkOffset2D offset = sample_locs[i][j];
+ max_sample_dist = MAX2(max_sample_dist, MAX2(abs(offset.x), abs(offset.y)));
+ }
+ }
+
+ /* Emit the specified user sample locations. */
+ switch (num_samples) {
+ case 2:
+ case 4:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
+ sample_locs_pixel[0][0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
+ sample_locs_pixel[1][0]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
+ sample_locs_pixel[2][0]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
+ sample_locs_pixel[3][0]);
+ break;
+ case 8:
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
+ sample_locs_pixel[0][0]);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
+ sample_locs_pixel[1][0]);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
+ sample_locs_pixel[2][0]);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
+ sample_locs_pixel[3][0]);
+ radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1,
+ sample_locs_pixel[0][1]);
+ radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1,
+ sample_locs_pixel[1][1]);
+ radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1,
+ sample_locs_pixel[2][1]);
+ radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1,
+ sample_locs_pixel[3][1]);
+ break;
+ default:
+ unreachable("invalid number of samples");
+ }
+
+ /* Emit the maximum sample distance and the centroid priority. */
+ radeon_set_context_reg_rmw(cs, R_028BE0_PA_SC_AA_CONFIG,
+ S_028BE0_MAX_SAMPLE_DIST(max_sample_dist), ~C_028BE0_MAX_SAMPLE_DIST);
+
+ radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+ radeon_emit(cs, centroid_priority);
+ radeon_emit(cs, centroid_priority >> 32);
+
+ /* GFX9: Flush DFSM when the AA mode changes. */
+ if (cmd_buffer->device->dfsm_allowed) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+ }
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
-radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline,
- gl_shader_stage stage,
- int idx, int count, uint32_t *values)
+radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
+ gl_shader_stage stage, int idx, int count, uint32_t *values)
{
- struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
- uint32_t base_reg = pipeline->user_data_0[stage];
- if (loc->sgpr_idx == -1)
- return;
+ struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+ uint32_t base_reg = pipeline->user_data_0[stage];
+ if (loc->sgpr_idx == -1)
+ return;
- assert(loc->num_sgprs == count);
+ assert(loc->num_sgprs == count);
- radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count);
- radeon_emit_array(cmd_buffer->cs, values, count);
+ radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count);
+ radeon_emit_array(cmd_buffer->cs, values, count);
}
static void
-radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
{
- int num_samples = pipeline->graphics.ms.num_samples;
- struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
+ int num_samples = pipeline->graphics.ms.num_samples;
+ struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
- if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions)
- cmd_buffer->sample_positions_needed = true;
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions)
+ cmd_buffer->sample_positions_needed = true;
- if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
- return;
+ if (old_pipeline && num_samples == old_pipeline->graphics.ms.num_samples)
+ return;
- radv_emit_default_sample_locations(cmd_buffer->cs, num_samples);
+ radv_emit_default_sample_locations(cmd_buffer->cs, num_samples);
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
-radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline)
+radv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
{
- const struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
-
+ const struct radv_pipeline *old_pipeline = cmd_buffer->state.emitted_pipeline;
- if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
- return;
+ if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
+ return;
- if (old_pipeline &&
- old_pipeline->graphics.binning.pa_sc_binner_cntl_0 == pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
- old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
- return;
+ if (old_pipeline &&
+ old_pipeline->graphics.binning.pa_sc_binner_cntl_0 ==
+ pipeline->graphics.binning.pa_sc_binner_cntl_0 &&
+ old_pipeline->graphics.binning.db_dfsm_control == pipeline->graphics.binning.db_dfsm_control)
+ return;
- bool binning_flush = false;
- if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 ||
- cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 ||
- cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 ||
- cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- binning_flush = !old_pipeline ||
- G_028C44_BINNING_MODE(old_pipeline->graphics.binning.pa_sc_binner_cntl_0) !=
- G_028C44_BINNING_MODE(pipeline->graphics.binning.pa_sc_binner_cntl_0);
- }
+ bool binning_flush = false;
+ if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 ||
+ cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 ||
+ cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 ||
+ cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ binning_flush = !old_pipeline ||
+ G_028C44_BINNING_MODE(old_pipeline->graphics.binning.pa_sc_binner_cntl_0) !=
+ G_028C44_BINNING_MODE(pipeline->graphics.binning.pa_sc_binner_cntl_0);
+ }
- radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0,
- pipeline->graphics.binning.pa_sc_binner_cntl_0 |
- S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
+ radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0,
+ pipeline->graphics.binning.pa_sc_binner_cntl_0 |
+ S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
- pipeline->graphics.binning.db_dfsm_control);
- } else {
- radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
- pipeline->graphics.binning.db_dfsm_control);
- }
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_context_reg(cmd_buffer->cs, R_028038_DB_DFSM_CONTROL,
+ pipeline->graphics.binning.db_dfsm_control);
+ } else {
+ radeon_set_context_reg(cmd_buffer->cs, R_028060_DB_DFSM_CONTROL,
+ pipeline->graphics.binning.db_dfsm_control);
+ }
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
-
static void
-radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer,
- struct radv_shader_variant *shader)
+radv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader_variant *shader)
{
- uint64_t va;
+ uint64_t va;
- if (!shader)
- return;
+ if (!shader)
+ return;
- va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
+ si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
}
static void
-radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
- struct radv_pipeline *pipeline,
- bool vertex_stage_only)
+radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline,
+ bool vertex_stage_only)
{
- struct radv_cmd_state *state = &cmd_buffer->state;
- uint32_t mask = state->prefetch_L2_mask;
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ uint32_t mask = state->prefetch_L2_mask;
- if (vertex_stage_only) {
- /* Fast prefetch path for starting draws as soon as possible.
- */
- mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS |
- RADV_PREFETCH_VBO_DESCRIPTORS);
- }
+ if (vertex_stage_only) {
+ /* Fast prefetch path for starting draws as soon as possible.
+ */
+ mask = state->prefetch_L2_mask & (RADV_PREFETCH_VS | RADV_PREFETCH_VBO_DESCRIPTORS);
+ }
- if (mask & RADV_PREFETCH_VS)
- radv_emit_shader_prefetch(cmd_buffer,
- pipeline->shaders[MESA_SHADER_VERTEX]);
+ if (mask & RADV_PREFETCH_VS)
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_VERTEX]);
- if (mask & RADV_PREFETCH_VBO_DESCRIPTORS)
- si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size);
+ if (mask & RADV_PREFETCH_VBO_DESCRIPTORS)
+ si_cp_dma_prefetch(cmd_buffer, state->vb_va, state->vb_size);
- if (mask & RADV_PREFETCH_TCS)
- radv_emit_shader_prefetch(cmd_buffer,
- pipeline->shaders[MESA_SHADER_TESS_CTRL]);
+ if (mask & RADV_PREFETCH_TCS)
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_TESS_CTRL]);
- if (mask & RADV_PREFETCH_TES)
- radv_emit_shader_prefetch(cmd_buffer,
- pipeline->shaders[MESA_SHADER_TESS_EVAL]);
+ if (mask & RADV_PREFETCH_TES)
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_TESS_EVAL]);
- if (mask & RADV_PREFETCH_GS) {
- radv_emit_shader_prefetch(cmd_buffer,
- pipeline->shaders[MESA_SHADER_GEOMETRY]);
- if (radv_pipeline_has_gs_copy_shader(pipeline))
- radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader);
- }
+ if (mask & RADV_PREFETCH_GS) {
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_GEOMETRY]);
+ if (radv_pipeline_has_gs_copy_shader(pipeline))
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->gs_copy_shader);
+ }
- if (mask & RADV_PREFETCH_PS)
- radv_emit_shader_prefetch(cmd_buffer,
- pipeline->shaders[MESA_SHADER_FRAGMENT]);
+ if (mask & RADV_PREFETCH_PS)
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_FRAGMENT]);
- state->prefetch_L2_mask &= ~mask;
+ state->prefetch_L2_mask &= ~mask;
}
static void
radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
{
- if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed)
- return;
-
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-
- unsigned sx_ps_downconvert = 0;
- unsigned sx_blend_opt_epsilon = 0;
- unsigned sx_blend_opt_control = 0;
-
- if (!cmd_buffer->state.attachments || !subpass)
- return;
-
- for (unsigned i = 0; i < subpass->color_count; ++i) {
- if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
- /* We don't set the DISABLE bits, because the HW can't have holes,
- * so the SPI color format is set to 32-bit 1-component. */
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
- continue;
- }
-
- int idx = subpass->color_attachments[i].attachment;
- struct radv_color_buffer_info *cb = &cmd_buffer->state.attachments[idx].cb;
-
- unsigned format = G_028C70_FORMAT(cb->cb_color_info);
- unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
- uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf;
- uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf;
-
- bool has_alpha, has_rgb;
-
- /* Set if RGB and A are present. */
- has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
-
- if (format == V_028C70_COLOR_8 ||
- format == V_028C70_COLOR_16 ||
- format == V_028C70_COLOR_32)
- has_rgb = !has_alpha;
- else
- has_rgb = true;
-
- /* Check the colormask and export format. */
- if (!(colormask & 0x7))
- has_rgb = false;
- if (!(colormask & 0x8))
- has_alpha = false;
-
- if (spi_format == V_028714_SPI_SHADER_ZERO) {
- has_rgb = false;
- has_alpha = false;
- }
-
- /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha
- * optimization, even though it has no alpha. */
- if (has_rgb && format == V_028C70_COLOR_5_9_9_9)
- has_alpha = true;
-
- /* Disable value checking for disabled channels. */
- if (!has_rgb)
- sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
- if (!has_alpha)
- sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
-
- /* Enable down-conversion for 32bpp and smaller formats. */
- switch (format) {
- case V_028C70_COLOR_8:
- case V_028C70_COLOR_8_8:
- case V_028C70_COLOR_8_8_8_8:
- /* For 1 and 2-channel formats, use the superset thereof. */
- if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
- spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
- spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
- sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
- }
- break;
-
- case V_028C70_COLOR_5_6_5:
- if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
- sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
- }
- break;
-
- case V_028C70_COLOR_1_5_5_5:
- if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
- sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
- }
- break;
-
- case V_028C70_COLOR_4_4_4_4:
- if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
- sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
- }
- break;
-
- case V_028C70_COLOR_32:
- if (swap == V_028C70_SWAP_STD &&
- spi_format == V_028714_SPI_SHADER_32_R)
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
- else if (swap == V_028C70_SWAP_ALT_REV &&
- spi_format == V_028714_SPI_SHADER_32_AR)
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
- break;
-
- case V_028C70_COLOR_16:
- case V_028C70_COLOR_16_16:
- /* For 1-channel formats, use the superset thereof. */
- if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
- spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
- spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
- spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
- if (swap == V_028C70_SWAP_STD ||
- swap == V_028C70_SWAP_STD_REV)
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
- else
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
- }
- break;
-
- case V_028C70_COLOR_10_11_11:
- if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
- break;
-
- case V_028C70_COLOR_2_10_10_10:
- if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
- sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
- }
- break;
- case V_028C70_COLOR_5_9_9_9:
- if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
- sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
- break;
- }
- }
-
- /* Do not set the DISABLE bits for the unused attachments, as that
- * breaks dual source blending in SkQP and does not seem to improve
- * performance. */
-
- if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert &&
- sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon &&
- sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control)
- return;
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
- radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
- radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
- radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
-
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
-
- cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert;
- cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon;
- cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control;
+ if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed)
+ return;
+
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+
+ unsigned sx_ps_downconvert = 0;
+ unsigned sx_blend_opt_epsilon = 0;
+ unsigned sx_blend_opt_control = 0;
+
+ if (!cmd_buffer->state.attachments || !subpass)
+ return;
+
+ for (unsigned i = 0; i < subpass->color_count; ++i) {
+ if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
+ /* We don't set the DISABLE bits, because the HW can't have holes,
+ * so the SPI color format is set to 32-bit 1-component. */
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
+ continue;
+ }
+
+ int idx = subpass->color_attachments[i].attachment;
+ struct radv_color_buffer_info *cb = &cmd_buffer->state.attachments[idx].cb;
+
+ unsigned format = G_028C70_FORMAT(cb->cb_color_info);
+ unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
+ uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf;
+ uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf;
+
+ bool has_alpha, has_rgb;
+
+ /* Set if RGB and A are present. */
+ has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
+
+ if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || format == V_028C70_COLOR_32)
+ has_rgb = !has_alpha;
+ else
+ has_rgb = true;
+
+ /* Check the colormask and export format. */
+ if (!(colormask & 0x7))
+ has_rgb = false;
+ if (!(colormask & 0x8))
+ has_alpha = false;
+
+ if (spi_format == V_028714_SPI_SHADER_ZERO) {
+ has_rgb = false;
+ has_alpha = false;
+ }
+
+ /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha
+ * optimization, even though it has no alpha. */
+ if (has_rgb && format == V_028C70_COLOR_5_9_9_9)
+ has_alpha = true;
+
+ /* Disable value checking for disabled channels. */
+ if (!has_rgb)
+ sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
+ if (!has_alpha)
+ sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
+
+ /* Enable down-conversion for 32bpp and smaller formats. */
+ switch (format) {
+ case V_028C70_COLOR_8:
+ case V_028C70_COLOR_8_8:
+ case V_028C70_COLOR_8_8_8_8:
+ /* For 1 and 2-channel formats, use the superset thereof. */
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_5_6_5:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_1_5_5_5:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_4_4_4_4:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_32:
+ if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
+ else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
+ break;
+
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_16_16:
+ /* For 1-channel formats, use the superset thereof. */
+ if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
+ spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
+ if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
+ else
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
+ }
+ break;
+
+ case V_028C70_COLOR_10_11_11:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
+ break;
+
+ case V_028C70_COLOR_2_10_10_10:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
+ sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
+ }
+ break;
+ case V_028C70_COLOR_5_9_9_9:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
+ break;
+ }
+ }
+
+ /* Do not set the DISABLE bits for the unused attachments, as that
+ * breaks dual source blending in SkQP and does not seem to improve
+ * performance. */
+
+ if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert &&
+ sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon &&
+ sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control)
+ return;
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
+ radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
+ radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
+ radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
+ cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert;
+ cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon;
+ cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control;
}
static void
radv_emit_batch_break_on_new_ps(struct radv_cmd_buffer *cmd_buffer)
{
- if (!cmd_buffer->device->pbb_allowed)
- return;
+ if (!cmd_buffer->device->pbb_allowed)
+ return;
- struct radv_binning_settings settings =
- radv_get_binning_settings(cmd_buffer->device->physical_device);
- bool break_for_new_ps =
- (!cmd_buffer->state.emitted_pipeline ||
- cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] !=
- cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) &&
- (settings.context_states_per_bin > 1 ||
- settings.persistent_states_per_bin > 1);
- bool break_for_new_cb_target_mask =
- (!cmd_buffer->state.emitted_pipeline ||
- cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask !=
- cmd_buffer->state.pipeline->graphics.cb_target_mask) &&
- settings.context_states_per_bin > 1;
+ struct radv_binning_settings settings =
+ radv_get_binning_settings(cmd_buffer->device->physical_device);
+ bool break_for_new_ps =
+ (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->shaders[MESA_SHADER_FRAGMENT] !=
+ cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]) &&
+ (settings.context_states_per_bin > 1 || settings.persistent_states_per_bin > 1);
+ bool break_for_new_cb_target_mask =
+ (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->graphics.cb_target_mask !=
+ cmd_buffer->state.pipeline->graphics.cb_target_mask) &&
+ settings.context_states_per_bin > 1;
- if (!break_for_new_ps && !break_for_new_cb_target_mask)
- return;
+ if (!break_for_new_ps && !break_for_new_cb_target_mask)
+ return;
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
}
static void
radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline)
- return;
+ if (!pipeline || cmd_buffer->state.emitted_pipeline == pipeline)
+ return;
- radv_update_multisample_state(cmd_buffer, pipeline);
- radv_update_binning_state(cmd_buffer, pipeline);
+ radv_update_multisample_state(cmd_buffer, pipeline);
+ radv_update_binning_state(cmd_buffer, pipeline);
- cmd_buffer->scratch_size_per_wave_needed = MAX2(cmd_buffer->scratch_size_per_wave_needed,
- pipeline->scratch_bytes_per_wave);
- cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted,
- pipeline->max_waves);
+ cmd_buffer->scratch_size_per_wave_needed =
+ MAX2(cmd_buffer->scratch_size_per_wave_needed, pipeline->scratch_bytes_per_wave);
+ cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, pipeline->max_waves);
- if (!cmd_buffer->state.emitted_pipeline ||
- cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband !=
- pipeline->graphics.can_use_guardband)
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
+ if (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband !=
+ pipeline->graphics.can_use_guardband)
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
- if (!cmd_buffer->state.emitted_pipeline ||
- cmd_buffer->state.emitted_pipeline->graphics.pa_su_sc_mode_cntl !=
- pipeline->graphics.pa_su_sc_mode_cntl)
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE |
- RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
+ if (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->graphics.pa_su_sc_mode_cntl !=
+ pipeline->graphics.pa_su_sc_mode_cntl)
+ cmd_buffer->state.dirty |=
+ RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
- if (!cmd_buffer->state.emitted_pipeline)
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
+ if (!cmd_buffer->state.emitted_pipeline)
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
- if (!cmd_buffer->state.emitted_pipeline ||
- cmd_buffer->state.emitted_pipeline->graphics.db_depth_control !=
- pipeline->graphics.db_depth_control)
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
+ if (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->graphics.db_depth_control !=
+ pipeline->graphics.db_depth_control)
+ cmd_buffer->state.dirty |=
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
- if (!cmd_buffer->state.emitted_pipeline)
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
+ if (!cmd_buffer->state.emitted_pipeline)
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
- radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
+ radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
- if (!cmd_buffer->state.emitted_pipeline ||
- cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
- cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
- memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf,
- pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw * 4)) {
- radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw);
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
- }
+ if (!cmd_buffer->state.emitted_pipeline ||
+ cmd_buffer->state.emitted_pipeline->ctx_cs.cdw != pipeline->ctx_cs.cdw ||
+ cmd_buffer->state.emitted_pipeline->ctx_cs_hash != pipeline->ctx_cs_hash ||
+ memcmp(cmd_buffer->state.emitted_pipeline->ctx_cs.buf, pipeline->ctx_cs.buf,
+ pipeline->ctx_cs.cdw * 4)) {
+ radeon_emit_array(cmd_buffer->cs, pipeline->ctx_cs.buf, pipeline->ctx_cs.cdw);
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ }
- radv_emit_batch_break_on_new_ps(cmd_buffer);
+ radv_emit_batch_break_on_new_ps(cmd_buffer);
- for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
- if (!pipeline->shaders[i])
- continue;
+ for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) {
+ if (!pipeline->shaders[i])
+ continue;
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- pipeline->shaders[i]->bo);
- }
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->shaders[i]->bo);
+ }
- if (radv_pipeline_has_gs_copy_shader(pipeline))
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- pipeline->gs_copy_shader->bo);
+ if (radv_pipeline_has_gs_copy_shader(pipeline))
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->gs_copy_shader->bo);
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_save_pipeline(cmd_buffer, pipeline);
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_save_pipeline(cmd_buffer, pipeline);
- cmd_buffer->state.emitted_pipeline = pipeline;
+ cmd_buffer->state.emitted_pipeline = pipeline;
- cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE;
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE;
}
static void
radv_emit_viewport(struct radv_cmd_buffer *cmd_buffer)
{
- si_write_viewport(cmd_buffer->cs, 0, cmd_buffer->state.dynamic.viewport.count,
- cmd_buffer->state.dynamic.viewport.viewports);
+ si_write_viewport(cmd_buffer->cs, 0, cmd_buffer->state.dynamic.viewport.count,
+ cmd_buffer->state.dynamic.viewport.viewports);
}
static void
radv_emit_scissor(struct radv_cmd_buffer *cmd_buffer)
{
- uint32_t count = cmd_buffer->state.dynamic.scissor.count;
+ uint32_t count = cmd_buffer->state.dynamic.scissor.count;
- si_write_scissors(cmd_buffer->cs, 0, count,
- cmd_buffer->state.dynamic.scissor.scissors,
- cmd_buffer->state.dynamic.viewport.viewports,
- cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband);
+ si_write_scissors(cmd_buffer->cs, 0, count, cmd_buffer->state.dynamic.scissor.scissors,
+ cmd_buffer->state.dynamic.viewport.viewports,
+ cmd_buffer->state.emitted_pipeline->graphics.can_use_guardband);
- cmd_buffer->state.context_roll_without_scissor_emitted = false;
+ cmd_buffer->state.context_roll_without_scissor_emitted = false;
}
static void
radv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer)
{
- if (!cmd_buffer->state.dynamic.discard_rectangle.count)
- return;
+ if (!cmd_buffer->state.dynamic.discard_rectangle.count)
+ return;
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL,
- cmd_buffer->state.dynamic.discard_rectangle.count * 2);
- for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) {
- VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i];
- radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y));
- radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) |
- S_028214_BR_Y(rect.offset.y + rect.extent.height));
- }
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL,
+ cmd_buffer->state.dynamic.discard_rectangle.count * 2);
+ for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) {
+ VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i];
+ radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y));
+ radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) |
+ S_028214_BR_Y(rect.offset.y + rect.extent.height));
+ }
}
static void
radv_emit_line_width(struct radv_cmd_buffer *cmd_buffer)
{
- unsigned width = cmd_buffer->state.dynamic.line_width * 8;
+ unsigned width = cmd_buffer->state.dynamic.line_width * 8;
- radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
- S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF)));
+ radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
+ S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF)));
}
static void
radv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4);
- radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4);
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4);
+ radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4);
}
static void
radv_emit_stencil(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- radeon_set_context_reg_seq(cmd_buffer->cs,
- R_028430_DB_STENCILREFMASK, 2);
- radeon_emit(cmd_buffer->cs,
- S_028430_STENCILTESTVAL(d->stencil_reference.front) |
- S_028430_STENCILMASK(d->stencil_compare_mask.front) |
- S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) |
- S_028430_STENCILOPVAL(1));
- radeon_emit(cmd_buffer->cs,
- S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) |
- S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) |
- S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) |
- S_028434_STENCILOPVAL_BF(1));
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028430_DB_STENCILREFMASK, 2);
+ radeon_emit(cmd_buffer->cs, S_028430_STENCILTESTVAL(d->stencil_reference.front) |
+ S_028430_STENCILMASK(d->stencil_compare_mask.front) |
+ S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) |
+ S_028430_STENCILOPVAL(1));
+ radeon_emit(cmd_buffer->cs, S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) |
+ S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) |
+ S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) |
+ S_028434_STENCILOPVAL_BF(1));
}
static void
radv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN,
- fui(d->depth_bounds.min));
- radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX,
- fui(d->depth_bounds.max));
+ radeon_set_context_reg(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, fui(d->depth_bounds.min));
+ radeon_set_context_reg(cmd_buffer->cs, R_028024_DB_DEPTH_BOUNDS_MAX, fui(d->depth_bounds.max));
}
static void
radv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- unsigned slope = fui(d->depth_bias.slope * 16.0f);
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ unsigned slope = fui(d->depth_bias.slope * 16.0f);
- radeon_set_context_reg_seq(cmd_buffer->cs,
- R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5);
- radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */
- radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */
- radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* FRONT OFFSET */
- radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */
- radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* BACK OFFSET */
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5);
+ radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */
+ radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */
+ radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* FRONT OFFSET */
+ radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */
+ radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* BACK OFFSET */
}
static void
radv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- uint32_t auto_reset_cntl = 1;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ uint32_t auto_reset_cntl = 1;
- if (d->primitive_topology == V_008958_DI_PT_LINESTRIP)
- auto_reset_cntl = 2;
+ if (d->primitive_topology == V_008958_DI_PT_LINESTRIP)
+ auto_reset_cntl = 2;
- radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE,
- S_028A0C_LINE_PATTERN(d->line_stipple.pattern) |
- S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) |
- S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl));
+ radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE,
+ S_028A0C_LINE_PATTERN(d->line_stipple.pattern) |
+ S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) |
+ S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl));
}
static void
radv_emit_culling(struct radv_cmd_buffer *cmd_buffer, uint32_t states)
{
- unsigned pa_su_sc_mode_cntl = cmd_buffer->state.pipeline->graphics.pa_su_sc_mode_cntl;
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ unsigned pa_su_sc_mode_cntl = cmd_buffer->state.pipeline->graphics.pa_su_sc_mode_cntl;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- if (states & RADV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
- pa_su_sc_mode_cntl &= C_028814_CULL_FRONT;
- pa_su_sc_mode_cntl |= S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT));
+ if (states & RADV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
+ pa_su_sc_mode_cntl &= C_028814_CULL_FRONT;
+ pa_su_sc_mode_cntl |= S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT));
- pa_su_sc_mode_cntl &= C_028814_CULL_BACK;
- pa_su_sc_mode_cntl |= S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT));
- }
+ pa_su_sc_mode_cntl &= C_028814_CULL_BACK;
+ pa_su_sc_mode_cntl |= S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT));
+ }
- if (states & RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
- pa_su_sc_mode_cntl &= C_028814_FACE;
- pa_su_sc_mode_cntl |= S_028814_FACE(d->front_face);
- }
+ if (states & RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
+ pa_su_sc_mode_cntl &= C_028814_FACE;
+ pa_su_sc_mode_cntl |= S_028814_FACE(d->front_face);
+ }
- radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL,
- pa_su_sc_mode_cntl);
+ radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl);
}
static void
radv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
- radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device,
- cmd_buffer->cs,
- R_030908_VGT_PRIMITIVE_TYPE, 1,
- d->primitive_topology);
- } else {
- radeon_set_config_reg(cmd_buffer->cs,
- R_008958_VGT_PRIMITIVE_TYPE,
- d->primitive_topology);
- }
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+ radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs,
+ R_030908_VGT_PRIMITIVE_TYPE, 1, d->primitive_topology);
+ } else {
+ radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->primitive_topology);
+ }
}
static void
radv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer, uint32_t states)
{
- unsigned db_depth_control = cmd_buffer->state.pipeline->graphics.db_depth_control;
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ unsigned db_depth_control = cmd_buffer->state.pipeline->graphics.db_depth_control;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
- db_depth_control &= C_028800_Z_ENABLE;
- db_depth_control |= S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0);
- }
+ if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
+ db_depth_control &= C_028800_Z_ENABLE;
+ db_depth_control |= S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0);
+ }
- if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
- db_depth_control &= C_028800_Z_WRITE_ENABLE;
- db_depth_control |= S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0);
- }
+ if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
+ db_depth_control &= C_028800_Z_WRITE_ENABLE;
+ db_depth_control |= S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0);
+ }
- if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
- db_depth_control &= C_028800_ZFUNC;
- db_depth_control |= S_028800_ZFUNC(d->depth_compare_op);
- }
+ if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
+ db_depth_control &= C_028800_ZFUNC;
+ db_depth_control |= S_028800_ZFUNC(d->depth_compare_op);
+ }
- if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
- db_depth_control &= C_028800_DEPTH_BOUNDS_ENABLE;
- db_depth_control |= S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0);
- }
+ if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
+ db_depth_control &= C_028800_DEPTH_BOUNDS_ENABLE;
+ db_depth_control |= S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0);
+ }
- if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
- db_depth_control &= C_028800_STENCIL_ENABLE;
- db_depth_control |= S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
+ db_depth_control &= C_028800_STENCIL_ENABLE;
+ db_depth_control |= S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0);
- db_depth_control &= C_028800_BACKFACE_ENABLE;
- db_depth_control |= S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0);
- }
+ db_depth_control &= C_028800_BACKFACE_ENABLE;
+ db_depth_control |= S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0);
+ }
- if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
- db_depth_control &= C_028800_STENCILFUNC;
- db_depth_control |= S_028800_STENCILFUNC(d->stencil_op.front.compare_op);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
+ db_depth_control &= C_028800_STENCILFUNC;
+ db_depth_control |= S_028800_STENCILFUNC(d->stencil_op.front.compare_op);
- db_depth_control &= C_028800_STENCILFUNC_BF;
- db_depth_control |= S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op);
- }
+ db_depth_control &= C_028800_STENCILFUNC_BF;
+ db_depth_control |= S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op);
+ }
- radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL,
- db_depth_control);
+ radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, db_depth_control);
}
static void
radv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- radeon_set_context_reg(cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL,
- S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) |
- S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) |
- S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) |
- S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) |
- S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) |
- S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op)));
+ radeon_set_context_reg(
+ cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL,
+ S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) |
+ S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) |
+ S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) |
+ S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) |
+ S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) |
+ S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op)));
}
static void
radv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
- uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1;
- uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1;
- uint32_t pa_cl_vrs_cntl = pipeline->graphics.vrs.pa_cl_vrs_cntl;
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
+ uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1;
+ uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1;
+ uint32_t pa_cl_vrs_cntl = pipeline->graphics.vrs.pa_cl_vrs_cntl;
- /* Emit per-draw VRS rate which is the first combiner. */
- radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE,
- S_03098C_RATE_X(rate_x) |
- S_03098C_RATE_Y(rate_y));
+ /* Emit per-draw VRS rate which is the first combiner. */
+ radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE,
+ S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y));
- /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the
- * draw rate and the vertex rate.
- */
- pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(d->fragment_shading_rate.combiner_ops[0]);
+ /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the
+ * draw rate and the vertex rate.
+ */
+ pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(d->fragment_shading_rate.combiner_ops[0]);
- radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl);
+ radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl);
}
static void
-radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
- int index,
- struct radv_color_buffer_info *cb,
- struct radv_image_view *iview,
- VkImageLayout layout,
- bool in_render_loop,
- bool disable_dcc)
-{
- bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8;
- uint32_t cb_color_info = cb->cb_color_info;
- struct radv_image *image = iview->image;
-
- if (!radv_layout_dcc_compressed(cmd_buffer->device, image, layout, in_render_loop,
- radv_image_queue_family_mask(image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index)) ||
- disable_dcc) {
- cb_color_info &= C_028C70_DCC_ENABLE;
- }
-
- if (!radv_layout_fmask_compressed(cmd_buffer->device, image, layout,
- radv_image_queue_family_mask(image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index))) {
- cb_color_info &= C_028C70_COMPRESSION;
- }
-
- if (radv_image_is_tc_compat_cmask(image) &&
- (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
- radv_is_dcc_decompress_pipeline(cmd_buffer))) {
- /* If this bit is set, the FMASK decompression operation
- * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS).
- */
- cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
- }
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
- radeon_emit(cmd_buffer->cs, cb->cb_color_base);
- radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, cb->cb_color_view);
- radeon_emit(cmd_buffer->cs, cb_color_info);
- radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
- radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
- radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
- radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
- radeon_emit(cmd_buffer->cs, 0);
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 1);
- radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4,
- cb->cb_color_base >> 32);
- radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4,
- cb->cb_color_cmask >> 32);
- radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4,
- cb->cb_color_fmask >> 32);
- radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4,
- cb->cb_dcc_base >> 32);
- radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4,
- cb->cb_color_attrib2);
- radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4,
- cb->cb_color_attrib3);
- } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
- radeon_emit(cmd_buffer->cs, cb->cb_color_base);
- radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32));
- radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2);
- radeon_emit(cmd_buffer->cs, cb->cb_color_view);
- radeon_emit(cmd_buffer->cs, cb_color_info);
- radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
- radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
- radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
- radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32));
- radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
- radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32));
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2);
- radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
- radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32));
-
- radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4,
- cb->cb_mrt_epitch);
- } else {
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
- radeon_emit(cmd_buffer->cs, cb->cb_color_base);
- radeon_emit(cmd_buffer->cs, cb->cb_color_pitch);
- radeon_emit(cmd_buffer->cs, cb->cb_color_slice);
- radeon_emit(cmd_buffer->cs, cb->cb_color_view);
- radeon_emit(cmd_buffer->cs, cb_color_info);
- radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
- radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
- radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
- radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice);
- radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
- radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice);
-
- if (is_vi) { /* DCC BASE */
- radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base);
- }
- }
-
- if (radv_dcc_enabled(image, iview->base_mip)) {
- /* Drawing with DCC enabled also compresses colorbuffers. */
- VkImageSubresourceRange range = {
- .aspectMask = iview->aspect_mask,
- .baseMipLevel = iview->base_mip,
- .levelCount = iview->level_count,
- .baseArrayLayer = iview->base_layer,
- .layerCount = iview->layer_count,
- };
-
- radv_update_dcc_metadata(cmd_buffer, image, &range, true);
- }
+radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index,
+ struct radv_color_buffer_info *cb, struct radv_image_view *iview,
+ VkImageLayout layout, bool in_render_loop, bool disable_dcc)
+{
+ bool is_vi = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8;
+ uint32_t cb_color_info = cb->cb_color_info;
+ struct radv_image *image = iview->image;
+
+ if (!radv_layout_dcc_compressed(
+ cmd_buffer->device, image, layout, in_render_loop,
+ radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index)) ||
+ disable_dcc) {
+ cb_color_info &= C_028C70_DCC_ENABLE;
+ }
+
+ if (!radv_layout_fmask_compressed(
+ cmd_buffer->device, image, layout,
+ radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index))) {
+ cb_color_info &= C_028C70_COMPRESSION;
+ }
+
+ if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
+ radv_is_dcc_decompress_pipeline(cmd_buffer))) {
+ /* If this bit is set, the FMASK decompression operation
+ * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS).
+ */
+ cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
+ }
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_base);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_view);
+ radeon_emit(cmd_buffer->cs, cb_color_info);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
+ radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
+ radeon_emit(cmd_buffer->cs, 0);
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 1);
+ radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4,
+ cb->cb_color_base >> 32);
+ radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4,
+ cb->cb_color_cmask >> 32);
+ radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4,
+ cb->cb_color_fmask >> 32);
+ radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4,
+ cb->cb_dcc_base >> 32);
+ radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4,
+ cb->cb_color_attrib2);
+ radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4,
+ cb->cb_color_attrib3);
+ } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_base);
+ radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32));
+ radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_view);
+ radeon_emit(cmd_buffer->cs, cb_color_info);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
+ radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
+ radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32));
+ radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
+ radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32));
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2);
+ radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
+ radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32));
+
+ radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4,
+ cb->cb_mrt_epitch);
+ } else {
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_base);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_pitch);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_slice);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_view);
+ radeon_emit(cmd_buffer->cs, cb_color_info);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
+ radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
+ radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice);
+
+ if (is_vi) { /* DCC BASE */
+ radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c,
+ cb->cb_dcc_base);
+ }
+ }
+
+ if (radv_dcc_enabled(image, iview->base_mip)) {
+ /* Drawing with DCC enabled also compresses colorbuffers. */
+ VkImageSubresourceRange range = {
+ .aspectMask = iview->aspect_mask,
+ .baseMipLevel = iview->base_mip,
+ .levelCount = iview->level_count,
+ .baseArrayLayer = iview->base_layer,
+ .layerCount = iview->layer_count,
+ };
+
+ radv_update_dcc_metadata(cmd_buffer, image, &range, true);
+ }
}
static void
-radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
- struct radv_ds_buffer_info *ds,
- const struct radv_image_view *iview,
- VkImageLayout layout,
- bool in_render_loop, bool requires_cond_exec)
-{
- const struct radv_image *image = iview->image;
- uint32_t db_z_info = ds->db_z_info;
- uint32_t db_z_info_reg;
-
- if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug ||
- !radv_image_is_tc_compat_htile(image))
- return;
-
- if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout, in_render_loop,
- radv_image_queue_family_mask(image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index))) {
- db_z_info &= C_028040_TILE_SURFACE_ENABLE;
- }
-
- db_z_info &= C_028040_ZRANGE_PRECISION;
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
- db_z_info_reg = R_028038_DB_Z_INFO;
- } else {
- db_z_info_reg = R_028040_DB_Z_INFO;
- }
-
- /* When we don't know the last fast clear value we need to emit a
- * conditional packet that will eventually skip the following
- * SET_CONTEXT_REG packet.
- */
- if (requires_cond_exec) {
- uint64_t va = radv_get_tc_compat_zrange_va(image, iview->base_mip);
-
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
- radeon_emit(cmd_buffer->cs, 0);
- radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
- }
-
- radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
+radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds,
+ const struct radv_image_view *iview, VkImageLayout layout,
+ bool in_render_loop, bool requires_cond_exec)
+{
+ const struct radv_image *image = iview->image;
+ uint32_t db_z_info = ds->db_z_info;
+ uint32_t db_z_info_reg;
+
+ if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug ||
+ !radv_image_is_tc_compat_htile(image))
+ return;
+
+ if (!radv_layout_is_htile_compressed(
+ cmd_buffer->device, image, layout, in_render_loop,
+ radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index))) {
+ db_z_info &= C_028040_TILE_SURFACE_ENABLE;
+ }
+
+ db_z_info &= C_028040_ZRANGE_PRECISION;
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+ db_z_info_reg = R_028038_DB_Z_INFO;
+ } else {
+ db_z_info_reg = R_028040_DB_Z_INFO;
+ }
+
+ /* When we don't know the last fast clear value we need to emit a
+ * conditional packet that will eventually skip the following
+ * SET_CONTEXT_REG packet.
+ */
+ if (requires_cond_exec) {
+ uint64_t va = radv_get_tc_compat_zrange_va(image, iview->base_mip);
+
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
+ radeon_emit(cmd_buffer->cs, 0);
+ radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
+ }
+
+ radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
}
static void
-radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
- struct radv_ds_buffer_info *ds,
- struct radv_image_view *iview,
- VkImageLayout layout,
- bool in_render_loop)
-{
- const struct radv_image *image = iview->image;
- uint32_t db_z_info = ds->db_z_info;
- uint32_t db_stencil_info = ds->db_stencil_info;
-
- if (!radv_layout_is_htile_compressed(cmd_buffer->device, image, layout, in_render_loop,
- radv_image_queue_family_mask(image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index))) {
- db_z_info &= C_028040_TILE_SURFACE_ENABLE;
- db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
- }
-
- radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view);
- radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
- radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size);
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7);
- radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1));
- radeon_emit(cmd_buffer->cs, db_z_info);
- radeon_emit(cmd_buffer->cs, db_stencil_info);
- radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
- radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
- radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
- radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5);
- radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
- radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
- radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
- radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
- radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32);
- } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
- radeon_emit(cmd_buffer->cs, ds->db_htile_data_base);
- radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32));
- radeon_emit(cmd_buffer->cs, ds->db_depth_size);
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10);
- radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */
- radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */
- radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */
- radeon_emit(cmd_buffer->cs, S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */
- radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */
- radeon_emit(cmd_buffer->cs, S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
- radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */
- radeon_emit(cmd_buffer->cs, S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */
- radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */
- radeon_emit(cmd_buffer->cs, S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2);
- radeon_emit(cmd_buffer->cs, ds->db_z_info2);
- radeon_emit(cmd_buffer->cs, ds->db_stencil_info2);
- } else {
- radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
-
- radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9);
- radeon_emit(cmd_buffer->cs, ds->db_depth_info); /* R_02803C_DB_DEPTH_INFO */
- radeon_emit(cmd_buffer->cs, db_z_info); /* R_028040_DB_Z_INFO */
- radeon_emit(cmd_buffer->cs, db_stencil_info); /* R_028044_DB_STENCIL_INFO */
- radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* R_028048_DB_Z_READ_BASE */
- radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* R_02804C_DB_STENCIL_READ_BASE */
- radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* R_028050_DB_Z_WRITE_BASE */
- radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */
- radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */
- radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
-
- }
-
- /* Update the ZRANGE_PRECISION value for the TC-compat bug. */
- radv_update_zrange_precision(cmd_buffer, ds, iview, layout,
- in_render_loop, true);
-
- radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
- ds->pa_su_poly_offset_db_fmt_cntl);
+radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds,
+ struct radv_image_view *iview, VkImageLayout layout, bool in_render_loop)
+{
+ const struct radv_image *image = iview->image;
+ uint32_t db_z_info = ds->db_z_info;
+ uint32_t db_stencil_info = ds->db_stencil_info;
+
+ if (!radv_layout_is_htile_compressed(
+ cmd_buffer->device, image, layout, in_render_loop,
+ radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index))) {
+ db_z_info &= C_028040_TILE_SURFACE_ENABLE;
+ db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
+ }
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view);
+ radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
+ radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size);
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7);
+ radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1));
+ radeon_emit(cmd_buffer->cs, db_z_info);
+ radeon_emit(cmd_buffer->cs, db_stencil_info);
+ radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
+ radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5);
+ radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
+ radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
+ radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32);
+ } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
+ radeon_emit(cmd_buffer->cs, ds->db_htile_data_base);
+ radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32));
+ radeon_emit(cmd_buffer->cs, ds->db_depth_size);
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10);
+ radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */
+ radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */
+ radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */
+ radeon_emit(cmd_buffer->cs,
+ S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */
+ radeon_emit(cmd_buffer->cs,
+ S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
+ radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */
+ radeon_emit(cmd_buffer->cs,
+ S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */
+ radeon_emit(cmd_buffer->cs,
+ S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2);
+ radeon_emit(cmd_buffer->cs, ds->db_z_info2);
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_info2);
+ } else {
+ radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
+
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9);
+ radeon_emit(cmd_buffer->cs, ds->db_depth_info); /* R_02803C_DB_DEPTH_INFO */
+ radeon_emit(cmd_buffer->cs, db_z_info); /* R_028040_DB_Z_INFO */
+ radeon_emit(cmd_buffer->cs, db_stencil_info); /* R_028044_DB_STENCIL_INFO */
+ radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* R_028048_DB_Z_READ_BASE */
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* R_02804C_DB_STENCIL_READ_BASE */
+ radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* R_028050_DB_Z_WRITE_BASE */
+ radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */
+ radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */
+ radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
+ }
+
+ /* Update the ZRANGE_PRECISION value for the TC-compat bug. */
+ radv_update_zrange_precision(cmd_buffer, ds, iview, layout, in_render_loop, true);
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
+ ds->pa_su_poly_offset_db_fmt_cntl);
}
/**
@@ -1873,157 +1791,143 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
*/
static void
radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- VkClearDepthStencilValue ds_clear_value,
- VkImageAspectFlags aspects)
-{
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- const struct radv_image *image = iview->image;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint32_t att_idx;
-
- if (!cmd_buffer->state.attachments || !subpass)
- return;
-
- if (!subpass->depth_stencil_attachment)
- return;
-
- att_idx = subpass->depth_stencil_attachment->attachment;
- if (cmd_buffer->state.attachments[att_idx].iview->image != image)
- return;
-
- if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT)) {
- radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
- radeon_emit(cs, ds_clear_value.stencil);
- radeon_emit(cs, fui(ds_clear_value.depth));
- } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
- radeon_set_context_reg_seq(cs, R_02802C_DB_DEPTH_CLEAR, 1);
- radeon_emit(cs, fui(ds_clear_value.depth));
- } else {
- assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
- radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 1);
- radeon_emit(cs, ds_clear_value.stencil);
- }
-
- /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is
- * only needed when clearing Z to 0.0.
- */
- if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- ds_clear_value.depth == 0.0) {
- VkImageLayout layout = subpass->depth_stencil_attachment->layout;
- bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
-
- radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.attachments[att_idx].ds,
- iview, layout, in_render_loop, false);
- }
-
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ const struct radv_image_view *iview,
+ VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
+{
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ const struct radv_image *image = iview->image;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t att_idx;
+
+ if (!cmd_buffer->state.attachments || !subpass)
+ return;
+
+ if (!subpass->depth_stencil_attachment)
+ return;
+
+ att_idx = subpass->depth_stencil_attachment->attachment;
+ if (cmd_buffer->state.attachments[att_idx].iview->image != image)
+ return;
+
+ if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
+ radeon_emit(cs, ds_clear_value.stencil);
+ radeon_emit(cs, fui(ds_clear_value.depth));
+ } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ radeon_set_context_reg_seq(cs, R_02802C_DB_DEPTH_CLEAR, 1);
+ radeon_emit(cs, fui(ds_clear_value.depth));
+ } else {
+ assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
+ radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 1);
+ radeon_emit(cs, ds_clear_value.stencil);
+ }
+
+ /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is
+ * only needed when clearing Z to 0.0.
+ */
+ if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && ds_clear_value.depth == 0.0) {
+ VkImageLayout layout = subpass->depth_stencil_attachment->layout;
+ bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
+
+ radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.attachments[att_idx].ds, iview,
+ layout, in_render_loop, false);
+ }
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
/**
* Set the clear depth/stencil values to the image's metadata.
*/
static void
-radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range,
- VkClearDepthStencilValue ds_clear_value,
- VkImageAspectFlags aspects)
-{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint32_t level_count = radv_get_levelCount(image, range);
-
- if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT)) {
- uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel);
-
- /* Use the fastest way when both aspects are used. */
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_PFP));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- for (uint32_t l = 0; l < level_count; l++) {
- radeon_emit(cs, ds_clear_value.stencil);
- radeon_emit(cs, fui(ds_clear_value.depth));
- }
- } else {
- /* Otherwise we need one WRITE_DATA packet per level. */
- for (uint32_t l = 0; l < level_count; l++) {
- uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l);
- unsigned value;
-
- if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
- value = fui(ds_clear_value.depth);
- va += 4;
- } else {
- assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
- value = ds_clear_value.stencil;
- }
-
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_PFP));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, value);
- }
- }
+radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range,
+ VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t level_count = radv_get_levelCount(image, range);
+
+ if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel);
+
+ /* Use the fastest way when both aspects are used. */
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ for (uint32_t l = 0; l < level_count; l++) {
+ radeon_emit(cs, ds_clear_value.stencil);
+ radeon_emit(cs, fui(ds_clear_value.depth));
+ }
+ } else {
+ /* Otherwise we need one WRITE_DATA packet per level. */
+ for (uint32_t l = 0; l < level_count; l++) {
+ uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l);
+ unsigned value;
+
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ value = fui(ds_clear_value.depth);
+ va += 4;
+ } else {
+ assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
+ value = ds_clear_value.stencil;
+ }
+
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
+ radeon_emit(cs,
+ S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, value);
+ }
+ }
}
/**
* Update the TC-compat metadata value for this image.
*/
static void
-radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range,
- uint32_t value)
+radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug)
- return;
+ if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug)
+ return;
- uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel);
- uint32_t level_count = radv_get_levelCount(image, range);
+ uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel);
+ uint32_t level_count = radv_get_levelCount(image, range);
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_PFP));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
- for (uint32_t l = 0; l < level_count; l++)
- radeon_emit(cs, value);
+ for (uint32_t l = 0; l < level_count; l++)
+ radeon_emit(cs, value);
}
static void
radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- VkClearDepthStencilValue ds_clear_value)
+ const struct radv_image_view *iview,
+ VkClearDepthStencilValue ds_clear_value)
{
- VkImageSubresourceRange range = {
- .aspectMask = iview->aspect_mask,
- .baseMipLevel = iview->base_mip,
- .levelCount = iview->level_count,
- .baseArrayLayer = iview->base_layer,
- .layerCount = iview->layer_count,
- };
- uint32_t cond_val;
+ VkImageSubresourceRange range = {
+ .aspectMask = iview->aspect_mask,
+ .baseMipLevel = iview->base_mip,
+ .levelCount = iview->level_count,
+ .baseArrayLayer = iview->base_layer,
+ .layerCount = iview->layer_count,
+ };
+ uint32_t cond_val;
- /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
- * depth clear value is 0.0f.
- */
- cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
+ /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
+ * depth clear value is 0.0f.
+ */
+ cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
- radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range,
- cond_val);
+ radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range, cond_val);
}
/**
@@ -2031,79 +1935,72 @@ radv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
*/
void
radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- VkClearDepthStencilValue ds_clear_value,
- VkImageAspectFlags aspects)
+ const struct radv_image_view *iview,
+ VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
{
- VkImageSubresourceRange range = {
- .aspectMask = iview->aspect_mask,
- .baseMipLevel = iview->base_mip,
- .levelCount = iview->level_count,
- .baseArrayLayer = iview->base_layer,
- .layerCount = iview->layer_count,
- };
- struct radv_image *image = iview->image;
+ VkImageSubresourceRange range = {
+ .aspectMask = iview->aspect_mask,
+ .baseMipLevel = iview->base_mip,
+ .levelCount = iview->level_count,
+ .baseArrayLayer = iview->base_layer,
+ .layerCount = iview->layer_count,
+ };
+ struct radv_image *image = iview->image;
- assert(radv_htile_enabled(image, range.baseMipLevel));
+ assert(radv_htile_enabled(image, range.baseMipLevel));
- radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range,
- ds_clear_value, aspects);
+ radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range, ds_clear_value, aspects);
- if (radv_image_is_tc_compat_htile(image) &&
- (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
- radv_update_tc_compat_zrange_metadata(cmd_buffer, iview,
- ds_clear_value);
- }
+ if (radv_image_is_tc_compat_htile(image) && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
+ radv_update_tc_compat_zrange_metadata(cmd_buffer, iview, ds_clear_value);
+ }
- radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value,
- aspects);
+ radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value, aspects);
}
/**
* Load the clear depth/stencil values from the image's metadata.
*/
static void
-radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview)
-{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- const struct radv_image *image = iview->image;
- VkImageAspectFlags aspects = vk_format_aspects(image->vk_format);
- uint64_t va = radv_get_ds_clear_value_va(image, iview->base_mip);
- unsigned reg_offset = 0, reg_count = 0;
-
- assert(radv_image_has_htile(image));
-
- if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
- ++reg_count;
- } else {
- ++reg_offset;
- va += 4;
- }
- if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
- ++reg_count;
-
- uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
-
- if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
- radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
- radeon_emit(cs, reg_count);
- } else {
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG) |
- (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, reg >> 2);
- radeon_emit(cs, 0);
-
- radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
- radeon_emit(cs, 0);
- }
+radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ const struct radv_image *image = iview->image;
+ VkImageAspectFlags aspects = vk_format_aspects(image->vk_format);
+ uint64_t va = radv_get_ds_clear_value_va(image, iview->base_mip);
+ unsigned reg_offset = 0, reg_count = 0;
+
+ assert(radv_image_has_htile(image));
+
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ ++reg_count;
+ } else {
+ ++reg_offset;
+ va += 4;
+ }
+ if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+ ++reg_count;
+
+ uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
+
+ if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
+ radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+ radeon_emit(cs, reg_count);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
+ (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, reg >> 2);
+ radeon_emit(cs, 0);
+
+ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+ radeon_emit(cs, 0);
+ }
}
/*
@@ -2112,126 +2009,115 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
* cmask eliminate is required.
*/
void
-radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, bool value)
+radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, bool value)
{
- if (!image->fce_pred_offset)
- return;
+ if (!image->fce_pred_offset)
+ return;
- uint64_t pred_val = value;
- uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel);
- uint32_t level_count = radv_get_levelCount(image, range);
- uint32_t count = 2 * level_count;
+ uint64_t pred_val = value;
+ uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel);
+ uint32_t level_count = radv_get_levelCount(image, range);
+ uint32_t count = 2 * level_count;
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
- radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_PFP));
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
+ radeon_emit(cmd_buffer->cs,
+ S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
- for (uint32_t l = 0; l < level_count; l++) {
- radeon_emit(cmd_buffer->cs, pred_val);
- radeon_emit(cmd_buffer->cs, pred_val >> 32);
- }
+ for (uint32_t l = 0; l < level_count; l++) {
+ radeon_emit(cmd_buffer->cs, pred_val);
+ radeon_emit(cmd_buffer->cs, pred_val >> 32);
+ }
}
/**
* Update the DCC predicate to reflect the compression state.
*/
void
-radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, bool value)
+radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, bool value)
{
- if (image->dcc_pred_offset == 0)
- return;
+ if (image->dcc_pred_offset == 0)
+ return;
- uint64_t pred_val = value;
- uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel);
- uint32_t level_count = radv_get_levelCount(image, range);
- uint32_t count = 2 * level_count;
+ uint64_t pred_val = value;
+ uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel);
+ uint32_t level_count = radv_get_levelCount(image, range);
+ uint32_t count = 2 * level_count;
- assert(radv_dcc_enabled(image, range->baseMipLevel));
+ assert(radv_dcc_enabled(image, range->baseMipLevel));
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
- radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_PFP));
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
+ radeon_emit(cmd_buffer->cs,
+ S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
- for (uint32_t l = 0; l < level_count; l++) {
- radeon_emit(cmd_buffer->cs, pred_val);
- radeon_emit(cmd_buffer->cs, pred_val >> 32);
- }
+ for (uint32_t l = 0; l < level_count; l++) {
+ radeon_emit(cmd_buffer->cs, pred_val);
+ radeon_emit(cmd_buffer->cs, pred_val >> 32);
+ }
}
/**
* Update the fast clear color values if the image is bound as a color buffer.
*/
static void
-radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- int cb_idx,
- uint32_t color_values[2])
+radv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ int cb_idx, uint32_t color_values[2])
{
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint32_t att_idx;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t att_idx;
- if (!cmd_buffer->state.attachments || !subpass)
- return;
+ if (!cmd_buffer->state.attachments || !subpass)
+ return;
- att_idx = subpass->color_attachments[cb_idx].attachment;
- if (att_idx == VK_ATTACHMENT_UNUSED)
- return;
+ att_idx = subpass->color_attachments[cb_idx].attachment;
+ if (att_idx == VK_ATTACHMENT_UNUSED)
+ return;
- if (cmd_buffer->state.attachments[att_idx].iview->image != image)
- return;
+ if (cmd_buffer->state.attachments[att_idx].iview->image != image)
+ return;
- radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
- radeon_emit(cs, color_values[0]);
- radeon_emit(cs, color_values[1]);
+ radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
+ radeon_emit(cs, color_values[0]);
+ radeon_emit(cs, color_values[1]);
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
/**
* Set the clear color values to the image's metadata.
*/
static void
-radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range,
- uint32_t color_values[2])
-{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint32_t level_count = radv_get_levelCount(image, range);
- uint32_t count = 2 * level_count;
-
- assert(radv_image_has_cmask(image) ||
- radv_dcc_enabled(image, range->baseMipLevel));
-
- if (radv_image_has_clear_value(image)) {
- uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel);
-
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_PFP));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- for (uint32_t l = 0; l < level_count; l++) {
- radeon_emit(cs, color_values[0]);
- radeon_emit(cs, color_values[1]);
- }
- } else {
- /* Some default value we can set in the update. */
- assert(color_values[0] == 0 && color_values[1] == 0);
- }
+radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t color_values[2])
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t level_count = radv_get_levelCount(image, range);
+ uint32_t count = 2 * level_count;
+
+ assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel));
+
+ if (radv_image_has_clear_value(image)) {
+ uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel);
+
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ for (uint32_t l = 0; l < level_count; l++) {
+ radeon_emit(cs, color_values[0]);
+ radeon_emit(cs, color_values[1]);
+ }
+ } else {
+ /* Some default value we can set in the update. */
+ assert(color_values[0] == 0 && color_values[1] == 0);
+ }
}
/**
@@ -2239,72 +2125,65 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
*/
void
radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- int cb_idx,
- uint32_t color_values[2])
+ const struct radv_image_view *iview, int cb_idx,
+ uint32_t color_values[2])
{
- struct radv_image *image = iview->image;
- VkImageSubresourceRange range = {
- .aspectMask = iview->aspect_mask,
- .baseMipLevel = iview->base_mip,
- .levelCount = iview->level_count,
- .baseArrayLayer = iview->base_layer,
- .layerCount = iview->layer_count,
- };
+ struct radv_image *image = iview->image;
+ VkImageSubresourceRange range = {
+ .aspectMask = iview->aspect_mask,
+ .baseMipLevel = iview->base_mip,
+ .levelCount = iview->level_count,
+ .baseArrayLayer = iview->base_layer,
+ .layerCount = iview->layer_count,
+ };
- assert(radv_image_has_cmask(image) ||
- radv_dcc_enabled(image, iview->base_mip));
+ assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, iview->base_mip));
- radv_set_color_clear_metadata(cmd_buffer, image, &range, color_values);
+ radv_set_color_clear_metadata(cmd_buffer, image, &range, color_values);
- radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx,
- color_values);
+ radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values);
}
/**
* Load the clear color values from the image's metadata.
*/
static void
-radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *iview,
- int cb_idx)
-{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- struct radv_image *image = iview->image;
-
- if (!radv_image_has_cmask(image) &&
- !radv_dcc_enabled(image, iview->base_mip))
- return;
-
- if (!radv_image_has_clear_value(image)) {
- uint32_t color_values[2] = {0, 0};
- radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx,
- color_values);
- return;
- }
-
- uint64_t va = radv_image_get_fast_clear_va(image, iview->base_mip);
- uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
-
- if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
- radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
- radeon_emit(cs, 2);
- } else {
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_COUNT_SEL);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, reg >> 2);
- radeon_emit(cs, 0);
-
- radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
- radeon_emit(cs, 0);
- }
+radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview,
+ int cb_idx)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct radv_image *image = iview->image;
+
+ if (!radv_image_has_cmask(image) && !radv_dcc_enabled(image, iview->base_mip))
+ return;
+
+ if (!radv_image_has_clear_value(image)) {
+ uint32_t color_values[2] = {0, 0};
+ radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values);
+ return;
+ }
+
+ uint64_t va = radv_image_get_fast_clear_va(image, iview->base_mip);
+ uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
+
+ if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
+ radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+ radeon_emit(cs, 2);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
+ COPY_DATA_COUNT_SEL);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, reg >> 2);
+ radeon_emit(cs, 0);
+
+ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
+ radeon_emit(cs, 0);
+ }
}
/* GFX9+ metadata cache flushing workaround. metadata cache coherency is
@@ -2316,36 +2195,35 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
static void
radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- bool color_mip_changed = false;
+ struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ bool color_mip_changed = false;
- /* Entire workaround is not applicable before GFX9 */
- if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
- return;
+ /* Entire workaround is not applicable before GFX9 */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
+ return;
- if (!framebuffer)
- return;
+ if (!framebuffer)
+ return;
- for (int i = 0; i < subpass->color_count; ++i) {
- int idx = subpass->color_attachments[i].attachment;
- if (idx == VK_ATTACHMENT_UNUSED)
- continue;
+ for (int i = 0; i < subpass->color_count; ++i) {
+ int idx = subpass->color_attachments[i].attachment;
+ if (idx == VK_ATTACHMENT_UNUSED)
+ continue;
- struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
+ struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
- if ((radv_image_has_CB_metadata(iview->image) ||
- radv_image_has_dcc(iview->image)) &&
- cmd_buffer->state.cb_mip[i] != iview->base_mip)
- color_mip_changed = true;
+ if ((radv_image_has_CB_metadata(iview->image) || radv_image_has_dcc(iview->image)) &&
+ cmd_buffer->state.cb_mip[i] != iview->base_mip)
+ color_mip_changed = true;
- cmd_buffer->state.cb_mip[i] = iview->base_mip;
- }
+ cmd_buffer->state.cb_mip[i] = iview->base_mip;
+ }
- if (color_mip_changed) {
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- }
+ if (color_mip_changed) {
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ }
}
/* This function does the flushes for mip changes if the levels are not zero for
@@ -2355,2924 +2233,2739 @@ radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
static void
radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer)
{
- /* Entire workaround is not applicable before GFX9 */
- if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
- return;
+ /* Entire workaround is not applicable before GFX9 */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
+ return;
- bool need_color_mip_flush = false;
- for (unsigned i = 0; i < 8; ++i) {
- if (cmd_buffer->state.cb_mip[i]) {
- need_color_mip_flush = true;
- break;
- }
- }
+ bool need_color_mip_flush = false;
+ for (unsigned i = 0; i < 8; ++i) {
+ if (cmd_buffer->state.cb_mip[i]) {
+ need_color_mip_flush = true;
+ break;
+ }
+ }
- if (need_color_mip_flush) {
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- }
+ if (need_color_mip_flush) {
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ }
- memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
+ memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
}
static void
radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
{
- int i;
- struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
-
- /* this may happen for inherited secondary recording */
- if (!framebuffer)
- return;
-
- for (i = 0; i < 8; ++i) {
- if (i >= subpass->color_count || subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
- radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
- S_028C70_FORMAT(V_028C70_COLOR_INVALID));
- continue;
- }
-
- int idx = subpass->color_attachments[i].attachment;
- struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
- VkImageLayout layout = subpass->color_attachments[i].layout;
- bool in_render_loop = subpass->color_attachments[i].in_render_loop;
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->bo);
-
- assert(iview->aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT |
- VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
- radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout,
- in_render_loop, cmd_buffer->state.attachments[idx].disable_dcc);
-
- radv_load_color_clear_metadata(cmd_buffer, iview, i);
- }
-
- if (subpass->depth_stencil_attachment) {
- int idx = subpass->depth_stencil_attachment->attachment;
- VkImageLayout layout = subpass->depth_stencil_attachment->layout;
- bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
- struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->state.attachments[idx].iview->bo);
-
- radv_emit_fb_ds_state(cmd_buffer, &cmd_buffer->state.attachments[idx].ds, iview, layout, in_render_loop);
-
- if (radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop,
- radv_image_queue_family_mask(iview->image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index))) {
- /* Only load the depth/stencil fast clear values when
- * compressed rendering is enabled.
- */
- radv_load_ds_clear_metadata(cmd_buffer, iview);
- }
- } else {
- if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9)
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2);
- else
- radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
-
- radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */
- radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
- }
- radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
- S_028208_BR_X(framebuffer->width) |
- S_028208_BR_Y(framebuffer->height));
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) {
- bool disable_constant_encode =
- cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode;
- enum chip_class chip_class =
- cmd_buffer->device->physical_device->rad_info.chip_class;
- uint8_t watermark = chip_class >= GFX10 ? 6 : 4;
-
- radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
- S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(chip_class <= GFX9) |
- S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
- S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
- }
-
- if (cmd_buffer->device->dfsm_allowed) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
- }
-
- cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
+ int i;
+ struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+
+ /* this may happen for inherited secondary recording */
+ if (!framebuffer)
+ return;
+
+ for (i = 0; i < 8; ++i) {
+ if (i >= subpass->color_count ||
+ subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
+ radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C,
+ S_028C70_FORMAT(V_028C70_COLOR_INVALID));
+ continue;
+ }
+
+ int idx = subpass->color_attachments[i].attachment;
+ struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
+ VkImageLayout layout = subpass->color_attachments[i].layout;
+ bool in_render_loop = subpass->color_attachments[i].in_render_loop;
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->bo);
+
+ assert(iview->aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT |
+ VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
+ radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout,
+ in_render_loop, cmd_buffer->state.attachments[idx].disable_dcc);
+
+ radv_load_color_clear_metadata(cmd_buffer, iview, i);
+ }
+
+ if (subpass->depth_stencil_attachment) {
+ int idx = subpass->depth_stencil_attachment->attachment;
+ VkImageLayout layout = subpass->depth_stencil_attachment->layout;
+ bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
+ struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+ cmd_buffer->state.attachments[idx].iview->bo);
+
+ radv_emit_fb_ds_state(cmd_buffer, &cmd_buffer->state.attachments[idx].ds, iview, layout,
+ in_render_loop);
+
+ if (radv_layout_is_htile_compressed(
+ cmd_buffer->device, iview->image, layout, in_render_loop,
+ radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index))) {
+ /* Only load the depth/stencil fast clear values when
+ * compressed rendering is enabled.
+ */
+ radv_load_ds_clear_metadata(cmd_buffer, iview);
+ }
+ } else {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9)
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2);
+ else
+ radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
+
+ radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID)); /* DB_Z_INFO */
+ radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
+ }
+ radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
+ S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height));
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8) {
+ bool disable_constant_encode =
+ cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode;
+ enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
+ uint8_t watermark = chip_class >= GFX10 ? 6 : 4;
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
+ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(chip_class <= GFX9) |
+ S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
+ S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
+ }
+
+ if (cmd_buffer->device->dfsm_allowed) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
+ }
+
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
}
static void
radv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer, bool indirect)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- struct radv_cmd_state *state = &cmd_buffer->state;
-
- if (state->index_type != state->last_index_type) {
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device,
- cs, R_03090C_VGT_INDEX_TYPE,
- 2, state->index_type);
- } else {
- radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
- radeon_emit(cs, state->index_type);
- }
-
- state->last_index_type = state->index_type;
- }
-
- /* For the direct indexed draws we use DRAW_INDEX_2, which includes
- * the index_va and max_index_count already. */
- if (!indirect)
- return;
-
- radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
- radeon_emit(cs, state->index_va);
- radeon_emit(cs, state->index_va >> 32);
-
- radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
- radeon_emit(cs, state->max_index_count);
-
- cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER;
-}
-
-void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer)
-{
- bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled;
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- uint32_t pa_sc_mode_cntl_1 =
- pipeline ? pipeline->graphics.ms.pa_sc_mode_cntl_1 : 0;
- uint32_t db_count_control;
-
- if(!cmd_buffer->state.active_occlusion_queries) {
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
- if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
- pipeline->graphics.disable_out_of_order_rast_for_occlusion &&
- has_perfect_queries) {
- /* Re-enable out-of-order rasterization if the
- * bound pipeline supports it and if it's has
- * been disabled before starting any perfect
- * occlusion queries.
- */
- radeon_set_context_reg(cmd_buffer->cs,
- R_028A4C_PA_SC_MODE_CNTL_1,
- pa_sc_mode_cntl_1);
- }
- }
- db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
- } else {
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0;
- bool gfx10_perfect = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10 && has_perfect_queries;
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
- /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially
- * covered tiles, discards, and early depth testing. For more details,
- * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */
- db_count_control =
- S_028004_PERFECT_ZPASS_COUNTS(1) |
- S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
- S_028004_SAMPLE_RATE(sample_rate) |
- S_028004_ZPASS_ENABLE(1) |
- S_028004_SLICE_EVEN_ENABLE(1) |
- S_028004_SLICE_ODD_ENABLE(1);
-
- if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
- pipeline->graphics.disable_out_of_order_rast_for_occlusion &&
- has_perfect_queries) {
- /* If the bound pipeline has enabled
- * out-of-order rasterization, we should
- * disable it before starting any perfect
- * occlusion queries.
- */
- pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE;
-
- radeon_set_context_reg(cmd_buffer->cs,
- R_028A4C_PA_SC_MODE_CNTL_1,
- pa_sc_mode_cntl_1);
- }
- } else {
- db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) |
- S_028004_SAMPLE_RATE(sample_rate);
- }
- }
-
- radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
-
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ if (state->index_type != state->last_index_type) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
+ R_03090C_VGT_INDEX_TYPE, 2, state->index_type);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
+ radeon_emit(cs, state->index_type);
+ }
+
+ state->last_index_type = state->index_type;
+ }
+
+ /* For the direct indexed draws we use DRAW_INDEX_2, which includes
+ * the index_va and max_index_count already. */
+ if (!indirect)
+ return;
+
+ radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
+ radeon_emit(cs, state->index_va);
+ radeon_emit(cs, state->index_va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
+ radeon_emit(cs, state->max_index_count);
+
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER;
+}
+
+void
+radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer)
+{
+ bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled;
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ uint32_t pa_sc_mode_cntl_1 = pipeline ? pipeline->graphics.ms.pa_sc_mode_cntl_1 : 0;
+ uint32_t db_count_control;
+
+ if (!cmd_buffer->state.active_occlusion_queries) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+ if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
+ pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) {
+ /* Re-enable out-of-order rasterization if the
+ * bound pipeline supports it and if it's has
+ * been disabled before starting any perfect
+ * occlusion queries.
+ */
+ radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1);
+ }
+ }
+ db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
+ } else {
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0;
+ bool gfx10_perfect =
+ cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10 && has_perfect_queries;
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+ /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially
+ * covered tiles, discards, and early depth testing. For more details,
+ * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */
+ db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) |
+ S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
+ S_028004_SAMPLE_RATE(sample_rate) | S_028004_ZPASS_ENABLE(1) |
+ S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1);
+
+ if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
+ pipeline->graphics.disable_out_of_order_rast_for_occlusion && has_perfect_queries) {
+ /* If the bound pipeline has enabled
+ * out-of-order rasterization, we should
+ * disable it before starting any perfect
+ * occlusion queries.
+ */
+ pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE;
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1);
+ }
+ } else {
+ db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | S_028004_SAMPLE_RATE(sample_rate);
+ }
+ }
+
+ radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
{
- uint32_t states = cmd_buffer->state.dirty & cmd_buffer->state.emitted_pipeline->graphics.needed_dynamic_state;
+ uint32_t states =
+ cmd_buffer->state.dirty & cmd_buffer->state.emitted_pipeline->graphics.needed_dynamic_state;
- if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
- radv_emit_viewport(cmd_buffer);
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
+ radv_emit_viewport(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) &&
- !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
- radv_emit_scissor(cmd_buffer);
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) &&
+ !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
+ radv_emit_scissor(cmd_buffer);
- if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)
- radv_emit_line_width(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)
+ radv_emit_line_width(cmd_buffer);
- if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
- radv_emit_blend_constants(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
+ radv_emit_blend_constants(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK))
- radv_emit_stencil(cmd_buffer);
+ if (states &
+ (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK))
+ radv_emit_stencil(cmd_buffer);
- if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS)
- radv_emit_depth_bounds(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS)
+ radv_emit_depth_bounds(cmd_buffer);
- if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)
- radv_emit_depth_bias(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)
+ radv_emit_depth_bias(cmd_buffer);
- if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
- radv_emit_discard_rectangle(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
+ radv_emit_discard_rectangle(cmd_buffer);
- if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
- radv_emit_sample_locations(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
+ radv_emit_sample_locations(cmd_buffer);
- if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE)
- radv_emit_line_stipple(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE)
+ radv_emit_line_stipple(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE |
- RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE))
- radv_emit_culling(cmd_buffer, states);
+ if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE))
+ radv_emit_culling(cmd_buffer, states);
- if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)
- radv_emit_primitive_topology(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)
+ radv_emit_primitive_topology(cmd_buffer);
- if (states & (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP))
- radv_emit_depth_control(cmd_buffer, states);
+ if (states &
+ (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP))
+ radv_emit_depth_control(cmd_buffer, states);
- if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
- radv_emit_stencil_control(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
+ radv_emit_stencil_control(cmd_buffer);
- if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE)
- radv_emit_fragment_shading_rate(cmd_buffer);
+ if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE)
+ radv_emit_fragment_shading_rate(cmd_buffer);
- cmd_buffer->state.dirty &= ~states;
+ cmd_buffer->state.dirty &= ~states;
}
static void
-radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
+radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
- struct radv_descriptor_set *set =
- (struct radv_descriptor_set *)&descriptors_state->push_set.set;
- unsigned bo_offset;
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_descriptor_set *set = (struct radv_descriptor_set *)&descriptors_state->push_set.set;
+ unsigned bo_offset;
- if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size,
- set->header.mapped_ptr,
- &bo_offset))
- return;
+ if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr,
+ &bo_offset))
+ return;
- set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- set->header.va += bo_offset;
+ set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ set->header.va += bo_offset;
}
static void
radv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
+ VkPipelineBindPoint bind_point)
{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
- uint32_t size = MAX_SETS * 4;
- uint32_t offset;
- void *ptr;
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
+ uint32_t size = MAX_SETS * 4;
+ uint32_t offset;
+ void *ptr;
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr))
- return;
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr))
+ return;
- for (unsigned i = 0; i < MAX_SETS; i++) {
- uint32_t *uptr = ((uint32_t *)ptr) + i;
- uint64_t set_va = 0;
- struct radv_descriptor_set *set = descriptors_state->sets[i];
- if (descriptors_state->valid & (1u << i))
- set_va = set->header.va;
- uptr[0] = set_va & 0xffffffff;
- }
+ for (unsigned i = 0; i < MAX_SETS; i++) {
+ uint32_t *uptr = ((uint32_t *)ptr) + i;
+ uint64_t set_va = 0;
+ struct radv_descriptor_set *set = descriptors_state->sets[i];
+ if (descriptors_state->valid & (1u << i))
+ set_va = set->header.va;
+ uptr[0] = set_va & 0xffffffff;
+ }
- uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- va += offset;
+ uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += offset;
- if (cmd_buffer->state.pipeline) {
- if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX])
- radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
- AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+ if (cmd_buffer->state.pipeline) {
+ if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_VERTEX])
+ radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
+ AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
- if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT])
- radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_FRAGMENT,
- AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+ if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT])
+ radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_FRAGMENT,
+ AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
- if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
- radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
- AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+ if (radv_pipeline_has_gs(cmd_buffer->state.pipeline))
+ radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
+ AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
- if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
- radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_CTRL,
- AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+ if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
+ radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_CTRL,
+ AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
- if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
- radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_EVAL,
- AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
- }
+ if (radv_pipeline_has_tess(cmd_buffer->state.pipeline))
+ radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_TESS_EVAL,
+ AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+ }
- if (cmd_buffer->state.compute_pipeline)
- radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.compute_pipeline, MESA_SHADER_COMPUTE,
- AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
+ if (cmd_buffer->state.compute_pipeline)
+ radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.compute_pipeline,
+ MESA_SHADER_COMPUTE, AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
}
static void
-radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
- VkShaderStageFlags stages)
+radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages)
{
- VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ?
- VK_PIPELINE_BIND_POINT_COMPUTE :
- VK_PIPELINE_BIND_POINT_GRAPHICS;
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
- struct radv_cmd_state *state = &cmd_buffer->state;
- bool flush_indirect_descriptors;
+ VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT
+ ? VK_PIPELINE_BIND_POINT_COMPUTE
+ : VK_PIPELINE_BIND_POINT_GRAPHICS;
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool flush_indirect_descriptors;
- if (!descriptors_state->dirty)
- return;
+ if (!descriptors_state->dirty)
+ return;
- if (descriptors_state->push_dirty)
- radv_flush_push_descriptors(cmd_buffer, bind_point);
+ if (descriptors_state->push_dirty)
+ radv_flush_push_descriptors(cmd_buffer, bind_point);
- flush_indirect_descriptors =
- (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS &&
- state->pipeline && state->pipeline->need_indirect_descriptor_sets) ||
- (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE &&
- state->compute_pipeline && state->compute_pipeline->need_indirect_descriptor_sets);
+ flush_indirect_descriptors =
+ (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS && state->pipeline &&
+ state->pipeline->need_indirect_descriptor_sets) ||
+ (bind_point == VK_PIPELINE_BIND_POINT_COMPUTE && state->compute_pipeline &&
+ state->compute_pipeline->need_indirect_descriptor_sets);
- if (flush_indirect_descriptors)
- radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point);
+ if (flush_indirect_descriptors)
+ radv_flush_indirect_descriptor_sets(cmd_buffer, bind_point);
- ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs,
- MAX_SETS * MESA_SHADER_STAGES * 4);
+ ASSERTED unsigned cdw_max =
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MAX_SETS * MESA_SHADER_STAGES * 4);
- if (cmd_buffer->state.pipeline) {
- radv_foreach_stage(stage, stages) {
- if (!cmd_buffer->state.pipeline->shaders[stage])
- continue;
+ if (cmd_buffer->state.pipeline) {
+ radv_foreach_stage(stage, stages)
+ {
+ if (!cmd_buffer->state.pipeline->shaders[stage])
+ continue;
- radv_emit_descriptor_pointers(cmd_buffer,
- cmd_buffer->state.pipeline,
- descriptors_state, stage);
- }
- }
+ radv_emit_descriptor_pointers(cmd_buffer, cmd_buffer->state.pipeline, descriptors_state,
+ stage);
+ }
+ }
- if (cmd_buffer->state.compute_pipeline &&
- (stages & VK_SHADER_STAGE_COMPUTE_BIT)) {
- radv_emit_descriptor_pointers(cmd_buffer,
- cmd_buffer->state.compute_pipeline,
- descriptors_state,
- MESA_SHADER_COMPUTE);
- }
+ if (cmd_buffer->state.compute_pipeline && (stages & VK_SHADER_STAGE_COMPUTE_BIT)) {
+ radv_emit_descriptor_pointers(cmd_buffer, cmd_buffer->state.compute_pipeline,
+ descriptors_state, MESA_SHADER_COMPUTE);
+ }
- descriptors_state->dirty = 0;
- descriptors_state->push_dirty = false;
+ descriptors_state->dirty = 0;
+ descriptors_state->push_dirty = false;
- assert(cmd_buffer->cs->cdw <= cdw_max);
+ assert(cmd_buffer->cs->cdw <= cdw_max);
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_save_descriptors(cmd_buffer, bind_point);
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_save_descriptors(cmd_buffer, bind_point);
}
static void
-radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
- VkShaderStageFlags stages)
-{
- struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT
- ? cmd_buffer->state.compute_pipeline
- : cmd_buffer->state.pipeline;
- VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ?
- VK_PIPELINE_BIND_POINT_COMPUTE :
- VK_PIPELINE_BIND_POINT_GRAPHICS;
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
- struct radv_pipeline_layout *layout = pipeline->layout;
- struct radv_shader_variant *shader, *prev_shader;
- bool need_push_constants = false;
- unsigned offset;
- void *ptr;
- uint64_t va;
-
- stages &= cmd_buffer->push_constant_stages;
- if (!stages ||
- (!layout->push_constant_size && !layout->dynamic_offset_count))
- return;
-
- radv_foreach_stage(stage, stages) {
- shader = radv_get_shader(pipeline, stage);
- if (!shader)
- continue;
-
- need_push_constants |= shader->info.loads_push_constants;
- need_push_constants |= shader->info.loads_dynamic_offsets;
-
- uint8_t base = shader->info.base_inline_push_consts;
- uint8_t count = shader->info.num_inline_push_consts;
-
- radv_emit_inline_push_consts(cmd_buffer, pipeline, stage,
- AC_UD_INLINE_PUSH_CONSTANTS,
- count,
- (uint32_t *)&cmd_buffer->push_constants[base * 4]);
- }
-
- if (need_push_constants) {
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
- 16 * layout->dynamic_offset_count, &offset, &ptr))
- return;
-
- memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
- memcpy((char*)ptr + layout->push_constant_size,
- descriptors_state->dynamic_buffers,
- 16 * layout->dynamic_offset_count);
-
- va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- va += offset;
-
- ASSERTED unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, MESA_SHADER_STAGES * 4);
-
- prev_shader = NULL;
- radv_foreach_stage(stage, stages) {
- shader = radv_get_shader(pipeline, stage);
-
- /* Avoid redundantly emitting the address for merged stages. */
- if (shader && shader != prev_shader) {
- radv_emit_userdata_address(cmd_buffer, pipeline, stage,
- AC_UD_PUSH_CONSTANTS, va);
-
- prev_shader = shader;
- }
- }
- assert(cmd_buffer->cs->cdw <= cdw_max);
- }
-
- cmd_buffer->push_constant_stages &= ~stages;
+radv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages)
+{
+ struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT
+ ? cmd_buffer->state.compute_pipeline
+ : cmd_buffer->state.pipeline;
+ VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT
+ ? VK_PIPELINE_BIND_POINT_COMPUTE
+ : VK_PIPELINE_BIND_POINT_GRAPHICS;
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
+ struct radv_pipeline_layout *layout = pipeline->layout;
+ struct radv_shader_variant *shader, *prev_shader;
+ bool need_push_constants = false;
+ unsigned offset;
+ void *ptr;
+ uint64_t va;
+
+ stages &= cmd_buffer->push_constant_stages;
+ if (!stages || (!layout->push_constant_size && !layout->dynamic_offset_count))
+ return;
+
+ radv_foreach_stage(stage, stages)
+ {
+ shader = radv_get_shader(pipeline, stage);
+ if (!shader)
+ continue;
+
+ need_push_constants |= shader->info.loads_push_constants;
+ need_push_constants |= shader->info.loads_dynamic_offsets;
+
+ uint8_t base = shader->info.base_inline_push_consts;
+ uint8_t count = shader->info.num_inline_push_consts;
+
+ radv_emit_inline_push_consts(cmd_buffer, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS, count,
+ (uint32_t *)&cmd_buffer->push_constants[base * 4]);
+ }
+
+ if (need_push_constants) {
+ if (!radv_cmd_buffer_upload_alloc(
+ cmd_buffer, layout->push_constant_size + 16 * layout->dynamic_offset_count, &offset,
+ &ptr))
+ return;
+
+ memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
+ memcpy((char *)ptr + layout->push_constant_size, descriptors_state->dynamic_buffers,
+ 16 * layout->dynamic_offset_count);
+
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += offset;
+
+ ASSERTED unsigned cdw_max =
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+
+ prev_shader = NULL;
+ radv_foreach_stage(stage, stages)
+ {
+ shader = radv_get_shader(pipeline, stage);
+
+ /* Avoid redundantly emitting the address for merged stages. */
+ if (shader && shader != prev_shader) {
+ radv_emit_userdata_address(cmd_buffer, pipeline, stage, AC_UD_PUSH_CONSTANTS, va);
+
+ prev_shader = shader;
+ }
+ }
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+ }
+
+ cmd_buffer->push_constant_stages &= ~stages;
}
static void
-radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
- bool pipeline_is_dirty)
-{
- if ((pipeline_is_dirty ||
- (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
- cmd_buffer->state.pipeline->num_vertex_bindings &&
- radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.vs.has_vertex_buffers) {
- unsigned vb_offset;
- void *vb_ptr;
- uint32_t i = 0;
- uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings;
- uint64_t va;
-
- /* allocate some descriptor state for vertex buffers */
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16,
- &vb_offset, &vb_ptr))
- return;
-
- for (i = 0; i < count; i++) {
- uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
- uint32_t offset;
- struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
- unsigned num_records;
- unsigned stride;
-
- if (!buffer) {
- memset(desc, 0, 4 * 4);
- continue;
- }
-
- va = radv_buffer_get_va(buffer->bo);
-
- offset = cmd_buffer->vertex_bindings[i].offset;
- va += offset + buffer->offset;
-
- if (cmd_buffer->vertex_bindings[i].size) {
- num_records = cmd_buffer->vertex_bindings[i].size;
- } else {
- num_records = buffer->size - offset;
- }
-
- if (cmd_buffer->state.pipeline->graphics.uses_dynamic_stride) {
- stride = cmd_buffer->vertex_bindings[i].stride;
- } else {
- stride = cmd_buffer->state.pipeline->binding_stride[i];
- }
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class != GFX8 && stride)
- num_records = DIV_ROUND_UP(num_records, stride);
-
- uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- /* OOB_SELECT chooses the out-of-bounds check:
- * - 1: index >= NUM_RECORDS (Structured)
- * - 3: offset >= NUM_RECORDS (Raw)
- */
- int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW;
-
- rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_UINT) |
- S_008F0C_OOB_SELECT(oob_select) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
-
- desc[0] = va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
- desc[2] = num_records;
- desc[3] = rsrc_word3;
- }
-
- va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- va += vb_offset;
-
- radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
- AC_UD_VS_VERTEX_BUFFERS, va);
-
- cmd_buffer->state.vb_va = va;
- cmd_buffer->state.vb_size = count * 16;
- cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS;
-
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_save_vertex_descriptors(cmd_buffer, (uintptr_t)vb_ptr);
- }
- cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER;
+radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
+{
+ if ((pipeline_is_dirty || (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
+ cmd_buffer->state.pipeline->num_vertex_bindings &&
+ radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.vs.has_vertex_buffers) {
+ unsigned vb_offset;
+ void *vb_ptr;
+ uint32_t i = 0;
+ uint32_t count = cmd_buffer->state.pipeline->num_vertex_bindings;
+ uint64_t va;
+
+ /* allocate some descriptor state for vertex buffers */
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, count * 16, &vb_offset, &vb_ptr))
+ return;
+
+ for (i = 0; i < count; i++) {
+ uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
+ uint32_t offset;
+ struct radv_buffer *buffer = cmd_buffer->vertex_bindings[i].buffer;
+ unsigned num_records;
+ unsigned stride;
+
+ if (!buffer) {
+ memset(desc, 0, 4 * 4);
+ continue;
+ }
+
+ va = radv_buffer_get_va(buffer->bo);
+
+ offset = cmd_buffer->vertex_bindings[i].offset;
+ va += offset + buffer->offset;
+
+ if (cmd_buffer->vertex_bindings[i].size) {
+ num_records = cmd_buffer->vertex_bindings[i].size;
+ } else {
+ num_records = buffer->size - offset;
+ }
+
+ if (cmd_buffer->state.pipeline->graphics.uses_dynamic_stride) {
+ stride = cmd_buffer->vertex_bindings[i].stride;
+ } else {
+ stride = cmd_buffer->state.pipeline->binding_stride[i];
+ }
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class != GFX8 && stride)
+ num_records = DIV_ROUND_UP(num_records, stride);
+
+ uint32_t rsrc_word3 =
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ /* OOB_SELECT chooses the out-of-bounds check:
+ * - 1: index >= NUM_RECORDS (Structured)
+ * - 3: offset >= NUM_RECORDS (Raw)
+ */
+ int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW;
+
+ rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_UINT) |
+ S_008F0C_OOB_SELECT(oob_select) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
+ desc[2] = num_records;
+ desc[3] = rsrc_word3;
+ }
+
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += vb_offset;
+
+ radv_emit_userdata_address(cmd_buffer, cmd_buffer->state.pipeline, MESA_SHADER_VERTEX,
+ AC_UD_VS_VERTEX_BUFFERS, va);
+
+ cmd_buffer->state.vb_va = va;
+ cmd_buffer->state.vb_size = count * 16;
+ cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS;
+
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_save_vertex_descriptors(cmd_buffer, (uintptr_t)vb_ptr);
+ }
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER;
}
static void
radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
{
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- struct radv_userdata_info *loc;
- uint32_t base_reg;
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct radv_userdata_info *loc;
+ uint32_t base_reg;
- for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
- if (!radv_get_shader(pipeline, stage))
- continue;
+ for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
+ if (!radv_get_shader(pipeline, stage))
+ continue;
- loc = radv_lookup_user_sgpr(pipeline, stage,
- AC_UD_STREAMOUT_BUFFERS);
- if (loc->sgpr_idx == -1)
- continue;
+ loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_STREAMOUT_BUFFERS);
+ if (loc->sgpr_idx == -1)
+ continue;
- base_reg = pipeline->user_data_0[stage];
+ base_reg = pipeline->user_data_0[stage];
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
- base_reg + loc->sgpr_idx * 4, va, false);
- }
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va,
+ false);
+ }
- if (radv_pipeline_has_gs_copy_shader(pipeline)) {
- loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
- if (loc->sgpr_idx != -1) {
- base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ if (radv_pipeline_has_gs_copy_shader(pipeline)) {
+ loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
+ if (loc->sgpr_idx != -1) {
+ base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
- radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
- base_reg + loc->sgpr_idx * 4, va, false);
- }
- }
+ radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
+ va, false);
+ }
+ }
}
static void
radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
{
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) {
- struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
- struct radv_streamout_state *so = &cmd_buffer->state.streamout;
- unsigned so_offset;
- void *so_ptr;
- uint64_t va;
-
- /* Allocate some descriptor state for streamout buffers. */
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer,
- MAX_SO_BUFFERS * 16,
- &so_offset, &so_ptr))
- return;
-
- for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) {
- struct radv_buffer *buffer = sb[i].buffer;
- uint32_t *desc = &((uint32_t *)so_ptr)[i * 4];
-
- if (!(so->enabled_mask & (1 << i)))
- continue;
-
- va = radv_buffer_get_va(buffer->bo) + buffer->offset;
-
- va += sb[i].offset;
-
- /* Set the descriptor.
- *
- * On GFX8, the format must be non-INVALID, otherwise
- * the buffer will be considered not bound and store
- * instructions will be no-ops.
- */
- uint32_t size = 0xffffffff;
-
- /* Compute the correct buffer size for NGG streamout
- * because it's used to determine the max emit per
- * buffer.
- */
- if (cmd_buffer->device->physical_device->use_ngg_streamout)
- size = buffer->size - sb[i].offset;
-
- uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
-
- desc[0] = va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
- desc[2] = size;
- desc[3] = rsrc_word3;
- }
-
- va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- va += so_offset;
-
- radv_emit_streamout_buffers(cmd_buffer, va);
- }
-
- cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER;
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) {
+ struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ unsigned so_offset;
+ void *so_ptr;
+ uint64_t va;
+
+ /* Allocate some descriptor state for streamout buffers. */
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, MAX_SO_BUFFERS * 16, &so_offset, &so_ptr))
+ return;
+
+ for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) {
+ struct radv_buffer *buffer = sb[i].buffer;
+ uint32_t *desc = &((uint32_t *)so_ptr)[i * 4];
+
+ if (!(so->enabled_mask & (1 << i)))
+ continue;
+
+ va = radv_buffer_get_va(buffer->bo) + buffer->offset;
+
+ va += sb[i].offset;
+
+ /* Set the descriptor.
+ *
+ * On GFX8, the format must be non-INVALID, otherwise
+ * the buffer will be considered not bound and store
+ * instructions will be no-ops.
+ */
+ uint32_t size = 0xffffffff;
+
+ /* Compute the correct buffer size for NGG streamout
+ * because it's used to determine the max emit per
+ * buffer.
+ */
+ if (cmd_buffer->device->physical_device->use_ngg_streamout)
+ size = buffer->size - sb[i].offset;
+
+ uint32_t rsrc_word3 =
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
+ desc[0] = va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+ desc[2] = size;
+ desc[3] = rsrc_word3;
+ }
+
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += so_offset;
+
+ radv_emit_streamout_buffers(cmd_buffer, va);
+ }
+
+ cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER;
}
static void
radv_flush_ngg_gs_state(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- struct radv_userdata_info *loc;
- uint32_t ngg_gs_state = 0;
- uint32_t base_reg;
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct radv_userdata_info *loc;
+ uint32_t ngg_gs_state = 0;
+ uint32_t base_reg;
- if (!radv_pipeline_has_gs(pipeline) ||
- !radv_pipeline_has_ngg(pipeline))
- return;
+ if (!radv_pipeline_has_gs(pipeline) || !radv_pipeline_has_ngg(pipeline))
+ return;
- /* By default NGG GS queries are disabled but they are enabled if the
- * command buffer has active GDS queries or if it's a secondary command
- * buffer that inherits the number of generated primitives.
- */
- if (cmd_buffer->state.active_pipeline_gds_queries ||
- (cmd_buffer->state.inherited_pipeline_statistics & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
- ngg_gs_state = 1;
+ /* By default NGG GS queries are disabled but they are enabled if the
+ * command buffer has active GDS queries or if it's a secondary command
+ * buffer that inherits the number of generated primitives.
+ */
+ if (cmd_buffer->state.active_pipeline_gds_queries ||
+ (cmd_buffer->state.inherited_pipeline_statistics &
+ VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
+ ngg_gs_state = 1;
- loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY,
- AC_UD_NGG_GS_STATE);
- base_reg = pipeline->user_data_0[MESA_SHADER_GEOMETRY];
- assert(loc->sgpr_idx != -1);
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY, AC_UD_NGG_GS_STATE);
+ base_reg = pipeline->user_data_0[MESA_SHADER_GEOMETRY];
+ assert(loc->sgpr_idx != -1);
- radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
- ngg_gs_state);
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ngg_gs_state);
}
static void
radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
{
- radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty);
- radv_flush_streamout_descriptors(cmd_buffer);
- radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
- radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
- radv_flush_ngg_gs_state(cmd_buffer);
+ radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty);
+ radv_flush_streamout_descriptors(cmd_buffer);
+ radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
+ radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
+ radv_flush_ngg_gs_state(cmd_buffer);
}
struct radv_draw_info {
- /**
- * Number of vertices.
- */
- uint32_t count;
-
- /**
- * First instance id.
- */
- uint32_t first_instance;
-
- /**
- * Number of instances.
- */
- uint32_t instance_count;
-
- /**
- * First index (indexed draws only).
- */
- uint32_t first_index;
-
- /**
- * Whether it's an indexed draw.
- */
- bool indexed;
-
- /**
- * Indirect draw parameters resource.
- */
- struct radv_buffer *indirect;
- uint64_t indirect_offset;
- uint32_t stride;
-
- /**
- * Draw count parameters resource.
- */
- struct radv_buffer *count_buffer;
- uint64_t count_buffer_offset;
-
- /**
- * Stream output parameters resource.
- */
- struct radv_buffer *strmout_buffer;
- uint64_t strmout_buffer_offset;
+ /**
+ * Number of vertices.
+ */
+ uint32_t count;
+
+ /**
+ * First instance id.
+ */
+ uint32_t first_instance;
+
+ /**
+ * Number of instances.
+ */
+ uint32_t instance_count;
+
+ /**
+ * First index (indexed draws only).
+ */
+ uint32_t first_index;
+
+ /**
+ * Whether it's an indexed draw.
+ */
+ bool indexed;
+
+ /**
+ * Indirect draw parameters resource.
+ */
+ struct radv_buffer *indirect;
+ uint64_t indirect_offset;
+ uint32_t stride;
+
+ /**
+ * Draw count parameters resource.
+ */
+ struct radv_buffer *count_buffer;
+ uint64_t count_buffer_offset;
+
+ /**
+ * Stream output parameters resource.
+ */
+ struct radv_buffer *strmout_buffer;
+ uint64_t strmout_buffer_offset;
};
static uint32_t
radv_get_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
{
- switch (cmd_buffer->state.index_type) {
- case V_028A7C_VGT_INDEX_8:
- return 0xffu;
- case V_028A7C_VGT_INDEX_16:
- return 0xffffu;
- case V_028A7C_VGT_INDEX_32:
- return 0xffffffffu;
- default:
- unreachable("invalid index type");
- }
+ switch (cmd_buffer->state.index_type) {
+ case V_028A7C_VGT_INDEX_8:
+ return 0xffu;
+ case V_028A7C_VGT_INDEX_16:
+ return 0xffffu;
+ case V_028A7C_VGT_INDEX_32:
+ return 0xffffffffu;
+ default:
+ unreachable("invalid index type");
+ }
}
static void
-si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
- bool instanced_draw, bool indirect_draw,
- bool count_from_stream_output,
- uint32_t draw_vertex_count)
-{
- struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
- struct radv_cmd_state *state = &cmd_buffer->state;
- unsigned topology = state->dynamic.primitive_topology;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- unsigned ia_multi_vgt_param;
-
- ia_multi_vgt_param =
- si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw,
- indirect_draw,
- count_from_stream_output,
- draw_vertex_count,
- topology);
-
- if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
- if (info->chip_class == GFX9) {
- radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device,
- cs,
- R_030960_IA_MULTI_VGT_PARAM,
- 4, ia_multi_vgt_param);
- } else if (info->chip_class >= GFX7) {
- radeon_set_context_reg_idx(cs,
- R_028AA8_IA_MULTI_VGT_PARAM,
- 1, ia_multi_vgt_param);
- } else {
- radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM,
- ia_multi_vgt_param);
- }
- state->last_ia_multi_vgt_param = ia_multi_vgt_param;
- }
+si_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
+ bool indirect_draw, bool count_from_stream_output,
+ uint32_t draw_vertex_count)
+{
+ struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ unsigned topology = state->dynamic.primitive_topology;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ unsigned ia_multi_vgt_param;
+
+ ia_multi_vgt_param =
+ si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output,
+ draw_vertex_count, topology);
+
+ if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
+ if (info->chip_class == GFX9) {
+ radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
+ R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
+ } else if (info->chip_class >= GFX7) {
+ radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
+ } else {
+ radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
+ }
+ state->last_ia_multi_vgt_param = ia_multi_vgt_param;
+ }
}
static void
-radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *draw_info)
-{
- struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
- struct radv_cmd_state *state = &cmd_buffer->state;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- int32_t primitive_reset_en;
-
- /* Draw state. */
- if (info->chip_class < GFX10) {
- si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1,
- draw_info->indirect,
- !!draw_info->strmout_buffer,
- draw_info->indirect ? 0 : draw_info->count);
- }
-
- /* Primitive restart. */
- primitive_reset_en =
- draw_info->indexed && state->pipeline->graphics.prim_restart_enable;
-
- if (primitive_reset_en != state->last_primitive_reset_en) {
- state->last_primitive_reset_en = primitive_reset_en;
- if (info->chip_class >= GFX9) {
- radeon_set_uconfig_reg(cs,
- R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
- primitive_reset_en);
- } else {
- radeon_set_context_reg(cs,
- R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
- primitive_reset_en);
- }
- }
-
- if (primitive_reset_en) {
- uint32_t primitive_reset_index =
- radv_get_primitive_reset_index(cmd_buffer);
-
- if (primitive_reset_index != state->last_primitive_reset_index) {
- radeon_set_context_reg(cs,
- R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX,
- primitive_reset_index);
- state->last_primitive_reset_index = primitive_reset_index;
- }
- }
-
- if (draw_info->strmout_buffer) {
- uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
-
- va += draw_info->strmout_buffer->offset +
- draw_info->strmout_buffer_offset;
-
- radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
- draw_info->stride);
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
- radeon_emit(cs, 0); /* unused */
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
- }
-}
-
-static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineStageFlags src_stage_mask)
-{
- if (src_stage_mask & (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
- VK_PIPELINE_STAGE_TRANSFER_BIT |
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT |
- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
- }
-
- if (src_stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
- VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
- VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
- VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
- VK_PIPELINE_STAGE_TRANSFER_BIT |
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT |
- VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT |
- VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
- } else if (src_stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
- VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
- VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
- VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
- VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) {
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
- }
+radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
+{
+ struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ int32_t primitive_reset_en;
+
+ /* Draw state. */
+ if (info->chip_class < GFX10) {
+ si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect,
+ !!draw_info->strmout_buffer,
+ draw_info->indirect ? 0 : draw_info->count);
+ }
+
+ /* Primitive restart. */
+ primitive_reset_en = draw_info->indexed && state->pipeline->graphics.prim_restart_enable;
+
+ if (primitive_reset_en != state->last_primitive_reset_en) {
+ state->last_primitive_reset_en = primitive_reset_en;
+ if (info->chip_class >= GFX9) {
+ radeon_set_uconfig_reg(cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, primitive_reset_en);
+ } else {
+ radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, primitive_reset_en);
+ }
+ }
+
+ if (primitive_reset_en) {
+ uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer);
+
+ if (primitive_reset_index != state->last_primitive_reset_index) {
+ radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index);
+ state->last_primitive_reset_index = primitive_reset_index;
+ }
+ }
+
+ if (draw_info->strmout_buffer) {
+ uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
+
+ va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset;
+
+ radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride);
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
+ radeon_emit(cs, 0); /* unused */
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
+ }
+}
+
+static void
+radv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags src_stage_mask)
+{
+ if (src_stage_mask &
+ (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT |
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
+ }
+
+ if (src_stage_mask &
+ (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+ VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
+ VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT |
+ VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) {
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+ } else if (src_stage_mask &
+ (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
+ VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+ VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+ VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
+ VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
+ VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) {
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
+ }
}
/* Determine if the image is affected by the pipe misaligned metadata issue
* which requires to invalidate L2.
*/
static bool
-radv_image_is_pipe_misaligned(const struct radv_device *device,
- const struct radv_image *image)
-{
- struct radeon_info *rad_info = &device->physical_device->rad_info;
- unsigned log2_samples = util_logbase2(image->info.samples);
-
- assert(rad_info->chip_class >= GFX10);
-
- for (unsigned i = 0; i < image->plane_count; ++i) {
- VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
- unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
- unsigned log2_bpp_and_samples;
-
- if (rad_info->chip_class >= GFX10_3) {
- log2_bpp_and_samples = log2_bpp + log2_samples;
- } else {
- if (vk_format_has_depth(image->vk_format) &&
- image->info.array_size >= 8) {
- log2_bpp = 2;
- }
-
- log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
- }
-
- unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
- int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
-
- if (vk_format_has_depth(image->vk_format)) {
- if (radv_image_is_tc_compat_htile(image) && overlap) {
- return true;
- }
- } else {
- unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
- int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
- int samples_overlap = MIN2(log2_samples, overlap);
-
- /* TODO: It shouldn't be necessary if the image has DCC but
- * not readable by shader.
- */
- if ((radv_image_has_dcc(image) ||
- radv_image_is_tc_compat_cmask(image)) &&
- (samples_overlap > log2_samples_frag_diff)) {
- return true;
- }
- }
- }
-
- return false;
+radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
+{
+ struct radeon_info *rad_info = &device->physical_device->rad_info;
+ unsigned log2_samples = util_logbase2(image->info.samples);
+
+ assert(rad_info->chip_class >= GFX10);
+
+ for (unsigned i = 0; i < image->plane_count; ++i) {
+ VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
+ unsigned log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
+ unsigned log2_bpp_and_samples;
+
+ if (rad_info->chip_class >= GFX10_3) {
+ log2_bpp_and_samples = log2_bpp + log2_samples;
+ } else {
+ if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
+ log2_bpp = 2;
+ }
+
+ log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
+ }
+
+ unsigned num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
+ int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
+
+ if (vk_format_has_depth(image->vk_format)) {
+ if (radv_image_is_tc_compat_htile(image) && overlap) {
+ return true;
+ }
+ } else {
+ unsigned max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
+ int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
+ int samples_overlap = MIN2(log2_samples, overlap);
+
+ /* TODO: It shouldn't be necessary if the image has DCC but
+ * not readable by shader.
+ */
+ if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
+ (samples_overlap > log2_samples_frag_diff)) {
+ return true;
+ }
+ }
+ }
+
+ return false;
}
static bool
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
{
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- return !device->physical_device->rad_info.tcc_rb_non_coherent &&
- (image && !radv_image_is_pipe_misaligned(device, image));
- } else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
- if (image->info.samples == 1 &&
- (image->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
- !vk_format_has_stencil(image->vk_format)) {
- /* Single-sample color and single-sample depth
- * (not stencil) are coherent with shaders on
- * GFX9.
- */
- return true;
- }
- }
-
- return false;
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ return !device->physical_device->rad_info.tcc_rb_non_coherent &&
+ (image && !radv_image_is_pipe_misaligned(device, image));
+ } else if (device->physical_device->rad_info.chip_class == GFX9 && image) {
+ if (image->info.samples == 1 &&
+ (image->usage &
+ (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+ !vk_format_has_stencil(image->vk_format)) {
+ /* Single-sample color and single-sample depth
+ * (not stencil) are coherent with shaders on
+ * GFX9.
+ */
+ return true;
+ }
+ }
+
+ return false;
}
enum radv_cmd_flush_bits
-radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
- VkAccessFlags src_flags,
- const struct radv_image *image)
-{
- bool has_CB_meta = true, has_DB_meta = true;
- bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
- enum radv_cmd_flush_bits flush_bits = 0;
-
- if (image) {
- if (!radv_image_has_CB_metadata(image))
- has_CB_meta = false;
- if (!radv_image_has_htile(image))
- has_DB_meta = false;
- }
-
- u_foreach_bit(b, src_flags) {
- switch ((VkAccessFlagBits)(1 << b)) {
- case VK_ACCESS_SHADER_WRITE_BIT:
- /* since the STORAGE bit isn't set we know that this is a meta operation.
- * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so
- * set it here. */
- if (image && !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
- if (vk_format_is_depth_or_stencil(image->vk_format)) {
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
- } else {
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
- }
- }
- if (!image_is_coherent)
- flush_bits |= RADV_CMD_FLAG_WB_L2;
- break;
- case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
- case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
- if (!image_is_coherent)
- flush_bits |= RADV_CMD_FLAG_WB_L2;
- break;
- case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
- if (has_CB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- break;
- case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
- if (has_DB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- break;
- case VK_ACCESS_TRANSFER_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-
- if (!image_is_coherent)
- flush_bits |= RADV_CMD_FLAG_INV_L2;
- if (has_CB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- if (has_DB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- break;
- case VK_ACCESS_MEMORY_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB;
-
- if (!image_is_coherent)
- flush_bits |= RADV_CMD_FLAG_INV_L2;
- if (has_CB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- if (has_DB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- break;
- default:
- break;
- }
- }
- return flush_bits;
+radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags src_flags,
+ const struct radv_image *image)
+{
+ bool has_CB_meta = true, has_DB_meta = true;
+ bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
+ enum radv_cmd_flush_bits flush_bits = 0;
+
+ if (image) {
+ if (!radv_image_has_CB_metadata(image))
+ has_CB_meta = false;
+ if (!radv_image_has_htile(image))
+ has_DB_meta = false;
+ }
+
+ u_foreach_bit(b, src_flags)
+ {
+ switch ((VkAccessFlagBits)(1 << b)) {
+ case VK_ACCESS_SHADER_WRITE_BIT:
+ /* since the STORAGE bit isn't set we know that this is a meta operation.
+ * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so
+ * set it here. */
+ if (image && !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
+ if (vk_format_is_depth_or_stencil(image->vk_format)) {
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ } else {
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ }
+ }
+ if (!image_is_coherent)
+ flush_bits |= RADV_CMD_FLAG_WB_L2;
+ break;
+ case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
+ case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
+ if (!image_is_coherent)
+ flush_bits |= RADV_CMD_FLAG_WB_L2;
+ break;
+ case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ if (has_CB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ break;
+ case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ if (has_DB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ break;
+ case VK_ACCESS_TRANSFER_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+
+ if (!image_is_coherent)
+ flush_bits |= RADV_CMD_FLAG_INV_L2;
+ if (has_CB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ if (has_DB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ break;
+ case VK_ACCESS_MEMORY_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+
+ if (!image_is_coherent)
+ flush_bits |= RADV_CMD_FLAG_INV_L2;
+ if (has_CB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ if (has_DB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ break;
+ default:
+ break;
+ }
+ }
+ return flush_bits;
}
enum radv_cmd_flush_bits
-radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
- VkAccessFlags dst_flags,
+radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags dst_flags,
const struct radv_image *image)
{
- bool has_CB_meta = true, has_DB_meta = true;
- enum radv_cmd_flush_bits flush_bits = 0;
- bool flush_CB = true, flush_DB = true;
- bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
-
- if (image) {
- if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
- flush_CB = false;
- flush_DB = false;
- }
-
- if (!radv_image_has_CB_metadata(image))
- has_CB_meta = false;
- if (!radv_image_has_htile(image))
- has_DB_meta = false;
- }
-
- u_foreach_bit(b, dst_flags) {
- switch ((VkAccessFlagBits)(1 << b)) {
- case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
- case VK_ACCESS_INDEX_READ_BIT:
- case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
- break;
- case VK_ACCESS_UNIFORM_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
- break;
- case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
- case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
- case VK_ACCESS_TRANSFER_READ_BIT:
- case VK_ACCESS_TRANSFER_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
-
- if (has_CB_meta || has_DB_meta)
- flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
- if (!image_is_coherent)
- flush_bits |= RADV_CMD_FLAG_INV_L2;
- break;
- case VK_ACCESS_SHADER_READ_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
- /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
- * invalidate the scalar cache. */
- if (!cmd_buffer->device->physical_device->use_llvm && !image)
- flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
-
- if (has_CB_meta || has_DB_meta)
- flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
- if (!image_is_coherent)
- flush_bits |= RADV_CMD_FLAG_INV_L2;
- break;
- case VK_ACCESS_SHADER_WRITE_BIT:
- break;
- case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
- case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
- if (flush_CB)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
- if (has_CB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- break;
- case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
- case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
- if (flush_DB)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
- if (has_DB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- break;
- case VK_ACCESS_MEMORY_READ_BIT:
- case VK_ACCESS_MEMORY_WRITE_BIT:
- flush_bits |= RADV_CMD_FLAG_INV_VCACHE |
- RADV_CMD_FLAG_INV_SCACHE;
- if (!image_is_coherent)
- flush_bits |= RADV_CMD_FLAG_INV_L2;
- if (flush_CB)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
- if (has_CB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
- if (flush_DB)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
- if (has_DB_meta)
- flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- break;
- default:
- break;
- }
- }
- return flush_bits;
-}
-
-void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_subpass_barrier *barrier)
-{
- struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
- if (fb && !fb->imageless) {
- for (int i = 0; i < fb->attachment_count; ++i) {
- cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask,
- fb->attachments[i]->image);
- }
- } else {
- cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, barrier->src_access_mask,
- NULL);
- }
-
- radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
-
- if (fb && !fb->imageless) {
- for (int i = 0; i < fb->attachment_count; ++i) {
- cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask,
- fb->attachments[i]->image);
- }
- } else {
- cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask,
- NULL);
- }
+ bool has_CB_meta = true, has_DB_meta = true;
+ enum radv_cmd_flush_bits flush_bits = 0;
+ bool flush_CB = true, flush_DB = true;
+ bool image_is_coherent = radv_image_is_l2_coherent(cmd_buffer->device, image);
+
+ if (image) {
+ if (!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
+ flush_CB = false;
+ flush_DB = false;
+ }
+
+ if (!radv_image_has_CB_metadata(image))
+ has_CB_meta = false;
+ if (!radv_image_has_htile(image))
+ has_DB_meta = false;
+ }
+
+ u_foreach_bit(b, dst_flags)
+ {
+ switch ((VkAccessFlagBits)(1 << b)) {
+ case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
+ case VK_ACCESS_INDEX_READ_BIT:
+ case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
+ break;
+ case VK_ACCESS_UNIFORM_READ_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
+ break;
+ case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
+ case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
+ case VK_ACCESS_TRANSFER_READ_BIT:
+ case VK_ACCESS_TRANSFER_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
+
+ if (has_CB_meta || has_DB_meta)
+ flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
+ if (!image_is_coherent)
+ flush_bits |= RADV_CMD_FLAG_INV_L2;
+ break;
+ case VK_ACCESS_SHADER_READ_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
+ /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
+ * invalidate the scalar cache. */
+ if (!cmd_buffer->device->physical_device->use_llvm && !image)
+ flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
+
+ if (has_CB_meta || has_DB_meta)
+ flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
+ if (!image_is_coherent)
+ flush_bits |= RADV_CMD_FLAG_INV_L2;
+ break;
+ case VK_ACCESS_SHADER_WRITE_BIT:
+ break;
+ case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
+ case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
+ if (flush_CB)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ if (has_CB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ break;
+ case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
+ case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
+ if (flush_DB)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ if (has_DB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ break;
+ case VK_ACCESS_MEMORY_READ_BIT:
+ case VK_ACCESS_MEMORY_WRITE_BIT:
+ flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
+ if (!image_is_coherent)
+ flush_bits |= RADV_CMD_FLAG_INV_L2;
+ if (flush_CB)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+ if (has_CB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+ if (flush_DB)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ if (has_DB_meta)
+ flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ break;
+ default:
+ break;
+ }
+ }
+ return flush_bits;
+}
+
+void
+radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass_barrier *barrier)
+{
+ struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ if (fb && !fb->imageless) {
+ for (int i = 0; i < fb->attachment_count; ++i) {
+ cmd_buffer->state.flush_bits |=
+ radv_src_access_flush(cmd_buffer, barrier->src_access_mask, fb->attachments[i]->image);
+ }
+ } else {
+ cmd_buffer->state.flush_bits |=
+ radv_src_access_flush(cmd_buffer, barrier->src_access_mask, NULL);
+ }
+
+ radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
+
+ if (fb && !fb->imageless) {
+ for (int i = 0; i < fb->attachment_count; ++i) {
+ cmd_buffer->state.flush_bits |=
+ radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, fb->attachments[i]->image);
+ }
+ } else {
+ cmd_buffer->state.flush_bits |=
+ radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, NULL);
+ }
}
uint32_t
radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_cmd_state *state = &cmd_buffer->state;
- uint32_t subpass_id = state->subpass - state->pass->subpasses;
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ uint32_t subpass_id = state->subpass - state->pass->subpasses;
- /* The id of this subpass shouldn't exceed the number of subpasses in
- * this render pass minus 1.
- */
- assert(subpass_id < state->pass->subpass_count);
- return subpass_id;
+ /* The id of this subpass shouldn't exceed the number of subpasses in
+ * this render pass minus 1.
+ */
+ assert(subpass_id < state->pass->subpass_count);
+ return subpass_id;
}
static struct radv_sample_locations_state *
-radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer,
- uint32_t att_idx,
- bool begin_subpass)
-{
- struct radv_cmd_state *state = &cmd_buffer->state;
- uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
- struct radv_image_view *view = state->attachments[att_idx].iview;
-
- if (view->image->info.samples == 1)
- return NULL;
-
- if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
- /* Return the initial sample locations if this is the initial
- * layout transition of the given subpass attachemnt.
- */
- if (state->attachments[att_idx].sample_location.count > 0)
- return &state->attachments[att_idx].sample_location;
- } else {
- /* Otherwise return the subpass sample locations if defined. */
- if (state->subpass_sample_locs) {
- /* Because the driver sets the current subpass before
- * initial layout transitions, we should use the sample
- * locations from the previous subpass to avoid an
- * off-by-one problem. Otherwise, use the sample
- * locations for the current subpass for final layout
- * transitions.
- */
- if (begin_subpass)
- subpass_id--;
-
- for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
- if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
- return &state->subpass_sample_locs[i].sample_location;
- }
- }
- }
-
- return NULL;
-}
-
-static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
- struct radv_subpass_attachment att,
- bool begin_subpass)
-{
- unsigned idx = att.attachment;
- struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview;
- struct radv_sample_locations_state *sample_locs;
- VkImageSubresourceRange range;
- range.aspectMask = view->aspect_mask;
- range.baseMipLevel = view->base_mip;
- range.levelCount = 1;
- range.baseArrayLayer = view->base_layer;
- range.layerCount = cmd_buffer->state.framebuffer->layers;
-
- if (cmd_buffer->state.subpass->view_mask) {
- /* If the current subpass uses multiview, the driver might have
- * performed a fast color/depth clear to the whole image
- * (including all layers). To make sure the driver will
- * decompress the image correctly (if needed), we have to
- * account for the "real" number of layers. If the view mask is
- * sparse, this will decompress more layers than needed.
- */
- range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
- }
-
- /* Get the subpass sample locations for the given attachment, if NULL
- * is returned the driver will use the default HW locations.
- */
- sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx,
- begin_subpass);
-
- /* Determine if the subpass uses separate depth/stencil layouts. */
- bool uses_separate_depth_stencil_layouts = false;
- if ((cmd_buffer->state.attachments[idx].current_layout !=
- cmd_buffer->state.attachments[idx].current_stencil_layout) ||
- (att.layout != att.stencil_layout)) {
- uses_separate_depth_stencil_layouts = true;
- }
-
- /* For separate layouts, perform depth and stencil transitions
- * separately.
- */
- if (uses_separate_depth_stencil_layouts &&
- (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT))) {
- /* Depth-only transitions. */
- range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
- radv_handle_image_transition(cmd_buffer,
- view->image,
- cmd_buffer->state.attachments[idx].current_layout,
- cmd_buffer->state.attachments[idx].current_in_render_loop,
- att.layout, att.in_render_loop,
- 0, 0, &range, sample_locs);
-
- /* Stencil-only transitions. */
- range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
- radv_handle_image_transition(cmd_buffer,
- view->image,
- cmd_buffer->state.attachments[idx].current_stencil_layout,
- cmd_buffer->state.attachments[idx].current_in_render_loop,
- att.stencil_layout, att.in_render_loop,
- 0, 0, &range, sample_locs);
- } else {
- radv_handle_image_transition(cmd_buffer,
- view->image,
- cmd_buffer->state.attachments[idx].current_layout,
- cmd_buffer->state.attachments[idx].current_in_render_loop,
- att.layout, att.in_render_loop,
- 0, 0, &range, sample_locs);
- }
-
- cmd_buffer->state.attachments[idx].current_layout = att.layout;
- cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout;
- cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop;
-
+radv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer, uint32_t att_idx,
+ bool begin_subpass)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+ struct radv_image_view *view = state->attachments[att_idx].iview;
+
+ if (view->image->info.samples == 1)
+ return NULL;
+
+ if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
+ /* Return the initial sample locations if this is the initial
+ * layout transition of the given subpass attachemnt.
+ */
+ if (state->attachments[att_idx].sample_location.count > 0)
+ return &state->attachments[att_idx].sample_location;
+ } else {
+ /* Otherwise return the subpass sample locations if defined. */
+ if (state->subpass_sample_locs) {
+ /* Because the driver sets the current subpass before
+ * initial layout transitions, we should use the sample
+ * locations from the previous subpass to avoid an
+ * off-by-one problem. Otherwise, use the sample
+ * locations for the current subpass for final layout
+ * transitions.
+ */
+ if (begin_subpass)
+ subpass_id--;
+
+ for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
+ if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
+ return &state->subpass_sample_locs[i].sample_location;
+ }
+ }
+ }
+
+ return NULL;
+}
+static void
+radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
+ struct radv_subpass_attachment att, bool begin_subpass)
+{
+ unsigned idx = att.attachment;
+ struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview;
+ struct radv_sample_locations_state *sample_locs;
+ VkImageSubresourceRange range;
+ range.aspectMask = view->aspect_mask;
+ range.baseMipLevel = view->base_mip;
+ range.levelCount = 1;
+ range.baseArrayLayer = view->base_layer;
+ range.layerCount = cmd_buffer->state.framebuffer->layers;
+
+ if (cmd_buffer->state.subpass->view_mask) {
+ /* If the current subpass uses multiview, the driver might have
+ * performed a fast color/depth clear to the whole image
+ * (including all layers). To make sure the driver will
+ * decompress the image correctly (if needed), we have to
+ * account for the "real" number of layers. If the view mask is
+ * sparse, this will decompress more layers than needed.
+ */
+ range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
+ }
+
+ /* Get the subpass sample locations for the given attachment, if NULL
+ * is returned the driver will use the default HW locations.
+ */
+ sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx, begin_subpass);
+
+ /* Determine if the subpass uses separate depth/stencil layouts. */
+ bool uses_separate_depth_stencil_layouts = false;
+ if ((cmd_buffer->state.attachments[idx].current_layout !=
+ cmd_buffer->state.attachments[idx].current_stencil_layout) ||
+ (att.layout != att.stencil_layout)) {
+ uses_separate_depth_stencil_layouts = true;
+ }
+
+ /* For separate layouts, perform depth and stencil transitions
+ * separately.
+ */
+ if (uses_separate_depth_stencil_layouts &&
+ (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
+ /* Depth-only transitions. */
+ range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+ radv_handle_image_transition(cmd_buffer, view->image,
+ cmd_buffer->state.attachments[idx].current_layout,
+ cmd_buffer->state.attachments[idx].current_in_render_loop,
+ att.layout, att.in_render_loop, 0, 0, &range, sample_locs);
+
+ /* Stencil-only transitions. */
+ range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
+ radv_handle_image_transition(
+ cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_stencil_layout,
+ cmd_buffer->state.attachments[idx].current_in_render_loop, att.stencil_layout,
+ att.in_render_loop, 0, 0, &range, sample_locs);
+ } else {
+ radv_handle_image_transition(cmd_buffer, view->image,
+ cmd_buffer->state.attachments[idx].current_layout,
+ cmd_buffer->state.attachments[idx].current_in_render_loop,
+ att.layout, att.in_render_loop, 0, 0, &range, sample_locs);
+ }
+
+ cmd_buffer->state.attachments[idx].current_layout = att.layout;
+ cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout;
+ cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop;
}
void
-radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_subpass *subpass)
+radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass *subpass)
{
- cmd_buffer->state.subpass = subpass;
+ cmd_buffer->state.subpass = subpass;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
}
static VkResult
radv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer,
- struct radv_render_pass *pass,
- const VkRenderPassBeginInfo *info)
-{
- const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
- vk_find_struct_const(info->pNext,
- RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
- struct radv_cmd_state *state = &cmd_buffer->state;
-
- if (!sample_locs) {
- state->subpass_sample_locs = NULL;
- return VK_SUCCESS;
- }
-
- for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
- const VkAttachmentSampleLocationsEXT *att_sample_locs =
- &sample_locs->pAttachmentInitialSampleLocations[i];
- uint32_t att_idx = att_sample_locs->attachmentIndex;
- struct radv_image *image = cmd_buffer->state.attachments[att_idx].iview->image;
-
- assert(vk_format_is_depth_or_stencil(image->vk_format));
-
- /* From the Vulkan spec 1.1.108:
- *
- * "If the image referenced by the framebuffer attachment at
- * index attachmentIndex was not created with
- * VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
- * then the values specified in sampleLocationsInfo are
- * ignored."
- */
- if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
- continue;
-
- const VkSampleLocationsInfoEXT *sample_locs_info =
- &att_sample_locs->sampleLocationsInfo;
-
- state->attachments[att_idx].sample_location.per_pixel =
- sample_locs_info->sampleLocationsPerPixel;
- state->attachments[att_idx].sample_location.grid_size =
- sample_locs_info->sampleLocationGridSize;
- state->attachments[att_idx].sample_location.count =
- sample_locs_info->sampleLocationsCount;
- typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
- sample_locs_info->pSampleLocations,
- sample_locs_info->sampleLocationsCount);
- }
-
- state->subpass_sample_locs = vk_alloc(&cmd_buffer->pool->alloc,
- sample_locs->postSubpassSampleLocationsCount *
- sizeof(state->subpass_sample_locs[0]),
- 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (state->subpass_sample_locs == NULL) {
- cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
- return cmd_buffer->record_result;
- }
-
- state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
-
- for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
- const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
- &sample_locs->pPostSubpassSampleLocations[i];
- const VkSampleLocationsInfoEXT *sample_locs_info =
- &subpass_sample_locs_info->sampleLocationsInfo;
-
- state->subpass_sample_locs[i].subpass_idx =
- subpass_sample_locs_info->subpassIndex;
- state->subpass_sample_locs[i].sample_location.per_pixel =
- sample_locs_info->sampleLocationsPerPixel;
- state->subpass_sample_locs[i].sample_location.grid_size =
- sample_locs_info->sampleLocationGridSize;
- state->subpass_sample_locs[i].sample_location.count =
- sample_locs_info->sampleLocationsCount;
- typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
- sample_locs_info->pSampleLocations,
- sample_locs_info->sampleLocationsCount);
- }
-
- return VK_SUCCESS;
+ struct radv_render_pass *pass,
+ const VkRenderPassBeginInfo *info)
+{
+ const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
+ vk_find_struct_const(info->pNext, RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ if (!sample_locs) {
+ state->subpass_sample_locs = NULL;
+ return VK_SUCCESS;
+ }
+
+ for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
+ const VkAttachmentSampleLocationsEXT *att_sample_locs =
+ &sample_locs->pAttachmentInitialSampleLocations[i];
+ uint32_t att_idx = att_sample_locs->attachmentIndex;
+ struct radv_image *image = cmd_buffer->state.attachments[att_idx].iview->image;
+
+ assert(vk_format_is_depth_or_stencil(image->vk_format));
+
+ /* From the Vulkan spec 1.1.108:
+ *
+ * "If the image referenced by the framebuffer attachment at
+ * index attachmentIndex was not created with
+ * VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
+ * then the values specified in sampleLocationsInfo are
+ * ignored."
+ */
+ if (!(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
+ continue;
+
+ const VkSampleLocationsInfoEXT *sample_locs_info = &att_sample_locs->sampleLocationsInfo;
+
+ state->attachments[att_idx].sample_location.per_pixel =
+ sample_locs_info->sampleLocationsPerPixel;
+ state->attachments[att_idx].sample_location.grid_size =
+ sample_locs_info->sampleLocationGridSize;
+ state->attachments[att_idx].sample_location.count = sample_locs_info->sampleLocationsCount;
+ typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
+ sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount);
+ }
+
+ state->subpass_sample_locs =
+ vk_alloc(&cmd_buffer->pool->alloc,
+ sample_locs->postSubpassSampleLocationsCount * sizeof(state->subpass_sample_locs[0]),
+ 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (state->subpass_sample_locs == NULL) {
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return cmd_buffer->record_result;
+ }
+
+ state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
+
+ for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
+ const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
+ &sample_locs->pPostSubpassSampleLocations[i];
+ const VkSampleLocationsInfoEXT *sample_locs_info =
+ &subpass_sample_locs_info->sampleLocationsInfo;
+
+ state->subpass_sample_locs[i].subpass_idx = subpass_sample_locs_info->subpassIndex;
+ state->subpass_sample_locs[i].sample_location.per_pixel =
+ sample_locs_info->sampleLocationsPerPixel;
+ state->subpass_sample_locs[i].sample_location.grid_size =
+ sample_locs_info->sampleLocationGridSize;
+ state->subpass_sample_locs[i].sample_location.count = sample_locs_info->sampleLocationsCount;
+ typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
+ sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount);
+ }
+
+ return VK_SUCCESS;
}
static VkResult
-radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer,
- struct radv_render_pass *pass,
- const VkRenderPassBeginInfo *info,
- const struct radv_extra_render_pass_begin_info *extra)
-{
- struct radv_cmd_state *state = &cmd_buffer->state;
- const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL;
-
- if (info) {
- attachment_info = vk_find_struct_const(info->pNext,
- RENDER_PASS_ATTACHMENT_BEGIN_INFO);
- }
-
-
- if (pass->attachment_count == 0) {
- state->attachments = NULL;
- return VK_SUCCESS;
- }
-
- state->attachments = vk_alloc(&cmd_buffer->pool->alloc,
- pass->attachment_count *
- sizeof(state->attachments[0]),
- 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (state->attachments == NULL) {
- cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
- return cmd_buffer->record_result;
- }
-
- for (uint32_t i = 0; i < pass->attachment_count; ++i) {
- struct radv_render_pass_attachment *att = &pass->attachments[i];
- VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
- VkImageAspectFlags clear_aspects = 0;
-
- if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
- /* color attachment */
- if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
- clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
- }
- } else {
- /* depthstencil attachment */
- if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
- clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
- if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
- att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
- clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
- }
- if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
- att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
- clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
- }
- }
-
- state->attachments[i].pending_clear_aspects = clear_aspects;
- state->attachments[i].cleared_views = 0;
- if (clear_aspects && info) {
- assert(info->clearValueCount > i);
- state->attachments[i].clear_value = info->pClearValues[i];
- }
-
- state->attachments[i].current_layout = att->initial_layout;
- state->attachments[i].current_in_render_loop = false;
- state->attachments[i].current_stencil_layout = att->stencil_initial_layout;
- state->attachments[i].disable_dcc = extra && extra->disable_dcc;
- state->attachments[i].sample_location.count = 0;
-
- struct radv_image_view *iview;
- if (attachment_info && attachment_info->attachmentCount > i) {
- iview = radv_image_view_from_handle(attachment_info->pAttachments[i]);
- } else {
- iview = state->framebuffer->attachments[i];
- }
-
- state->attachments[i].iview = iview;
- if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
- radv_initialise_ds_surface(cmd_buffer->device, &state->attachments[i].ds, iview);
- } else {
- radv_initialise_color_surface(cmd_buffer->device, &state->attachments[i].cb, iview);
- }
- }
-
- return VK_SUCCESS;
-}
-
-VkResult radv_AllocateCommandBuffers(
- VkDevice _device,
- const VkCommandBufferAllocateInfo *pAllocateInfo,
- VkCommandBuffer *pCommandBuffers)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool);
-
- VkResult result = VK_SUCCESS;
- uint32_t i;
-
- for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
-
- if (!list_is_empty(&pool->free_cmd_buffers)) {
- struct radv_cmd_buffer *cmd_buffer = list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link);
-
- list_del(&cmd_buffer->pool_link);
- list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
-
- result = radv_reset_cmd_buffer(cmd_buffer);
- cmd_buffer->level = pAllocateInfo->level;
- vk_object_base_reset(&cmd_buffer->base);
-
- pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer);
- } else {
- result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level,
- &pCommandBuffers[i]);
- }
- if (result != VK_SUCCESS)
- break;
- }
-
- if (result != VK_SUCCESS) {
- radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool,
- i, pCommandBuffers);
-
- /* From the Vulkan 1.0.66 spec:
- *
- * "vkAllocateCommandBuffers can be used to create multiple
- * command buffers. If the creation of any of those command
- * buffers fails, the implementation must destroy all
- * successfully created command buffer objects from this
- * command, set all entries of the pCommandBuffers array to
- * NULL and return the error."
- */
- memset(pCommandBuffers, 0,
- sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
- }
-
- return result;
-}
-
-void radv_FreeCommandBuffers(
- VkDevice device,
- VkCommandPool commandPool,
- uint32_t commandBufferCount,
- const VkCommandBuffer *pCommandBuffers)
-{
- for (uint32_t i = 0; i < commandBufferCount; i++) {
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
-
- if (cmd_buffer) {
- if (cmd_buffer->pool) {
- list_del(&cmd_buffer->pool_link);
- list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers);
- } else
- radv_destroy_cmd_buffer(cmd_buffer);
-
- }
- }
-}
-
-VkResult radv_ResetCommandBuffer(
- VkCommandBuffer commandBuffer,
- VkCommandBufferResetFlags flags)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- return radv_reset_cmd_buffer(cmd_buffer);
-}
-
-VkResult radv_BeginCommandBuffer(
- VkCommandBuffer commandBuffer,
- const VkCommandBufferBeginInfo *pBeginInfo)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- VkResult result = VK_SUCCESS;
-
- if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) {
- /* If the command buffer has already been resetted with
- * vkResetCommandBuffer, no need to do it again.
- */
- result = radv_reset_cmd_buffer(cmd_buffer);
- if (result != VK_SUCCESS)
- return result;
- }
-
- memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
- cmd_buffer->state.last_primitive_reset_en = -1;
- cmd_buffer->state.last_index_type = -1;
- cmd_buffer->state.last_num_instances = -1;
- cmd_buffer->state.last_vertex_offset = -1;
- cmd_buffer->state.last_first_instance = -1;
- cmd_buffer->state.last_drawid = -1;
- cmd_buffer->state.predication_type = -1;
- cmd_buffer->state.last_sx_ps_downconvert = -1;
- cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
- cmd_buffer->state.last_sx_blend_opt_control = -1;
- cmd_buffer->usage_flags = pBeginInfo->flags;
-
- if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
- (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
- assert(pBeginInfo->pInheritanceInfo);
- cmd_buffer->state.framebuffer = radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
- cmd_buffer->state.pass = radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
-
- struct radv_subpass *subpass =
- &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
-
- if (cmd_buffer->state.framebuffer) {
- result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL, NULL);
- if (result != VK_SUCCESS)
- return result;
- }
-
- cmd_buffer->state.inherited_pipeline_statistics =
- pBeginInfo->pInheritanceInfo->pipelineStatistics;
-
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
- }
-
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_cmd_buffer_trace_emit(cmd_buffer);
-
- radv_describe_begin_cmd_buffer(cmd_buffer);
-
- cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING;
-
- return result;
-}
-
-void radv_CmdBindVertexBuffers(
- VkCommandBuffer commandBuffer,
- uint32_t firstBinding,
- uint32_t bindingCount,
- const VkBuffer* pBuffers,
- const VkDeviceSize* pOffsets)
-{
- radv_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding,
- bindingCount, pBuffers, pOffsets,
- NULL, NULL);
-}
-
-void radv_CmdBindVertexBuffers2EXT(
- VkCommandBuffer commandBuffer,
- uint32_t firstBinding,
- uint32_t bindingCount,
- const VkBuffer* pBuffers,
- const VkDeviceSize* pOffsets,
- const VkDeviceSize* pSizes,
- const VkDeviceSize* pStrides)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings;
- bool changed = false;
-
- /* We have to defer setting up vertex buffer since we need the buffer
- * stride from the pipeline. */
-
- assert(firstBinding + bindingCount <= MAX_VBS);
- for (uint32_t i = 0; i < bindingCount; i++) {
- RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
- uint32_t idx = firstBinding + i;
- VkDeviceSize size = pSizes ? pSizes[i] : 0;
- VkDeviceSize stride = pStrides ? pStrides[i] : 0;
-
- /* pSizes and pStrides are optional. */
- if (!changed &&
- (vb[idx].buffer != buffer ||
- vb[idx].offset != pOffsets[i] ||
- vb[idx].size != size ||
- vb[idx].stride != stride)) {
- changed = true;
- }
-
- vb[idx].buffer = buffer;
- vb[idx].offset = pOffsets[i];
- vb[idx].size = size;
- vb[idx].stride = stride;
-
- if (buffer) {
- radv_cs_add_buffer(cmd_buffer->device->ws,
- cmd_buffer->cs, vb[idx].buffer->bo);
- }
- }
-
- if (!changed) {
- /* No state changes. */
- return;
- }
-
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER;
+radv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, struct radv_render_pass *pass,
+ const VkRenderPassBeginInfo *info,
+ const struct radv_extra_render_pass_begin_info *extra)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL;
+
+ if (info) {
+ attachment_info = vk_find_struct_const(info->pNext, RENDER_PASS_ATTACHMENT_BEGIN_INFO);
+ }
+
+ if (pass->attachment_count == 0) {
+ state->attachments = NULL;
+ return VK_SUCCESS;
+ }
+
+ state->attachments =
+ vk_alloc(&cmd_buffer->pool->alloc, pass->attachment_count * sizeof(state->attachments[0]), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (state->attachments == NULL) {
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return cmd_buffer->record_result;
+ }
+
+ for (uint32_t i = 0; i < pass->attachment_count; ++i) {
+ struct radv_render_pass_attachment *att = &pass->attachments[i];
+ VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
+ VkImageAspectFlags clear_aspects = 0;
+
+ if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
+ /* color attachment */
+ if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
+ }
+ } else {
+ /* depthstencil attachment */
+ if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+ att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
+ if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+ att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
+ clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+ }
+ if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+ att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
+ clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+ }
+ }
+
+ state->attachments[i].pending_clear_aspects = clear_aspects;
+ state->attachments[i].cleared_views = 0;
+ if (clear_aspects && info) {
+ assert(info->clearValueCount > i);
+ state->attachments[i].clear_value = info->pClearValues[i];
+ }
+
+ state->attachments[i].current_layout = att->initial_layout;
+ state->attachments[i].current_in_render_loop = false;
+ state->attachments[i].current_stencil_layout = att->stencil_initial_layout;
+ state->attachments[i].disable_dcc = extra && extra->disable_dcc;
+ state->attachments[i].sample_location.count = 0;
+
+ struct radv_image_view *iview;
+ if (attachment_info && attachment_info->attachmentCount > i) {
+ iview = radv_image_view_from_handle(attachment_info->pAttachments[i]);
+ } else {
+ iview = state->framebuffer->attachments[i];
+ }
+
+ state->attachments[i].iview = iview;
+ if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ radv_initialise_ds_surface(cmd_buffer->device, &state->attachments[i].ds, iview);
+ } else {
+ radv_initialise_color_surface(cmd_buffer->device, &state->attachments[i].cb, iview);
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult
+radv_AllocateCommandBuffers(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo,
+ VkCommandBuffer *pCommandBuffers)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool);
+
+ VkResult result = VK_SUCCESS;
+ uint32_t i;
+
+ for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
+
+ if (!list_is_empty(&pool->free_cmd_buffers)) {
+ struct radv_cmd_buffer *cmd_buffer =
+ list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link);
+
+ list_del(&cmd_buffer->pool_link);
+ list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
+
+ result = radv_reset_cmd_buffer(cmd_buffer);
+ cmd_buffer->level = pAllocateInfo->level;
+ vk_object_base_reset(&cmd_buffer->base);
+
+ pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer);
+ } else {
+ result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, &pCommandBuffers[i]);
+ }
+ if (result != VK_SUCCESS)
+ break;
+ }
+
+ if (result != VK_SUCCESS) {
+ radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, pCommandBuffers);
+
+ /* From the Vulkan 1.0.66 spec:
+ *
+ * "vkAllocateCommandBuffers can be used to create multiple
+ * command buffers. If the creation of any of those command
+ * buffers fails, the implementation must destroy all
+ * successfully created command buffer objects from this
+ * command, set all entries of the pCommandBuffers array to
+ * NULL and return the error."
+ */
+ memset(pCommandBuffers, 0, sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
+ }
+
+ return result;
+}
+
+void
+radv_FreeCommandBuffers(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount,
+ const VkCommandBuffer *pCommandBuffers)
+{
+ for (uint32_t i = 0; i < commandBufferCount; i++) {
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
+
+ if (cmd_buffer) {
+ if (cmd_buffer->pool) {
+ list_del(&cmd_buffer->pool_link);
+ list_addtail(&cmd_buffer->pool_link, &cmd_buffer->pool->free_cmd_buffers);
+ } else
+ radv_destroy_cmd_buffer(cmd_buffer);
+ }
+ }
+}
+
+VkResult
+radv_ResetCommandBuffer(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ return radv_reset_cmd_buffer(cmd_buffer);
+}
+
+VkResult
+radv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ VkResult result = VK_SUCCESS;
+
+ if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) {
+ /* If the command buffer has already been resetted with
+ * vkResetCommandBuffer, no need to do it again.
+ */
+ result = radv_reset_cmd_buffer(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
+ cmd_buffer->state.last_primitive_reset_en = -1;
+ cmd_buffer->state.last_index_type = -1;
+ cmd_buffer->state.last_num_instances = -1;
+ cmd_buffer->state.last_vertex_offset = -1;
+ cmd_buffer->state.last_first_instance = -1;
+ cmd_buffer->state.last_drawid = -1;
+ cmd_buffer->state.predication_type = -1;
+ cmd_buffer->state.last_sx_ps_downconvert = -1;
+ cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
+ cmd_buffer->state.last_sx_blend_opt_control = -1;
+ cmd_buffer->usage_flags = pBeginInfo->flags;
+
+ if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
+ (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
+ assert(pBeginInfo->pInheritanceInfo);
+ cmd_buffer->state.framebuffer =
+ radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
+ cmd_buffer->state.pass =
+ radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
+
+ struct radv_subpass *subpass =
+ &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
+
+ if (cmd_buffer->state.framebuffer) {
+ result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL, NULL);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ cmd_buffer->state.inherited_pipeline_statistics =
+ pBeginInfo->pInheritanceInfo->pipelineStatistics;
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+ }
+
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_cmd_buffer_trace_emit(cmd_buffer);
+
+ radv_describe_begin_cmd_buffer(cmd_buffer);
+
+ cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING;
+
+ return result;
+}
+
+void
+radv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+ uint32_t bindingCount, const VkBuffer *pBuffers,
+ const VkDeviceSize *pOffsets)
+{
+ radv_CmdBindVertexBuffers2EXT(commandBuffer, firstBinding, bindingCount, pBuffers, pOffsets,
+ NULL, NULL);
+}
+
+void
+radv_CmdBindVertexBuffers2EXT(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+ uint32_t bindingCount, const VkBuffer *pBuffers,
+ const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
+ const VkDeviceSize *pStrides)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings;
+ bool changed = false;
+
+ /* We have to defer setting up vertex buffer since we need the buffer
+ * stride from the pipeline. */
+
+ assert(firstBinding + bindingCount <= MAX_VBS);
+ for (uint32_t i = 0; i < bindingCount; i++) {
+ RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
+ uint32_t idx = firstBinding + i;
+ VkDeviceSize size = pSizes ? pSizes[i] : 0;
+ VkDeviceSize stride = pStrides ? pStrides[i] : 0;
+
+ /* pSizes and pStrides are optional. */
+ if (!changed && (vb[idx].buffer != buffer || vb[idx].offset != pOffsets[i] ||
+ vb[idx].size != size || vb[idx].stride != stride)) {
+ changed = true;
+ }
+
+ vb[idx].buffer = buffer;
+ vb[idx].offset = pOffsets[i];
+ vb[idx].size = size;
+ vb[idx].stride = stride;
+
+ if (buffer) {
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, vb[idx].buffer->bo);
+ }
+ }
+
+ if (!changed) {
+ /* No state changes. */
+ return;
+ }
+
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER;
}
static uint32_t
vk_to_index_type(VkIndexType type)
{
- switch (type) {
- case VK_INDEX_TYPE_UINT8_EXT:
- return V_028A7C_VGT_INDEX_8;
- case VK_INDEX_TYPE_UINT16:
- return V_028A7C_VGT_INDEX_16;
- case VK_INDEX_TYPE_UINT32:
- return V_028A7C_VGT_INDEX_32;
- default:
- unreachable("invalid index type");
- }
+ switch (type) {
+ case VK_INDEX_TYPE_UINT8_EXT:
+ return V_028A7C_VGT_INDEX_8;
+ case VK_INDEX_TYPE_UINT16:
+ return V_028A7C_VGT_INDEX_16;
+ case VK_INDEX_TYPE_UINT32:
+ return V_028A7C_VGT_INDEX_32;
+ default:
+ unreachable("invalid index type");
+ }
}
static uint32_t
radv_get_vgt_index_size(uint32_t type)
{
- switch (type) {
- case V_028A7C_VGT_INDEX_8:
- return 1;
- case V_028A7C_VGT_INDEX_16:
- return 2;
- case V_028A7C_VGT_INDEX_32:
- return 4;
- default:
- unreachable("invalid index type");
- }
+ switch (type) {
+ case V_028A7C_VGT_INDEX_8:
+ return 1;
+ case V_028A7C_VGT_INDEX_16:
+ return 2;
+ case V_028A7C_VGT_INDEX_32:
+ return 4;
+ default:
+ unreachable("invalid index type");
+ }
}
-void radv_CmdBindIndexBuffer(
- VkCommandBuffer commandBuffer,
- VkBuffer buffer,
- VkDeviceSize offset,
- VkIndexType indexType)
+void
+radv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
+ VkIndexType indexType)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer);
- if (cmd_buffer->state.index_buffer == index_buffer &&
- cmd_buffer->state.index_offset == offset &&
- cmd_buffer->state.index_type == indexType) {
- /* No state changes. */
- return;
- }
+ if (cmd_buffer->state.index_buffer == index_buffer && cmd_buffer->state.index_offset == offset &&
+ cmd_buffer->state.index_type == indexType) {
+ /* No state changes. */
+ return;
+ }
- cmd_buffer->state.index_buffer = index_buffer;
- cmd_buffer->state.index_offset = offset;
- cmd_buffer->state.index_type = vk_to_index_type(indexType);
- cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo);
- cmd_buffer->state.index_va += index_buffer->offset + offset;
+ cmd_buffer->state.index_buffer = index_buffer;
+ cmd_buffer->state.index_offset = offset;
+ cmd_buffer->state.index_type = vk_to_index_type(indexType);
+ cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo);
+ cmd_buffer->state.index_va += index_buffer->offset + offset;
- int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType));
- cmd_buffer->state.max_index_count = (index_buffer->size - offset) / index_size;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo);
+ int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType));
+ cmd_buffer->state.max_index_count = (index_buffer->size - offset) / index_size;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo);
}
-
static void
-radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point,
- struct radv_descriptor_set *set, unsigned idx)
-{
- struct radeon_winsys *ws = cmd_buffer->device->ws;
-
- radv_set_descriptor_set(cmd_buffer, bind_point, set, idx);
-
- assert(set);
-
- if (!cmd_buffer->device->use_global_bo_list) {
- for (unsigned j = 0; j < set->header.buffer_count; ++j)
- if (set->descriptors[j])
- radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]);
- }
-
- if(set->header.bo)
- radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo);
-}
-
-void radv_CmdBindDescriptorSets(
- VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipelineLayout _layout,
- uint32_t firstSet,
- uint32_t descriptorSetCount,
- const VkDescriptorSet* pDescriptorSets,
- uint32_t dynamicOffsetCount,
- const uint32_t* pDynamicOffsets)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- unsigned dyn_idx = 0;
-
- const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
-
- for (unsigned i = 0; i < descriptorSetCount; ++i) {
- unsigned set_idx = i + firstSet;
- RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
-
- /* If the set is already bound we only need to update the
- * (potentially changed) dynamic offsets. */
- if (descriptors_state->sets[set_idx] != set ||
- !(descriptors_state->valid & (1u << set_idx))) {
- radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx);
- }
-
- for(unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) {
- unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
- uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4;
- assert(dyn_idx < dynamicOffsetCount);
-
- struct radv_descriptor_range *range = set->header.dynamic_descriptors + j;
-
- if (!range->va) {
- memset(dst, 0, 4 * 4);
- } else {
- uint64_t va = range->va + pDynamicOffsets[dyn_idx];
- dst[0] = va;
- dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
- dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size;
- dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- dst[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
- }
-
- cmd_buffer->push_constant_stages |= layout->set[set_idx].dynamic_offset_stages;
- }
- }
-}
-
-static bool radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- struct radv_descriptor_set *set,
- struct radv_descriptor_set_layout *layout,
- VkPipelineBindPoint bind_point)
-{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
- set->header.size = layout->size;
- set->header.layout = layout;
-
- if (descriptors_state->push_set.capacity < set->header.size) {
- size_t new_size = MAX2(set->header.size, 1024);
- new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity);
- new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS);
-
- free(set->header.mapped_ptr);
- set->header.mapped_ptr = malloc(new_size);
-
- if (!set->header.mapped_ptr) {
- descriptors_state->push_set.capacity = 0;
- cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
- return false;
- }
-
- descriptors_state->push_set.capacity = new_size;
- }
-
- return true;
-}
-
-void radv_meta_push_descriptor_set(
- struct radv_cmd_buffer* cmd_buffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipelineLayout _layout,
- uint32_t set,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet* pDescriptorWrites)
-{
- RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- struct radv_descriptor_set *push_set =
- (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors;
- unsigned bo_offset;
-
- assert(set == 0);
- assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
-
- push_set->header.size = layout->set[set].layout->size;
- push_set->header.layout = layout->set[set].layout;
-
- if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size,
- &bo_offset, (void**) &push_set->header.mapped_ptr))
- return;
-
- push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- push_set->header.va += bo_offset;
-
- radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
- radv_descriptor_set_to_handle(push_set),
- descriptorWriteCount, pDescriptorWrites, 0, NULL);
-
- radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
-}
-
-void radv_CmdPushDescriptorSetKHR(
- VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipelineLayout _layout,
- uint32_t set,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet* pDescriptorWrites)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
- struct radv_descriptor_set *push_set =
- (struct radv_descriptor_set *)&descriptors_state->push_set.set;
-
- assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
-
- if (!radv_init_push_descriptor_set(cmd_buffer, push_set,
- layout->set[set].layout,
- pipelineBindPoint))
- return;
+radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
+ struct radv_descriptor_set *set, unsigned idx)
+{
+ struct radeon_winsys *ws = cmd_buffer->device->ws;
+
+ radv_set_descriptor_set(cmd_buffer, bind_point, set, idx);
- /* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR()
- * because it is invalid, according to Vulkan spec.
- */
- for (int i = 0; i < descriptorWriteCount; i++) {
- ASSERTED const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
- assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
- }
+ assert(set);
- radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
- radv_descriptor_set_to_handle(push_set),
- descriptorWriteCount, pDescriptorWrites, 0, NULL);
+ if (!cmd_buffer->device->use_global_bo_list) {
+ for (unsigned j = 0; j < set->header.buffer_count; ++j)
+ if (set->descriptors[j])
+ radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]);
+ }
- radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
- descriptors_state->push_dirty = true;
+ if (set->header.bo)
+ radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo);
+}
+
+void
+radv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+ VkPipelineLayout _layout, uint32_t firstSet, uint32_t descriptorSetCount,
+ const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
+ const uint32_t *pDynamicOffsets)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
+ unsigned dyn_idx = 0;
+
+ const bool no_dynamic_bounds =
+ cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
+
+ for (unsigned i = 0; i < descriptorSetCount; ++i) {
+ unsigned set_idx = i + firstSet;
+ RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
+
+ /* If the set is already bound we only need to update the
+ * (potentially changed) dynamic offsets. */
+ if (descriptors_state->sets[set_idx] != set ||
+ !(descriptors_state->valid & (1u << set_idx))) {
+ radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx);
+ }
+
+ for (unsigned j = 0; j < layout->set[set_idx].dynamic_offset_count; ++j, ++dyn_idx) {
+ unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
+ uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4;
+ assert(dyn_idx < dynamicOffsetCount);
+
+ struct radv_descriptor_range *range = set->header.dynamic_descriptors + j;
+
+ if (!range->va) {
+ memset(dst, 0, 4 * 4);
+ } else {
+ uint64_t va = range->va + pDynamicOffsets[dyn_idx];
+ dst[0] = va;
+ dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+ dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size;
+ dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ dst[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+ }
+
+ cmd_buffer->push_constant_stages |= layout->set[set_idx].dynamic_offset_stages;
+ }
+ }
}
-void radv_CmdPushDescriptorSetWithTemplateKHR(
- VkCommandBuffer commandBuffer,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- VkPipelineLayout _layout,
- uint32_t set,
- const void* pData)
+static bool
+radv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set,
+ struct radv_descriptor_set_layout *layout,
+ VkPipelineBindPoint bind_point)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
- RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, templ->bind_point);
- struct radv_descriptor_set *push_set =
- (struct radv_descriptor_set *)&descriptors_state->push_set.set;
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
+ set->header.size = layout->size;
+ set->header.layout = layout;
+
+ if (descriptors_state->push_set.capacity < set->header.size) {
+ size_t new_size = MAX2(set->header.size, 1024);
+ new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity);
+ new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS);
- assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
+ free(set->header.mapped_ptr);
+ set->header.mapped_ptr = malloc(new_size);
- if (!radv_init_push_descriptor_set(cmd_buffer, push_set,
- layout->set[set].layout,
- templ->bind_point))
- return;
+ if (!set->header.mapped_ptr) {
+ descriptors_state->push_set.capacity = 0;
+ cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return false;
+ }
- radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set,
- descriptorUpdateTemplate, pData);
+ descriptors_state->push_set.capacity = new_size;
+ }
- radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set);
- descriptors_state->push_dirty = true;
+ return true;
}
-void radv_CmdPushConstants(VkCommandBuffer commandBuffer,
- VkPipelineLayout layout,
- VkShaderStageFlags stageFlags,
- uint32_t offset,
- uint32_t size,
- const void* pValues)
+void
+radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
+ VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
+ uint32_t set, uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- memcpy(cmd_buffer->push_constants + offset, pValues, size);
- cmd_buffer->push_constant_stages |= stageFlags;
+ RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
+ struct radv_descriptor_set *push_set =
+ (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors;
+ unsigned bo_offset;
+
+ assert(set == 0);
+ assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
+
+ push_set->header.size = layout->set[set].layout->size;
+ push_set->header.layout = layout->set[set].layout;
+
+ if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, &bo_offset,
+ (void **)&push_set->header.mapped_ptr))
+ return;
+
+ push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ push_set->header.va += bo_offset;
+
+ radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
+ radv_descriptor_set_to_handle(push_set), descriptorWriteCount,
+ pDescriptorWrites, 0, NULL);
+
+ radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
}
-VkResult radv_EndCommandBuffer(
- VkCommandBuffer commandBuffer)
+void
+radv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+ VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
+ struct radv_descriptor_set *push_set =
+ (struct radv_descriptor_set *)&descriptors_state->push_set.set;
- radv_emit_mip_change_flush_default(cmd_buffer);
+ assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
- if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
- if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
+ if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout,
+ pipelineBindPoint))
+ return;
- /* Make sure to sync all pending active queries at the end of
- * command buffer.
- */
- cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
+ /* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR()
+ * because it is invalid, according to Vulkan spec.
+ */
+ for (int i = 0; i < descriptorWriteCount; i++) {
+ ASSERTED const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
+ assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
+ }
- /* Since NGG streamout uses GDS, we need to make GDS idle when
- * we leave the IB, otherwise another process might overwrite
- * it while our shaders are busy.
- */
- if (cmd_buffer->gds_needed)
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+ radv_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
+ radv_descriptor_set_to_handle(push_set), descriptorWriteCount,
+ pDescriptorWrites, 0, NULL);
- si_emit_cache_flush(cmd_buffer);
- }
+ radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
+ descriptors_state->push_dirty = true;
+}
- /* Make sure CP DMA is idle at the end of IBs because the kernel
- * doesn't wait for it.
- */
- si_cp_dma_wait_for_idle(cmd_buffer);
+void
+radv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+ VkPipelineLayout _layout, uint32_t set, const void *pData)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
+ RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, templ->bind_point);
+ struct radv_descriptor_set *push_set =
+ (struct radv_descriptor_set *)&descriptors_state->push_set.set;
- radv_describe_end_cmd_buffer(cmd_buffer);
+ assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
- vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
- vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
+ if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout,
+ templ->bind_point))
+ return;
- VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs);
- if (result != VK_SUCCESS)
- return vk_error(cmd_buffer->device->instance, result);
+ radv_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set,
+ descriptorUpdateTemplate, pData);
- cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE;
+ radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set);
+ descriptors_state->push_dirty = true;
+}
- return cmd_buffer->record_result;
+void
+radv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
+ VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
+ const void *pValues)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ memcpy(cmd_buffer->push_constants + offset, pValues, size);
+ cmd_buffer->push_constant_stages |= stageFlags;
}
-static void
-radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
+VkResult
+radv_EndCommandBuffer(VkCommandBuffer commandBuffer)
{
- struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ radv_emit_mip_change_flush_default(cmd_buffer);
+
+ if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
+
+ /* Make sure to sync all pending active queries at the end of
+ * command buffer.
+ */
+ cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
+
+ /* Since NGG streamout uses GDS, we need to make GDS idle when
+ * we leave the IB, otherwise another process might overwrite
+ * it while our shaders are busy.
+ */
+ if (cmd_buffer->gds_needed)
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
+
+ si_emit_cache_flush(cmd_buffer);
+ }
+
+ /* Make sure CP DMA is idle at the end of IBs because the kernel
+ * doesn't wait for it.
+ */
+ si_cp_dma_wait_for_idle(cmd_buffer);
+
+ radv_describe_end_cmd_buffer(cmd_buffer);
- if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
- return;
-
- assert(!pipeline->ctx_cs.cdw);
-
- cmd_buffer->state.emitted_compute_pipeline = pipeline;
+ vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+ vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
- radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
-
- cmd_buffer->compute_scratch_size_per_wave_needed = MAX2(cmd_buffer->compute_scratch_size_per_wave_needed,
- pipeline->scratch_bytes_per_wave);
- cmd_buffer->compute_scratch_waves_wanted = MAX2(cmd_buffer->compute_scratch_waves_wanted,
- pipeline->max_waves);
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
-
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_save_pipeline(cmd_buffer, pipeline);
-}
-
-static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
-{
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
+ VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs);
+ if (result != VK_SUCCESS)
+ return vk_error(cmd_buffer->device->instance, result);
- descriptors_state->dirty |= descriptors_state->valid;
-}
+ cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE;
-void radv_CmdBindPipeline(
- VkCommandBuffer commandBuffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipeline _pipeline)
+ return cmd_buffer->record_result;
+}
+
+static void
+radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+ struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- switch (pipelineBindPoint) {
- case VK_PIPELINE_BIND_POINT_COMPUTE:
- if (cmd_buffer->state.compute_pipeline == pipeline)
- return;
- radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
+ if (!pipeline || pipeline == cmd_buffer->state.emitted_compute_pipeline)
+ return;
- cmd_buffer->state.compute_pipeline = pipeline;
- cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
- break;
- case VK_PIPELINE_BIND_POINT_GRAPHICS:
- if (cmd_buffer->state.pipeline == pipeline)
- return;
- radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
+ assert(!pipeline->ctx_cs.cdw);
- bool vtx_emit_count_changed = !pipeline ||
- !cmd_buffer->state.pipeline ||
- cmd_buffer->state.pipeline->graphics.vtx_emit_num !=
- pipeline->graphics.vtx_emit_num ||
- cmd_buffer->state.pipeline->graphics.vtx_base_sgpr !=
- pipeline->graphics.vtx_base_sgpr;
- cmd_buffer->state.pipeline = pipeline;
- if (!pipeline)
- break;
+ cmd_buffer->state.emitted_compute_pipeline = pipeline;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
- cmd_buffer->push_constant_stages |= pipeline->active_stages;
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->cs.cdw);
+ radeon_emit_array(cmd_buffer->cs, pipeline->cs.buf, pipeline->cs.cdw);
- /* the new vertex shader might not have the same user regs */
- if (vtx_emit_count_changed) {
- cmd_buffer->state.last_first_instance = -1;
- cmd_buffer->state.last_vertex_offset = -1;
- cmd_buffer->state.last_drawid = -1;
- }
+ cmd_buffer->compute_scratch_size_per_wave_needed =
+ MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, pipeline->scratch_bytes_per_wave);
+ cmd_buffer->compute_scratch_waves_wanted =
+ MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->max_waves);
- /* Prefetch all pipeline shaders at first draw time. */
- cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS;
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+ pipeline->shaders[MESA_SHADER_COMPUTE]->bo);
- if ((cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 ||
- cmd_buffer->device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID) &&
- cmd_buffer->state.emitted_pipeline &&
- radv_pipeline_has_ngg(cmd_buffer->state.emitted_pipeline) &&
- !radv_pipeline_has_ngg(cmd_buffer->state.pipeline)) {
- /* Transitioning from NGG to legacy GS requires
- * VGT_FLUSH on GFX10 and Sienna Cichlid. VGT_FLUSH
- * is also emitted at the beginning of IBs when legacy
- * GS ring pointers are set.
- */
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
- }
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_save_pipeline(cmd_buffer, pipeline);
+}
- radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state);
- radv_bind_streamout_state(cmd_buffer, pipeline);
+static void
+radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
+{
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
- if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
- cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
- if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
- cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
+ descriptors_state->dirty |= descriptors_state->valid;
+}
- if (radv_pipeline_has_tess(pipeline))
- cmd_buffer->tess_rings_needed = true;
- break;
- default:
- assert(!"invalid bind point");
- break;
- }
+void
+radv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
+ VkPipeline _pipeline)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+
+ switch (pipelineBindPoint) {
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ if (cmd_buffer->state.compute_pipeline == pipeline)
+ return;
+ radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
+
+ cmd_buffer->state.compute_pipeline = pipeline;
+ cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
+ break;
+ case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ if (cmd_buffer->state.pipeline == pipeline)
+ return;
+ radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
+
+ bool vtx_emit_count_changed =
+ !pipeline || !cmd_buffer->state.pipeline ||
+ cmd_buffer->state.pipeline->graphics.vtx_emit_num != pipeline->graphics.vtx_emit_num ||
+ cmd_buffer->state.pipeline->graphics.vtx_base_sgpr != pipeline->graphics.vtx_base_sgpr;
+ cmd_buffer->state.pipeline = pipeline;
+ if (!pipeline)
+ break;
+
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
+ cmd_buffer->push_constant_stages |= pipeline->active_stages;
+
+ /* the new vertex shader might not have the same user regs */
+ if (vtx_emit_count_changed) {
+ cmd_buffer->state.last_first_instance = -1;
+ cmd_buffer->state.last_vertex_offset = -1;
+ cmd_buffer->state.last_drawid = -1;
+ }
+
+ /* Prefetch all pipeline shaders at first draw time. */
+ cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS;
+
+ if ((cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 ||
+ cmd_buffer->device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID) &&
+ cmd_buffer->state.emitted_pipeline &&
+ radv_pipeline_has_ngg(cmd_buffer->state.emitted_pipeline) &&
+ !radv_pipeline_has_ngg(cmd_buffer->state.pipeline)) {
+ /* Transitioning from NGG to legacy GS requires
+ * VGT_FLUSH on GFX10 and Sienna Cichlid. VGT_FLUSH
+ * is also emitted at the beginning of IBs when legacy
+ * GS ring pointers are set.
+ */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
+ }
+
+ radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state);
+ radv_bind_streamout_state(cmd_buffer, pipeline);
+
+ if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
+ cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
+ if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
+ cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
+
+ if (radv_pipeline_has_tess(pipeline))
+ cmd_buffer->tess_rings_needed = true;
+ break;
+ default:
+ assert(!"invalid bind point");
+ break;
+ }
}
-void radv_CmdSetViewport(
- VkCommandBuffer commandBuffer,
- uint32_t firstViewport,
- uint32_t viewportCount,
- const VkViewport* pViewports)
+void
+radv_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
+ const VkViewport *pViewports)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
- ASSERTED const uint32_t total_count = firstViewport + viewportCount;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ ASSERTED const uint32_t total_count = firstViewport + viewportCount;
- assert(firstViewport < MAX_VIEWPORTS);
- assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
+ assert(firstViewport < MAX_VIEWPORTS);
+ assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
- if (total_count <= state->dynamic.viewport.count &&
- !memcmp(state->dynamic.viewport.viewports + firstViewport,
- pViewports, viewportCount * sizeof(*pViewports))) {
- return;
- }
+ if (total_count <= state->dynamic.viewport.count &&
+ !memcmp(state->dynamic.viewport.viewports + firstViewport, pViewports,
+ viewportCount * sizeof(*pViewports))) {
+ return;
+ }
- if (state->dynamic.viewport.count < total_count)
- state->dynamic.viewport.count = total_count;
+ if (state->dynamic.viewport.count < total_count)
+ state->dynamic.viewport.count = total_count;
- memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
- viewportCount * sizeof(*pViewports));
+ memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
+ viewportCount * sizeof(*pViewports));
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT;
}
-void radv_CmdSetScissor(
- VkCommandBuffer commandBuffer,
- uint32_t firstScissor,
- uint32_t scissorCount,
- const VkRect2D* pScissors)
+void
+radv_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
+ const VkRect2D *pScissors)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
- ASSERTED const uint32_t total_count = firstScissor + scissorCount;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ ASSERTED const uint32_t total_count = firstScissor + scissorCount;
- assert(firstScissor < MAX_SCISSORS);
- assert(total_count >= 1 && total_count <= MAX_SCISSORS);
+ assert(firstScissor < MAX_SCISSORS);
+ assert(total_count >= 1 && total_count <= MAX_SCISSORS);
- if (total_count <= state->dynamic.scissor.count &&
- !memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors,
- scissorCount * sizeof(*pScissors))) {
- return;
- }
+ if (total_count <= state->dynamic.scissor.count &&
+ !memcmp(state->dynamic.scissor.scissors + firstScissor, pScissors,
+ scissorCount * sizeof(*pScissors))) {
+ return;
+ }
- if (state->dynamic.scissor.count < total_count)
- state->dynamic.scissor.count = total_count;
+ if (state->dynamic.scissor.count < total_count)
+ state->dynamic.scissor.count = total_count;
- memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
- scissorCount * sizeof(*pScissors));
+ memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
+ scissorCount * sizeof(*pScissors));
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
}
-void radv_CmdSetLineWidth(
- VkCommandBuffer commandBuffer,
- float lineWidth)
+void
+radv_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- if (cmd_buffer->state.dynamic.line_width == lineWidth)
- return;
+ if (cmd_buffer->state.dynamic.line_width == lineWidth)
+ return;
- cmd_buffer->state.dynamic.line_width = lineWidth;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
+ cmd_buffer->state.dynamic.line_width = lineWidth;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
}
-void radv_CmdSetDepthBias(
- VkCommandBuffer commandBuffer,
- float depthBiasConstantFactor,
- float depthBiasClamp,
- float depthBiasSlopeFactor)
+void
+radv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,
+ float depthBiasClamp, float depthBiasSlopeFactor)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.depth_bias.bias == depthBiasConstantFactor &&
- state->dynamic.depth_bias.clamp == depthBiasClamp &&
- state->dynamic.depth_bias.slope == depthBiasSlopeFactor) {
- return;
- }
+ if (state->dynamic.depth_bias.bias == depthBiasConstantFactor &&
+ state->dynamic.depth_bias.clamp == depthBiasClamp &&
+ state->dynamic.depth_bias.slope == depthBiasSlopeFactor) {
+ return;
+ }
- state->dynamic.depth_bias.bias = depthBiasConstantFactor;
- state->dynamic.depth_bias.clamp = depthBiasClamp;
- state->dynamic.depth_bias.slope = depthBiasSlopeFactor;
+ state->dynamic.depth_bias.bias = depthBiasConstantFactor;
+ state->dynamic.depth_bias.clamp = depthBiasClamp;
+ state->dynamic.depth_bias.slope = depthBiasSlopeFactor;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
}
-void radv_CmdSetBlendConstants(
- VkCommandBuffer commandBuffer,
- const float blendConstants[4])
+void
+radv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4))
- return;
+ if (!memcmp(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4))
+ return;
- memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4);
+ memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4);
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
}
-void radv_CmdSetDepthBounds(
- VkCommandBuffer commandBuffer,
- float minDepthBounds,
- float maxDepthBounds)
+void
+radv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.depth_bounds.min == minDepthBounds &&
- state->dynamic.depth_bounds.max == maxDepthBounds) {
- return;
- }
+ if (state->dynamic.depth_bounds.min == minDepthBounds &&
+ state->dynamic.depth_bounds.max == maxDepthBounds) {
+ return;
+ }
- state->dynamic.depth_bounds.min = minDepthBounds;
- state->dynamic.depth_bounds.max = maxDepthBounds;
+ state->dynamic.depth_bounds.min = minDepthBounds;
+ state->dynamic.depth_bounds.max = maxDepthBounds;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
}
-void radv_CmdSetStencilCompareMask(
- VkCommandBuffer commandBuffer,
- VkStencilFaceFlags faceMask,
- uint32_t compareMask)
+void
+radv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+ uint32_t compareMask)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
- bool front_same = state->dynamic.stencil_compare_mask.front == compareMask;
- bool back_same = state->dynamic.stencil_compare_mask.back == compareMask;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_compare_mask.front == compareMask;
+ bool back_same = state->dynamic.stencil_compare_mask.back == compareMask;
- if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
- (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
- return;
- }
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
- if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
- state->dynamic.stencil_compare_mask.front = compareMask;
- if (faceMask & VK_STENCIL_FACE_BACK_BIT)
- state->dynamic.stencil_compare_mask.back = compareMask;
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ state->dynamic.stencil_compare_mask.front = compareMask;
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ state->dynamic.stencil_compare_mask.back = compareMask;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
}
-void radv_CmdSetStencilWriteMask(
- VkCommandBuffer commandBuffer,
- VkStencilFaceFlags faceMask,
- uint32_t writeMask)
+void
+radv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+ uint32_t writeMask)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
- bool front_same = state->dynamic.stencil_write_mask.front == writeMask;
- bool back_same = state->dynamic.stencil_write_mask.back == writeMask;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_write_mask.front == writeMask;
+ bool back_same = state->dynamic.stencil_write_mask.back == writeMask;
- if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
- (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
- return;
- }
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
- if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
- state->dynamic.stencil_write_mask.front = writeMask;
- if (faceMask & VK_STENCIL_FACE_BACK_BIT)
- state->dynamic.stencil_write_mask.back = writeMask;
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ state->dynamic.stencil_write_mask.front = writeMask;
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ state->dynamic.stencil_write_mask.back = writeMask;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
}
-void radv_CmdSetStencilReference(
- VkCommandBuffer commandBuffer,
- VkStencilFaceFlags faceMask,
- uint32_t reference)
+void
+radv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+ uint32_t reference)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
- bool front_same = state->dynamic.stencil_reference.front == reference;
- bool back_same = state->dynamic.stencil_reference.back == reference;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_reference.front == reference;
+ bool back_same = state->dynamic.stencil_reference.back == reference;
- if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
- (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
- return;
- }
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same)) {
+ return;
+ }
- if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
- cmd_buffer->state.dynamic.stencil_reference.front = reference;
- if (faceMask & VK_STENCIL_FACE_BACK_BIT)
- cmd_buffer->state.dynamic.stencil_reference.back = reference;
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ cmd_buffer->state.dynamic.stencil_reference.front = reference;
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ cmd_buffer->state.dynamic.stencil_reference.back = reference;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
}
-void radv_CmdSetDiscardRectangleEXT(
- VkCommandBuffer commandBuffer,
- uint32_t firstDiscardRectangle,
- uint32_t discardRectangleCount,
- const VkRect2D* pDiscardRectangles)
+void
+radv_CmdSetDiscardRectangleEXT(VkCommandBuffer commandBuffer, uint32_t firstDiscardRectangle,
+ uint32_t discardRectangleCount, const VkRect2D *pDiscardRectangles)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
- ASSERTED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ ASSERTED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount;
- assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
- assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
+ assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
+ assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
- if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle,
- pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) {
- return;
- }
+ if (!memcmp(state->dynamic.discard_rectangle.rectangles + firstDiscardRectangle,
+ pDiscardRectangles, discardRectangleCount * sizeof(*pDiscardRectangles))) {
+ return;
+ }
- typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle],
- pDiscardRectangles, discardRectangleCount);
+ typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle],
+ pDiscardRectangles, discardRectangleCount);
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
}
-void radv_CmdSetSampleLocationsEXT(
- VkCommandBuffer commandBuffer,
- const VkSampleLocationsInfoEXT* pSampleLocationsInfo)
+void
+radv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
+ const VkSampleLocationsInfoEXT *pSampleLocationsInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+ assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
- state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
- state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
- state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
- typed_memcpy(&state->dynamic.sample_location.locations[0],
- pSampleLocationsInfo->pSampleLocations,
- pSampleLocationsInfo->sampleLocationsCount);
+ state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+ state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+ state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+ typed_memcpy(&state->dynamic.sample_location.locations[0],
+ pSampleLocationsInfo->pSampleLocations, pSampleLocationsInfo->sampleLocationsCount);
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
}
-void radv_CmdSetLineStippleEXT(
- VkCommandBuffer commandBuffer,
- uint32_t lineStippleFactor,
- uint16_t lineStipplePattern)
+void
+radv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor,
+ uint16_t lineStipplePattern)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.line_stipple.factor == lineStippleFactor &&
- state->dynamic.line_stipple.pattern == lineStipplePattern)
- return;
+ if (state->dynamic.line_stipple.factor == lineStippleFactor &&
+ state->dynamic.line_stipple.pattern == lineStipplePattern)
+ return;
- state->dynamic.line_stipple.factor = lineStippleFactor;
- state->dynamic.line_stipple.pattern = lineStipplePattern;
+ state->dynamic.line_stipple.factor = lineStippleFactor;
+ state->dynamic.line_stipple.pattern = lineStipplePattern;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
}
-void radv_CmdSetCullModeEXT(
- VkCommandBuffer commandBuffer,
- VkCullModeFlags cullMode)
+void
+radv_CmdSetCullModeEXT(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.cull_mode == cullMode)
- return;
+ if (state->dynamic.cull_mode == cullMode)
+ return;
- state->dynamic.cull_mode = cullMode;
+ state->dynamic.cull_mode = cullMode;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE;
}
-void radv_CmdSetFrontFaceEXT(
- VkCommandBuffer commandBuffer,
- VkFrontFace frontFace)
+void
+radv_CmdSetFrontFaceEXT(VkCommandBuffer commandBuffer, VkFrontFace frontFace)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.front_face == frontFace)
- return;
+ if (state->dynamic.front_face == frontFace)
+ return;
- state->dynamic.front_face = frontFace;
+ state->dynamic.front_face = frontFace;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
}
-void radv_CmdSetPrimitiveTopologyEXT(
- VkCommandBuffer commandBuffer,
- VkPrimitiveTopology primitiveTopology)
+void
+radv_CmdSetPrimitiveTopologyEXT(VkCommandBuffer commandBuffer,
+ VkPrimitiveTopology primitiveTopology)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
- unsigned primitive_topology = si_translate_prim(primitiveTopology);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ unsigned primitive_topology = si_translate_prim(primitiveTopology);
- if (state->dynamic.primitive_topology == primitive_topology)
- return;
+ if (state->dynamic.primitive_topology == primitive_topology)
+ return;
- state->dynamic.primitive_topology = primitive_topology;
+ state->dynamic.primitive_topology = primitive_topology;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
}
-void radv_CmdSetViewportWithCountEXT(
- VkCommandBuffer commandBuffer,
- uint32_t viewportCount,
- const VkViewport* pViewports)
+void
+radv_CmdSetViewportWithCountEXT(VkCommandBuffer commandBuffer, uint32_t viewportCount,
+ const VkViewport *pViewports)
{
- radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports);
+ radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports);
}
-void radv_CmdSetScissorWithCountEXT(
- VkCommandBuffer commandBuffer,
- uint32_t scissorCount,
- const VkRect2D* pScissors)
+void
+radv_CmdSetScissorWithCountEXT(VkCommandBuffer commandBuffer, uint32_t scissorCount,
+ const VkRect2D *pScissors)
{
- radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors);
+ radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors);
}
-void radv_CmdSetDepthTestEnableEXT(
- VkCommandBuffer commandBuffer,
- VkBool32 depthTestEnable)
+void
+radv_CmdSetDepthTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthTestEnable)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.depth_test_enable == depthTestEnable)
- return;
+ if (state->dynamic.depth_test_enable == depthTestEnable)
+ return;
- state->dynamic.depth_test_enable = depthTestEnable;
+ state->dynamic.depth_test_enable = depthTestEnable;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
}
-void radv_CmdSetDepthWriteEnableEXT(
- VkCommandBuffer commandBuffer,
- VkBool32 depthWriteEnable)
+void
+radv_CmdSetDepthWriteEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthWriteEnable)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.depth_write_enable == depthWriteEnable)
- return;
+ if (state->dynamic.depth_write_enable == depthWriteEnable)
+ return;
- state->dynamic.depth_write_enable = depthWriteEnable;
+ state->dynamic.depth_write_enable = depthWriteEnable;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
}
-void radv_CmdSetDepthCompareOpEXT(
- VkCommandBuffer commandBuffer,
- VkCompareOp depthCompareOp)
+void
+radv_CmdSetDepthCompareOpEXT(VkCommandBuffer commandBuffer, VkCompareOp depthCompareOp)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.depth_compare_op == depthCompareOp)
- return;
+ if (state->dynamic.depth_compare_op == depthCompareOp)
+ return;
- state->dynamic.depth_compare_op = depthCompareOp;
+ state->dynamic.depth_compare_op = depthCompareOp;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
}
-void radv_CmdSetDepthBoundsTestEnableEXT(
- VkCommandBuffer commandBuffer,
- VkBool32 depthBoundsTestEnable)
+void
+radv_CmdSetDepthBoundsTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 depthBoundsTestEnable)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.depth_bounds_test_enable == depthBoundsTestEnable)
- return;
+ if (state->dynamic.depth_bounds_test_enable == depthBoundsTestEnable)
+ return;
- state->dynamic.depth_bounds_test_enable = depthBoundsTestEnable;
+ state->dynamic.depth_bounds_test_enable = depthBoundsTestEnable;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
}
-void radv_CmdSetStencilTestEnableEXT(
- VkCommandBuffer commandBuffer,
- VkBool32 stencilTestEnable)
+void
+radv_CmdSetStencilTestEnableEXT(VkCommandBuffer commandBuffer, VkBool32 stencilTestEnable)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.stencil_test_enable == stencilTestEnable)
- return;
+ if (state->dynamic.stencil_test_enable == stencilTestEnable)
+ return;
- state->dynamic.stencil_test_enable = stencilTestEnable;
+ state->dynamic.stencil_test_enable = stencilTestEnable;
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
}
-void radv_CmdSetStencilOpEXT(
- VkCommandBuffer commandBuffer,
- VkStencilFaceFlags faceMask,
- VkStencilOp failOp,
- VkStencilOp passOp,
- VkStencilOp depthFailOp,
- VkCompareOp compareOp)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
- bool front_same =
- state->dynamic.stencil_op.front.fail_op == failOp &&
- state->dynamic.stencil_op.front.pass_op == passOp &&
- state->dynamic.stencil_op.front.depth_fail_op == depthFailOp &&
- state->dynamic.stencil_op.front.compare_op == compareOp;
- bool back_same =
- state->dynamic.stencil_op.back.fail_op == failOp &&
- state->dynamic.stencil_op.back.pass_op == passOp &&
- state->dynamic.stencil_op.back.depth_fail_op == depthFailOp &&
- state->dynamic.stencil_op.back.compare_op == compareOp;
-
- if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
- (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same))
- return;
-
- if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
- state->dynamic.stencil_op.front.fail_op = failOp;
- state->dynamic.stencil_op.front.pass_op = passOp;
- state->dynamic.stencil_op.front.depth_fail_op = depthFailOp;
- state->dynamic.stencil_op.front.compare_op = compareOp;
- }
-
- if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
- state->dynamic.stencil_op.back.fail_op = failOp;
- state->dynamic.stencil_op.back.pass_op = passOp;
- state->dynamic.stencil_op.back.depth_fail_op = depthFailOp;
- state->dynamic.stencil_op.back.compare_op = compareOp;
- }
-
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
+void
+radv_CmdSetStencilOpEXT(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
+ VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp,
+ VkCompareOp compareOp)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ bool front_same = state->dynamic.stencil_op.front.fail_op == failOp &&
+ state->dynamic.stencil_op.front.pass_op == passOp &&
+ state->dynamic.stencil_op.front.depth_fail_op == depthFailOp &&
+ state->dynamic.stencil_op.front.compare_op == compareOp;
+ bool back_same = state->dynamic.stencil_op.back.fail_op == failOp &&
+ state->dynamic.stencil_op.back.pass_op == passOp &&
+ state->dynamic.stencil_op.back.depth_fail_op == depthFailOp &&
+ state->dynamic.stencil_op.back.compare_op == compareOp;
+
+ if ((!(faceMask & VK_STENCIL_FACE_FRONT_BIT) || front_same) &&
+ (!(faceMask & VK_STENCIL_FACE_BACK_BIT) || back_same))
+ return;
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
+ state->dynamic.stencil_op.front.fail_op = failOp;
+ state->dynamic.stencil_op.front.pass_op = passOp;
+ state->dynamic.stencil_op.front.depth_fail_op = depthFailOp;
+ state->dynamic.stencil_op.front.compare_op = compareOp;
+ }
+
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
+ state->dynamic.stencil_op.back.fail_op = failOp;
+ state->dynamic.stencil_op.back.pass_op = passOp;
+ state->dynamic.stencil_op.back.depth_fail_op = depthFailOp;
+ state->dynamic.stencil_op.back.compare_op = compareOp;
+ }
+
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
}
-void radv_CmdSetFragmentShadingRateKHR(
- VkCommandBuffer commandBuffer,
- const VkExtent2D* pFragmentSize,
- const VkFragmentShadingRateCombinerOpKHR combinerOps[2])
+void
+radv_CmdSetFragmentShadingRateKHR(VkCommandBuffer commandBuffer, const VkExtent2D *pFragmentSize,
+ const VkFragmentShadingRateCombinerOpKHR combinerOps[2])
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_cmd_state *state = &cmd_buffer->state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (state->dynamic.fragment_shading_rate.size.width == pFragmentSize->width &&
- state->dynamic.fragment_shading_rate.size.height == pFragmentSize->height &&
- state->dynamic.fragment_shading_rate.combiner_ops[0] == combinerOps[0] &&
- state->dynamic.fragment_shading_rate.combiner_ops[1] == combinerOps[1])
- return;
-
- state->dynamic.fragment_shading_rate.size = *pFragmentSize;
- for (unsigned i = 0; i < 2; i++)
- state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i];
-
- state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
-}
-
-void radv_CmdExecuteCommands(
- VkCommandBuffer commandBuffer,
- uint32_t commandBufferCount,
- const VkCommandBuffer* pCmdBuffers)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
-
- assert(commandBufferCount > 0);
-
- radv_emit_mip_change_flush_default(primary);
-
- /* Emit pending flushes on primary prior to executing secondary */
- si_emit_cache_flush(primary);
-
- for (uint32_t i = 0; i < commandBufferCount; i++) {
- RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
-
- primary->scratch_size_per_wave_needed = MAX2(primary->scratch_size_per_wave_needed,
- secondary->scratch_size_per_wave_needed);
- primary->scratch_waves_wanted = MAX2(primary->scratch_waves_wanted,
- secondary->scratch_waves_wanted);
- primary->compute_scratch_size_per_wave_needed = MAX2(primary->compute_scratch_size_per_wave_needed,
- secondary->compute_scratch_size_per_wave_needed);
- primary->compute_scratch_waves_wanted = MAX2(primary->compute_scratch_waves_wanted,
- secondary->compute_scratch_waves_wanted);
-
- if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
- primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
- if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
- primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
- if (secondary->tess_rings_needed)
- primary->tess_rings_needed = true;
- if (secondary->sample_positions_needed)
- primary->sample_positions_needed = true;
- if (secondary->gds_needed)
- primary->gds_needed = true;
-
- if (!secondary->state.framebuffer &&
- (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) {
- /* Emit the framebuffer state from primary if secondary
- * has been recorded without a framebuffer, otherwise
- * fast color/depth clears can't work.
- */
- radv_emit_fb_mip_change_flush(primary);
- radv_emit_framebuffer_state(primary);
- }
-
- primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
-
-
- /* When the secondary command buffer is compute only we don't
- * need to re-emit the current graphics pipeline.
- */
- if (secondary->state.emitted_pipeline) {
- primary->state.emitted_pipeline =
- secondary->state.emitted_pipeline;
- }
-
- /* When the secondary command buffer is graphics only we don't
- * need to re-emit the current compute pipeline.
- */
- if (secondary->state.emitted_compute_pipeline) {
- primary->state.emitted_compute_pipeline =
- secondary->state.emitted_compute_pipeline;
- }
-
- /* Only re-emit the draw packets when needed. */
- if (secondary->state.last_primitive_reset_en != -1) {
- primary->state.last_primitive_reset_en =
- secondary->state.last_primitive_reset_en;
- }
-
- if (secondary->state.last_primitive_reset_index) {
- primary->state.last_primitive_reset_index =
- secondary->state.last_primitive_reset_index;
- }
-
- if (secondary->state.last_ia_multi_vgt_param) {
- primary->state.last_ia_multi_vgt_param =
- secondary->state.last_ia_multi_vgt_param;
- }
-
- primary->state.last_first_instance = secondary->state.last_first_instance;
- primary->state.last_num_instances = secondary->state.last_num_instances;
- primary->state.last_drawid = secondary->state.last_drawid;
- primary->state.last_vertex_offset = secondary->state.last_vertex_offset;
- primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert;
- primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon;
- primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control;
-
- if (secondary->state.last_index_type != -1) {
- primary->state.last_index_type =
- secondary->state.last_index_type;
- }
- }
-
- /* After executing commands from secondary buffers we have to dirty
- * some states.
- */
- primary->state.dirty |= RADV_CMD_DIRTY_PIPELINE |
- RADV_CMD_DIRTY_INDEX_BUFFER |
- RADV_CMD_DIRTY_DYNAMIC_ALL;
- radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
- radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
-}
-
-VkResult radv_CreateCommandPool(
- VkDevice _device,
- const VkCommandPoolCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkCommandPool* pCmdPool)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_cmd_pool *pool;
-
- pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (pool == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &pool->base,
- VK_OBJECT_TYPE_COMMAND_POOL);
-
- if (pAllocator)
- pool->alloc = *pAllocator;
- else
- pool->alloc = device->vk.alloc;
-
- list_inithead(&pool->cmd_buffers);
- list_inithead(&pool->free_cmd_buffers);
-
- pool->queue_family_index = pCreateInfo->queueFamilyIndex;
+ if (state->dynamic.fragment_shading_rate.size.width == pFragmentSize->width &&
+ state->dynamic.fragment_shading_rate.size.height == pFragmentSize->height &&
+ state->dynamic.fragment_shading_rate.combiner_ops[0] == combinerOps[0] &&
+ state->dynamic.fragment_shading_rate.combiner_ops[1] == combinerOps[1])
+ return;
- *pCmdPool = radv_cmd_pool_to_handle(pool);
+ state->dynamic.fragment_shading_rate.size = *pFragmentSize;
+ for (unsigned i = 0; i < 2; i++)
+ state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i];
- return VK_SUCCESS;
+ state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
+}
+void
+radv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
+ const VkCommandBuffer *pCmdBuffers)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
+
+ assert(commandBufferCount > 0);
+
+ radv_emit_mip_change_flush_default(primary);
+
+ /* Emit pending flushes on primary prior to executing secondary */
+ si_emit_cache_flush(primary);
+
+ for (uint32_t i = 0; i < commandBufferCount; i++) {
+ RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
+
+ primary->scratch_size_per_wave_needed =
+ MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed);
+ primary->scratch_waves_wanted =
+ MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted);
+ primary->compute_scratch_size_per_wave_needed =
+ MAX2(primary->compute_scratch_size_per_wave_needed,
+ secondary->compute_scratch_size_per_wave_needed);
+ primary->compute_scratch_waves_wanted =
+ MAX2(primary->compute_scratch_waves_wanted, secondary->compute_scratch_waves_wanted);
+
+ if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
+ primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
+ if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
+ primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
+ if (secondary->tess_rings_needed)
+ primary->tess_rings_needed = true;
+ if (secondary->sample_positions_needed)
+ primary->sample_positions_needed = true;
+ if (secondary->gds_needed)
+ primary->gds_needed = true;
+
+ if (!secondary->state.framebuffer && (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) {
+ /* Emit the framebuffer state from primary if secondary
+ * has been recorded without a framebuffer, otherwise
+ * fast color/depth clears can't work.
+ */
+ radv_emit_fb_mip_change_flush(primary);
+ radv_emit_framebuffer_state(primary);
+ }
+
+ primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs);
+
+ /* When the secondary command buffer is compute only we don't
+ * need to re-emit the current graphics pipeline.
+ */
+ if (secondary->state.emitted_pipeline) {
+ primary->state.emitted_pipeline = secondary->state.emitted_pipeline;
+ }
+
+ /* When the secondary command buffer is graphics only we don't
+ * need to re-emit the current compute pipeline.
+ */
+ if (secondary->state.emitted_compute_pipeline) {
+ primary->state.emitted_compute_pipeline = secondary->state.emitted_compute_pipeline;
+ }
+
+ /* Only re-emit the draw packets when needed. */
+ if (secondary->state.last_primitive_reset_en != -1) {
+ primary->state.last_primitive_reset_en = secondary->state.last_primitive_reset_en;
+ }
+
+ if (secondary->state.last_primitive_reset_index) {
+ primary->state.last_primitive_reset_index = secondary->state.last_primitive_reset_index;
+ }
+
+ if (secondary->state.last_ia_multi_vgt_param) {
+ primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param;
+ }
+
+ primary->state.last_first_instance = secondary->state.last_first_instance;
+ primary->state.last_num_instances = secondary->state.last_num_instances;
+ primary->state.last_drawid = secondary->state.last_drawid;
+ primary->state.last_vertex_offset = secondary->state.last_vertex_offset;
+ primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert;
+ primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon;
+ primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control;
+
+ if (secondary->state.last_index_type != -1) {
+ primary->state.last_index_type = secondary->state.last_index_type;
+ }
+ }
+
+ /* After executing commands from secondary buffers we have to dirty
+ * some states.
+ */
+ primary->state.dirty |=
+ RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_ALL;
+ radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
+ radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
+}
+
+VkResult
+radv_CreateCommandPool(VkDevice _device, const VkCommandPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkCommandPool *pCmdPool)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_cmd_pool *pool;
+
+ pool =
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (pool == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_COMMAND_POOL);
+
+ if (pAllocator)
+ pool->alloc = *pAllocator;
+ else
+ pool->alloc = device->vk.alloc;
+
+ list_inithead(&pool->cmd_buffers);
+ list_inithead(&pool->free_cmd_buffers);
+
+ pool->queue_family_index = pCreateInfo->queueFamilyIndex;
+
+ *pCmdPool = radv_cmd_pool_to_handle(pool);
+
+ return VK_SUCCESS;
}
-void radv_DestroyCommandPool(
- VkDevice _device,
- VkCommandPool commandPool,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
- if (!pool)
- return;
+ if (!pool)
+ return;
- list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer,
- &pool->cmd_buffers, pool_link) {
- radv_destroy_cmd_buffer(cmd_buffer);
- }
+ list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link)
+ {
+ radv_destroy_cmd_buffer(cmd_buffer);
+ }
- list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer,
- &pool->free_cmd_buffers, pool_link) {
- radv_destroy_cmd_buffer(cmd_buffer);
- }
+ list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link)
+ {
+ radv_destroy_cmd_buffer(cmd_buffer);
+ }
- vk_object_base_finish(&pool->base);
- vk_free2(&device->vk.alloc, pAllocator, pool);
+ vk_object_base_finish(&pool->base);
+ vk_free2(&device->vk.alloc, pAllocator, pool);
}
-VkResult radv_ResetCommandPool(
- VkDevice device,
- VkCommandPool commandPool,
- VkCommandPoolResetFlags flags)
+VkResult
+radv_ResetCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags)
{
- RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
- VkResult result;
+ RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
+ VkResult result;
- list_for_each_entry(struct radv_cmd_buffer, cmd_buffer,
- &pool->cmd_buffers, pool_link) {
- result = radv_reset_cmd_buffer(cmd_buffer);
- if (result != VK_SUCCESS)
- return result;
- }
+ list_for_each_entry(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link)
+ {
+ result = radv_reset_cmd_buffer(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return result;
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-void radv_TrimCommandPool(
- VkDevice device,
- VkCommandPool commandPool,
- VkCommandPoolTrimFlags flags)
+void
+radv_TrimCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags)
{
- RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
+ RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
- if (!pool)
- return;
+ if (!pool)
+ return;
- list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer,
- &pool->free_cmd_buffers, pool_link) {
- radv_destroy_cmd_buffer(cmd_buffer);
- }
+ list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link)
+ {
+ radv_destroy_cmd_buffer(cmd_buffer);
+ }
}
static void
-radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
- uint32_t subpass_id)
+radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, uint32_t subpass_id)
{
- struct radv_cmd_state *state = &cmd_buffer->state;
- struct radv_subpass *subpass = &state->pass->subpasses[subpass_id];
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radv_subpass *subpass = &state->pass->subpasses[subpass_id];
- ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 4096);
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096);
- radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
+ radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
- radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
+ radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
- for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
- const uint32_t a = subpass->attachments[i].attachment;
- if (a == VK_ATTACHMENT_UNUSED)
- continue;
+ for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+ const uint32_t a = subpass->attachments[i].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
- radv_handle_subpass_image_transition(cmd_buffer,
- subpass->attachments[i],
- true);
- }
+ radv_handle_subpass_image_transition(cmd_buffer, subpass->attachments[i], true);
+ }
- radv_describe_barrier_end(cmd_buffer);
+ radv_describe_barrier_end(cmd_buffer);
- radv_cmd_buffer_clear_subpass(cmd_buffer);
+ radv_cmd_buffer_clear_subpass(cmd_buffer);
- assert(cmd_buffer->cs->cdw <= cdw_max);
+ assert(cmd_buffer->cs->cdw <= cdw_max);
}
static void
radv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_cmd_state *state = &cmd_buffer->state;
- const struct radv_subpass *subpass = state->subpass;
- uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ const struct radv_subpass *subpass = state->subpass;
+ uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
- radv_cmd_buffer_resolve_subpass(cmd_buffer);
+ radv_cmd_buffer_resolve_subpass(cmd_buffer);
- radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
+ radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
- for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
- const uint32_t a = subpass->attachments[i].attachment;
- if (a == VK_ATTACHMENT_UNUSED)
- continue;
+ for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+ const uint32_t a = subpass->attachments[i].attachment;
+ if (a == VK_ATTACHMENT_UNUSED)
+ continue;
- if (state->pass->attachments[a].last_subpass_idx != subpass_id)
- continue;
+ if (state->pass->attachments[a].last_subpass_idx != subpass_id)
+ continue;
- VkImageLayout layout = state->pass->attachments[a].final_layout;
- VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout;
- struct radv_subpass_attachment att = { a, layout, stencil_layout };
- radv_handle_subpass_image_transition(cmd_buffer, att, false);
- }
+ VkImageLayout layout = state->pass->attachments[a].final_layout;
+ VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout;
+ struct radv_subpass_attachment att = {a, layout, stencil_layout};
+ radv_handle_subpass_image_transition(cmd_buffer, att, false);
+ }
- radv_describe_barrier_end(cmd_buffer);
+ radv_describe_barrier_end(cmd_buffer);
}
void
radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer,
- const VkRenderPassBeginInfo *pRenderPassBegin,
- const struct radv_extra_render_pass_begin_info *extra_info)
+ const VkRenderPassBeginInfo *pRenderPassBegin,
+ const struct radv_extra_render_pass_begin_info *extra_info)
{
- RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
- RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
- VkResult result;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
+ RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
+ VkResult result;
- cmd_buffer->state.framebuffer = framebuffer;
- cmd_buffer->state.pass = pass;
- cmd_buffer->state.render_area = pRenderPassBegin->renderArea;
+ cmd_buffer->state.framebuffer = framebuffer;
+ cmd_buffer->state.pass = pass;
+ cmd_buffer->state.render_area = pRenderPassBegin->renderArea;
- result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin, extra_info);
- if (result != VK_SUCCESS)
- return;
+ result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBegin, extra_info);
+ if (result != VK_SUCCESS)
+ return;
- result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
- if (result != VK_SUCCESS)
- return;
+ result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBegin);
+ if (result != VK_SUCCESS)
+ return;
}
-void radv_CmdBeginRenderPass2(
- VkCommandBuffer commandBuffer,
- const VkRenderPassBeginInfo* pRenderPassBeginInfo,
- const VkSubpassBeginInfo* pSubpassBeginInfo)
+void
+radv_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+ const VkRenderPassBeginInfo *pRenderPassBeginInfo,
+ const VkSubpassBeginInfo *pSubpassBeginInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_cmd_buffer_begin_render_pass(cmd_buffer, pRenderPassBeginInfo, NULL);
+ radv_cmd_buffer_begin_render_pass(cmd_buffer, pRenderPassBeginInfo, NULL);
- radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
+ radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
}
-void radv_CmdNextSubpass2(
- VkCommandBuffer commandBuffer,
- const VkSubpassBeginInfo* pSubpassBeginInfo,
- const VkSubpassEndInfo* pSubpassEndInfo)
+void
+radv_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo,
+ const VkSubpassEndInfo *pSubpassEndInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer);
- radv_cmd_buffer_end_subpass(cmd_buffer);
- radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
+ uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer);
+ radv_cmd_buffer_end_subpass(cmd_buffer);
+ radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
}
-static void radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
-{
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
- if (!radv_get_shader(pipeline, stage))
- continue;
-
- struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX);
- if (loc->sgpr_idx == -1)
- continue;
- uint32_t base_reg = pipeline->user_data_0[stage];
- radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
-
- }
- if (radv_pipeline_has_gs_copy_shader(pipeline)) {
- struct radv_userdata_info *loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX];
- if (loc->sgpr_idx != -1) {
- uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
- radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
- }
- }
+static void
+radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
+{
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
+ if (!radv_get_shader(pipeline, stage))
+ continue;
+
+ struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, AC_UD_VIEW_INDEX);
+ if (loc->sgpr_idx == -1)
+ continue;
+ uint32_t base_reg = pipeline->user_data_0[stage];
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
+ }
+ if (radv_pipeline_has_gs_copy_shader(pipeline)) {
+ struct radv_userdata_info *loc =
+ &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX];
+ if (loc->sgpr_idx != -1) {
+ uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
+ }
+ }
}
static void
-radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer,
- uint32_t vertex_count,
- uint32_t use_opaque)
+radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count,
+ uint32_t use_opaque)
{
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
- radeon_emit(cmd_buffer->cs, vertex_count);
- radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
+ radeon_emit(cmd_buffer->cs, vertex_count);
+ radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
}
/**
@@ -5283,216 +4976,194 @@ radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer,
* Hardware uses this information to return 0 for out-of-bounds reads.
*/
static void
-radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer,
- uint64_t index_va,
- uint32_t max_index_count,
- uint32_t index_count)
+radv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va,
+ uint32_t max_index_count, uint32_t index_count)
{
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating));
- radeon_emit(cmd_buffer->cs, max_index_count);
- radeon_emit(cmd_buffer->cs, index_va);
- radeon_emit(cmd_buffer->cs, index_va >> 32);
- radeon_emit(cmd_buffer->cs, index_count);
- radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating));
+ radeon_emit(cmd_buffer->cs, max_index_count);
+ radeon_emit(cmd_buffer->cs, index_va);
+ radeon_emit(cmd_buffer->cs, index_va >> 32);
+ radeon_emit(cmd_buffer->cs, index_count);
+ radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA);
}
/* MUST inline this function to avoid massive perf loss in drawoverhead */
ALWAYS_INLINE static void
-radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer,
- bool indexed,
- uint32_t draw_count,
- uint64_t count_va,
- uint32_t stride)
-{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
- bool draw_id_enable = cmd_buffer->state.pipeline->graphics.uses_drawid;
- uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr;
- uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0;
- bool predicating = cmd_buffer->state.predicating;
- assert(base_reg);
-
- /* just reset draw state for vertex data */
- cmd_buffer->state.last_first_instance = -1;
- cmd_buffer->state.last_num_instances = -1;
- cmd_buffer->state.last_drawid = -1;
- cmd_buffer->state.last_vertex_offset = -1;
-
- vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
- if (cmd_buffer->state.pipeline->graphics.uses_baseinstance)
- start_instance_reg = ((base_reg + (draw_id_enable ? 8 : 4)) - SI_SH_REG_OFFSET) >> 2;
- if (draw_id_enable)
- draw_id_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2;
-
- if (draw_count == 1 && !count_va && !draw_id_enable) {
- radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT :
- PKT3_DRAW_INDIRECT, 3, predicating));
- radeon_emit(cs, 0);
- radeon_emit(cs, vertex_offset_reg);
- radeon_emit(cs, start_instance_reg);
- radeon_emit(cs, di_src_sel);
- } else {
- radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
- PKT3_DRAW_INDIRECT_MULTI,
- 8, predicating));
- radeon_emit(cs, 0);
- radeon_emit(cs, vertex_offset_reg);
- radeon_emit(cs, start_instance_reg);
- radeon_emit(cs, draw_id_reg |
- S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
- S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
- radeon_emit(cs, draw_count); /* count */
- radeon_emit(cs, count_va); /* count_addr */
- radeon_emit(cs, count_va >> 32);
- radeon_emit(cs, stride); /* stride */
- radeon_emit(cs, di_src_sel);
- }
+radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed,
+ uint32_t draw_count, uint64_t count_va, uint32_t stride)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
+ bool draw_id_enable = cmd_buffer->state.pipeline->graphics.uses_drawid;
+ uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr;
+ uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0;
+ bool predicating = cmd_buffer->state.predicating;
+ assert(base_reg);
+
+ /* just reset draw state for vertex data */
+ cmd_buffer->state.last_first_instance = -1;
+ cmd_buffer->state.last_num_instances = -1;
+ cmd_buffer->state.last_drawid = -1;
+ cmd_buffer->state.last_vertex_offset = -1;
+
+ vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
+ if (cmd_buffer->state.pipeline->graphics.uses_baseinstance)
+ start_instance_reg = ((base_reg + (draw_id_enable ? 8 : 4)) - SI_SH_REG_OFFSET) >> 2;
+ if (draw_id_enable)
+ draw_id_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2;
+
+ if (draw_count == 1 && !count_va && !draw_id_enable) {
+ radeon_emit(cs,
+ PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, vertex_offset_reg);
+ radeon_emit(cs, start_instance_reg);
+ radeon_emit(cs, di_src_sel);
+ } else {
+ radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8,
+ predicating));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, vertex_offset_reg);
+ radeon_emit(cs, start_instance_reg);
+ radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
+ S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
+ radeon_emit(cs, draw_count); /* count */
+ radeon_emit(cs, count_va); /* count_addr */
+ radeon_emit(cs, count_va >> 32);
+ radeon_emit(cs, stride); /* stride */
+ radeon_emit(cs, di_src_sel);
+ }
}
static inline void
radv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info,
- const uint32_t vertex_offset)
-{
- struct radv_cmd_state *state = &cmd_buffer->state;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance;
- const bool uses_drawid = state->pipeline->graphics.uses_drawid;
- radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr,
- state->pipeline->graphics.vtx_emit_num);
-
- radeon_emit(cs, vertex_offset);
- state->last_vertex_offset = vertex_offset;
- if (uses_drawid) {
- radeon_emit(cs, 0);
- state->last_drawid = 0;
- }
- if (uses_baseinstance) {
- radeon_emit(cs, info->first_instance);
- state->last_first_instance = info->first_instance;
- }
+ const struct radv_draw_info *info, const uint32_t vertex_offset)
+{
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance;
+ const bool uses_drawid = state->pipeline->graphics.uses_drawid;
+ radeon_set_sh_reg_seq(cs, state->pipeline->graphics.vtx_base_sgpr,
+ state->pipeline->graphics.vtx_emit_num);
+
+ radeon_emit(cs, vertex_offset);
+ state->last_vertex_offset = vertex_offset;
+ if (uses_drawid) {
+ radeon_emit(cs, 0);
+ state->last_drawid = 0;
+ }
+ if (uses_baseinstance) {
+ radeon_emit(cs, info->first_instance);
+ state->last_first_instance = info->first_instance;
+ }
}
static inline void
-radv_emit_userdata_vertex(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info,
- const uint32_t vertex_offset)
-{
- const struct radv_cmd_state *state = &cmd_buffer->state;
- const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance;
- const bool uses_drawid = state->pipeline->graphics.uses_drawid;
-
- /* this looks very dumb, but it allows the compiler to optimize better and yields
- * ~3-4% perf increase in drawoverhead
- */
- if (vertex_offset != state->last_vertex_offset) {
- radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
- } else if (uses_drawid && 0 != state->last_drawid) {
- radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
- } else if (uses_baseinstance && info->first_instance != state->last_first_instance) {
- radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
- }
+radv_emit_userdata_vertex(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
+ const uint32_t vertex_offset)
+{
+ const struct radv_cmd_state *state = &cmd_buffer->state;
+ const bool uses_baseinstance = state->pipeline->graphics.uses_baseinstance;
+ const bool uses_drawid = state->pipeline->graphics.uses_drawid;
+
+ /* this looks very dumb, but it allows the compiler to optimize better and yields
+ * ~3-4% perf increase in drawoverhead
+ */
+ if (vertex_offset != state->last_vertex_offset) {
+ radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
+ } else if (uses_drawid && 0 != state->last_drawid) {
+ radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
+ } else if (uses_baseinstance && info->first_instance != state->last_first_instance) {
+ radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
+ }
}
ALWAYS_INLINE static void
radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info,
- uint32_t count,
- uint32_t first_index)
-{
- const struct radv_cmd_state *state = &cmd_buffer->state;
- const int index_size = radv_get_vgt_index_size(state->index_type);
- uint64_t index_va;
-
- uint32_t remaining_indexes = cmd_buffer->state.max_index_count;
- remaining_indexes = MAX2(remaining_indexes, info->first_index) - info->first_index;
-
- /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */
- if (!remaining_indexes &&
- cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
- return;
-
- index_va = state->index_va;
- index_va += first_index * index_size;
-
- if (!state->subpass->view_mask) {
- radv_cs_emit_draw_indexed_packet(cmd_buffer,
- index_va,
- remaining_indexes,
- count);
- } else {
- u_foreach_bit(i, state->subpass->view_mask) {
- radv_emit_view_index(cmd_buffer, i);
-
- radv_cs_emit_draw_indexed_packet(cmd_buffer,
- index_va,
- remaining_indexes,
- count);
- }
- }
+ const struct radv_draw_info *info, uint32_t count,
+ uint32_t first_index)
+{
+ const struct radv_cmd_state *state = &cmd_buffer->state;
+ const int index_size = radv_get_vgt_index_size(state->index_type);
+ uint64_t index_va;
+
+ uint32_t remaining_indexes = cmd_buffer->state.max_index_count;
+ remaining_indexes = MAX2(remaining_indexes, info->first_index) - info->first_index;
+
+ /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */
+ if (!remaining_indexes &&
+ cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
+ return;
+
+ index_va = state->index_va;
+ index_va += first_index * index_size;
+
+ if (!state->subpass->view_mask) {
+ radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, count);
+ } else {
+ u_foreach_bit(i, state->subpass->view_mask)
+ {
+ radv_emit_view_index(cmd_buffer, i);
+
+ radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, count);
+ }
+ }
}
ALWAYS_INLINE static void
-radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info,
- uint32_t count,
- uint32_t use_opaque)
-{
- const struct radv_cmd_state *state = &cmd_buffer->state;
- if (!state->subpass->view_mask) {
- radv_cs_emit_draw_packet(cmd_buffer,
- count,
- use_opaque);
- } else {
- u_foreach_bit(i, state->subpass->view_mask) {
- radv_emit_view_index(cmd_buffer, i);
-
- radv_cs_emit_draw_packet(cmd_buffer,
- count,
- use_opaque);
- }
- }
+radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
+ uint32_t count, uint32_t use_opaque)
+{
+ const struct radv_cmd_state *state = &cmd_buffer->state;
+ if (!state->subpass->view_mask) {
+ radv_cs_emit_draw_packet(cmd_buffer, count, use_opaque);
+ } else {
+ u_foreach_bit(i, state->subpass->view_mask)
+ {
+ radv_emit_view_index(cmd_buffer, i);
+
+ radv_cs_emit_draw_packet(cmd_buffer, count, use_opaque);
+ }
+ }
}
static void
radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info)
-{
- const struct radv_cmd_state *state = &cmd_buffer->state;
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- const uint64_t va = radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
- const uint64_t count_va = info->count_buffer ? radv_buffer_get_va(info->count_buffer->bo) + info->count_buffer->offset +
- info->count_buffer_offset : 0;
-
- radv_cs_add_buffer(ws, cs, info->indirect->bo);
-
- radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
- radeon_emit(cs, 1);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- if (info->count_buffer) {
- radv_cs_add_buffer(ws, cs, info->count_buffer->bo);
- }
-
- if (!state->subpass->view_mask) {
- radv_cs_emit_indirect_draw_packet(cmd_buffer,
- info->indexed,
- info->count,
- count_va,
- info->stride);
- } else {
- u_foreach_bit(i, state->subpass->view_mask) {
- radv_emit_view_index(cmd_buffer, i);
-
- radv_cs_emit_indirect_draw_packet(cmd_buffer,
- info->indexed,
- info->count,
- count_va,
- info->stride);
- }
- }
+ const struct radv_draw_info *info)
+{
+ const struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radeon_winsys *ws = cmd_buffer->device->ws;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ const uint64_t va =
+ radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
+ const uint64_t count_va = info->count_buffer
+ ? radv_buffer_get_va(info->count_buffer->bo) +
+ info->count_buffer->offset + info->count_buffer_offset
+ : 0;
+
+ radv_cs_add_buffer(ws, cs, info->indirect->bo);
+
+ radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
+ radeon_emit(cs, 1);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ if (info->count_buffer) {
+ radv_cs_add_buffer(ws, cs, info->count_buffer->bo);
+ }
+
+ if (!state->subpass->view_mask) {
+ radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va,
+ info->stride);
+ } else {
+ u_foreach_bit(i, state->subpass->view_mask)
+ {
+ radv_emit_view_index(cmd_buffer, i);
+
+ radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va,
+ info->stride);
+ }
+ }
}
/*
@@ -5511,671 +5182,602 @@ radv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer,
* return true on affected HW if radv_emit_all_graphics_states sets
* any context registers.
*/
-static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info)
+static bool
+radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_draw_info *info)
{
- struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radv_cmd_state *state = &cmd_buffer->state;
- if (!cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
- return false;
+ if (!cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
+ return false;
- if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
- return true;
+ if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
+ return true;
- uint64_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
+ uint64_t used_states =
+ cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
- /* Index, vertex and streamout buffers don't change context regs, and
- * pipeline is already handled.
- */
- used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
- RADV_CMD_DIRTY_VERTEX_BUFFER |
- RADV_CMD_DIRTY_STREAMOUT_BUFFER |
- RADV_CMD_DIRTY_PIPELINE);
+ /* Index, vertex and streamout buffers don't change context regs, and
+ * pipeline is already handled.
+ */
+ used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER |
+ RADV_CMD_DIRTY_STREAMOUT_BUFFER | RADV_CMD_DIRTY_PIPELINE);
- if (cmd_buffer->state.dirty & used_states)
- return true;
+ if (cmd_buffer->state.dirty & used_states)
+ return true;
- uint32_t primitive_reset_index =
- radv_get_primitive_reset_index(cmd_buffer);
+ uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer);
- if (info->indexed && state->pipeline->graphics.prim_restart_enable &&
- primitive_reset_index != state->last_primitive_reset_index)
- return true;
+ if (info->indexed && state->pipeline->graphics.prim_restart_enable &&
+ primitive_reset_index != state->last_primitive_reset_index)
+ return true;
- return false;
+ return false;
}
static void
-radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info)
+radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info)
{
- bool late_scissor_emission;
+ bool late_scissor_emission;
- if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
- cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
- radv_emit_rbplus_state(cmd_buffer);
+ if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
+ cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
+ radv_emit_rbplus_state(cmd_buffer);
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
- radv_emit_graphics_pipeline(cmd_buffer);
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
+ radv_emit_graphics_pipeline(cmd_buffer);
- /* This should be before the cmd_buffer->state.dirty is cleared
- * (excluding RADV_CMD_DIRTY_PIPELINE) and after
- * cmd_buffer->state.context_roll_without_scissor_emitted is set. */
- late_scissor_emission =
- radv_need_late_scissor_emission(cmd_buffer, info);
+ /* This should be before the cmd_buffer->state.dirty is cleared
+ * (excluding RADV_CMD_DIRTY_PIPELINE) and after
+ * cmd_buffer->state.context_roll_without_scissor_emitted is set. */
+ late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info);
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
- radv_emit_framebuffer_state(cmd_buffer);
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
+ radv_emit_framebuffer_state(cmd_buffer);
- if (info->indexed) {
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER)
- radv_emit_index_buffer(cmd_buffer, info->indirect);
- } else {
- /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
- * so the state must be re-emitted before the next indexed
- * draw.
- */
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
- cmd_buffer->state.last_index_type = -1;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
- }
- }
+ if (info->indexed) {
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER)
+ radv_emit_index_buffer(cmd_buffer, info->indirect);
+ } else {
+ /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
+ * so the state must be re-emitted before the next indexed
+ * draw.
+ */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+ cmd_buffer->state.last_index_type = -1;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
+ }
+ }
- radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
+ radv_cmd_buffer_flush_dynamic_state(cmd_buffer);
- radv_emit_draw_registers(cmd_buffer, info);
+ radv_emit_draw_registers(cmd_buffer, info);
- if (late_scissor_emission)
- radv_emit_scissor(cmd_buffer);
+ if (late_scissor_emission)
+ radv_emit_scissor(cmd_buffer);
}
/* MUST inline this function to avoid massive perf loss in drawoverhead */
ALWAYS_INLINE static bool
-radv_before_draw(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_draw_info *info,
- uint32_t vertex_offset)
-{
- const bool has_prefetch =
- cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
- const bool pipeline_is_dirty =
- (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) &&
- cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline;
-
- ASSERTED const unsigned cdw_max =
- radeon_check_space(cmd_buffer->device->ws,
- cmd_buffer->cs, 4096);
-
- if (likely(!info->indirect)) {
- /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
- * no workaround for indirect draws, but we can at least skip
- * direct draws.
- */
- if (unlikely(!info->instance_count))
- return false;
-
- /* Handle count == 0. */
- if (unlikely(!info->count && !info->strmout_buffer))
- return false;
- }
-
- /* Need to apply this workaround early as it can set flush flags. */
- if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
- radv_emit_fb_mip_change_flush(cmd_buffer);
-
- /* Use optimal packet order based on whether we need to sync the
- * pipeline.
- */
- if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
- /* If we have to wait for idle, set all states first, so that
- * all SET packets are processed in parallel with previous draw
- * calls. Then upload descriptors, set shader pointers, and
- * draw, and prefetch at the end. This ensures that the time
- * the CUs are idle is very short. (there are only SET_SH
- * packets between the wait and the draw)
- */
- radv_emit_all_graphics_states(cmd_buffer, info);
- si_emit_cache_flush(cmd_buffer);
- /* <-- CUs are idle here --> */
-
- radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
- } else {
- /* If we don't wait for idle, start prefetches first, then set
- * states, and draw at the end.
- */
- si_emit_cache_flush(cmd_buffer);
-
- if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
- /* Only prefetch the vertex shader and VBO descriptors
- * in order to start the draw as soon as possible.
- */
- radv_emit_prefetch_L2(cmd_buffer,
- cmd_buffer->state.pipeline, true);
- }
-
- radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
-
- radv_emit_all_graphics_states(cmd_buffer, info);
- }
-
- radv_describe_draw(cmd_buffer);
- if (likely(!info->indirect)) {
- struct radv_cmd_state *state = &cmd_buffer->state;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- assert(state->pipeline->graphics.vtx_base_sgpr);
- if (state->last_num_instances != info->instance_count) {
- radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false));
- radeon_emit(cs, info->instance_count);
- state->last_num_instances = info->instance_count;
- }
- radv_emit_userdata_vertex(cmd_buffer, info, vertex_offset);
- }
- assert(cmd_buffer->cs->cdw <= cdw_max);
-
- return true;
+radv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
+ uint32_t vertex_offset)
+{
+ const bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
+ const bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) &&
+ cmd_buffer->state.pipeline != cmd_buffer->state.emitted_pipeline;
+
+ ASSERTED const unsigned cdw_max =
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096);
+
+ if (likely(!info->indirect)) {
+ /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
+ * no workaround for indirect draws, but we can at least skip
+ * direct draws.
+ */
+ if (unlikely(!info->instance_count))
+ return false;
+
+ /* Handle count == 0. */
+ if (unlikely(!info->count && !info->strmout_buffer))
+ return false;
+ }
+
+ /* Need to apply this workaround early as it can set flush flags. */
+ if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
+ radv_emit_fb_mip_change_flush(cmd_buffer);
+
+ /* Use optimal packet order based on whether we need to sync the
+ * pipeline.
+ */
+ if (cmd_buffer->state.flush_bits &
+ (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+ /* If we have to wait for idle, set all states first, so that
+ * all SET packets are processed in parallel with previous draw
+ * calls. Then upload descriptors, set shader pointers, and
+ * draw, and prefetch at the end. This ensures that the time
+ * the CUs are idle is very short. (there are only SET_SH
+ * packets between the wait and the draw)
+ */
+ radv_emit_all_graphics_states(cmd_buffer, info);
+ si_emit_cache_flush(cmd_buffer);
+ /* <-- CUs are idle here --> */
+
+ radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
+ } else {
+ /* If we don't wait for idle, start prefetches first, then set
+ * states, and draw at the end.
+ */
+ si_emit_cache_flush(cmd_buffer);
+
+ if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
+ /* Only prefetch the vertex shader and VBO descriptors
+ * in order to start the draw as soon as possible.
+ */
+ radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.pipeline, true);
+ }
+
+ radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
+
+ radv_emit_all_graphics_states(cmd_buffer, info);
+ }
+
+ radv_describe_draw(cmd_buffer);
+ if (likely(!info->indirect)) {
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ assert(state->pipeline->graphics.vtx_base_sgpr);
+ if (state->last_num_instances != info->instance_count) {
+ radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false));
+ radeon_emit(cs, info->instance_count);
+ state->last_num_instances = info->instance_count;
+ }
+ radv_emit_userdata_vertex(cmd_buffer, info, vertex_offset);
+ }
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+
+ return true;
}
static void
radv_after_draw(struct radv_cmd_buffer *cmd_buffer)
{
- const struct radeon_info *rad_info =
- &cmd_buffer->device->physical_device->rad_info;
- bool has_prefetch =
- cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
- /* Start prefetches after the draw has been started. Both will
- * run in parallel, but starting the draw first is more
- * important.
- */
- if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
- radv_emit_prefetch_L2(cmd_buffer,
- cmd_buffer->state.pipeline, false);
- }
-
- /* Workaround for a VGT hang when streamout is enabled.
- * It must be done after drawing.
- */
- if (cmd_buffer->state.streamout.streamout_enabled &&
- (rad_info->family == CHIP_HAWAII ||
- rad_info->family == CHIP_TONGA ||
- rad_info->family == CHIP_FIJI)) {
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC;
- }
-
- radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
-}
-
-void radv_CmdDraw(
- VkCommandBuffer commandBuffer,
- uint32_t vertexCount,
- uint32_t instanceCount,
- uint32_t firstVertex,
- uint32_t firstInstance)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_draw_info info;
-
- info.count = vertexCount;
- info.instance_count = instanceCount;
- info.first_instance = firstInstance;
- info.strmout_buffer = NULL;
- info.indirect = NULL;
- info.indexed = false;
-
- if (!radv_before_draw(cmd_buffer, &info, firstVertex))
- return;
- radv_emit_direct_draw_packets(cmd_buffer, &info,
- vertexCount, 0);
- radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndexed(
- VkCommandBuffer commandBuffer,
- uint32_t indexCount,
- uint32_t instanceCount,
- uint32_t firstIndex,
- int32_t vertexOffset,
- uint32_t firstInstance)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_draw_info info;
-
- info.indexed = true;
- info.count = indexCount;
- info.instance_count = instanceCount;
- info.first_index = firstIndex;
- info.first_instance = firstInstance;
- info.strmout_buffer = NULL;
- info.indirect = NULL;
-
- if (!radv_before_draw(cmd_buffer, &info, vertexOffset))
- return;
- radv_emit_draw_packets_indexed(cmd_buffer, &info,
- indexCount, firstIndex);
- radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndirect(
- VkCommandBuffer commandBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset,
- uint32_t drawCount,
- uint32_t stride)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- struct radv_draw_info info;
-
- info.count = drawCount;
- info.indirect = buffer;
- info.indirect_offset = offset;
- info.stride = stride;
- info.strmout_buffer = NULL;
- info.count_buffer = NULL;
- info.indexed = false;
-
- if (!radv_before_draw(cmd_buffer, &info, 0))
- return;
- radv_emit_indirect_draw_packets(cmd_buffer, &info);
- radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndexedIndirect(
- VkCommandBuffer commandBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset,
- uint32_t drawCount,
- uint32_t stride)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- struct radv_draw_info info;
-
- info.indexed = true;
- info.count = drawCount;
- info.indirect = buffer;
- info.indirect_offset = offset;
- info.stride = stride;
- info.count_buffer = NULL;
- info.strmout_buffer = NULL;
-
- if (!radv_before_draw(cmd_buffer, &info, 0))
- return;
- radv_emit_indirect_draw_packets(cmd_buffer, &info);
- radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndirectCount(
- VkCommandBuffer commandBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset,
- VkBuffer _countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
- struct radv_draw_info info;
-
- info.count = maxDrawCount;
- info.indirect = buffer;
- info.indirect_offset = offset;
- info.count_buffer = count_buffer;
- info.count_buffer_offset = countBufferOffset;
- info.stride = stride;
- info.strmout_buffer = NULL;
- info.indexed = false;
-
- if (!radv_before_draw(cmd_buffer, &info, 0))
- return;
- radv_emit_indirect_draw_packets(cmd_buffer, &info);
- radv_after_draw(cmd_buffer);
-}
-
-void radv_CmdDrawIndexedIndirectCount(
- VkCommandBuffer commandBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset,
- VkBuffer _countBuffer,
- VkDeviceSize countBufferOffset,
- uint32_t maxDrawCount,
- uint32_t stride)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
- struct radv_draw_info info;
-
- info.indexed = true;
- info.count = maxDrawCount;
- info.indirect = buffer;
- info.indirect_offset = offset;
- info.count_buffer = count_buffer;
- info.count_buffer_offset = countBufferOffset;
- info.stride = stride;
- info.strmout_buffer = NULL;
-
- if (!radv_before_draw(cmd_buffer, &info, 0))
- return;
- radv_emit_indirect_draw_packets(cmd_buffer, &info);
- radv_after_draw(cmd_buffer);
+ const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
+ bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
+ /* Start prefetches after the draw has been started. Both will
+ * run in parallel, but starting the draw first is more
+ * important.
+ */
+ if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
+ radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.pipeline, false);
+ }
+
+ /* Workaround for a VGT hang when streamout is enabled.
+ * It must be done after drawing.
+ */
+ if (cmd_buffer->state.streamout.streamout_enabled &&
+ (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA ||
+ rad_info->family == CHIP_FIJI)) {
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC;
+ }
+
+ radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
}
-struct radv_dispatch_info {
- /**
- * Determine the layout of the grid (in block units) to be used.
- */
- uint32_t blocks[3];
-
- /**
- * A starting offset for the grid. If unaligned is set, the offset
- * must still be aligned.
- */
- uint32_t offsets[3];
- /**
- * Whether it's an unaligned compute dispatch.
- */
- bool unaligned;
-
- /**
- * Indirect compute parameters resource.
- */
- struct radv_buffer *indirect;
- uint64_t indirect_offset;
-};
+void
+radv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
+ uint32_t firstVertex, uint32_t firstInstance)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_draw_info info;
-static void
-radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_dispatch_info *info)
-{
- struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
- unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator;
- struct radeon_winsys *ws = cmd_buffer->device->ws;
- bool predicating = cmd_buffer->state.predicating;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- struct radv_userdata_info *loc;
-
- radv_describe_dispatch(cmd_buffer, info->blocks[0], info->blocks[1],
- info->blocks[2]);
-
- loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE,
- AC_UD_CS_GRID_SIZE);
-
- ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25);
-
- if (compute_shader->info.wave_size == 32) {
- assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
- dispatch_initiator |= S_00B800_CS_W32_EN(1);
- }
-
- if (info->indirect) {
- uint64_t va = radv_buffer_get_va(info->indirect->bo);
-
- va += info->indirect->offset + info->indirect_offset;
-
- radv_cs_add_buffer(ws, cs, info->indirect->bo);
-
- if (loc->sgpr_idx != -1) {
- for (unsigned i = 0; i < 3; ++i) {
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_REG));
- radeon_emit(cs, (va + 4 * i));
- radeon_emit(cs, (va + 4 * i) >> 32);
- radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0
- + loc->sgpr_idx * 4) >> 2) + i);
- radeon_emit(cs, 0);
- }
- }
-
- if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
- radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, dispatch_initiator);
- } else {
- radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, 1);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, 0);
- radeon_emit(cs, dispatch_initiator);
- }
- } else {
- unsigned blocks[3] = { info->blocks[0], info->blocks[1], info->blocks[2] };
- unsigned offsets[3] = { info->offsets[0], info->offsets[1], info->offsets[2] };
-
- if (info->unaligned) {
- unsigned *cs_block_size = compute_shader->info.cs.block_size;
- unsigned remainder[3];
-
- /* If aligned, these should be an entire block size,
- * not 0.
- */
- remainder[0] = blocks[0] + cs_block_size[0] -
- align_u32_npot(blocks[0], cs_block_size[0]);
- remainder[1] = blocks[1] + cs_block_size[1] -
- align_u32_npot(blocks[1], cs_block_size[1]);
- remainder[2] = blocks[2] + cs_block_size[2] -
- align_u32_npot(blocks[2], cs_block_size[2]);
-
- blocks[0] = round_up_u32(blocks[0], cs_block_size[0]);
- blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
- blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
-
- for(unsigned i = 0; i < 3; ++i) {
- assert(offsets[i] % cs_block_size[i] == 0);
- offsets[i] /= cs_block_size[i];
- }
-
- radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
- radeon_emit(cs,
- S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
- radeon_emit(cs,
- S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
- radeon_emit(cs,
- S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
- S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
-
- dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
- }
-
- if (loc->sgpr_idx != -1) {
- assert(loc->num_sgprs == 3);
-
- radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
- loc->sgpr_idx * 4, 3);
- radeon_emit(cs, blocks[0]);
- radeon_emit(cs, blocks[1]);
- radeon_emit(cs, blocks[2]);
- }
-
- if (offsets[0] || offsets[1] || offsets[2]) {
- radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
- radeon_emit(cs, offsets[0]);
- radeon_emit(cs, offsets[1]);
- radeon_emit(cs, offsets[2]);
-
- /* The blocks in the packet are not counts but end values. */
- for (unsigned i = 0; i < 3; ++i)
- blocks[i] += offsets[i];
- } else {
- dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
- }
-
- radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) |
- PKT3_SHADER_TYPE_S(1));
- radeon_emit(cs, blocks[0]);
- radeon_emit(cs, blocks[1]);
- radeon_emit(cs, blocks[2]);
- radeon_emit(cs, dispatch_initiator);
- }
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
+ info.count = vertexCount;
+ info.instance_count = instanceCount;
+ info.first_instance = firstInstance;
+ info.strmout_buffer = NULL;
+ info.indirect = NULL;
+ info.indexed = false;
+
+ if (!radv_before_draw(cmd_buffer, &info, firstVertex))
+ return;
+ radv_emit_direct_draw_packets(cmd_buffer, &info, vertexCount, 0);
+ radv_after_draw(cmd_buffer);
}
-static void
-radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer)
+void
+radv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
+ uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
{
- radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
- radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_draw_info info;
+
+ info.indexed = true;
+ info.count = indexCount;
+ info.instance_count = instanceCount;
+ info.first_index = firstIndex;
+ info.first_instance = firstInstance;
+ info.strmout_buffer = NULL;
+ info.indirect = NULL;
+
+ if (!radv_before_draw(cmd_buffer, &info, vertexOffset))
+ return;
+ radv_emit_draw_packets_indexed(cmd_buffer, &info, indexCount, firstIndex);
+ radv_after_draw(cmd_buffer);
}
-static void
-radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_dispatch_info *info)
-{
- struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
- bool has_prefetch =
- cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
- bool pipeline_is_dirty = pipeline &&
- pipeline != cmd_buffer->state.emitted_compute_pipeline;
-
- if (cmd_buffer->state.flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
- /* If we have to wait for idle, set all states first, so that
- * all SET packets are processed in parallel with previous draw
- * calls. Then upload descriptors, set shader pointers, and
- * dispatch, and prefetch at the end. This ensures that the
- * time the CUs are idle is very short. (there are only SET_SH
- * packets between the wait and the draw)
- */
- radv_emit_compute_pipeline(cmd_buffer);
- si_emit_cache_flush(cmd_buffer);
- /* <-- CUs are idle here --> */
+void
+radv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+ uint32_t drawCount, uint32_t stride)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ struct radv_draw_info info;
- radv_upload_compute_shader_descriptors(cmd_buffer);
+ info.count = drawCount;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
+ info.stride = stride;
+ info.strmout_buffer = NULL;
+ info.count_buffer = NULL;
+ info.indexed = false;
- radv_emit_dispatch_packets(cmd_buffer, info);
- /* <-- CUs are busy here --> */
+ if (!radv_before_draw(cmd_buffer, &info, 0))
+ return;
+ radv_emit_indirect_draw_packets(cmd_buffer, &info);
+ radv_after_draw(cmd_buffer);
+}
- /* Start prefetches after the dispatch has been started. Both
- * will run in parallel, but starting the dispatch first is
- * more important.
- */
- if (has_prefetch && pipeline_is_dirty) {
- radv_emit_shader_prefetch(cmd_buffer,
- pipeline->shaders[MESA_SHADER_COMPUTE]);
- }
- } else {
- /* If we don't wait for idle, start prefetches first, then set
- * states, and dispatch at the end.
- */
- si_emit_cache_flush(cmd_buffer);
+void
+radv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+ uint32_t drawCount, uint32_t stride)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ struct radv_draw_info info;
- if (has_prefetch && pipeline_is_dirty) {
- radv_emit_shader_prefetch(cmd_buffer,
- pipeline->shaders[MESA_SHADER_COMPUTE]);
- }
+ info.indexed = true;
+ info.count = drawCount;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
+ info.stride = stride;
+ info.count_buffer = NULL;
+ info.strmout_buffer = NULL;
- radv_upload_compute_shader_descriptors(cmd_buffer);
+ if (!radv_before_draw(cmd_buffer, &info, 0))
+ return;
+ radv_emit_indirect_draw_packets(cmd_buffer, &info);
+ radv_after_draw(cmd_buffer);
+}
- radv_emit_compute_pipeline(cmd_buffer);
- radv_emit_dispatch_packets(cmd_buffer, info);
- }
+void
+radv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
+ VkBuffer _countBuffer, VkDeviceSize countBufferOffset,
+ uint32_t maxDrawCount, uint32_t stride)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
+ struct radv_draw_info info;
+
+ info.count = maxDrawCount;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
+ info.count_buffer = count_buffer;
+ info.count_buffer_offset = countBufferOffset;
+ info.stride = stride;
+ info.strmout_buffer = NULL;
+ info.indexed = false;
+
+ if (!radv_before_draw(cmd_buffer, &info, 0))
+ return;
+ radv_emit_indirect_draw_packets(cmd_buffer, &info);
+ radv_after_draw(cmd_buffer);
+}
- radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
+void
+radv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
+ VkDeviceSize offset, VkBuffer _countBuffer,
+ VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
+ uint32_t stride)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
+ struct radv_draw_info info;
+
+ info.indexed = true;
+ info.count = maxDrawCount;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
+ info.count_buffer = count_buffer;
+ info.count_buffer_offset = countBufferOffset;
+ info.stride = stride;
+ info.strmout_buffer = NULL;
+
+ if (!radv_before_draw(cmd_buffer, &info, 0))
+ return;
+ radv_emit_indirect_draw_packets(cmd_buffer, &info);
+ radv_after_draw(cmd_buffer);
}
-void radv_CmdDispatchBase(
- VkCommandBuffer commandBuffer,
- uint32_t base_x,
- uint32_t base_y,
- uint32_t base_z,
- uint32_t x,
- uint32_t y,
- uint32_t z)
+struct radv_dispatch_info {
+ /**
+ * Determine the layout of the grid (in block units) to be used.
+ */
+ uint32_t blocks[3];
+
+ /**
+ * A starting offset for the grid. If unaligned is set, the offset
+ * must still be aligned.
+ */
+ uint32_t offsets[3];
+ /**
+ * Whether it's an unaligned compute dispatch.
+ */
+ bool unaligned;
+
+ /**
+ * Indirect compute parameters resource.
+ */
+ struct radv_buffer *indirect;
+ uint64_t indirect_offset;
+};
+
+static void
+radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_dispatch_info *info)
+{
+ struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+ struct radv_shader_variant *compute_shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+ unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator;
+ struct radeon_winsys *ws = cmd_buffer->device->ws;
+ bool predicating = cmd_buffer->state.predicating;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct radv_userdata_info *loc;
+
+ radv_describe_dispatch(cmd_buffer, info->blocks[0], info->blocks[1], info->blocks[2]);
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
+
+ ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 25);
+
+ if (compute_shader->info.wave_size == 32) {
+ assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+ dispatch_initiator |= S_00B800_CS_W32_EN(1);
+ }
+
+ if (info->indirect) {
+ uint64_t va = radv_buffer_get_va(info->indirect->bo);
+
+ va += info->indirect->offset + info->indirect_offset;
+
+ radv_cs_add_buffer(ws, cs, info->indirect->bo);
+
+ if (loc->sgpr_idx != -1) {
+ for (unsigned i = 0; i < 3; ++i) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs,
+ COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG));
+ radeon_emit(cs, (va + 4 * i));
+ radeon_emit(cs, (va + 4 * i) >> 32);
+ radeon_emit(cs, ((R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4) >> 2) + i);
+ radeon_emit(cs, 0);
+ }
+ }
+
+ if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, predicating) | PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, dispatch_initiator);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, 1);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) | PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, dispatch_initiator);
+ }
+ } else {
+ unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]};
+ unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]};
+
+ if (info->unaligned) {
+ unsigned *cs_block_size = compute_shader->info.cs.block_size;
+ unsigned remainder[3];
+
+ /* If aligned, these should be an entire block size,
+ * not 0.
+ */
+ remainder[0] = blocks[0] + cs_block_size[0] - align_u32_npot(blocks[0], cs_block_size[0]);
+ remainder[1] = blocks[1] + cs_block_size[1] - align_u32_npot(blocks[1], cs_block_size[1]);
+ remainder[2] = blocks[2] + cs_block_size[2] - align_u32_npot(blocks[2], cs_block_size[2]);
+
+ blocks[0] = round_up_u32(blocks[0], cs_block_size[0]);
+ blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
+ blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
+
+ for (unsigned i = 0; i < 3; ++i) {
+ assert(offsets[i] % cs_block_size[i] == 0);
+ offsets[i] /= cs_block_size[i];
+ }
+
+ radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
+ S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
+
+ dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
+ }
+
+ if (loc->sgpr_idx != -1) {
+ assert(loc->num_sgprs == 3);
+
+ radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
+ radeon_emit(cs, blocks[0]);
+ radeon_emit(cs, blocks[1]);
+ radeon_emit(cs, blocks[2]);
+ }
+
+ if (offsets[0] || offsets[1] || offsets[2]) {
+ radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
+ radeon_emit(cs, offsets[0]);
+ radeon_emit(cs, offsets[1]);
+ radeon_emit(cs, offsets[2]);
+
+ /* The blocks in the packet are not counts but end values. */
+ for (unsigned i = 0; i < 3; ++i)
+ blocks[i] += offsets[i];
+ } else {
+ dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1));
+ radeon_emit(cs, blocks[0]);
+ radeon_emit(cs, blocks[1]);
+ radeon_emit(cs, blocks[2]);
+ radeon_emit(cs, dispatch_initiator);
+ }
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+}
+
+static void
+radv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_dispatch_info info = {0};
+ radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
+ radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
+}
+
+static void
+radv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
+{
+ struct radv_pipeline *pipeline = cmd_buffer->state.compute_pipeline;
+ bool has_prefetch = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7;
+ bool pipeline_is_dirty = pipeline && pipeline != cmd_buffer->state.emitted_compute_pipeline;
+
+ if (cmd_buffer->state.flush_bits &
+ (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+ /* If we have to wait for idle, set all states first, so that
+ * all SET packets are processed in parallel with previous draw
+ * calls. Then upload descriptors, set shader pointers, and
+ * dispatch, and prefetch at the end. This ensures that the
+ * time the CUs are idle is very short. (there are only SET_SH
+ * packets between the wait and the draw)
+ */
+ radv_emit_compute_pipeline(cmd_buffer);
+ si_emit_cache_flush(cmd_buffer);
+ /* <-- CUs are idle here --> */
+
+ radv_upload_compute_shader_descriptors(cmd_buffer);
+
+ radv_emit_dispatch_packets(cmd_buffer, info);
+ /* <-- CUs are busy here --> */
+
+ /* Start prefetches after the dispatch has been started. Both
+ * will run in parallel, but starting the dispatch first is
+ * more important.
+ */
+ if (has_prefetch && pipeline_is_dirty) {
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_COMPUTE]);
+ }
+ } else {
+ /* If we don't wait for idle, start prefetches first, then set
+ * states, and dispatch at the end.
+ */
+ si_emit_cache_flush(cmd_buffer);
+
+ if (has_prefetch && pipeline_is_dirty) {
+ radv_emit_shader_prefetch(cmd_buffer, pipeline->shaders[MESA_SHADER_COMPUTE]);
+ }
+
+ radv_upload_compute_shader_descriptors(cmd_buffer);
+
+ radv_emit_compute_pipeline(cmd_buffer);
+ radv_emit_dispatch_packets(cmd_buffer, info);
+ }
+
+ radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
+}
- info.blocks[0] = x;
- info.blocks[1] = y;
- info.blocks[2] = z;
+void
+radv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y,
+ uint32_t base_z, uint32_t x, uint32_t y, uint32_t z)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_dispatch_info info = {0};
- info.offsets[0] = base_x;
- info.offsets[1] = base_y;
- info.offsets[2] = base_z;
- radv_dispatch(cmd_buffer, &info);
+ info.blocks[0] = x;
+ info.blocks[1] = y;
+ info.blocks[2] = z;
+
+ info.offsets[0] = base_x;
+ info.offsets[1] = base_y;
+ info.offsets[2] = base_z;
+ radv_dispatch(cmd_buffer, &info);
}
-void radv_CmdDispatch(
- VkCommandBuffer commandBuffer,
- uint32_t x,
- uint32_t y,
- uint32_t z)
+void
+radv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
{
- radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
+ radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
}
-void radv_CmdDispatchIndirect(
- VkCommandBuffer commandBuffer,
- VkBuffer _buffer,
- VkDeviceSize offset)
+void
+radv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- struct radv_dispatch_info info = {0};
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ struct radv_dispatch_info info = {0};
- info.indirect = buffer;
- info.indirect_offset = offset;
+ info.indirect = buffer;
+ info.indirect_offset = offset;
- radv_dispatch(cmd_buffer, &info);
+ radv_dispatch(cmd_buffer, &info);
}
-void radv_unaligned_dispatch(
- struct radv_cmd_buffer *cmd_buffer,
- uint32_t x,
- uint32_t y,
- uint32_t z)
+void
+radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z)
{
- struct radv_dispatch_info info = {0};
+ struct radv_dispatch_info info = {0};
- info.blocks[0] = x;
- info.blocks[1] = y;
- info.blocks[2] = z;
- info.unaligned = 1;
+ info.blocks[0] = x;
+ info.blocks[1] = y;
+ info.blocks[2] = z;
+ info.unaligned = 1;
- radv_dispatch(cmd_buffer, &info);
+ radv_dispatch(cmd_buffer, &info);
}
void
radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer)
{
- vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
- vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
+ vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
+ vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
- cmd_buffer->state.pass = NULL;
- cmd_buffer->state.subpass = NULL;
- cmd_buffer->state.attachments = NULL;
- cmd_buffer->state.framebuffer = NULL;
- cmd_buffer->state.subpass_sample_locs = NULL;
+ cmd_buffer->state.pass = NULL;
+ cmd_buffer->state.subpass = NULL;
+ cmd_buffer->state.attachments = NULL;
+ cmd_buffer->state.framebuffer = NULL;
+ cmd_buffer->state.subpass_sample_locs = NULL;
}
-void radv_CmdEndRenderPass2(
- VkCommandBuffer commandBuffer,
- const VkSubpassEndInfo* pSubpassEndInfo)
+void
+radv_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
+ radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
- radv_cmd_buffer_end_subpass(cmd_buffer);
+ radv_cmd_buffer_end_subpass(cmd_buffer);
- radv_cmd_buffer_end_render_pass(cmd_buffer);
+ radv_cmd_buffer_end_render_pass(cmd_buffer);
}
/*
@@ -6185,1194 +5787,1073 @@ void radv_CmdEndRenderPass2(
* 0xfffffff0: Clear depth to 1.0
* 0x00000000: Clear depth to 0.0
*/
-static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range)
-{
- VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
- struct radv_cmd_state *state = &cmd_buffer->state;
- uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
- VkClearDepthStencilValue value = {0};
- struct radv_barrier_data barrier = {0};
-
- barrier.layout_transitions.init_mask_ram = 1;
- radv_describe_layout_transition(cmd_buffer, &barrier);
-
- /* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent
- * in considering previous rendering work for WAW hazards. */
- state->flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image);
-
- state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value);
-
- if (vk_format_has_stencil(image->vk_format))
- aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+static void
+radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range)
+{
+ VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
+ VkClearDepthStencilValue value = {0};
+ struct radv_barrier_data barrier = {0};
+
+ barrier.layout_transitions.init_mask_ram = 1;
+ radv_describe_layout_transition(cmd_buffer, &barrier);
+
+ /* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent
+ * in considering previous rendering work for WAW hazards. */
+ state->flush_bits |=
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image);
+
+ state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value);
+
+ if (vk_format_has_stencil(image->vk_format))
+ aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
+
+ radv_set_ds_clear_metadata(cmd_buffer, image, range, value, aspects);
+
+ if (radv_image_is_tc_compat_htile(image)) {
+ /* Initialize the TC-compat metada value to 0 because by
+ * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
+ * need have to conditionally update its value when performing
+ * a fast depth clear.
+ */
+ radv_set_tc_compat_zrange_metadata(cmd_buffer, image, range, 0);
+ }
+}
+
+static void
+radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ VkImageLayout src_layout, bool src_render_loop,
+ VkImageLayout dst_layout, bool dst_render_loop,
+ unsigned src_queue_mask, unsigned dst_queue_mask,
+ const VkImageSubresourceRange *range,
+ struct radv_sample_locations_state *sample_locs)
+{
+ struct radv_device *device = cmd_buffer->device;
+
+ if (!radv_htile_enabled(image, range->baseMipLevel))
+ return;
+
+ if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+ radv_initialize_htile(cmd_buffer, image, range);
+ } else if (!radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop,
+ src_queue_mask) &&
+ radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop,
+ dst_queue_mask)) {
+ radv_initialize_htile(cmd_buffer, image, range);
+ } else if (radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop,
+ src_queue_mask) &&
+ !radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop,
+ dst_queue_mask)) {
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+
+ radv_decompress_depth_stencil(cmd_buffer, image, range, sample_locs);
+
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ }
+}
+
+static uint32_t
+radv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range)
+{
+ static const uint32_t cmask_clear_values[4] = {0xffffffff, 0xdddddddd, 0xeeeeeeee, 0xffffffff};
+ uint32_t log2_samples = util_logbase2(image->info.samples);
+ uint32_t value = cmask_clear_values[log2_samples];
+ struct radv_barrier_data barrier = {0};
+
+ barrier.layout_transitions.init_mask_ram = 1;
+ radv_describe_layout_transition(cmd_buffer, &barrier);
+
+ return radv_clear_cmask(cmd_buffer, image, range, value);
+}
+
+uint32_t
+radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range)
+{
+ static const uint32_t fmask_clear_values[4] = {0x00000000, 0x02020202, 0xE4E4E4E4, 0x76543210};
+ uint32_t log2_samples = util_logbase2(image->info.samples);
+ uint32_t value = fmask_clear_values[log2_samples];
+ struct radv_barrier_data barrier = {0};
+
+ barrier.layout_transitions.init_mask_ram = 1;
+ radv_describe_layout_transition(cmd_buffer, &barrier);
+
+ return radv_clear_fmask(cmd_buffer, image, range, value);
+}
+
+uint32_t
+radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value)
+{
+ struct radv_barrier_data barrier = {0};
+ uint32_t flush_bits = 0;
+ unsigned size = 0;
+
+ barrier.layout_transitions.init_mask_ram = 1;
+ radv_describe_layout_transition(cmd_buffer, &barrier);
+
+ flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value);
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
+ /* When DCC is enabled with mipmaps, some levels might not
+ * support fast clears and we have to initialize them as "fully
+ * expanded".
+ */
+ /* Compute the size of all fast clearable DCC levels. */
+ for (unsigned i = 0; i < image->planes[0].surface.num_dcc_levels; i++) {
+ struct legacy_surf_level *surf_level = &image->planes[0].surface.u.legacy.level[i];
+ unsigned dcc_fast_clear_size =
+ surf_level->dcc_slice_fast_clear_size * image->info.array_size;
+
+ if (!dcc_fast_clear_size)
+ break;
- radv_set_ds_clear_metadata(cmd_buffer, image, range, value, aspects);
-
- if (radv_image_is_tc_compat_htile(image)) {
- /* Initialize the TC-compat metada value to 0 because by
- * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
- * need have to conditionally update its value when performing
- * a fast depth clear.
- */
- radv_set_tc_compat_zrange_metadata(cmd_buffer, image, range, 0);
- }
-}
-
-static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout src_layout,
- bool src_render_loop,
- VkImageLayout dst_layout,
- bool dst_render_loop,
- unsigned src_queue_mask,
- unsigned dst_queue_mask,
- const VkImageSubresourceRange *range,
- struct radv_sample_locations_state *sample_locs)
-{
- struct radv_device *device = cmd_buffer->device;
-
- if (!radv_htile_enabled(image, range->baseMipLevel))
- return;
-
- if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
- radv_initialize_htile(cmd_buffer, image, range);
- } else if (!radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, src_queue_mask) &&
- radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, dst_queue_mask)) {
- radv_initialize_htile(cmd_buffer, image, range);
- } else if (radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, src_queue_mask) &&
- !radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, dst_queue_mask)) {
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
-
- radv_decompress_depth_stencil(cmd_buffer, image, range,
- sample_locs);
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- }
-}
-
-static uint32_t radv_init_cmask(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range)
-{
- static const uint32_t cmask_clear_values[4] = {
- 0xffffffff,
- 0xdddddddd,
- 0xeeeeeeee,
- 0xffffffff
- };
- uint32_t log2_samples = util_logbase2(image->info.samples);
- uint32_t value = cmask_clear_values[log2_samples];
- struct radv_barrier_data barrier = {0};
-
- barrier.layout_transitions.init_mask_ram = 1;
- radv_describe_layout_transition(cmd_buffer, &barrier);
-
- return radv_clear_cmask(cmd_buffer, image, range, value);
-}
-
-uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range)
-{
- static const uint32_t fmask_clear_values[4] = {
- 0x00000000,
- 0x02020202,
- 0xE4E4E4E4,
- 0x76543210
- };
- uint32_t log2_samples = util_logbase2(image->info.samples);
- uint32_t value = fmask_clear_values[log2_samples];
- struct radv_barrier_data barrier = {0};
-
- barrier.layout_transitions.init_mask_ram = 1;
- radv_describe_layout_transition(cmd_buffer, &barrier);
-
- return radv_clear_fmask(cmd_buffer, image, range, value);
-}
-
-uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range,
- uint32_t value)
-{
- struct radv_barrier_data barrier = {0};
- uint32_t flush_bits = 0;
- unsigned size = 0;
-
- barrier.layout_transitions.init_mask_ram = 1;
- radv_describe_layout_transition(cmd_buffer, &barrier);
-
- flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value);
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
- /* When DCC is enabled with mipmaps, some levels might not
- * support fast clears and we have to initialize them as "fully
- * expanded".
- */
- /* Compute the size of all fast clearable DCC levels. */
- for (unsigned i = 0; i < image->planes[0].surface.num_dcc_levels; i++) {
- struct legacy_surf_level *surf_level =
- &image->planes[0].surface.u.legacy.level[i];
- unsigned dcc_fast_clear_size =
- surf_level->dcc_slice_fast_clear_size * image->info.array_size;
-
- if (!dcc_fast_clear_size)
- break;
-
- size = surf_level->dcc_offset + dcc_fast_clear_size;
- }
-
- /* Initialize the mipmap levels without DCC. */
- if (size != image->planes[0].surface.dcc_size) {
- flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo,
- image->offset + image->planes[0].surface.dcc_offset + size,
- image->planes[0].surface.dcc_size - size,
- 0xffffffff);
- }
- }
-
- return flush_bits;
+ size = surf_level->dcc_offset + dcc_fast_clear_size;
+ }
+
+ /* Initialize the mipmap levels without DCC. */
+ if (size != image->planes[0].surface.dcc_size) {
+ flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo,
+ image->offset + image->planes[0].surface.dcc_offset + size,
+ image->planes[0].surface.dcc_size - size, 0xffffffff);
+ }
+ }
+
+ return flush_bits;
}
/**
* Initialize DCC/FMASK/CMASK metadata for a color image.
*/
-static void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout src_layout,
- bool src_render_loop,
- VkImageLayout dst_layout,
- bool dst_render_loop,
- unsigned src_queue_mask,
- unsigned dst_queue_mask,
- const VkImageSubresourceRange *range)
-{
- uint32_t flush_bits = 0;
-
- /* Transitioning from LAYOUT_UNDEFINED layout not everyone is
- * consistent in considering previous rendering work for WAW hazards.
- */
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
-
- if (radv_image_has_cmask(image)) {
- flush_bits |= radv_init_cmask(cmd_buffer, image, range);
- }
-
- if (radv_image_has_fmask(image)) {
- flush_bits |= radv_init_fmask(cmd_buffer, image, range);
- }
-
- if (radv_dcc_enabled(image, range->baseMipLevel)) {
- uint32_t value = 0xffffffffu; /* Fully expanded mode. */
-
- if (radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout,
- dst_render_loop,
- dst_queue_mask)) {
- value = 0u;
- }
-
- flush_bits |= radv_init_dcc(cmd_buffer, image, range, value);
- }
-
- if (radv_image_has_cmask(image) ||
- radv_dcc_enabled(image, range->baseMipLevel)) {
- radv_update_fce_metadata(cmd_buffer, image, range, false);
-
- uint32_t color_values[2] = {0};
- radv_set_color_clear_metadata(cmd_buffer, image, range,
- color_values);
- }
-
- cmd_buffer->state.flush_bits |= flush_bits;
-}
-
-static void radv_retile_transition(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout src_layout,
- VkImageLayout dst_layout,
- unsigned dst_queue_mask)
-{
- if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR &&
- (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ||
- (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
- radv_retile_dcc(cmd_buffer, image);
+static void
+radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ VkImageLayout src_layout, bool src_render_loop,
+ VkImageLayout dst_layout, bool dst_render_loop,
+ unsigned src_queue_mask, unsigned dst_queue_mask,
+ const VkImageSubresourceRange *range)
+{
+ uint32_t flush_bits = 0;
+
+ /* Transitioning from LAYOUT_UNDEFINED layout not everyone is
+ * consistent in considering previous rendering work for WAW hazards.
+ */
+ cmd_buffer->state.flush_bits |=
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
+
+ if (radv_image_has_cmask(image)) {
+ flush_bits |= radv_init_cmask(cmd_buffer, image, range);
+ }
+
+ if (radv_image_has_fmask(image)) {
+ flush_bits |= radv_init_fmask(cmd_buffer, image, range);
+ }
+
+ if (radv_dcc_enabled(image, range->baseMipLevel)) {
+ uint32_t value = 0xffffffffu; /* Fully expanded mode. */
+
+ if (radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop,
+ dst_queue_mask)) {
+ value = 0u;
+ }
+
+ flush_bits |= radv_init_dcc(cmd_buffer, image, range, value);
+ }
+
+ if (radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)) {
+ radv_update_fce_metadata(cmd_buffer, image, range, false);
+
+ uint32_t color_values[2] = {0};
+ radv_set_color_clear_metadata(cmd_buffer, image, range, color_values);
+ }
+
+ cmd_buffer->state.flush_bits |= flush_bits;
+}
+
+static void
+radv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ VkImageLayout src_layout, VkImageLayout dst_layout, unsigned dst_queue_mask)
+{
+ if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR &&
+ (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ||
+ (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
+ radv_retile_dcc(cmd_buffer, image);
}
/**
* Handle color image transitions for DCC/FMASK/CMASK.
*/
-static void radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout src_layout,
- bool src_render_loop,
- VkImageLayout dst_layout,
- bool dst_render_loop,
- unsigned src_queue_mask,
- unsigned dst_queue_mask,
- const VkImageSubresourceRange *range)
-{
- bool dcc_decompressed = false, fast_clear_flushed = false;
-
- if (!radv_image_has_cmask(image) &&
- !radv_image_has_fmask(image) &&
- !radv_dcc_enabled(image, range->baseMipLevel))
- return;
-
- if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
- radv_init_color_image_metadata(cmd_buffer, image,
- src_layout, src_render_loop,
- dst_layout, dst_render_loop,
- src_queue_mask, dst_queue_mask,
- range);
-
- if (image->retile_map)
- radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
- return;
- }
-
- if (radv_dcc_enabled(image, range->baseMipLevel)) {
- if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
- cmd_buffer->state.flush_bits |=
- radv_init_dcc(cmd_buffer, image, range, 0xffffffffu);
- } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, src_layout, src_render_loop, src_queue_mask) &&
- !radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop, dst_queue_mask)) {
- radv_decompress_dcc(cmd_buffer, image, range);
- dcc_decompressed = true;
- } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout,
- src_render_loop, src_queue_mask) &&
- !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout,
- dst_render_loop, dst_queue_mask)) {
- radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
- fast_clear_flushed = true;
- }
-
- if (image->retile_map)
- radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
- } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
- if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout,
- src_render_loop, src_queue_mask) &&
- !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout,
- dst_render_loop, dst_queue_mask)) {
- radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
- fast_clear_flushed = true;
- }
- }
-
- /* MSAA color decompress. */
- if (radv_image_has_fmask(image) &&
- (image->usage & (VK_IMAGE_USAGE_STORAGE_BIT |
- VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
- radv_layout_fmask_compressed(cmd_buffer->device, image,
- src_layout, src_queue_mask) &&
- !radv_layout_fmask_compressed(cmd_buffer->device, image,
- dst_layout, dst_queue_mask)) {
- if (radv_dcc_enabled(image, range->baseMipLevel) &&
- !radv_image_use_dcc_image_stores(cmd_buffer->device, image) &&
- !dcc_decompressed) {
- /* A DCC decompress is required before expanding FMASK
- * when DCC stores aren't supported to avoid being in
- * a state where DCC is compressed and the main
- * surface is uncompressed.
- */
- radv_decompress_dcc(cmd_buffer, image, range);
- } else if (!fast_clear_flushed) {
- /* A FMASK decompress is required before expanding
- * FMASK.
- */
- radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
- }
-
- struct radv_barrier_data barrier = {0};
- barrier.layout_transitions.fmask_color_expand = 1;
- radv_describe_layout_transition(cmd_buffer, &barrier);
-
- radv_expand_fmask_image_inplace(cmd_buffer, image, range);
- }
+static void
+radv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ VkImageLayout src_layout, bool src_render_loop,
+ VkImageLayout dst_layout, bool dst_render_loop,
+ unsigned src_queue_mask, unsigned dst_queue_mask,
+ const VkImageSubresourceRange *range)
+{
+ bool dcc_decompressed = false, fast_clear_flushed = false;
+
+ if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) &&
+ !radv_dcc_enabled(image, range->baseMipLevel))
+ return;
+
+ if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+ radv_init_color_image_metadata(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
+ dst_render_loop, src_queue_mask, dst_queue_mask, range);
+
+ if (image->retile_map)
+ radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
+ return;
+ }
+
+ if (radv_dcc_enabled(image, range->baseMipLevel)) {
+ if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
+ cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, range, 0xffffffffu);
+ } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, src_layout, src_render_loop,
+ src_queue_mask) &&
+ !radv_layout_dcc_compressed(cmd_buffer->device, image, dst_layout, dst_render_loop,
+ dst_queue_mask)) {
+ radv_decompress_dcc(cmd_buffer, image, range);
+ dcc_decompressed = true;
+ } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, src_render_loop,
+ src_queue_mask) &&
+ !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout, dst_render_loop,
+ dst_queue_mask)) {
+ radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
+ fast_clear_flushed = true;
+ }
+
+ if (image->retile_map)
+ radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
+ } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
+ if (radv_layout_can_fast_clear(cmd_buffer->device, image, src_layout, src_render_loop,
+ src_queue_mask) &&
+ !radv_layout_can_fast_clear(cmd_buffer->device, image, dst_layout, dst_render_loop,
+ dst_queue_mask)) {
+ radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
+ fast_clear_flushed = true;
+ }
+ }
+
+ /* MSAA color decompress. */
+ if (radv_image_has_fmask(image) &&
+ (image->usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
+ radv_layout_fmask_compressed(cmd_buffer->device, image, src_layout, src_queue_mask) &&
+ !radv_layout_fmask_compressed(cmd_buffer->device, image, dst_layout, dst_queue_mask)) {
+ if (radv_dcc_enabled(image, range->baseMipLevel) &&
+ !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && !dcc_decompressed) {
+ /* A DCC decompress is required before expanding FMASK
+ * when DCC stores aren't supported to avoid being in
+ * a state where DCC is compressed and the main
+ * surface is uncompressed.
+ */
+ radv_decompress_dcc(cmd_buffer, image, range);
+ } else if (!fast_clear_flushed) {
+ /* A FMASK decompress is required before expanding
+ * FMASK.
+ */
+ radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
+ }
+
+ struct radv_barrier_data barrier = {0};
+ barrier.layout_transitions.fmask_color_expand = 1;
+ radv_describe_layout_transition(cmd_buffer, &barrier);
+
+ radv_expand_fmask_image_inplace(cmd_buffer, image, range);
+ }
}
-static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout src_layout,
- bool src_render_loop,
- VkImageLayout dst_layout,
- bool dst_render_loop,
- uint32_t src_family,
- uint32_t dst_family,
- const VkImageSubresourceRange *range,
- struct radv_sample_locations_state *sample_locs)
-{
- if (image->exclusive && src_family != dst_family) {
- /* This is an acquire or a release operation and there will be
- * a corresponding release/acquire. Do the transition in the
- * most flexible queue. */
-
- assert(src_family == cmd_buffer->queue_family_index ||
- dst_family == cmd_buffer->queue_family_index);
-
- if (src_family == VK_QUEUE_FAMILY_EXTERNAL ||
- src_family == VK_QUEUE_FAMILY_FOREIGN_EXT)
- return;
-
- if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER)
- return;
-
- if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
- (src_family == RADV_QUEUE_GENERAL ||
- dst_family == RADV_QUEUE_GENERAL))
- return;
- }
-
- if (src_layout == dst_layout && src_render_loop == dst_render_loop)
- return;
-
- unsigned src_queue_mask =
- radv_image_queue_family_mask(image, src_family,
- cmd_buffer->queue_family_index);
- unsigned dst_queue_mask =
- radv_image_queue_family_mask(image, dst_family,
- cmd_buffer->queue_family_index);
-
- if (vk_format_has_depth(image->vk_format)) {
- radv_handle_depth_image_transition(cmd_buffer, image,
- src_layout, src_render_loop,
- dst_layout, dst_render_loop,
- src_queue_mask, dst_queue_mask,
- range, sample_locs);
- } else {
- radv_handle_color_image_transition(cmd_buffer, image,
- src_layout, src_render_loop,
- dst_layout, dst_render_loop,
- src_queue_mask, dst_queue_mask,
- range);
- }
+static void
+radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ VkImageLayout src_layout, bool src_render_loop,
+ VkImageLayout dst_layout, bool dst_render_loop, uint32_t src_family,
+ uint32_t dst_family, const VkImageSubresourceRange *range,
+ struct radv_sample_locations_state *sample_locs)
+{
+ if (image->exclusive && src_family != dst_family) {
+ /* This is an acquire or a release operation and there will be
+ * a corresponding release/acquire. Do the transition in the
+ * most flexible queue. */
+
+ assert(src_family == cmd_buffer->queue_family_index ||
+ dst_family == cmd_buffer->queue_family_index);
+
+ if (src_family == VK_QUEUE_FAMILY_EXTERNAL || src_family == VK_QUEUE_FAMILY_FOREIGN_EXT)
+ return;
+
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_TRANSFER)
+ return;
+
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
+ (src_family == RADV_QUEUE_GENERAL || dst_family == RADV_QUEUE_GENERAL))
+ return;
+ }
+
+ if (src_layout == dst_layout && src_render_loop == dst_render_loop)
+ return;
+
+ unsigned src_queue_mask =
+ radv_image_queue_family_mask(image, src_family, cmd_buffer->queue_family_index);
+ unsigned dst_queue_mask =
+ radv_image_queue_family_mask(image, dst_family, cmd_buffer->queue_family_index);
+
+ if (vk_format_has_depth(image->vk_format)) {
+ radv_handle_depth_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
+ dst_render_loop, src_queue_mask, dst_queue_mask, range,
+ sample_locs);
+ } else {
+ radv_handle_color_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
+ dst_render_loop, src_queue_mask, dst_queue_mask, range);
+ }
}
struct radv_barrier_info {
- enum rgp_barrier_reason reason;
- uint32_t eventCount;
- const VkEvent *pEvents;
- VkPipelineStageFlags srcStageMask;
- VkPipelineStageFlags dstStageMask;
+ enum rgp_barrier_reason reason;
+ uint32_t eventCount;
+ const VkEvent *pEvents;
+ VkPipelineStageFlags srcStageMask;
+ VkPipelineStageFlags dstStageMask;
};
static void
-radv_barrier(struct radv_cmd_buffer *cmd_buffer,
- uint32_t memoryBarrierCount,
- const VkMemoryBarrier *pMemoryBarriers,
- uint32_t bufferMemoryBarrierCount,
- const VkBufferMemoryBarrier *pBufferMemoryBarriers,
- uint32_t imageMemoryBarrierCount,
- const VkImageMemoryBarrier *pImageMemoryBarriers,
- const struct radv_barrier_info *info)
-{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- enum radv_cmd_flush_bits src_flush_bits = 0;
- enum radv_cmd_flush_bits dst_flush_bits = 0;
-
- radv_describe_barrier_start(cmd_buffer, info->reason);
-
- for (unsigned i = 0; i < info->eventCount; ++i) {
- RADV_FROM_HANDLE(radv_event, event, info->pEvents[i]);
- uint64_t va = radv_buffer_get_va(event->bo);
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
-
- ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
- radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
- assert(cmd_buffer->cs->cdw <= cdw_max);
- }
-
- for (uint32_t i = 0; i < memoryBarrierCount; i++) {
- src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask,
- NULL);
- dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask,
- NULL);
- }
-
- for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
- src_flush_bits |= radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask,
- NULL);
- dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask,
- NULL);
- }
-
- for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
- RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
-
- src_flush_bits |= radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask,
- image);
- dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask,
- image);
- }
-
- /* The Vulkan spec 1.1.98 says:
- *
- * "An execution dependency with only
- * VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
- * will only prevent that stage from executing in subsequently
- * submitted commands. As this stage does not perform any actual
- * execution, this is not observable - in effect, it does not delay
- * processing of subsequent commands. Similarly an execution dependency
- * with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask
- * will effectively not wait for any prior commands to complete."
- */
- if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
- radv_stage_flush(cmd_buffer, info->srcStageMask);
- cmd_buffer->state.flush_bits |= src_flush_bits;
-
- for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
- RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
-
- const struct VkSampleLocationsInfoEXT *sample_locs_info =
- vk_find_struct_const(pImageMemoryBarriers[i].pNext,
- SAMPLE_LOCATIONS_INFO_EXT);
- struct radv_sample_locations_state sample_locations = {0};
-
- if (sample_locs_info) {
- assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
- sample_locations.per_pixel = sample_locs_info->sampleLocationsPerPixel;
- sample_locations.grid_size = sample_locs_info->sampleLocationGridSize;
- sample_locations.count = sample_locs_info->sampleLocationsCount;
- typed_memcpy(&sample_locations.locations[0],
- sample_locs_info->pSampleLocations,
- sample_locs_info->sampleLocationsCount);
- }
-
- radv_handle_image_transition(cmd_buffer, image,
- pImageMemoryBarriers[i].oldLayout,
- false, /* Outside of a renderpass we are never in a renderloop */
- pImageMemoryBarriers[i].newLayout,
- false, /* Outside of a renderpass we are never in a renderloop */
- pImageMemoryBarriers[i].srcQueueFamilyIndex,
- pImageMemoryBarriers[i].dstQueueFamilyIndex,
- &pImageMemoryBarriers[i].subresourceRange,
- sample_locs_info ? &sample_locations : NULL);
- }
-
- /* Make sure CP DMA is idle because the driver might have performed a
- * DMA operation for copying or filling buffers/images.
- */
- if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
- si_cp_dma_wait_for_idle(cmd_buffer);
-
- cmd_buffer->state.flush_bits |= dst_flush_bits;
-
- radv_describe_barrier_end(cmd_buffer);
-}
-
-void radv_CmdPipelineBarrier(
- VkCommandBuffer commandBuffer,
- VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags destStageMask,
- VkBool32 byRegion,
- uint32_t memoryBarrierCount,
- const VkMemoryBarrier* pMemoryBarriers,
- uint32_t bufferMemoryBarrierCount,
- const VkBufferMemoryBarrier* pBufferMemoryBarriers,
- uint32_t imageMemoryBarrierCount,
- const VkImageMemoryBarrier* pImageMemoryBarriers)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_barrier_info info;
-
- info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER;
- info.eventCount = 0;
- info.pEvents = NULL;
- info.srcStageMask = srcStageMask;
- info.dstStageMask = destStageMask;
-
- radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
- bufferMemoryBarrierCount, pBufferMemoryBarriers,
- imageMemoryBarrierCount, pImageMemoryBarriers, &info);
-}
-
-
-static void write_event(struct radv_cmd_buffer *cmd_buffer,
- struct radv_event *event,
- VkPipelineStageFlags stageMask,
- unsigned value)
-{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint64_t va = radv_buffer_get_va(event->bo);
-
- si_emit_cache_flush(cmd_buffer);
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
-
- ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
-
- /* Flags that only require a top-of-pipe event. */
- VkPipelineStageFlags top_of_pipe_flags =
- VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
-
- /* Flags that only require a post-index-fetch event. */
- VkPipelineStageFlags post_index_fetch_flags =
- top_of_pipe_flags |
- VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
- VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
-
- /* Flags that only require signaling post PS. */
- VkPipelineStageFlags post_ps_flags =
- post_index_fetch_flags |
- VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
- VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
- VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
- VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
- VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT |
- VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR |
- VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
- VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
-
- /* Flags that only require signaling post CS. */
- VkPipelineStageFlags post_cs_flags =
- VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
-
- /* Make sure CP DMA is idle because the driver might have performed a
- * DMA operation for copying or filling buffers/images.
- */
- if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT |
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
- si_cp_dma_wait_for_idle(cmd_buffer);
-
- if (!(stageMask & ~top_of_pipe_flags)) {
- /* Just need to sync the PFP engine. */
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_PFP));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, value);
- } else if (!(stageMask & ~post_index_fetch_flags)) {
- /* Sync ME because PFP reads index and indirect buffers. */
- radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
- radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, value);
- } else {
- unsigned event_type;
-
- if (!(stageMask & ~post_ps_flags)) {
- /* Sync previous fragment shaders. */
- event_type = V_028A90_PS_DONE;
- } else if (!(stageMask & ~post_cs_flags)) {
- /* Sync previous compute shaders. */
- event_type = V_028A90_CS_DONE;
- } else {
- /* Otherwise, sync all prior GPU work. */
- event_type = V_028A90_BOTTOM_OF_PIPE_TS;
- }
-
- si_cs_emit_write_event_eop(cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- radv_cmd_buffer_uses_mec(cmd_buffer),
- event_type, 0, EOP_DST_SEL_MEM,
- EOP_DATA_SEL_VALUE_32BIT, va, value,
- cmd_buffer->gfx9_eop_bug_va);
- }
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
-}
-
-void radv_CmdSetEvent(VkCommandBuffer commandBuffer,
- VkEvent _event,
- VkPipelineStageFlags stageMask)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_event, event, _event);
-
- write_event(cmd_buffer, event, stageMask, 1);
-}
-
-void radv_CmdResetEvent(VkCommandBuffer commandBuffer,
- VkEvent _event,
- VkPipelineStageFlags stageMask)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_event, event, _event);
-
- write_event(cmd_buffer, event, stageMask, 0);
-}
-
-void radv_CmdWaitEvents(VkCommandBuffer commandBuffer,
- uint32_t eventCount,
- const VkEvent* pEvents,
- VkPipelineStageFlags srcStageMask,
- VkPipelineStageFlags dstStageMask,
- uint32_t memoryBarrierCount,
- const VkMemoryBarrier* pMemoryBarriers,
- uint32_t bufferMemoryBarrierCount,
- const VkBufferMemoryBarrier* pBufferMemoryBarriers,
- uint32_t imageMemoryBarrierCount,
- const VkImageMemoryBarrier* pImageMemoryBarriers)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_barrier_info info;
-
- info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS;
- info.eventCount = eventCount;
- info.pEvents = pEvents;
- info.srcStageMask = 0;
-
- radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
- bufferMemoryBarrierCount, pBufferMemoryBarriers,
- imageMemoryBarrierCount, pImageMemoryBarriers, &info);
-}
-
-
-void radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer,
- uint32_t deviceMask)
+radv_barrier(struct radv_cmd_buffer *cmd_buffer, uint32_t memoryBarrierCount,
+ const VkMemoryBarrier *pMemoryBarriers, uint32_t bufferMemoryBarrierCount,
+ const VkBufferMemoryBarrier *pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount,
+ const VkImageMemoryBarrier *pImageMemoryBarriers, const struct radv_barrier_info *info)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ enum radv_cmd_flush_bits src_flush_bits = 0;
+ enum radv_cmd_flush_bits dst_flush_bits = 0;
+
+ radv_describe_barrier_start(cmd_buffer, info->reason);
+
+ for (unsigned i = 0; i < info->eventCount; ++i) {
+ RADV_FROM_HANDLE(radv_event, event, info->pEvents[i]);
+ uint64_t va = radv_buffer_get_va(event->bo);
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
+
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+ }
+
+ for (uint32_t i = 0; i < memoryBarrierCount; i++) {
+ src_flush_bits |= radv_src_access_flush(cmd_buffer, pMemoryBarriers[i].srcAccessMask, NULL);
+ dst_flush_bits |= radv_dst_access_flush(cmd_buffer, pMemoryBarriers[i].dstAccessMask, NULL);
+ }
+
+ for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) {
+ src_flush_bits |=
+ radv_src_access_flush(cmd_buffer, pBufferMemoryBarriers[i].srcAccessMask, NULL);
+ dst_flush_bits |=
+ radv_dst_access_flush(cmd_buffer, pBufferMemoryBarriers[i].dstAccessMask, NULL);
+ }
+
+ for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
+ RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
+
+ src_flush_bits |=
+ radv_src_access_flush(cmd_buffer, pImageMemoryBarriers[i].srcAccessMask, image);
+ dst_flush_bits |=
+ radv_dst_access_flush(cmd_buffer, pImageMemoryBarriers[i].dstAccessMask, image);
+ }
+
+ /* The Vulkan spec 1.1.98 says:
+ *
+ * "An execution dependency with only
+ * VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
+ * will only prevent that stage from executing in subsequently
+ * submitted commands. As this stage does not perform any actual
+ * execution, this is not observable - in effect, it does not delay
+ * processing of subsequent commands. Similarly an execution dependency
+ * with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask
+ * will effectively not wait for any prior commands to complete."
+ */
+ if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+ radv_stage_flush(cmd_buffer, info->srcStageMask);
+ cmd_buffer->state.flush_bits |= src_flush_bits;
+
+ for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
+ RADV_FROM_HANDLE(radv_image, image, pImageMemoryBarriers[i].image);
+
+ const struct VkSampleLocationsInfoEXT *sample_locs_info =
+ vk_find_struct_const(pImageMemoryBarriers[i].pNext, SAMPLE_LOCATIONS_INFO_EXT);
+ struct radv_sample_locations_state sample_locations = {0};
+
+ if (sample_locs_info) {
+ assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
+ sample_locations.per_pixel = sample_locs_info->sampleLocationsPerPixel;
+ sample_locations.grid_size = sample_locs_info->sampleLocationGridSize;
+ sample_locations.count = sample_locs_info->sampleLocationsCount;
+ typed_memcpy(&sample_locations.locations[0], sample_locs_info->pSampleLocations,
+ sample_locs_info->sampleLocationsCount);
+ }
+
+ radv_handle_image_transition(
+ cmd_buffer, image, pImageMemoryBarriers[i].oldLayout,
+ false, /* Outside of a renderpass we are never in a renderloop */
+ pImageMemoryBarriers[i].newLayout,
+ false, /* Outside of a renderpass we are never in a renderloop */
+ pImageMemoryBarriers[i].srcQueueFamilyIndex, pImageMemoryBarriers[i].dstQueueFamilyIndex,
+ &pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL);
+ }
+
+ /* Make sure CP DMA is idle because the driver might have performed a
+ * DMA operation for copying or filling buffers/images.
+ */
+ if (info->srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
+ si_cp_dma_wait_for_idle(cmd_buffer);
+
+ cmd_buffer->state.flush_bits |= dst_flush_bits;
+
+ radv_describe_barrier_end(cmd_buffer);
+}
+
+void
+radv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, VkPipelineStageFlags srcStageMask,
+ VkPipelineStageFlags destStageMask, VkBool32 byRegion,
+ uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
+ uint32_t bufferMemoryBarrierCount,
+ const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+ uint32_t imageMemoryBarrierCount,
+ const VkImageMemoryBarrier *pImageMemoryBarriers)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_barrier_info info;
+
+ info.reason = RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER;
+ info.eventCount = 0;
+ info.pEvents = NULL;
+ info.srcStageMask = srcStageMask;
+ info.dstStageMask = destStageMask;
+
+ radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
+ pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers, &info);
+}
+
+static void
+write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event,
+ VkPipelineStageFlags stageMask, unsigned value)
+{
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(event->bo);
+
+ si_emit_cache_flush(cmd_buffer);
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
+
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
+
+ /* Flags that only require a top-of-pipe event. */
+ VkPipelineStageFlags top_of_pipe_flags = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+
+ /* Flags that only require a post-index-fetch event. */
+ VkPipelineStageFlags post_index_fetch_flags =
+ top_of_pipe_flags | VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+
+ /* Flags that only require signaling post PS. */
+ VkPipelineStageFlags post_ps_flags =
+ post_index_fetch_flags | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+ VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+ VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
+ VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT |
+ VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR |
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+
+ /* Flags that only require signaling post CS. */
+ VkPipelineStageFlags post_cs_flags = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
+
+ /* Make sure CP DMA is idle because the driver might have performed a
+ * DMA operation for copying or filling buffers/images.
+ */
+ if (stageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT))
+ si_cp_dma_wait_for_idle(cmd_buffer);
+
+ if (!(stageMask & ~top_of_pipe_flags)) {
+ /* Just need to sync the PFP engine. */
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, value);
+ } else if (!(stageMask & ~post_index_fetch_flags)) {
+ /* Sync ME because PFP reads index and indirect buffers. */
+ radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+ radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, value);
+ } else {
+ unsigned event_type;
+
+ if (!(stageMask & ~post_ps_flags)) {
+ /* Sync previous fragment shaders. */
+ event_type = V_028A90_PS_DONE;
+ } else if (!(stageMask & ~post_cs_flags)) {
+ /* Sync previous compute shaders. */
+ event_type = V_028A90_CS_DONE;
+ } else {
+ /* Otherwise, sync all prior GPU work. */
+ event_type = V_028A90_BOTTOM_OF_PIPE_TS;
+ }
+
+ si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+ radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0,
+ EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value,
+ cmd_buffer->gfx9_eop_bug_va);
+ }
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
+}
+
+void
+radv_CmdSetEvent(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_event, event, _event);
+
+ write_event(cmd_buffer, event, stageMask, 1);
+}
+
+void
+radv_CmdResetEvent(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags stageMask)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_event, event, _event);
+
+ write_event(cmd_buffer, event, stageMask, 0);
+}
+
+void
+radv_CmdWaitEvents(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
+ VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask,
+ uint32_t memoryBarrierCount, const VkMemoryBarrier *pMemoryBarriers,
+ uint32_t bufferMemoryBarrierCount,
+ const VkBufferMemoryBarrier *pBufferMemoryBarriers,
+ uint32_t imageMemoryBarrierCount,
+ const VkImageMemoryBarrier *pImageMemoryBarriers)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_barrier_info info;
+
+ info.reason = RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS;
+ info.eventCount = eventCount;
+ info.pEvents = pEvents;
+ info.srcStageMask = 0;
+
+ radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers, bufferMemoryBarrierCount,
+ pBufferMemoryBarriers, imageMemoryBarrierCount, pImageMemoryBarriers, &info);
+}
+
+void
+radv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
{
/* No-op */
}
/* VK_EXT_conditional_rendering */
-void radv_CmdBeginConditionalRenderingEXT(
- VkCommandBuffer commandBuffer,
- const VkConditionalRenderingBeginInfoEXT* pConditionalRenderingBegin)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- unsigned pred_op = PREDICATION_OP_BOOL32;
- bool draw_visible = true;
- uint64_t va;
-
- va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset;
-
- /* By default, if the 32-bit value at offset in buffer memory is zero,
- * then the rendering commands are discarded, otherwise they are
- * executed as normal. If the inverted flag is set, all commands are
- * discarded if the value is non zero.
- */
- if (pConditionalRenderingBegin->flags &
- VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) {
- draw_visible = false;
- }
-
- si_emit_cache_flush(cmd_buffer);
-
- if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL &&
- !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) {
- uint64_t pred_value = 0, pred_va;
- unsigned pred_offset;
-
- /* From the Vulkan spec 1.1.107:
- *
- * "If the 32-bit value at offset in buffer memory is zero,
- * then the rendering commands are discarded, otherwise they
- * are executed as normal. If the value of the predicate in
- * buffer memory changes while conditional rendering is
- * active, the rendering commands may be discarded in an
- * implementation-dependent way. Some implementations may
- * latch the value of the predicate upon beginning conditional
- * rendering while others may read it before every rendering
- * command."
- *
- * But, the AMD hardware treats the predicate as a 64-bit
- * value which means we need a workaround in the driver.
- * Luckily, it's not required to support if the value changes
- * when predication is active.
- *
- * The workaround is as follows:
- * 1) allocate a 64-value in the upload BO and initialize it
- * to 0
- * 2) copy the 32-bit predicate value to the upload BO
- * 3) use the new allocated VA address for predication
- *
- * Based on the conditionalrender demo, it's faster to do the
- * COPY_DATA in ME (+ sync PFP) instead of PFP.
- */
- radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset);
-
- pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
- COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, pred_va);
- radeon_emit(cs, pred_va >> 32);
-
- radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
- radeon_emit(cs, 0);
-
- va = pred_va;
- pred_op = PREDICATION_OP_BOOL64;
- }
-
- /* Enable predication for this command buffer. */
- si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
- cmd_buffer->state.predicating = true;
-
- /* Store conditional rendering user info. */
- cmd_buffer->state.predication_type = draw_visible;
- cmd_buffer->state.predication_op = pred_op;
- cmd_buffer->state.predication_va = va;
-}
-
-void radv_CmdEndConditionalRenderingEXT(
- VkCommandBuffer commandBuffer)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-
- /* Disable predication for this command buffer. */
- si_emit_set_predication_state(cmd_buffer, false, 0, 0);
- cmd_buffer->state.predicating = false;
-
- /* Reset conditional rendering user info. */
- cmd_buffer->state.predication_type = -1;
- cmd_buffer->state.predication_op = 0;
- cmd_buffer->state.predication_va = 0;
+void
+radv_CmdBeginConditionalRenderingEXT(
+ VkCommandBuffer commandBuffer,
+ const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ unsigned pred_op = PREDICATION_OP_BOOL32;
+ bool draw_visible = true;
+ uint64_t va;
+
+ va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset;
+
+ /* By default, if the 32-bit value at offset in buffer memory is zero,
+ * then the rendering commands are discarded, otherwise they are
+ * executed as normal. If the inverted flag is set, all commands are
+ * discarded if the value is non zero.
+ */
+ if (pConditionalRenderingBegin->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) {
+ draw_visible = false;
+ }
+
+ si_emit_cache_flush(cmd_buffer);
+
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL &&
+ !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) {
+ uint64_t pred_value = 0, pred_va;
+ unsigned pred_offset;
+
+ /* From the Vulkan spec 1.1.107:
+ *
+ * "If the 32-bit value at offset in buffer memory is zero,
+ * then the rendering commands are discarded, otherwise they
+ * are executed as normal. If the value of the predicate in
+ * buffer memory changes while conditional rendering is
+ * active, the rendering commands may be discarded in an
+ * implementation-dependent way. Some implementations may
+ * latch the value of the predicate upon beginning conditional
+ * rendering while others may read it before every rendering
+ * command."
+ *
+ * But, the AMD hardware treats the predicate as a 64-bit
+ * value which means we need a workaround in the driver.
+ * Luckily, it's not required to support if the value changes
+ * when predication is active.
+ *
+ * The workaround is as follows:
+ * 1) allocate a 64-value in the upload BO and initialize it
+ * to 0
+ * 2) copy the 32-bit predicate value to the upload BO
+ * 3) use the new allocated VA address for predication
+ *
+ * Based on the conditionalrender demo, it's faster to do the
+ * COPY_DATA in ME (+ sync PFP) instead of PFP.
+ */
+ radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset);
+
+ pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, pred_va);
+ radeon_emit(cs, pred_va >> 32);
+
+ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+ radeon_emit(cs, 0);
+
+ va = pred_va;
+ pred_op = PREDICATION_OP_BOOL64;
+ }
+
+ /* Enable predication for this command buffer. */
+ si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
+ cmd_buffer->state.predicating = true;
+
+ /* Store conditional rendering user info. */
+ cmd_buffer->state.predication_type = draw_visible;
+ cmd_buffer->state.predication_op = pred_op;
+ cmd_buffer->state.predication_va = va;
+}
+
+void
+radv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ /* Disable predication for this command buffer. */
+ si_emit_set_predication_state(cmd_buffer, false, 0, 0);
+ cmd_buffer->state.predicating = false;
+
+ /* Reset conditional rendering user info. */
+ cmd_buffer->state.predication_type = -1;
+ cmd_buffer->state.predication_op = 0;
+ cmd_buffer->state.predication_va = 0;
}
/* VK_EXT_transform_feedback */
-void radv_CmdBindTransformFeedbackBuffersEXT(
- VkCommandBuffer commandBuffer,
- uint32_t firstBinding,
- uint32_t bindingCount,
- const VkBuffer* pBuffers,
- const VkDeviceSize* pOffsets,
- const VkDeviceSize* pSizes)
+void
+radv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding,
+ uint32_t bindingCount, const VkBuffer *pBuffers,
+ const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
- uint8_t enabled_mask = 0;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+ uint8_t enabled_mask = 0;
- assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
- for (uint32_t i = 0; i < bindingCount; i++) {
- uint32_t idx = firstBinding + i;
+ assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
+ for (uint32_t i = 0; i < bindingCount; i++) {
+ uint32_t idx = firstBinding + i;
- sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]);
- sb[idx].offset = pOffsets[i];
+ sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]);
+ sb[idx].offset = pOffsets[i];
- if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) {
- sb[idx].size = sb[idx].buffer->size - sb[idx].offset;
- } else {
- sb[idx].size = pSizes[i];
- }
+ if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) {
+ sb[idx].size = sb[idx].buffer->size - sb[idx].offset;
+ } else {
+ sb[idx].size = pSizes[i];
+ }
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
- sb[idx].buffer->bo);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, sb[idx].buffer->bo);
- enabled_mask |= 1 << idx;
- }
+ enabled_mask |= 1 << idx;
+ }
- cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
+ cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
}
static void
radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_streamout_state *so = &cmd_buffer->state.streamout;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
- radeon_emit(cs,
- S_028B94_STREAMOUT_0_EN(so->streamout_enabled) |
- S_028B94_RAST_STREAM(0) |
- S_028B94_STREAMOUT_1_EN(so->streamout_enabled) |
- S_028B94_STREAMOUT_2_EN(so->streamout_enabled) |
- S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
- radeon_emit(cs, so->hw_enabled_mask &
- so->enabled_stream_buffers_mask);
+ radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
+ radeon_emit(cs, S_028B94_STREAMOUT_0_EN(so->streamout_enabled) | S_028B94_RAST_STREAM(0) |
+ S_028B94_STREAMOUT_1_EN(so->streamout_enabled) |
+ S_028B94_STREAMOUT_2_EN(so->streamout_enabled) |
+ S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
+ radeon_emit(cs, so->hw_enabled_mask & so->enabled_stream_buffers_mask);
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
}
static void
radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
{
- struct radv_streamout_state *so = &cmd_buffer->state.streamout;
- bool old_streamout_enabled = so->streamout_enabled;
- uint32_t old_hw_enabled_mask = so->hw_enabled_mask;
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ bool old_streamout_enabled = so->streamout_enabled;
+ uint32_t old_hw_enabled_mask = so->hw_enabled_mask;
- so->streamout_enabled = enable;
+ so->streamout_enabled = enable;
- so->hw_enabled_mask = so->enabled_mask |
- (so->enabled_mask << 4) |
- (so->enabled_mask << 8) |
- (so->enabled_mask << 12);
+ so->hw_enabled_mask = so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) |
+ (so->enabled_mask << 12);
- if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
- ((old_streamout_enabled != so->streamout_enabled) ||
- (old_hw_enabled_mask != so->hw_enabled_mask)))
- radv_emit_streamout_enable(cmd_buffer);
+ if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
+ ((old_streamout_enabled != so->streamout_enabled) ||
+ (old_hw_enabled_mask != so->hw_enabled_mask)))
+ radv_emit_streamout_enable(cmd_buffer);
- if (cmd_buffer->device->physical_device->use_ngg_streamout) {
- cmd_buffer->gds_needed = true;
- cmd_buffer->gds_oa_needed = true;
- }
+ if (cmd_buffer->device->physical_device->use_ngg_streamout) {
+ cmd_buffer->gds_needed = true;
+ cmd_buffer->gds_oa_needed = true;
+ }
}
-static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
+static void
+radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- unsigned reg_strmout_cntl;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ unsigned reg_strmout_cntl;
- /* The register is at different places on different ASICs. */
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
- reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
- radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
- } else {
- reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
- radeon_set_config_reg(cs, reg_strmout_cntl, 0);
- }
+ /* The register is at different places on different ASICs. */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+ reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
+ radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
+ } else {
+ reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+ radeon_set_config_reg(cs, reg_strmout_cntl, 0);
+ }
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
- radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
- radeon_emit(cs, 0);
- radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
- radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
- radeon_emit(cs, 4); /* poll interval */
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(cs,
+ WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
+ radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
}
static void
-radv_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer,
- uint32_t firstCounterBuffer,
- uint32_t counterBufferCount,
- const VkBuffer *pCounterBuffers,
- const VkDeviceSize *pCounterBufferOffsets)
-
-{
- struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
- struct radv_streamout_state *so = &cmd_buffer->state.streamout;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
- radv_flush_vgt_streamout(cmd_buffer);
-
- assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
- u_foreach_bit(i, so->enabled_mask) {
- int32_t counter_buffer_idx = i - firstCounterBuffer;
- if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
- counter_buffer_idx = -1;
-
- /* AMD GCN binds streamout buffers as shader resources.
- * VGT only counts primitives and tells the shader through
- * SGPRs what to do.
- */
- radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
- radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */
- radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */
-
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
-
- if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
- /* The array of counter buffers is optional. */
- RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
- uint64_t va = radv_buffer_get_va(buffer->bo);
- uint64_t counter_buffer_offset = 0;
-
- if (pCounterBufferOffsets)
- counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
-
- va += buffer->offset + counter_buffer_offset;
-
- /* Append */
- radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
- radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_DATA_TYPE(1) | /* offset in bytes */
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, va); /* src address lo */
- radeon_emit(cs, va >> 32); /* src address hi */
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
- } else {
- /* Start from the beginning. */
- radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
- radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_DATA_TYPE(1) | /* offset in bytes */
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
- }
- }
-
- radv_set_streamout_enable(cmd_buffer, true);
+radv_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+ const VkDeviceSize *pCounterBufferOffsets)
+
+{
+ struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+ radv_flush_vgt_streamout(cmd_buffer);
+
+ assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+ u_foreach_bit(i, so->enabled_mask)
+ {
+ int32_t counter_buffer_idx = i - firstCounterBuffer;
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+ counter_buffer_idx = -1;
+
+ /* AMD GCN binds streamout buffers as shader resources.
+ * VGT only counts primitives and tells the shader through
+ * SGPRs what to do.
+ */
+ radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 2);
+ radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */
+ radeon_emit(cs, so->stride_in_dw[i]); /* VTX_STRIDE (in DW) */
+
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+
+ if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+ /* The array of counter buffers is optional. */
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+ uint64_t va = radv_buffer_get_va(buffer->bo);
+ uint64_t counter_buffer_offset = 0;
+
+ if (pCounterBufferOffsets)
+ counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
+
+ va += buffer->offset + counter_buffer_offset;
+
+ /* Append */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, va); /* src address lo */
+ radeon_emit(cs, va >> 32); /* src address hi */
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+ } else {
+ /* Start from the beginning. */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
+ }
+ }
+
+ radv_set_streamout_enable(cmd_buffer, true);
}
static void
-gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer,
- uint32_t firstCounterBuffer,
- uint32_t counterBufferCount,
- const VkBuffer *pCounterBuffers,
- const VkDeviceSize *pCounterBufferOffsets)
-{
- struct radv_streamout_state *so = &cmd_buffer->state.streamout;
- unsigned last_target = util_last_bit(so->enabled_mask) - 1;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
- assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
- assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
-
- /* Sync because the next streamout operation will overwrite GDS and we
- * have to make sure it's idle.
- * TODO: Improve by tracking if there is a streamout operation in
- * flight.
- */
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
- si_emit_cache_flush(cmd_buffer);
-
- u_foreach_bit(i, so->enabled_mask) {
- int32_t counter_buffer_idx = i - firstCounterBuffer;
- if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
- counter_buffer_idx = -1;
-
- bool append = counter_buffer_idx >= 0 &&
- pCounterBuffers && pCounterBuffers[counter_buffer_idx];
- uint64_t va = 0;
-
- if (append) {
- RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
- uint64_t counter_buffer_offset = 0;
-
- if (pCounterBufferOffsets)
- counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
-
- va += radv_buffer_get_va(buffer->bo);
- va += buffer->offset + counter_buffer_offset;
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
- }
-
- radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
- radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) |
- S_411_DST_SEL(V_411_GDS) |
- S_411_CP_SYNC(i == last_target));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, 4 * i); /* destination in GDS */
- radeon_emit(cs, 0);
- radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) |
- S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
- }
-
- radv_set_streamout_enable(cmd_buffer, true);
-}
-
-void radv_CmdBeginTransformFeedbackEXT(
- VkCommandBuffer commandBuffer,
- uint32_t firstCounterBuffer,
- uint32_t counterBufferCount,
- const VkBuffer* pCounterBuffers,
- const VkDeviceSize* pCounterBufferOffsets)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-
- if (cmd_buffer->device->physical_device->use_ngg_streamout) {
- gfx10_emit_streamout_begin(cmd_buffer,
- firstCounterBuffer, counterBufferCount,
- pCounterBuffers, pCounterBufferOffsets);
- } else {
- radv_emit_streamout_begin(cmd_buffer,
- firstCounterBuffer, counterBufferCount,
- pCounterBuffers, pCounterBufferOffsets);
- }
+gfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+ const VkDeviceSize *pCounterBufferOffsets)
+{
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ unsigned last_target = util_last_bit(so->enabled_mask) - 1;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+ assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+ assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+
+ /* Sync because the next streamout operation will overwrite GDS and we
+ * have to make sure it's idle.
+ * TODO: Improve by tracking if there is a streamout operation in
+ * flight.
+ */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
+ si_emit_cache_flush(cmd_buffer);
+
+ u_foreach_bit(i, so->enabled_mask)
+ {
+ int32_t counter_buffer_idx = i - firstCounterBuffer;
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+ counter_buffer_idx = -1;
+
+ bool append =
+ counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
+ uint64_t va = 0;
+
+ if (append) {
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+ uint64_t counter_buffer_offset = 0;
+
+ if (pCounterBufferOffsets)
+ counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
+
+ va += radv_buffer_get_va(buffer->bo);
+ va += buffer->offset + counter_buffer_offset;
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
+ radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) |
+ S_411_DST_SEL(V_411_GDS) | S_411_CP_SYNC(i == last_target));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, 4 * i); /* destination in GDS */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
+ }
+
+ radv_set_streamout_enable(cmd_buffer, true);
+}
+
+void
+radv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+ const VkDeviceSize *pCounterBufferOffsets)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ if (cmd_buffer->device->physical_device->use_ngg_streamout) {
+ gfx10_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount,
+ pCounterBuffers, pCounterBufferOffsets);
+ } else {
+ radv_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
+ pCounterBufferOffsets);
+ }
}
static void
-radv_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer,
- uint32_t firstCounterBuffer,
- uint32_t counterBufferCount,
- const VkBuffer *pCounterBuffers,
- const VkDeviceSize *pCounterBufferOffsets)
+radv_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+ const VkDeviceSize *pCounterBufferOffsets)
{
- struct radv_streamout_state *so = &cmd_buffer->state.streamout;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
- radv_flush_vgt_streamout(cmd_buffer);
+ radv_flush_vgt_streamout(cmd_buffer);
- assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
- u_foreach_bit(i, so->enabled_mask) {
- int32_t counter_buffer_idx = i - firstCounterBuffer;
- if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
- counter_buffer_idx = -1;
+ assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+ u_foreach_bit(i, so->enabled_mask)
+ {
+ int32_t counter_buffer_idx = i - firstCounterBuffer;
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+ counter_buffer_idx = -1;
- if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
- /* The array of counters buffer is optional. */
- RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
- uint64_t va = radv_buffer_get_va(buffer->bo);
- uint64_t counter_buffer_offset = 0;
+ if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+ /* The array of counters buffer is optional. */
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+ uint64_t va = radv_buffer_get_va(buffer->bo);
+ uint64_t counter_buffer_offset = 0;
- if (pCounterBufferOffsets)
- counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
+ if (pCounterBufferOffsets)
+ counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
- va += buffer->offset + counter_buffer_offset;
+ va += buffer->offset + counter_buffer_offset;
- radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
- radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
- STRMOUT_DATA_TYPE(1) | /* offset in bytes */
- STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
- STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
- radeon_emit(cs, va); /* dst address lo */
- radeon_emit(cs, va >> 32); /* dst address hi */
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+ radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+ STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+ STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+ radeon_emit(cs, va); /* dst address lo */
+ radeon_emit(cs, va >> 32); /* dst address hi */
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, 0); /* unused */
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
- }
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+ }
- /* Deactivate transform feedback by zeroing the buffer size.
- * The counters (primitives generated, primitives emitted) may
- * be enabled even if there is not buffer bound. This ensures
- * that the primitives-emitted query won't increment.
- */
- radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+ /* Deactivate transform feedback by zeroing the buffer size.
+ * The counters (primitives generated, primitives emitted) may
+ * be enabled even if there is not buffer bound. This ensures
+ * that the primitives-emitted query won't increment.
+ */
+ radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0);
- cmd_buffer->state.context_roll_without_scissor_emitted = true;
- }
+ cmd_buffer->state.context_roll_without_scissor_emitted = true;
+ }
- radv_set_streamout_enable(cmd_buffer, false);
+ radv_set_streamout_enable(cmd_buffer, false);
}
static void
-gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer,
- uint32_t firstCounterBuffer,
- uint32_t counterBufferCount,
- const VkBuffer *pCounterBuffers,
- const VkDeviceSize *pCounterBufferOffsets)
-{
- struct radv_streamout_state *so = &cmd_buffer->state.streamout;
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
- assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
- assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
-
- u_foreach_bit(i, so->enabled_mask) {
- int32_t counter_buffer_idx = i - firstCounterBuffer;
- if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
- counter_buffer_idx = -1;
-
- if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
- /* The array of counters buffer is optional. */
- RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
- uint64_t va = radv_buffer_get_va(buffer->bo);
- uint64_t counter_buffer_offset = 0;
-
- if (pCounterBufferOffsets)
- counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
-
- va += buffer->offset + counter_buffer_offset;
-
- si_cs_emit_write_event_eop(cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- radv_cmd_buffer_uses_mec(cmd_buffer),
- V_028A90_PS_DONE, 0,
- EOP_DST_SEL_TC_L2,
- EOP_DATA_SEL_GDS,
- va, EOP_DATA_GDS(i, 1), 0);
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
- }
- }
-
- radv_set_streamout_enable(cmd_buffer, false);
-}
-
-void radv_CmdEndTransformFeedbackEXT(
- VkCommandBuffer commandBuffer,
- uint32_t firstCounterBuffer,
- uint32_t counterBufferCount,
- const VkBuffer* pCounterBuffers,
- const VkDeviceSize* pCounterBufferOffsets)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-
- if (cmd_buffer->device->physical_device->use_ngg_streamout) {
- gfx10_emit_streamout_end(cmd_buffer,
- firstCounterBuffer, counterBufferCount,
- pCounterBuffers, pCounterBufferOffsets);
- } else {
- radv_emit_streamout_end(cmd_buffer,
- firstCounterBuffer, counterBufferCount,
- pCounterBuffers, pCounterBufferOffsets);
- }
-}
-
-void radv_CmdDrawIndirectByteCountEXT(
- VkCommandBuffer commandBuffer,
- uint32_t instanceCount,
- uint32_t firstInstance,
- VkBuffer _counterBuffer,
- VkDeviceSize counterBufferOffset,
- uint32_t counterOffset,
- uint32_t vertexStride)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer);
- struct radv_draw_info info;
-
- info.count = 0;
- info.instance_count = instanceCount;
- info.first_instance = firstInstance;
- info.strmout_buffer = counterBuffer;
- info.strmout_buffer_offset = counterBufferOffset;
- info.stride = vertexStride;
- info.indexed = false;
- info.indirect = NULL;
-
- if (!radv_before_draw(cmd_buffer, &info, 0))
- return;
- radv_emit_direct_draw_packets(cmd_buffer, &info, 0, S_0287F0_USE_OPAQUE(1));
- radv_after_draw(cmd_buffer);
+gfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+ const VkDeviceSize *pCounterBufferOffsets)
+{
+ struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+ assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+ assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+
+ u_foreach_bit(i, so->enabled_mask)
+ {
+ int32_t counter_buffer_idx = i - firstCounterBuffer;
+ if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+ counter_buffer_idx = -1;
+
+ if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+ /* The array of counters buffer is optional. */
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+ uint64_t va = radv_buffer_get_va(buffer->bo);
+ uint64_t counter_buffer_offset = 0;
+
+ if (pCounterBufferOffsets)
+ counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
+
+ va += buffer->offset + counter_buffer_offset;
+
+ si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0,
+ EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0);
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+ }
+ }
+
+ radv_set_streamout_enable(cmd_buffer, false);
+}
+
+void
+radv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
+ uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
+ const VkDeviceSize *pCounterBufferOffsets)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+
+ if (cmd_buffer->device->physical_device->use_ngg_streamout) {
+ gfx10_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
+ pCounterBufferOffsets);
+ } else {
+ radv_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
+ pCounterBufferOffsets);
+ }
+}
+
+void
+radv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount,
+ uint32_t firstInstance, VkBuffer _counterBuffer,
+ VkDeviceSize counterBufferOffset, uint32_t counterOffset,
+ uint32_t vertexStride)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer);
+ struct radv_draw_info info;
+
+ info.count = 0;
+ info.instance_count = instanceCount;
+ info.first_instance = firstInstance;
+ info.strmout_buffer = counterBuffer;
+ info.strmout_buffer_offset = counterBufferOffset;
+ info.stride = vertexStride;
+ info.indexed = false;
+ info.indirect = NULL;
+
+ if (!radv_before_draw(cmd_buffer, &info, 0))
+ return;
+ radv_emit_direct_draw_packets(cmd_buffer, &info, 0, S_0287F0_USE_OPAQUE(1));
+ radv_after_draw(cmd_buffer);
}
/* VK_AMD_buffer_marker */
-void radv_CmdWriteBufferMarkerAMD(
- VkCommandBuffer commandBuffer,
- VkPipelineStageFlagBits pipelineStage,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- uint32_t marker)
-{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer);
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint64_t va = radv_buffer_get_va(buffer->bo) + dstOffset;
-
- si_emit_cache_flush(cmd_buffer);
-
- ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12);
-
- if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
- COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, marker);
- radeon_emit(cs, 0);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- } else {
- si_cs_emit_write_event_eop(cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- radv_cmd_buffer_uses_mec(cmd_buffer),
- V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_DATA_SEL_VALUE_32BIT,
- va, marker,
- cmd_buffer->gfx9_eop_bug_va);
- }
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
+void
+radv_CmdWriteBufferMarkerAMD(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,
+ VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker)
+{
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer);
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(buffer->bo) + dstOffset;
+
+ si_emit_cache_flush(cmd_buffer);
+
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12);
+
+ if (!(pipelineStage & ~VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT)) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, marker);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ } else {
+ si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker,
+ cmd_buffer->gfx9_eop_bug_va);
+ }
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
}
diff --git a/src/amd/vulkan/radv_constants.h b/src/amd/vulkan/radv_constants.h
index 0b0d6714d25..bceedac3da6 100644
--- a/src/amd/vulkan/radv_constants.h
+++ b/src/amd/vulkan/radv_constants.h
@@ -30,30 +30,30 @@
#define ATI_VENDOR_ID 0x1002
-#define MAX_VBS 32
-#define MAX_VERTEX_ATTRIBS 32
-#define MAX_RTS 8
-#define MAX_VIEWPORTS 16
-#define MAX_SCISSORS 16
-#define MAX_DISCARD_RECTANGLES 4
-#define MAX_SAMPLE_LOCATIONS 32
-#define MAX_PUSH_CONSTANTS_SIZE 128
-#define MAX_PUSH_DESCRIPTORS 32
-#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
-#define MAX_DYNAMIC_STORAGE_BUFFERS 8
-#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
-#define MAX_SAMPLES_LOG2 4
-#define NUM_META_FS_KEYS 12
-#define RADV_MAX_DRM_DEVICES 8
-#define MAX_VIEWS 8
-#define MAX_SO_STREAMS 4
-#define MAX_SO_BUFFERS 4
-#define MAX_SO_OUTPUTS 64
-#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024)
+#define MAX_VBS 32
+#define MAX_VERTEX_ATTRIBS 32
+#define MAX_RTS 8
+#define MAX_VIEWPORTS 16
+#define MAX_SCISSORS 16
+#define MAX_DISCARD_RECTANGLES 4
+#define MAX_SAMPLE_LOCATIONS 32
+#define MAX_PUSH_CONSTANTS_SIZE 128
+#define MAX_PUSH_DESCRIPTORS 32
+#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
+#define MAX_DYNAMIC_STORAGE_BUFFERS 8
+#define MAX_DYNAMIC_BUFFERS (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
+#define MAX_SAMPLES_LOG2 4
+#define NUM_META_FS_KEYS 12
+#define RADV_MAX_DRM_DEVICES 8
+#define MAX_VIEWS 8
+#define MAX_SO_STREAMS 4
+#define MAX_SO_BUFFERS 4
+#define MAX_SO_OUTPUTS 64
+#define MAX_INLINE_UNIFORM_BLOCK_SIZE (4ull * 1024 * 1024)
#define MAX_INLINE_UNIFORM_BLOCK_COUNT 64
-#define MAX_BIND_POINTS 2 /* compute + graphics */
+#define MAX_BIND_POINTS 2 /* compute + graphics */
-#define NUM_DEPTH_CLEAR_PIPELINES 3
+#define NUM_DEPTH_CLEAR_PIPELINES 3
#define NUM_DEPTH_DECOMPRESS_PIPELINES 3
/*
@@ -65,13 +65,13 @@
#define RADV_BUFFER_UPDATE_THRESHOLD 1024
/* descriptor index into scratch ring offsets */
-#define RING_SCRATCH 0
-#define RING_ESGS_VS 1
-#define RING_ESGS_GS 2
-#define RING_GSVS_VS 3
-#define RING_GSVS_GS 4
-#define RING_HS_TESS_FACTOR 5
-#define RING_HS_TESS_OFFCHIP 6
+#define RING_SCRATCH 0
+#define RING_ESGS_VS 1
+#define RING_ESGS_GS 2
+#define RING_GSVS_VS 3
+#define RING_GSVS_GS 4
+#define RING_HS_TESS_FACTOR 5
+#define RING_HS_TESS_OFFCHIP 6
#define RING_PS_SAMPLE_POSITIONS 7
/* max number of descriptor sets */
@@ -80,7 +80,7 @@
/* Make sure everything is addressable by a signed 32-bit int, and
* our largest descriptors are 96 bytes.
*/
-#define RADV_MAX_PER_SET_DESCRIPTORS ((1ull << 31 ) / 96)
+#define RADV_MAX_PER_SET_DESCRIPTORS ((1ull << 31) / 96)
/* Our buffer size fields allow only 2**32 - 1. We round that down to a multiple
* of 4 bytes so we can align buffer sizes up.
@@ -91,4 +91,3 @@
#define RADV_SUBGROUP_SIZE 64
#endif /* RADV_CONSTANTS_H */
-
diff --git a/src/amd/vulkan/radv_cs.h b/src/amd/vulkan/radv_cs.h
index c6f9e4e4342..277c77b5967 100644
--- a/src/amd/vulkan/radv_cs.h
+++ b/src/amd/vulkan/radv_cs.h
@@ -25,166 +25,166 @@
#ifndef RADV_CS_H
#define RADV_CS_H
-#include <string.h>
-#include <stdint.h>
#include <assert.h>
+#include <stdint.h>
+#include <string.h>
#include "radv_private.h"
#include "sid.h"
-static inline unsigned radeon_check_space(struct radeon_winsys *ws,
- struct radeon_cmdbuf *cs,
- unsigned needed)
+static inline unsigned
+radeon_check_space(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, unsigned needed)
{
- if (cs->max_dw - cs->cdw < needed)
- ws->cs_grow(cs, needed);
- return cs->cdw + needed;
+ if (cs->max_dw - cs->cdw < needed)
+ ws->cs_grow(cs, needed);
+ return cs->cdw + needed;
}
-static inline void radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
+static inline void
+radeon_set_config_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
- assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
- assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
- radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
- radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
+ assert(reg >= SI_CONFIG_REG_OFFSET && reg < SI_CONFIG_REG_END);
+ assert(cs->cdw + 2 + num <= cs->max_dw);
+ assert(num);
+ radeon_emit(cs, PKT3(PKT3_SET_CONFIG_REG, num, 0));
+ radeon_emit(cs, (reg - SI_CONFIG_REG_OFFSET) >> 2);
}
-static inline void radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
+static inline void
+radeon_set_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
- radeon_set_config_reg_seq(cs, reg, 1);
- radeon_emit(cs, value);
+ radeon_set_config_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
}
-static inline void radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
+static inline void
+radeon_set_context_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
- assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
- assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
- radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
- radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+ assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
+ assert(cs->cdw + 2 + num <= cs->max_dw);
+ assert(num);
+ radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, num, 0));
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
}
-static inline void radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
+static inline void
+radeon_set_context_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
- radeon_set_context_reg_seq(cs, reg, 1);
- radeon_emit(cs, value);
+ radeon_set_context_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
}
-
-static inline void radeon_set_context_reg_idx(struct radeon_cmdbuf *cs,
- unsigned reg, unsigned idx,
- unsigned value)
+static inline void
+radeon_set_context_reg_idx(struct radeon_cmdbuf *cs, unsigned reg, unsigned idx, unsigned value)
{
- assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
- assert(cs->cdw + 3 <= cs->max_dw);
- radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
- radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
- radeon_emit(cs, value);
+ assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
+ assert(cs->cdw + 3 <= cs->max_dw);
+ radeon_emit(cs, PKT3(PKT3_SET_CONTEXT_REG, 1, 0));
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, value);
}
-static inline void radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs,
- unsigned reg, unsigned value,
- unsigned mask)
+static inline void
+radeon_set_context_reg_rmw(struct radeon_cmdbuf *cs, unsigned reg, unsigned value, unsigned mask)
{
- assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
- assert(cs->cdw + 4 <= cs->max_dw);
- radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0));
- radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
- radeon_emit(cs, mask);
- radeon_emit(cs, value);
+ assert(reg >= SI_CONTEXT_REG_OFFSET && reg < SI_CONTEXT_REG_END);
+ assert(cs->cdw + 4 <= cs->max_dw);
+ radeon_emit(cs, PKT3(PKT3_CONTEXT_REG_RMW, 2, 0));
+ radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
+ radeon_emit(cs, mask);
+ radeon_emit(cs, value);
}
-static inline void radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
+static inline void
+radeon_set_sh_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
- assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
- assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
- radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
+ assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
+ assert(cs->cdw + 2 + num <= cs->max_dw);
+ assert(num);
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, num, 0));
+ radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
}
-static inline void radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
+static inline void
+radeon_set_sh_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
- radeon_set_sh_reg_seq(cs, reg, 1);
- radeon_emit(cs, value);
+ radeon_set_sh_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
}
-static inline void radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice,
- struct radeon_cmdbuf *cs,
- unsigned reg, unsigned idx,
- unsigned value)
+static inline void
+radeon_set_sh_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
+ unsigned reg, unsigned idx, unsigned value)
{
- assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
- assert(cs->cdw + 3 <= cs->max_dw);
- assert(idx);
+ assert(reg >= SI_SH_REG_OFFSET && reg < SI_SH_REG_END);
+ assert(cs->cdw + 3 <= cs->max_dw);
+ assert(idx);
- unsigned opcode = PKT3_SET_SH_REG_INDEX;
- if (pdevice->rad_info.chip_class < GFX10)
- opcode = PKT3_SET_SH_REG;
+ unsigned opcode = PKT3_SET_SH_REG_INDEX;
+ if (pdevice->rad_info.chip_class < GFX10)
+ opcode = PKT3_SET_SH_REG;
- radeon_emit(cs, PKT3(opcode, 1, 0));
- radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28));
- radeon_emit(cs, value);
+ radeon_emit(cs, PKT3(opcode, 1, 0));
+ radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, value);
}
-static inline void radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
+static inline void
+radeon_set_uconfig_reg_seq(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
- assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
- assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
- radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
- radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
+ assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+ assert(cs->cdw + 2 + num <= cs->max_dw);
+ assert(num);
+ radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 0));
+ radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
-static inline void radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs,
- unsigned reg, unsigned num)
+static inline void
+radeon_set_uconfig_reg_seq_perfctr(struct radeon_cmdbuf *cs, unsigned reg, unsigned num)
{
- assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
- assert(cs->cdw + 2 + num <= cs->max_dw);
- assert(num);
- radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1));
- radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
+ assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+ assert(cs->cdw + 2 + num <= cs->max_dw);
+ assert(num);
+ radeon_emit(cs, PKT3(PKT3_SET_UCONFIG_REG, num, 1));
+ radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2);
}
-static inline void radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
+static inline void
+radeon_set_uconfig_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
- radeon_set_uconfig_reg_seq(cs, reg, 1);
- radeon_emit(cs, value);
+ radeon_set_uconfig_reg_seq(cs, reg, 1);
+ radeon_emit(cs, value);
}
-static inline void radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice,
- struct radeon_cmdbuf *cs,
- unsigned reg, unsigned idx,
- unsigned value)
+static inline void
+radeon_set_uconfig_reg_idx(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs,
+ unsigned reg, unsigned idx, unsigned value)
{
- assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
- assert(cs->cdw + 3 <= cs->max_dw);
- assert(idx);
-
- unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
- if (pdevice->rad_info.chip_class < GFX9 ||
- (pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26))
- opcode = PKT3_SET_UCONFIG_REG;
-
- radeon_emit(cs, PKT3(opcode, 1, 0));
- radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
- radeon_emit(cs, value);
+ assert(reg >= CIK_UCONFIG_REG_OFFSET && reg < CIK_UCONFIG_REG_END);
+ assert(cs->cdw + 3 <= cs->max_dw);
+ assert(idx);
+
+ unsigned opcode = PKT3_SET_UCONFIG_REG_INDEX;
+ if (pdevice->rad_info.chip_class < GFX9 ||
+ (pdevice->rad_info.chip_class == GFX9 && pdevice->rad_info.me_fw_version < 26))
+ opcode = PKT3_SET_UCONFIG_REG;
+
+ radeon_emit(cs, PKT3(opcode, 1, 0));
+ radeon_emit(cs, (reg - CIK_UCONFIG_REG_OFFSET) >> 2 | (idx << 28));
+ radeon_emit(cs, value);
}
-static inline void radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs,
- unsigned reg,
- unsigned value)
+static inline void
+radeon_set_privileged_config_reg(struct radeon_cmdbuf *cs, unsigned reg, unsigned value)
{
- assert(reg < CIK_UCONFIG_REG_OFFSET);
- assert(cs->cdw + 6 <= cs->max_dw);
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) |
- COPY_DATA_DST_SEL(COPY_DATA_PERF));
- radeon_emit(cs, value);
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, reg >> 2);
- radeon_emit(cs, 0); /* unused */
+ assert(reg < CIK_UCONFIG_REG_OFFSET);
+ assert(cs->cdw + 6 <= cs->max_dw);
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_PERF));
+ radeon_emit(cs, value);
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, reg >> 2);
+ radeon_emit(cs, 0); /* unused */
}
#endif /* RADV_CS_H */
diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index 9ba853ee9f4..ca0c3389538 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -25,27 +25,27 @@
* IN THE SOFTWARE.
*/
-#include <stdlib.h>
#include <stdio.h>
+#include <stdlib.h>
#ifndef _WIN32
#include <sys/utsname.h>
#endif
#include <sys/stat.h>
#include "util/mesa-sha1.h"
-#include "sid.h"
#include "ac_debug.h"
#include "radv_debug.h"
#include "radv_shader.h"
+#include "sid.h"
#define TRACE_BO_SIZE 4096
-#define TMA_BO_SIZE 4096
+#define TMA_BO_SIZE 4096
-#define COLOR_RESET "\033[0m"
-#define COLOR_RED "\033[31m"
-#define COLOR_GREEN "\033[1;32m"
-#define COLOR_YELLOW "\033[1;33m"
-#define COLOR_CYAN "\033[1;36m"
+#define COLOR_RESET "\033[0m"
+#define COLOR_RED "\033[31m"
+#define COLOR_GREEN "\033[1;32m"
+#define COLOR_YELLOW "\033[1;33m"
+#define COLOR_CYAN "\033[1;36m"
#define RADV_DUMP_DIR "radv_dumps"
@@ -64,967 +64,934 @@
bool
radv_init_trace(struct radv_device *device)
{
- struct radeon_winsys *ws = device->ws;
- VkResult result;
-
- device->trace_bo = ws->buffer_create(ws, TRACE_BO_SIZE, 8,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS|
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_ZERO_VRAM,
- RADV_BO_PRIORITY_UPLOAD_BUFFER);
- if (!device->trace_bo)
- return false;
-
- result = ws->buffer_make_resident(ws, device->trace_bo, true);
- if (result != VK_SUCCESS)
- return false;
-
- device->trace_id_ptr = ws->buffer_map(device->trace_bo);
- if (!device->trace_id_ptr)
- return false;
-
- ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
- &device->dmesg_timestamp, NULL);
-
- return true;
+ struct radeon_winsys *ws = device->ws;
+ VkResult result;
+
+ device->trace_bo = ws->buffer_create(
+ ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER);
+ if (!device->trace_bo)
+ return false;
+
+ result = ws->buffer_make_resident(ws, device->trace_bo, true);
+ if (result != VK_SUCCESS)
+ return false;
+
+ device->trace_id_ptr = ws->buffer_map(device->trace_bo);
+ if (!device->trace_id_ptr)
+ return false;
+
+ ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
+ NULL);
+
+ return true;
}
void
radv_finish_trace(struct radv_device *device)
{
- struct radeon_winsys *ws = device->ws;
+ struct radeon_winsys *ws = device->ws;
- if (unlikely(device->trace_bo)) {
- ws->buffer_make_resident(ws, device->trace_bo, false);
- ws->buffer_destroy(ws, device->trace_bo);
- }
+ if (unlikely(device->trace_bo)) {
+ ws->buffer_make_resident(ws, device->trace_bo, false);
+ ws->buffer_destroy(ws, device->trace_bo);
+ }
}
static void
radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
{
- fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
- device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
+ fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
+ device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
}
static void
radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
{
- struct radeon_winsys *ws = device->ws;
- uint32_t value;
+ struct radeon_winsys *ws = device->ws;
+ uint32_t value;
- if (ws->read_registers(ws, offset, 1, &value))
- ac_dump_reg(f, device->physical_device->rad_info.chip_class,
- offset, value, ~0);
+ if (ws->read_registers(ws, offset, 1, &value))
+ ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
}
static void
radv_dump_debug_registers(struct radv_device *device, FILE *f)
{
- struct radeon_info *info = &device->physical_device->rad_info;
-
- fprintf(f, "Memory-mapped registers:\n");
- radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
-
- radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
- radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
- radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
- radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
- radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
- radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
- radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
- if (info->chip_class <= GFX8) {
- radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
- radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
- radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
- }
- radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
- radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
- radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
- radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
- radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
- radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
- radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
- radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
- radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
- radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
- fprintf(f, "\n");
+ struct radeon_info *info = &device->physical_device->rad_info;
+
+ fprintf(f, "Memory-mapped registers:\n");
+ radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
+
+ radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
+ radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
+ radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
+ radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
+ radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
+ radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
+ radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
+ if (info->chip_class <= GFX8) {
+ radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
+ radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
+ radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
+ }
+ radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
+ radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
+ radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
+ radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
+ radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
+ radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
+ radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
+ radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
+ radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
+ radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
+ fprintf(f, "\n");
}
static void
-radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc,
- FILE *f)
+radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
{
- fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
- for (unsigned j = 0; j < 4; j++)
- ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4,
- desc[j], 0xffffffff);
+ fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 4; j++)
+ ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
}
static void
-radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc,
- FILE *f)
+radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
{
- unsigned sq_img_rsrc_word0 = chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0
- : R_008F10_SQ_IMG_RSRC_WORD0;
-
- fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
- for (unsigned j = 0; j < 8; j++)
- ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4,
- desc[j], 0xffffffff);
-
- fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
- for (unsigned j = 0; j < 8; j++)
- ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4,
- desc[8 + j], 0xffffffff);
+ unsigned sq_img_rsrc_word0 =
+ chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
+
+ fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
+
+ fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 8; j++)
+ ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
}
static void
-radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
- FILE *f)
+radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
{
- fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
- for (unsigned j = 0; j < 4; j++) {
- ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4,
- desc[j], 0xffffffff);
- }
+ fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");
+ for (unsigned j = 0; j < 4; j++) {
+ ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
+ }
}
static void
-radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class,
- const uint32_t *desc, FILE *f)
+radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
+ FILE *f)
{
- radv_dump_image_descriptor(chip_class, desc, f);
- radv_dump_sampler_descriptor(chip_class, desc + 16, f);
+ radv_dump_image_descriptor(chip_class, desc, f);
+ radv_dump_sampler_descriptor(chip_class, desc + 16, f);
}
static void
-radv_dump_descriptor_set(struct radv_device *device,
- struct radv_descriptor_set *set, unsigned id, FILE *f)
+radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
+ FILE *f)
{
- enum chip_class chip_class = device->physical_device->rad_info.chip_class;
- const struct radv_descriptor_set_layout *layout;
- int i;
-
- if (!set)
- return;
- layout = set->header.layout;
-
- for (i = 0; i < set->header.layout->binding_count; i++) {
- uint32_t *desc =
- set->header.mapped_ptr + layout->binding[i].offset / 4;
-
- switch (layout->binding[i].type) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- radv_dump_buffer_descriptor(chip_class, desc, f);
- break;
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- radv_dump_image_descriptor(chip_class, desc, f);
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- radv_dump_sampler_descriptor(chip_class, desc, f);
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- /* todo */
- break;
- default:
- assert(!"unknown descriptor type");
- break;
- }
- fprintf(f, "\n");
- }
- fprintf(f, "\n\n");
+ enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+ const struct radv_descriptor_set_layout *layout;
+ int i;
+
+ if (!set)
+ return;
+ layout = set->header.layout;
+
+ for (i = 0; i < set->header.layout->binding_count; i++) {
+ uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
+
+ switch (layout->binding[i].type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ radv_dump_buffer_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ radv_dump_image_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ radv_dump_sampler_descriptor(chip_class, desc, f);
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ /* todo */
+ break;
+ default:
+ assert(!"unknown descriptor type");
+ break;
+ }
+ fprintf(f, "\n");
+ }
+ fprintf(f, "\n\n");
}
static void
radv_dump_descriptors(struct radv_device *device, FILE *f)
{
- uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
- int i;
+ uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+ int i;
- fprintf(f, "Descriptors:\n");
- for (i = 0; i < MAX_SETS; i++) {
- struct radv_descriptor_set *set =
- *(struct radv_descriptor_set **)(ptr + i + 4);
+ fprintf(f, "Descriptors:\n");
+ for (i = 0; i < MAX_SETS; i++) {
+ struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4);
- radv_dump_descriptor_set(device, set, i, f);
- }
+ radv_dump_descriptor_set(device, set, i, f);
+ }
}
struct radv_shader_inst {
- char text[160]; /* one disasm line */
- unsigned offset; /* instruction offset */
- unsigned size; /* instruction size = 4 or 8 */
+ char text[160]; /* one disasm line */
+ unsigned offset; /* instruction offset */
+ unsigned size; /* instruction size = 4 or 8 */
};
/* Split a disassembly string into lines and add them to the array pointed
* to by "instructions". */
-static void si_add_split_disasm(const char *disasm,
- uint64_t start_addr,
- unsigned *num,
- struct radv_shader_inst *instructions)
+static void
+si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
+ struct radv_shader_inst *instructions)
{
- struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
- char *next;
-
- while ((next = strchr(disasm, '\n'))) {
- struct radv_shader_inst *inst = &instructions[*num];
- unsigned len = next - disasm;
-
- if (!memchr(disasm, ';', len)) {
- /* Ignore everything that is not an instruction. */
- disasm = next + 1;
- continue;
- }
-
- assert(len < ARRAY_SIZE(inst->text));
- memcpy(inst->text, disasm, len);
- inst->text[len] = 0;
- inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
-
- const char *semicolon = strchr(disasm, ';');
- assert(semicolon);
- /* More than 16 chars after ";" means the instruction is 8 bytes long. */
- inst->size = next - semicolon > 16 ? 8 : 4;
-
- snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
- " [PC=0x%"PRIx64", off=%u, size=%u]",
- start_addr + inst->offset, inst->offset, inst->size);
-
- last_inst = inst;
- (*num)++;
- disasm = next + 1;
- }
+ struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
+ char *next;
+
+ while ((next = strchr(disasm, '\n'))) {
+ struct radv_shader_inst *inst = &instructions[*num];
+ unsigned len = next - disasm;
+
+ if (!memchr(disasm, ';', len)) {
+ /* Ignore everything that is not an instruction. */
+ disasm = next + 1;
+ continue;
+ }
+
+ assert(len < ARRAY_SIZE(inst->text));
+ memcpy(inst->text, disasm, len);
+ inst->text[len] = 0;
+ inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
+
+ const char *semicolon = strchr(disasm, ';');
+ assert(semicolon);
+ /* More than 16 chars after ";" means the instruction is 8 bytes long. */
+ inst->size = next - semicolon > 16 ? 8 : 4;
+
+ snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
+ " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
+ inst->size);
+
+ last_inst = inst;
+ (*num)++;
+ disasm = next + 1;
+ }
}
static void
-radv_dump_annotated_shader(struct radv_shader_variant *shader,
- gl_shader_stage stage, struct ac_wave_info *waves,
- unsigned num_waves, FILE *f)
+radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage,
+ struct ac_wave_info *waves, unsigned num_waves, FILE *f)
{
- uint64_t start_addr, end_addr;
- unsigned i;
-
- if (!shader)
- return;
-
- start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- end_addr = start_addr + shader->code_size;
-
- /* See if any wave executes the shader. */
- for (i = 0; i < num_waves; i++) {
- if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
- break;
- }
-
- if (i == num_waves)
- return; /* the shader is not being executed */
-
- /* Remember the first found wave. The waves are sorted according to PC. */
- waves = &waves[i];
- num_waves -= i;
-
- /* Get the list of instructions.
- * Buffer size / 4 is the upper bound of the instruction count.
- */
- unsigned num_inst = 0;
- struct radv_shader_inst *instructions =
- calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
-
- si_add_split_disasm(shader->disasm_string,
- start_addr, &num_inst, instructions);
-
- fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
- radv_get_shader_name(&shader->info, stage));
-
- /* Print instructions with annotations. */
- for (i = 0; i < num_inst; i++) {
- struct radv_shader_inst *inst = &instructions[i];
-
- fprintf(f, "%s\n", inst->text);
-
- /* Print which waves execute the instruction right now. */
- while (num_waves && start_addr + inst->offset == waves->pc) {
- fprintf(f,
- " " COLOR_GREEN "^ SE%u SH%u CU%u "
- "SIMD%u WAVE%u EXEC=%016"PRIx64 " ",
- waves->se, waves->sh, waves->cu, waves->simd,
- waves->wave, waves->exec);
-
- if (inst->size == 4) {
- fprintf(f, "INST32=%08X" COLOR_RESET "\n",
- waves->inst_dw0);
- } else {
- fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
- waves->inst_dw0, waves->inst_dw1);
- }
-
- waves->matched = true;
- waves = &waves[1];
- num_waves--;
- }
- }
-
- fprintf(f, "\n\n");
- free(instructions);
+ uint64_t start_addr, end_addr;
+ unsigned i;
+
+ if (!shader)
+ return;
+
+ start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ end_addr = start_addr + shader->code_size;
+
+ /* See if any wave executes the shader. */
+ for (i = 0; i < num_waves; i++) {
+ if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
+ break;
+ }
+
+ if (i == num_waves)
+ return; /* the shader is not being executed */
+
+ /* Remember the first found wave. The waves are sorted according to PC. */
+ waves = &waves[i];
+ num_waves -= i;
+
+ /* Get the list of instructions.
+ * Buffer size / 4 is the upper bound of the instruction count.
+ */
+ unsigned num_inst = 0;
+ struct radv_shader_inst *instructions =
+ calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
+
+ si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
+
+ fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
+ radv_get_shader_name(&shader->info, stage));
+
+ /* Print instructions with annotations. */
+ for (i = 0; i < num_inst; i++) {
+ struct radv_shader_inst *inst = &instructions[i];
+
+ fprintf(f, "%s\n", inst->text);
+
+ /* Print which waves execute the instruction right now. */
+ while (num_waves && start_addr + inst->offset == waves->pc) {
+ fprintf(f,
+ " " COLOR_GREEN "^ SE%u SH%u CU%u "
+ "SIMD%u WAVE%u EXEC=%016" PRIx64 " ",
+ waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
+
+ if (inst->size == 4) {
+ fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
+ } else {
+ fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
+ }
+
+ waves->matched = true;
+ waves = &waves[1];
+ num_waves--;
+ }
+ }
+
+ fprintf(f, "\n\n");
+ free(instructions);
}
static void
-radv_dump_annotated_shaders(struct radv_pipeline *pipeline,
- VkShaderStageFlagBits active_stages, FILE *f)
+radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
+ FILE *f)
{
- struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
- enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
- unsigned num_waves = ac_get_wave_info(chip_class, waves);
-
- fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
- "\n\n", num_waves);
-
- /* Dump annotated active graphics shaders. */
- unsigned stages = active_stages;
- while (stages) {
- int stage = u_bit_scan(&stages);
-
- radv_dump_annotated_shader(pipeline->shaders[stage],
- stage, waves, num_waves, f);
- }
-
- /* Print waves executing shaders that are not currently bound. */
- unsigned i;
- bool found = false;
- for (i = 0; i < num_waves; i++) {
- if (waves[i].matched)
- continue;
-
- if (!found) {
- fprintf(f, COLOR_CYAN
- "Waves not executing currently-bound shaders:"
- COLOR_RESET "\n");
- found = true;
- }
- fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64
- " INST=%08X %08X PC=%"PRIx64"\n",
- waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
- waves[i].wave, waves[i].exec, waves[i].inst_dw0,
- waves[i].inst_dw1, waves[i].pc);
- }
- if (found)
- fprintf(f, "\n\n");
+ struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
+ enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
+ unsigned num_waves = ac_get_wave_info(chip_class, waves);
+
+ fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
+
+ /* Dump annotated active graphics shaders. */
+ unsigned stages = active_stages;
+ while (stages) {
+ int stage = u_bit_scan(&stages);
+
+ radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
+ }
+
+ /* Print waves executing shaders that are not currently bound. */
+ unsigned i;
+ bool found = false;
+ for (i = 0; i < num_waves; i++) {
+ if (waves[i].matched)
+ continue;
+
+ if (!found) {
+ fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
+ found = true;
+ }
+ fprintf(f,
+ " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64
+ "\n",
+ waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
+ waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
+ }
+ if (found)
+ fprintf(f, "\n\n");
}
static void
-radv_dump_shader(struct radv_pipeline *pipeline,
- struct radv_shader_variant *shader, gl_shader_stage stage,
- FILE *f)
+radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader,
+ gl_shader_stage stage, FILE *f)
{
- if (!shader)
- return;
+ if (!shader)
+ return;
- fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
+ fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
- if (shader->spirv) {
- unsigned char sha1[21];
- char sha1buf[41];
+ if (shader->spirv) {
+ unsigned char sha1[21];
+ char sha1buf[41];
- _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
- _mesa_sha1_format(sha1buf, sha1);
+ _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
+ _mesa_sha1_format(sha1buf, sha1);
- fprintf(f, "SPIRV (sha1: %s):\n", sha1buf);
- radv_print_spirv(shader->spirv, shader->spirv_size, f);
- }
+ fprintf(f, "SPIRV (sha1: %s):\n", sha1buf);
+ radv_print_spirv(shader->spirv, shader->spirv_size, f);
+ }
- if (shader->nir_string) {
- fprintf(f, "NIR:\n%s\n", shader->nir_string);
- }
+ if (shader->nir_string) {
+ fprintf(f, "NIR:\n%s\n", shader->nir_string);
+ }
- fprintf(f, "%s IR:\n%s\n",
- pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
- shader->ir_string);
- fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
+ fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
+ shader->ir_string);
+ fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
- radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
+ radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
}
static void
-radv_dump_shaders(struct radv_pipeline *pipeline,
- VkShaderStageFlagBits active_stages, FILE *f)
+radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, FILE *f)
{
- /* Dump active graphics shaders. */
- unsigned stages = active_stages;
- while (stages) {
- int stage = u_bit_scan(&stages);
+ /* Dump active graphics shaders. */
+ unsigned stages = active_stages;
+ while (stages) {
+ int stage = u_bit_scan(&stages);
- radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
- }
+ radv_dump_shader(pipeline, pipeline->shaders[stage], stage, f);
+ }
}
static void
radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)
{
- void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
- uint32_t count = pipeline->num_vertex_bindings;
- uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
-
- if (!count)
- return;
-
- fprintf(f, "Num vertex bindings: %d\n", count);
- for (uint32_t i = 0; i < count; i++) {
- uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
- uint64_t va = 0;
-
- va |= desc[0];
- va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
-
- fprintf(f, "VBO#%d:\n", i);
- fprintf(f, "\tVA: 0x%"PRIx64"\n", va);
- fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
- fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
- }
+ void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
+ uint32_t count = pipeline->num_vertex_bindings;
+ uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
+
+ if (!count)
+ return;
+
+ fprintf(f, "Num vertex bindings: %d\n", count);
+ for (uint32_t i = 0; i < count; i++) {
+ uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
+ uint64_t va = 0;
+
+ va |= desc[0];
+ va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
+
+ fprintf(f, "VBO#%d:\n", i);
+ fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
+ fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
+ fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
+ }
}
static struct radv_pipeline *
radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
{
- uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
- int offset = ring == RING_GFX ? 1 : 2;
+ uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
+ int offset = ring == RING_GFX ? 1 : 2;
- return *(struct radv_pipeline **)(ptr + offset);
+ return *(struct radv_pipeline **)(ptr + offset);
}
static void
radv_dump_queue_state(struct radv_queue *queue, FILE *f)
{
- enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
- struct radv_pipeline *pipeline;
-
- fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
-
- pipeline = radv_get_saved_pipeline(queue->device, ring);
- if (pipeline) {
- radv_dump_shaders(pipeline, pipeline->active_stages, f);
- if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
- radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
- radv_dump_vertex_descriptors(pipeline, f);
- radv_dump_descriptors(queue->device, f);
- }
+ enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+ struct radv_pipeline *pipeline;
+
+ fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
+
+ pipeline = radv_get_saved_pipeline(queue->device, ring);
+ if (pipeline) {
+ radv_dump_shaders(pipeline, pipeline->active_stages, f);
+ if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
+ radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
+ radv_dump_vertex_descriptors(pipeline, f);
+ radv_dump_descriptors(queue->device, f);
+ }
}
static void
radv_dump_cmd(const char *cmd, FILE *f)
{
#ifndef _WIN32
- char line[2048];
- FILE *p;
-
- p = popen(cmd, "r");
- if (p) {
- while (fgets(line, sizeof(line), p))
- fputs(line, f);
- fprintf(f, "\n");
- pclose(p);
- }
+ char line[2048];
+ FILE *p;
+
+ p = popen(cmd, "r");
+ if (p) {
+ while (fgets(line, sizeof(line), p))
+ fputs(line, f);
+ fprintf(f, "\n");
+ pclose(p);
+ }
#endif
}
static void
radv_dump_dmesg(FILE *f)
{
- fprintf(f, "\nLast 60 lines of dmesg:\n\n");
- radv_dump_cmd("dmesg | tail -n60", f);
+ fprintf(f, "\nLast 60 lines of dmesg:\n\n");
+ radv_dump_cmd("dmesg | tail -n60", f);
}
void
radv_dump_enabled_options(struct radv_device *device, FILE *f)
{
- uint64_t mask;
-
- if (device->instance->debug_flags) {
- fprintf(f, "Enabled debug options: ");
-
- mask = device->instance->debug_flags;
- while (mask) {
- int i = u_bit_scan64(&mask);
- fprintf(f, "%s, ", radv_get_debug_option_name(i));
- }
- fprintf(f, "\n");
- }
-
- if (device->instance->perftest_flags) {
- fprintf(f, "Enabled perftest options: ");
-
- mask = device->instance->perftest_flags;
- while (mask) {
- int i = u_bit_scan64(&mask);
- fprintf(f, "%s, ", radv_get_perftest_option_name(i));
- }
- fprintf(f, "\n");
- }
+ uint64_t mask;
+
+ if (device->instance->debug_flags) {
+ fprintf(f, "Enabled debug options: ");
+
+ mask = device->instance->debug_flags;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ fprintf(f, "%s, ", radv_get_debug_option_name(i));
+ }
+ fprintf(f, "\n");
+ }
+
+ if (device->instance->perftest_flags) {
+ fprintf(f, "Enabled perftest options: ");
+
+ mask = device->instance->perftest_flags;
+ while (mask) {
+ int i = u_bit_scan64(&mask);
+ fprintf(f, "%s, ", radv_get_perftest_option_name(i));
+ }
+ fprintf(f, "\n");
+ }
}
static void
radv_dump_app_info(struct radv_device *device, FILE *f)
{
- struct radv_instance *instance = device->instance;
-
- fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
- fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
- fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
- fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
- fprintf(f, "API version: %d.%d.%d\n",
- VK_VERSION_MAJOR(instance->vk.app_info.api_version),
- VK_VERSION_MINOR(instance->vk.app_info.api_version),
- VK_VERSION_PATCH(instance->vk.app_info.api_version));
-
- radv_dump_enabled_options(device, f);
+ struct radv_instance *instance = device->instance;
+
+ fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
+ fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
+ fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
+ fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
+ fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
+ VK_VERSION_MINOR(instance->vk.app_info.api_version),
+ VK_VERSION_PATCH(instance->vk.app_info.api_version));
+
+ radv_dump_enabled_options(device, f);
}
static void
radv_dump_device_name(struct radv_device *device, FILE *f)
{
- struct radeon_info *info = &device->physical_device->rad_info;
+ struct radeon_info *info = &device->physical_device->rad_info;
#ifndef _WIN32
- char kernel_version[128] = {0};
- struct utsname uname_data;
+ char kernel_version[128] = {0};
+ struct utsname uname_data;
#endif
- const char *chip_name;
+ const char *chip_name;
- chip_name = device->ws->get_chip_name(device->ws);
+ chip_name = device->ws->get_chip_name(device->ws);
#ifdef _WIN32
- fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n",
- chip_name, device->physical_device->name,
- info->drm_major, info->drm_minor, info->drm_patchlevel);
+ fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,
+ info->drm_major, info->drm_minor, info->drm_patchlevel);
#else
- if (uname(&uname_data) == 0)
- snprintf(kernel_version, sizeof(kernel_version),
- " / %s", uname_data.release);
-
- fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n",
- chip_name, device->physical_device->name,
- info->drm_major, info->drm_minor, info->drm_patchlevel,
- kernel_version);
+ if (uname(&uname_data) == 0)
+ snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
+
+ fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,
+ info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
#endif
}
static void
radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
{
- enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
- struct radv_device *device = queue->device;
- char cmd[128];
+ enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+ struct radv_device *device = queue->device;
+ char cmd[128];
- /* TODO: Dump compute ring. */
- if (ring != RING_GFX)
- return;
+ /* TODO: Dump compute ring. */
+ if (ring != RING_GFX)
+ return;
- sprintf(cmd, "umr -R %s 2>&1",
- device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
+ sprintf(cmd, "umr -R %s 2>&1",
+ device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
- fprintf(f, "\nUMR GFX ring:\n\n");
- radv_dump_cmd(cmd, f);
+ fprintf(f, "\nUMR GFX ring:\n\n");
+ radv_dump_cmd(cmd, f);
}
static void
radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
{
- enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
- struct radv_device *device = queue->device;
- char cmd[128];
+ enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+ struct radv_device *device = queue->device;
+ char cmd[128];
- /* TODO: Dump compute ring. */
- if (ring != RING_GFX)
- return;
+ /* TODO: Dump compute ring. */
+ if (ring != RING_GFX)
+ return;
- sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
- device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
+ sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
+ device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
- fprintf(f, "\nUMR GFX waves:\n\n");
- radv_dump_cmd(cmd, f);
+ fprintf(f, "\nUMR GFX waves:\n\n");
+ radv_dump_cmd(cmd, f);
}
static bool
radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
{
- struct radeon_winsys *ws = queue->device->ws;
+ struct radeon_winsys *ws = queue->device->ws;
- if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
- return true;
+ if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))
+ return true;
- return false;
+ return false;
}
void
radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
{
- struct radv_device *device = queue->device;
- char dump_dir[256], dump_path[512];
- enum ring_type ring;
- uint64_t addr;
- FILE *f;
-
- ring = radv_queue_family_to_ring(queue->queue_family_index);
-
- bool hang_occurred = radv_gpu_hang_occured(queue, ring);
- bool vm_fault_occurred = false;
- if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
- vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
- &device->dmesg_timestamp, &addr);
- if (!hang_occurred && !vm_fault_occurred)
- return;
-
- fprintf(stderr, "radv: GPU hang detected...\n");
-
- /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
- * various debugging info about that GPU hang.
- */
- struct tm *timep, result;
- time_t raw_time;
- char buf_time[128];
-
- time(&raw_time);
- timep = os_localtime(&raw_time, &result);
- strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
-
- snprintf(dump_dir, sizeof(dump_dir), "%s/"RADV_DUMP_DIR"_%d_%s",
- debug_get_option("HOME", "."), getpid(), buf_time);
- if (mkdir(dump_dir, 0774) && errno != EEXIST) {
- fprintf(stderr, "radv: can't create directory '%s' (%i).\n",
- dump_dir, errno);
- abort();
- }
-
- fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
-
- /* Dump trace file. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
- f = fopen(dump_path, "w+");
- if (f) {
- radv_dump_trace(queue->device, cs, f);
- fclose(f);
- }
-
- /* Dump pipeline state. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
- f = fopen(dump_path, "w+");
- if (f) {
- radv_dump_queue_state(queue, f);
- fclose(f);
- }
-
- if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
- /* Dump UMR ring. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
- f = fopen(dump_path, "w+");
- if (f) {
- radv_dump_umr_ring(queue, f);
- fclose(f);
- }
-
- /* Dump UMR waves. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
- f = fopen(dump_path, "w+");
- if (f) {
- radv_dump_umr_waves(queue, f);
- fclose(f);
- }
- }
-
- /* Dump debug registers. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
- f = fopen(dump_path, "w+");
- if (f) {
- radv_dump_debug_registers(device, f);
- fclose(f);
- }
-
- /* Dump BO ranges. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
- f = fopen(dump_path, "w+");
- if (f) {
- device->ws->dump_bo_ranges(device->ws, f);
- fclose(f);
- }
-
- /* Dump BO log. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
- f = fopen(dump_path, "w+");
- if (f) {
- device->ws->dump_bo_log(device->ws, f);
- fclose(f);
- }
-
- /* Dump VM fault info. */
- if (vm_fault_occurred) {
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
- f = fopen(dump_path, "w+");
- if (f) {
- fprintf(f, "VM fault report.\n\n");
- fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
- fclose(f);
- }
- }
-
- /* Dump app info. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
- f = fopen(dump_path, "w+");
- if (f) {
- radv_dump_app_info(device, f);
- fclose(f);
- }
-
- /* Dump GPU info. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
- f = fopen(dump_path, "w+");
- if (f) {
- radv_dump_device_name(device, f);
- ac_print_gpu_info(&device->physical_device->rad_info, f);
- fclose(f);
- }
-
- /* Dump dmesg. */
- snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
- f = fopen(dump_path, "w+");
- if (f) {
- radv_dump_dmesg(f);
- fclose(f);
- }
-
- fprintf(stderr, "radv: GPU hang report saved successfully!\n");
- abort();
+ struct radv_device *device = queue->device;
+ char dump_dir[256], dump_path[512];
+ enum ring_type ring;
+ uint64_t addr;
+ FILE *f;
+
+ ring = radv_queue_family_to_ring(queue->queue_family_index);
+
+ bool hang_occurred = radv_gpu_hang_occured(queue, ring);
+ bool vm_fault_occurred = false;
+ if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
+ vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
+ &device->dmesg_timestamp, &addr);
+ if (!hang_occurred && !vm_fault_occurred)
+ return;
+
+ fprintf(stderr, "radv: GPU hang detected...\n");
+
+ /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
+ * various debugging info about that GPU hang.
+ */
+ struct tm *timep, result;
+ time_t raw_time;
+ char buf_time[128];
+
+ time(&raw_time);
+ timep = os_localtime(&raw_time, &result);
+ strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
+
+ snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
+ getpid(), buf_time);
+ if (mkdir(dump_dir, 0774) && errno != EEXIST) {
+ fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
+ abort();
+ }
+
+ fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
+
+ /* Dump trace file. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_trace(queue->device, cs, f);
+ fclose(f);
+ }
+
+ /* Dump pipeline state. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_queue_state(queue, f);
+ fclose(f);
+ }
+
+ if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
+ /* Dump UMR ring. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_umr_ring(queue, f);
+ fclose(f);
+ }
+
+ /* Dump UMR waves. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_umr_waves(queue, f);
+ fclose(f);
+ }
+ }
+
+ /* Dump debug registers. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_debug_registers(device, f);
+ fclose(f);
+ }
+
+ /* Dump BO ranges. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ device->ws->dump_bo_ranges(device->ws, f);
+ fclose(f);
+ }
+
+ /* Dump BO log. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ device->ws->dump_bo_log(device->ws, f);
+ fclose(f);
+ }
+
+ /* Dump VM fault info. */
+ if (vm_fault_occurred) {
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ fprintf(f, "VM fault report.\n\n");
+ fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
+ fclose(f);
+ }
+ }
+
+ /* Dump app info. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_app_info(device, f);
+ fclose(f);
+ }
+
+ /* Dump GPU info. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_device_name(device, f);
+ ac_print_gpu_info(&device->physical_device->rad_info, f);
+ fclose(f);
+ }
+
+ /* Dump dmesg. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_dmesg(f);
+ fclose(f);
+ }
+
+ fprintf(stderr, "radv: GPU hang report saved successfully!\n");
+ abort();
}
void
radv_print_spirv(const char *data, uint32_t size, FILE *fp)
{
#ifndef _WIN32
- char path[] = "/tmp/fileXXXXXX";
- char command[128];
- int fd;
+ char path[] = "/tmp/fileXXXXXX";
+ char command[128];
+ int fd;
- /* Dump the binary into a temporary file. */
- fd = mkstemp(path);
- if (fd < 0)
- return;
+ /* Dump the binary into a temporary file. */
+ fd = mkstemp(path);
+ if (fd < 0)
+ return;
- if (write(fd, data, size) == -1)
- goto fail;
+ if (write(fd, data, size) == -1)
+ goto fail;
- /* Disassemble using spirv-dis if installed. */
- sprintf(command, "spirv-dis %s", path);
- radv_dump_cmd(command, fp);
+ /* Disassemble using spirv-dis if installed. */
+ sprintf(command, "spirv-dis %s", path);
+ radv_dump_cmd(command, fp);
fail:
- close(fd);
- unlink(path);
+ close(fd);
+ unlink(path);
#endif
}
bool
radv_trap_handler_init(struct radv_device *device)
{
- struct radeon_winsys *ws = device->ws;
- VkResult result;
-
- /* Create the trap handler shader and upload it like other shaders. */
- device->trap_handler_shader = radv_create_trap_handler_shader(device);
- if (!device->trap_handler_shader) {
- fprintf(stderr, "radv: failed to create the trap handler shader.\n");
- return false;
- }
-
- result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
- if (result != VK_SUCCESS)
- return false;
-
- device->tma_bo = ws->buffer_create(ws, TMA_BO_SIZE, 256,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_ZERO_VRAM |
- RADEON_FLAG_32BIT,
- RADV_BO_PRIORITY_SCRATCH);
- if (!device->tma_bo)
- return false;
-
- result = ws->buffer_make_resident(ws, device->tma_bo, true);
- if (result != VK_SUCCESS)
- return false;
-
- device->tma_ptr = ws->buffer_map(device->tma_bo);
- if (!device->tma_ptr)
- return false;
-
- /* Upload a buffer descriptor to store various info from the trap. */
- uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
- uint32_t desc[4];
-
- desc[0] = tma_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
- desc[2] = TMA_BO_SIZE;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-
- memcpy(device->tma_ptr, desc, sizeof(desc));
-
- return true;
+ struct radeon_winsys *ws = device->ws;
+ VkResult result;
+
+ /* Create the trap handler shader and upload it like other shaders. */
+ device->trap_handler_shader = radv_create_trap_handler_shader(device);
+ if (!device->trap_handler_shader) {
+ fprintf(stderr, "radv: failed to create the trap handler shader.\n");
+ return false;
+ }
+
+ result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
+ if (result != VK_SUCCESS)
+ return false;
+
+ device->tma_bo = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
+ RADV_BO_PRIORITY_SCRATCH);
+ if (!device->tma_bo)
+ return false;
+
+ result = ws->buffer_make_resident(ws, device->tma_bo, true);
+ if (result != VK_SUCCESS)
+ return false;
+
+ device->tma_ptr = ws->buffer_map(device->tma_bo);
+ if (!device->tma_ptr)
+ return false;
+
+ /* Upload a buffer descriptor to store various info from the trap. */
+ uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
+ uint32_t desc[4];
+
+ desc[0] = tma_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
+ desc[2] = TMA_BO_SIZE;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+ memcpy(device->tma_ptr, desc, sizeof(desc));
+
+ return true;
}
void
radv_trap_handler_finish(struct radv_device *device)
{
- struct radeon_winsys *ws = device->ws;
+ struct radeon_winsys *ws = device->ws;
- if (unlikely(device->trap_handler_shader)) {
- ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
- radv_shader_variant_destroy(device, device->trap_handler_shader);
- }
+ if (unlikely(device->trap_handler_shader)) {
+ ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
+ radv_shader_variant_destroy(device, device->trap_handler_shader);
+ }
- if (unlikely(device->tma_bo)) {
- ws->buffer_make_resident(ws, device->tma_bo, false);
- ws->buffer_destroy(ws, device->tma_bo);
- }
+ if (unlikely(device->tma_bo)) {
+ ws->buffer_make_resident(ws, device->tma_bo, false);
+ ws->buffer_destroy(ws, device->tma_bo);
+ }
}
static struct radv_shader_variant *
radv_get_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
{
- struct radv_shader_variant *shader = NULL;
+ struct radv_shader_variant *shader = NULL;
- mtx_lock(&device->shader_slab_mutex);
+ mtx_lock(&device->shader_slab_mutex);
- list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
+ list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
+ {
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#endif
- list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
+ list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list)
+ {
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
- uint64_t offset = align_u64(s->bo_offset + s->code_size, 256);
- uint64_t va = radv_buffer_get_va(s->bo);
-
- if (faulty_pc >= va + s->bo_offset && faulty_pc < va + offset) {
- mtx_unlock(&device->shader_slab_mutex);
- return s;
- }
- }
- }
- mtx_unlock(&device->shader_slab_mutex);
-
- return shader;
+ uint64_t offset = align_u64(s->bo_offset + s->code_size, 256);
+ uint64_t va = radv_buffer_get_va(s->bo);
+
+ if (faulty_pc >= va + s->bo_offset && faulty_pc < va + offset) {
+ mtx_unlock(&device->shader_slab_mutex);
+ return s;
+ }
+ }
+ }
+ mtx_unlock(&device->shader_slab_mutex);
+
+ return shader;
}
static void
radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
{
- struct radv_shader_variant *shader;
- uint64_t start_addr, end_addr;
- uint32_t instr_offset;
-
- shader = radv_get_faulty_shader(device, faulty_pc);
- if (!shader)
- return;
-
- start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- end_addr = start_addr + shader->code_size;
- instr_offset = faulty_pc - start_addr;
-
- fprintf(stderr, "Faulty shader found "
- "VA=[0x%"PRIx64"-0x%"PRIx64"], instr_offset=%d\n",
- start_addr, end_addr, instr_offset);
-
- /* Get the list of instructions.
- * Buffer size / 4 is the upper bound of the instruction count.
- */
- unsigned num_inst = 0;
- struct radv_shader_inst *instructions =
- calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
-
- /* Split the disassembly string into instructions. */
- si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
-
- /* Print instructions with annotations. */
- for (unsigned i = 0; i < num_inst; i++) {
- struct radv_shader_inst *inst = &instructions[i];
-
- if (start_addr + inst->offset == faulty_pc) {
- fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
- fprintf(stderr, "%s\n", inst->text);
- fprintf(stderr, "\n");
- } else {
- fprintf(stderr, "%s\n", inst->text);
- }
- }
-
- free(instructions);
+ struct radv_shader_variant *shader;
+ uint64_t start_addr, end_addr;
+ uint32_t instr_offset;
+
+ shader = radv_get_faulty_shader(device, faulty_pc);
+ if (!shader)
+ return;
+
+ start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ end_addr = start_addr + shader->code_size;
+ instr_offset = faulty_pc - start_addr;
+
+ fprintf(stderr,
+ "Faulty shader found "
+ "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
+ start_addr, end_addr, instr_offset);
+
+ /* Get the list of instructions.
+ * Buffer size / 4 is the upper bound of the instruction count.
+ */
+ unsigned num_inst = 0;
+ struct radv_shader_inst *instructions =
+ calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
+
+ /* Split the disassembly string into instructions. */
+ si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
+
+ /* Print instructions with annotations. */
+ for (unsigned i = 0; i < num_inst; i++) {
+ struct radv_shader_inst *inst = &instructions[i];
+
+ if (start_addr + inst->offset == faulty_pc) {
+ fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
+ fprintf(stderr, "%s\n", inst->text);
+ fprintf(stderr, "\n");
+ } else {
+ fprintf(stderr, "%s\n", inst->text);
+ }
+ }
+
+ free(instructions);
}
struct radv_sq_hw_reg {
- uint32_t status;
- uint32_t trap_sts;
- uint32_t hw_id;
- uint32_t ib_sts;
+ uint32_t status;
+ uint32_t trap_sts;
+ uint32_t hw_id;
+ uint32_t ib_sts;
};
static void
radv_dump_sq_hw_regs(struct radv_device *device)
{
- struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
-
- fprintf(stderr, "\nHardware registers:\n");
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
- R_000408_SQ_WAVE_STATUS, regs->status, ~0);
- ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
- R_00040C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
- ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
- R_00045C_SQ_WAVE_HW_ID1, regs->hw_id, ~0);
- ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
- R_00041C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
- } else {
- ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
- R_000048_SQ_WAVE_STATUS, regs->status, ~0);
- ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
- R_00004C_SQ_WAVE_TRAPSTS, regs->trap_sts, ~0);
- ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
- R_000050_SQ_WAVE_HW_ID, regs->hw_id, ~0);
- ac_dump_reg(stderr, device->physical_device->rad_info.chip_class,
- R_00005C_SQ_WAVE_IB_STS, regs->ib_sts, ~0);
- }
- fprintf(stderr, "\n\n");
+ struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
+
+ fprintf(stderr, "\nHardware registers:\n");
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
+ regs->status, ~0);
+ ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
+ regs->trap_sts, ~0);
+ ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
+ regs->hw_id, ~0);
+ ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
+ regs->ib_sts, ~0);
+ } else {
+ ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
+ regs->status, ~0);
+ ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
+ regs->trap_sts, ~0);
+ ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
+ regs->hw_id, ~0);
+ ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
+ regs->ib_sts, ~0);
+ }
+ fprintf(stderr, "\n\n");
}
void
radv_check_trap_handler(struct radv_queue *queue)
{
- enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
- struct radv_device *device = queue->device;
- struct radeon_winsys *ws = device->ws;
+ enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);
+ struct radv_device *device = queue->device;
+ struct radeon_winsys *ws = device->ws;
- /* Wait for the context to be idle in a finite time. */
- ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx);
+ /* Wait for the context to be idle in a finite time. */
+ ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx);
- /* Try to detect if the trap handler has been reached by the hw by
- * looking at ttmp0 which should be non-zero if a shader exception
- * happened.
- */
- if (!device->tma_ptr[4])
- return;
+ /* Try to detect if the trap handler has been reached by the hw by
+ * looking at ttmp0 which should be non-zero if a shader exception
+ * happened.
+ */
+ if (!device->tma_ptr[4])
+ return;
#if 0
fprintf(stderr, "tma_ptr:\n");
@@ -1032,27 +999,27 @@ radv_check_trap_handler(struct radv_queue *queue)
fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
#endif
- radv_dump_sq_hw_regs(device);
+ radv_dump_sq_hw_regs(device);
- uint32_t ttmp0 = device->tma_ptr[4];
- uint32_t ttmp1 = device->tma_ptr[5];
+ uint32_t ttmp0 = device->tma_ptr[4];
+ uint32_t ttmp1 = device->tma_ptr[5];
- /* According to the ISA docs, 3.10 Trap and Exception Registers:
- *
- * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
- *
- * "When the trap handler is entered, the PC of the faulting
- * instruction is: (PC - PC_rewind * 4)."
- * */
- uint8_t trap_id = (ttmp1 >> 16) & 0xff;
- uint8_t ht = (ttmp1 >> 24) & 0x1;
- uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
- uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
+ /* According to the ISA docs, 3.10 Trap and Exception Registers:
+ *
+ * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
+ *
+ * "When the trap handler is entered, the PC of the faulting
+ * instruction is: (PC - PC_rewind * 4)."
+ * */
+ uint8_t trap_id = (ttmp1 >> 16) & 0xff;
+ uint8_t ht = (ttmp1 >> 24) & 0x1;
+ uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
+ uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
- fprintf(stderr, "PC=0x%"PRIx64", trapID=%d, HT=%d, PC_rewind=%d\n",
- pc, trap_id, ht, pc_rewind);
+ fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
+ pc_rewind);
- radv_dump_faulty_shader(device, pc);
+ radv_dump_faulty_shader(device, pc);
- abort();
+ abort();
}
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 1569fcb108e..922b29af8e8 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -28,69 +28,65 @@
/* Please keep docs/envvars.rst up-to-date when you add/remove options. */
enum {
- RADV_DEBUG_NO_FAST_CLEARS = 1ull << 0,
- RADV_DEBUG_NO_DCC = 1ull << 1,
- RADV_DEBUG_DUMP_SHADERS = 1ull << 2,
- RADV_DEBUG_NO_CACHE = 1ull << 3,
- RADV_DEBUG_DUMP_SHADER_STATS = 1ull << 4,
- RADV_DEBUG_NO_HIZ = 1ull << 5,
- RADV_DEBUG_NO_COMPUTE_QUEUE = 1ull << 6,
- RADV_DEBUG_ALL_BOS = 1ull << 7,
- RADV_DEBUG_NO_IBS = 1ull << 8,
- RADV_DEBUG_DUMP_SPIRV = 1ull << 9,
- RADV_DEBUG_VM_FAULTS = 1ull << 10,
- RADV_DEBUG_ZERO_VRAM = 1ull << 11,
- RADV_DEBUG_SYNC_SHADERS = 1ull << 12,
- RADV_DEBUG_PREOPTIR = 1ull << 13,
- RADV_DEBUG_NO_DYNAMIC_BOUNDS = 1ull << 14,
- RADV_DEBUG_NO_OUT_OF_ORDER = 1ull << 15,
- RADV_DEBUG_INFO = 1ull << 16,
- RADV_DEBUG_ERRORS = 1ull << 17,
- RADV_DEBUG_STARTUP = 1ull << 18,
- RADV_DEBUG_CHECKIR = 1ull << 19,
- RADV_DEBUG_NOTHREADLLVM = 1ull << 20,
- RADV_DEBUG_NOBINNING = 1ull << 21,
- RADV_DEBUG_NO_NGG = 1ull << 22,
- RADV_DEBUG_DUMP_META_SHADERS = 1ull << 23,
- RADV_DEBUG_NO_MEMORY_CACHE = 1ull << 24,
- RADV_DEBUG_DISCARD_TO_DEMOTE = 1ull << 25,
- RADV_DEBUG_LLVM = 1ull << 26,
- RADV_DEBUG_FORCE_COMPRESS = 1ull << 27,
- RADV_DEBUG_HANG = 1ull << 28,
- RADV_DEBUG_IMG = 1ull << 29,
- RADV_DEBUG_NO_UMR = 1ull << 30,
- RADV_DEBUG_INVARIANT_GEOM = 1ull << 31,
- RADV_DEBUG_NO_DISPLAY_DCC = 1ull << 32,
- RADV_DEBUG_NO_TC_COMPAT_CMASK= 1ull << 33,
- RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 34,
+ RADV_DEBUG_NO_FAST_CLEARS = 1ull << 0,
+ RADV_DEBUG_NO_DCC = 1ull << 1,
+ RADV_DEBUG_DUMP_SHADERS = 1ull << 2,
+ RADV_DEBUG_NO_CACHE = 1ull << 3,
+ RADV_DEBUG_DUMP_SHADER_STATS = 1ull << 4,
+ RADV_DEBUG_NO_HIZ = 1ull << 5,
+ RADV_DEBUG_NO_COMPUTE_QUEUE = 1ull << 6,
+ RADV_DEBUG_ALL_BOS = 1ull << 7,
+ RADV_DEBUG_NO_IBS = 1ull << 8,
+ RADV_DEBUG_DUMP_SPIRV = 1ull << 9,
+ RADV_DEBUG_VM_FAULTS = 1ull << 10,
+ RADV_DEBUG_ZERO_VRAM = 1ull << 11,
+ RADV_DEBUG_SYNC_SHADERS = 1ull << 12,
+ RADV_DEBUG_PREOPTIR = 1ull << 13,
+ RADV_DEBUG_NO_DYNAMIC_BOUNDS = 1ull << 14,
+ RADV_DEBUG_NO_OUT_OF_ORDER = 1ull << 15,
+ RADV_DEBUG_INFO = 1ull << 16,
+ RADV_DEBUG_ERRORS = 1ull << 17,
+ RADV_DEBUG_STARTUP = 1ull << 18,
+ RADV_DEBUG_CHECKIR = 1ull << 19,
+ RADV_DEBUG_NOTHREADLLVM = 1ull << 20,
+ RADV_DEBUG_NOBINNING = 1ull << 21,
+ RADV_DEBUG_NO_NGG = 1ull << 22,
+ RADV_DEBUG_DUMP_META_SHADERS = 1ull << 23,
+ RADV_DEBUG_NO_MEMORY_CACHE = 1ull << 24,
+ RADV_DEBUG_DISCARD_TO_DEMOTE = 1ull << 25,
+ RADV_DEBUG_LLVM = 1ull << 26,
+ RADV_DEBUG_FORCE_COMPRESS = 1ull << 27,
+ RADV_DEBUG_HANG = 1ull << 28,
+ RADV_DEBUG_IMG = 1ull << 29,
+ RADV_DEBUG_NO_UMR = 1ull << 30,
+ RADV_DEBUG_INVARIANT_GEOM = 1ull << 31,
+ RADV_DEBUG_NO_DISPLAY_DCC = 1ull << 32,
+ RADV_DEBUG_NO_TC_COMPAT_CMASK = 1ull << 33,
+ RADV_DEBUG_NO_VRS_FLAT_SHADING = 1ull << 34,
};
enum {
- RADV_PERFTEST_LOCAL_BOS = 1u << 0,
- RADV_PERFTEST_DCC_MSAA = 1u << 1,
- RADV_PERFTEST_BO_LIST = 1u << 2,
- RADV_PERFTEST_TC_COMPAT_CMASK = 1u << 3,
- RADV_PERFTEST_CS_WAVE_32 = 1u << 4,
- RADV_PERFTEST_PS_WAVE_32 = 1u << 5,
- RADV_PERFTEST_GE_WAVE_32 = 1u << 6,
- RADV_PERFTEST_DFSM = 1u << 7,
- RADV_PERFTEST_NO_SAM = 1u << 8,
- RADV_PERFTEST_SAM = 1u << 9,
- RADV_PERFTEST_DCC_STORES = 1u << 10,
+ RADV_PERFTEST_LOCAL_BOS = 1u << 0,
+ RADV_PERFTEST_DCC_MSAA = 1u << 1,
+ RADV_PERFTEST_BO_LIST = 1u << 2,
+ RADV_PERFTEST_TC_COMPAT_CMASK = 1u << 3,
+ RADV_PERFTEST_CS_WAVE_32 = 1u << 4,
+ RADV_PERFTEST_PS_WAVE_32 = 1u << 5,
+ RADV_PERFTEST_GE_WAVE_32 = 1u << 6,
+ RADV_PERFTEST_DFSM = 1u << 7,
+ RADV_PERFTEST_NO_SAM = 1u << 8,
+ RADV_PERFTEST_SAM = 1u << 9,
+ RADV_PERFTEST_DCC_STORES = 1u << 10,
};
-bool
-radv_init_trace(struct radv_device *device);
+bool radv_init_trace(struct radv_device *device);
void radv_finish_trace(struct radv_device *device);
-void
-radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs);
+void radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs);
-void
-radv_print_spirv(const char *data, uint32_t size, FILE *fp);
+void radv_print_spirv(const char *data, uint32_t size, FILE *fp);
-void
-radv_dump_enabled_options(struct radv_device *device, FILE *f);
+void radv_dump_enabled_options(struct radv_device *device, FILE *f);
bool radv_trap_handler_init(struct radv_device *device);
void radv_trap_handler_finish(struct radv_device *device);
diff --git a/src/amd/vulkan/radv_descriptor_set.c b/src/amd/vulkan/radv_descriptor_set.c
index 0abe0422dbf..25b14939e8f 100644
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -22,9 +22,9 @@
* IN THE SOFTWARE.
*/
#include <assert.h>
+#include <fcntl.h>
#include <stdbool.h>
#include <string.h>
-#include <fcntl.h>
#include "util/mesa-sha1.h"
#include "radv_private.h"
@@ -33,409 +33,413 @@
#include "vk_format.h"
#include "vk_util.h"
-
-static bool has_equal_immutable_samplers(const VkSampler *samplers, uint32_t count)
+static bool
+has_equal_immutable_samplers(const VkSampler *samplers, uint32_t count)
{
- if (!samplers)
- return false;
- for(uint32_t i = 1; i < count; ++i) {
- if (memcmp(radv_sampler_from_handle(samplers[0])->state,
- radv_sampler_from_handle(samplers[i])->state, 16)) {
- return false;
- }
- }
- return true;
+ if (!samplers)
+ return false;
+ for (uint32_t i = 1; i < count; ++i) {
+ if (memcmp(radv_sampler_from_handle(samplers[0])->state,
+ radv_sampler_from_handle(samplers[i])->state, 16)) {
+ return false;
+ }
+ }
+ return true;
}
-static bool radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVALVE *list,
- uint64_t *out_size, uint64_t *out_align)
+static bool
+radv_mutable_descriptor_type_size_alignment(const VkMutableDescriptorTypeListVALVE *list,
+ uint64_t *out_size, uint64_t *out_align)
{
- uint32_t max_size = 0;
- uint32_t max_align = 0;
-
- for (uint32_t i = 0; i < list->descriptorTypeCount; i++) {
- uint32_t size = 0;
- uint32_t align = 0;
-
- switch (list->pDescriptorTypes[i]) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- size = 16;
- align = 16;
- break;
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- size = 64;
- align = 32;
- break;
- default:
- return false;
- }
-
- max_size = MAX2(max_size, size);
- max_align = MAX2(max_align, align);
- }
-
- *out_size = max_size;
- *out_align = max_align;
- return true;
+ uint32_t max_size = 0;
+ uint32_t max_align = 0;
+
+ for (uint32_t i = 0; i < list->descriptorTypeCount; i++) {
+ uint32_t size = 0;
+ uint32_t align = 0;
+
+ switch (list->pDescriptorTypes[i]) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ size = 16;
+ align = 16;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ size = 64;
+ align = 32;
+ break;
+ default:
+ return false;
+ }
+
+ max_size = MAX2(max_size, size);
+ max_align = MAX2(max_align, align);
+ }
+
+ *out_size = max_size;
+ *out_align = max_align;
+ return true;
}
-VkResult radv_CreateDescriptorSetLayout(
- VkDevice _device,
- const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkDescriptorSetLayout* pSetLayout)
+VkResult
+radv_CreateDescriptorSetLayout(VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorSetLayout *pSetLayout)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_descriptor_set_layout *set_layout;
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
- const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
- vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
- const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
- vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
-
- uint32_t num_bindings = 0;
- uint32_t immutable_sampler_count = 0;
- uint32_t ycbcr_sampler_count = 0;
- for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
- num_bindings = MAX2(num_bindings, pCreateInfo->pBindings[j].binding + 1);
- if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
- pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
- pCreateInfo->pBindings[j].pImmutableSamplers) {
- immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
-
- bool has_ycbcr_sampler = false;
- for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
- if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])->ycbcr_sampler)
- has_ycbcr_sampler = true;
- }
-
- if (has_ycbcr_sampler)
- ycbcr_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
- }
- }
-
- uint32_t samplers_offset =
- offsetof(struct radv_descriptor_set_layout, binding[num_bindings]);
- size_t size = samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t);
- if (ycbcr_sampler_count > 0) {
- /* Store block of offsets first, followed by the conversion descriptors (padded to the struct alignment) */
- size += num_bindings * sizeof(uint32_t);
- size = ALIGN(size, alignof(struct radv_sampler_ycbcr_conversion));
- size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion);
- }
-
- set_layout = vk_zalloc2(&device->vk.alloc, pAllocator, size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!set_layout)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &set_layout->base,
- VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
-
- set_layout->flags = pCreateInfo->flags;
- set_layout->layout_size = size;
-
- /* We just allocate all the samplers at the end of the struct */
- uint32_t *samplers = (uint32_t*)&set_layout->binding[num_bindings];
- struct radv_sampler_ycbcr_conversion *ycbcr_samplers = NULL;
- uint32_t *ycbcr_sampler_offsets = NULL;
-
- if (ycbcr_sampler_count > 0) {
- ycbcr_sampler_offsets = samplers + 4 * immutable_sampler_count;
- set_layout->ycbcr_sampler_offsets_offset = (char*)ycbcr_sampler_offsets - (char*)set_layout;
-
- uintptr_t first_ycbcr_sampler_offset = (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings;
- first_ycbcr_sampler_offset = ALIGN(first_ycbcr_sampler_offset, alignof(struct radv_sampler_ycbcr_conversion));
- ycbcr_samplers = (struct radv_sampler_ycbcr_conversion *)first_ycbcr_sampler_offset;
- } else
- set_layout->ycbcr_sampler_offsets_offset = 0;
-
- VkDescriptorSetLayoutBinding *bindings = NULL;
- VkResult result = vk_create_sorted_bindings(pCreateInfo->pBindings,
- pCreateInfo->bindingCount,
- &bindings);
- if (result != VK_SUCCESS) {
- vk_object_base_finish(&set_layout->base);
- vk_free2(&device->vk.alloc, pAllocator, set_layout);
- return vk_error(device->instance, result);
- }
-
- set_layout->binding_count = num_bindings;
- set_layout->shader_stages = 0;
- set_layout->dynamic_shader_stages = 0;
- set_layout->has_immutable_samplers = false;
- set_layout->size = 0;
-
- uint32_t buffer_count = 0;
- uint32_t dynamic_offset_count = 0;
-
- for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
- const VkDescriptorSetLayoutBinding *binding = bindings + j;
- uint32_t b = binding->binding;
- uint32_t alignment = 0;
- unsigned binding_buffer_count = 0;
- uint32_t descriptor_count = binding->descriptorCount;
- bool has_ycbcr_sampler = false;
-
- /* main image + fmask */
- uint32_t max_sampled_image_descriptors = 2;
-
- if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
- binding->pImmutableSamplers) {
- for (unsigned i = 0; i < binding->descriptorCount; ++i) {
- struct radv_sampler_ycbcr_conversion *conversion =
- radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
-
- if (conversion) {
- has_ycbcr_sampler = true;
- max_sampled_image_descriptors = MAX2(max_sampled_image_descriptors,
- vk_format_get_plane_count(conversion->format));
- }
- }
- }
-
- switch (binding->descriptorType) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- assert(!(pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
- set_layout->binding[b].dynamic_offset_count = 1;
- set_layout->dynamic_shader_stages |= binding->stageFlags;
- set_layout->binding[b].size = 0;
- binding_buffer_count = 1;
- alignment = 1;
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- set_layout->binding[b].size = 16;
- binding_buffer_count = 1;
- alignment = 16;
- break;
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- /* main descriptor + fmask descriptor */
- set_layout->binding[b].size = 64;
- binding_buffer_count = 1;
- alignment = 32;
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- /* main descriptor + fmask descriptor + sampler */
- set_layout->binding[b].size = 96;
- binding_buffer_count = 1;
- alignment = 32;
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- set_layout->binding[b].size = 16;
- alignment = 16;
- break;
- case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: {
- uint64_t mutable_size = 0, mutable_align = 0;
- radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[j],
- &mutable_size, &mutable_align);
- assert(mutable_size && mutable_align);
- set_layout->binding[b].size = mutable_size;
- alignment = mutable_align;
- break;
- }
- case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
- alignment = 16;
- set_layout->binding[b].size = descriptor_count;
- descriptor_count = 1;
- break;
- default:
- break;
- }
-
- set_layout->size = align(set_layout->size, alignment);
- set_layout->binding[b].type = binding->descriptorType;
- set_layout->binding[b].array_size = descriptor_count;
- set_layout->binding[b].offset = set_layout->size;
- set_layout->binding[b].buffer_offset = buffer_count;
- set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
-
- if (variable_flags && binding->binding < variable_flags->bindingCount &&
- (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
- assert(!binding->pImmutableSamplers); /* Terribly ill defined how many samplers are valid */
- assert(binding->binding == num_bindings - 1);
-
- set_layout->has_variable_descriptors = true;
- }
-
- if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
- binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
- binding->pImmutableSamplers) {
- set_layout->binding[b].immutable_samplers_offset = samplers_offset;
- set_layout->binding[b].immutable_samplers_equal =
- has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
- set_layout->has_immutable_samplers = true;
-
-
- for (uint32_t i = 0; i < binding->descriptorCount; i++)
- memcpy(samplers + 4 * i, &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
-
- /* Don't reserve space for the samplers if they're not accessed. */
- if (set_layout->binding[b].immutable_samplers_equal) {
- if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
- max_sampled_image_descriptors <= 2)
- set_layout->binding[b].size -= 32;
- else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
- set_layout->binding[b].size -= 16;
- }
- samplers += 4 * binding->descriptorCount;
- samplers_offset += 4 * sizeof(uint32_t) * binding->descriptorCount;
-
- if (has_ycbcr_sampler) {
- ycbcr_sampler_offsets[b] = (const char*)ycbcr_samplers - (const char*)set_layout;
- for (uint32_t i = 0; i < binding->descriptorCount; i++) {
- if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler)
- ycbcr_samplers[i] = *radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
- else
- ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED;
- }
- ycbcr_samplers += binding->descriptorCount;
- }
- }
-
- set_layout->size += descriptor_count * set_layout->binding[b].size;
- buffer_count += descriptor_count * binding_buffer_count;
- dynamic_offset_count += descriptor_count *
- set_layout->binding[b].dynamic_offset_count;
- set_layout->shader_stages |= binding->stageFlags;
- }
-
- free(bindings);
-
- set_layout->buffer_count = buffer_count;
- set_layout->dynamic_offset_count = dynamic_offset_count;
-
- *pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_descriptor_set_layout *set_layout;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
+ const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
+ vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
+ const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
+ vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
+
+ uint32_t num_bindings = 0;
+ uint32_t immutable_sampler_count = 0;
+ uint32_t ycbcr_sampler_count = 0;
+ for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
+ num_bindings = MAX2(num_bindings, pCreateInfo->pBindings[j].binding + 1);
+ if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+ pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
+ pCreateInfo->pBindings[j].pImmutableSamplers) {
+ immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
+
+ bool has_ycbcr_sampler = false;
+ for (unsigned i = 0; i < pCreateInfo->pBindings[j].descriptorCount; ++i) {
+ if (radv_sampler_from_handle(pCreateInfo->pBindings[j].pImmutableSamplers[i])
+ ->ycbcr_sampler)
+ has_ycbcr_sampler = true;
+ }
+
+ if (has_ycbcr_sampler)
+ ycbcr_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
+ }
+ }
+
+ uint32_t samplers_offset = offsetof(struct radv_descriptor_set_layout, binding[num_bindings]);
+ size_t size = samplers_offset + immutable_sampler_count * 4 * sizeof(uint32_t);
+ if (ycbcr_sampler_count > 0) {
+ /* Store block of offsets first, followed by the conversion descriptors (padded to the struct
+ * alignment) */
+ size += num_bindings * sizeof(uint32_t);
+ size = ALIGN(size, alignof(struct radv_sampler_ycbcr_conversion));
+ size += ycbcr_sampler_count * sizeof(struct radv_sampler_ycbcr_conversion);
+ }
+
+ set_layout =
+ vk_zalloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!set_layout)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &set_layout->base, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
+
+ set_layout->flags = pCreateInfo->flags;
+ set_layout->layout_size = size;
+
+ /* We just allocate all the samplers at the end of the struct */
+ uint32_t *samplers = (uint32_t *)&set_layout->binding[num_bindings];
+ struct radv_sampler_ycbcr_conversion *ycbcr_samplers = NULL;
+ uint32_t *ycbcr_sampler_offsets = NULL;
+
+ if (ycbcr_sampler_count > 0) {
+ ycbcr_sampler_offsets = samplers + 4 * immutable_sampler_count;
+ set_layout->ycbcr_sampler_offsets_offset = (char *)ycbcr_sampler_offsets - (char *)set_layout;
+
+ uintptr_t first_ycbcr_sampler_offset =
+ (uintptr_t)ycbcr_sampler_offsets + sizeof(uint32_t) * num_bindings;
+ first_ycbcr_sampler_offset =
+ ALIGN(first_ycbcr_sampler_offset, alignof(struct radv_sampler_ycbcr_conversion));
+ ycbcr_samplers = (struct radv_sampler_ycbcr_conversion *)first_ycbcr_sampler_offset;
+ } else
+ set_layout->ycbcr_sampler_offsets_offset = 0;
+
+ VkDescriptorSetLayoutBinding *bindings = NULL;
+ VkResult result =
+ vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
+ if (result != VK_SUCCESS) {
+ vk_object_base_finish(&set_layout->base);
+ vk_free2(&device->vk.alloc, pAllocator, set_layout);
+ return vk_error(device->instance, result);
+ }
+
+ set_layout->binding_count = num_bindings;
+ set_layout->shader_stages = 0;
+ set_layout->dynamic_shader_stages = 0;
+ set_layout->has_immutable_samplers = false;
+ set_layout->size = 0;
+
+ uint32_t buffer_count = 0;
+ uint32_t dynamic_offset_count = 0;
+
+ for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
+ const VkDescriptorSetLayoutBinding *binding = bindings + j;
+ uint32_t b = binding->binding;
+ uint32_t alignment = 0;
+ unsigned binding_buffer_count = 0;
+ uint32_t descriptor_count = binding->descriptorCount;
+ bool has_ycbcr_sampler = false;
+
+ /* main image + fmask */
+ uint32_t max_sampled_image_descriptors = 2;
+
+ if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
+ binding->pImmutableSamplers) {
+ for (unsigned i = 0; i < binding->descriptorCount; ++i) {
+ struct radv_sampler_ycbcr_conversion *conversion =
+ radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
+
+ if (conversion) {
+ has_ycbcr_sampler = true;
+ max_sampled_image_descriptors = MAX2(max_sampled_image_descriptors,
+ vk_format_get_plane_count(conversion->format));
+ }
+ }
+ }
+
+ switch (binding->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ assert(!(pCreateInfo->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+ set_layout->binding[b].dynamic_offset_count = 1;
+ set_layout->dynamic_shader_stages |= binding->stageFlags;
+ set_layout->binding[b].size = 0;
+ binding_buffer_count = 1;
+ alignment = 1;
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ set_layout->binding[b].size = 16;
+ binding_buffer_count = 1;
+ alignment = 16;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ /* main descriptor + fmask descriptor */
+ set_layout->binding[b].size = 64;
+ binding_buffer_count = 1;
+ alignment = 32;
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ /* main descriptor + fmask descriptor + sampler */
+ set_layout->binding[b].size = 96;
+ binding_buffer_count = 1;
+ alignment = 32;
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ set_layout->binding[b].size = 16;
+ alignment = 16;
+ break;
+ case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: {
+ uint64_t mutable_size = 0, mutable_align = 0;
+ radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[j],
+ &mutable_size, &mutable_align);
+ assert(mutable_size && mutable_align);
+ set_layout->binding[b].size = mutable_size;
+ alignment = mutable_align;
+ break;
+ }
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ alignment = 16;
+ set_layout->binding[b].size = descriptor_count;
+ descriptor_count = 1;
+ break;
+ default:
+ break;
+ }
+
+ set_layout->size = align(set_layout->size, alignment);
+ set_layout->binding[b].type = binding->descriptorType;
+ set_layout->binding[b].array_size = descriptor_count;
+ set_layout->binding[b].offset = set_layout->size;
+ set_layout->binding[b].buffer_offset = buffer_count;
+ set_layout->binding[b].dynamic_offset_offset = dynamic_offset_count;
+
+ if (variable_flags && binding->binding < variable_flags->bindingCount &&
+ (variable_flags->pBindingFlags[binding->binding] &
+ VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
+ assert(
+ !binding->pImmutableSamplers); /* Terribly ill defined how many samplers are valid */
+ assert(binding->binding == num_bindings - 1);
+
+ set_layout->has_variable_descriptors = true;
+ }
+
+ if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+ binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
+ binding->pImmutableSamplers) {
+ set_layout->binding[b].immutable_samplers_offset = samplers_offset;
+ set_layout->binding[b].immutable_samplers_equal =
+ has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
+ set_layout->has_immutable_samplers = true;
+
+ for (uint32_t i = 0; i < binding->descriptorCount; i++)
+ memcpy(samplers + 4 * i,
+ &radv_sampler_from_handle(binding->pImmutableSamplers[i])->state, 16);
+
+ /* Don't reserve space for the samplers if they're not accessed. */
+ if (set_layout->binding[b].immutable_samplers_equal) {
+ if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
+ max_sampled_image_descriptors <= 2)
+ set_layout->binding[b].size -= 32;
+ else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
+ set_layout->binding[b].size -= 16;
+ }
+ samplers += 4 * binding->descriptorCount;
+ samplers_offset += 4 * sizeof(uint32_t) * binding->descriptorCount;
+
+ if (has_ycbcr_sampler) {
+ ycbcr_sampler_offsets[b] = (const char *)ycbcr_samplers - (const char *)set_layout;
+ for (uint32_t i = 0; i < binding->descriptorCount; i++) {
+ if (radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler)
+ ycbcr_samplers[i] =
+ *radv_sampler_from_handle(binding->pImmutableSamplers[i])->ycbcr_sampler;
+ else
+ ycbcr_samplers[i].format = VK_FORMAT_UNDEFINED;
+ }
+ ycbcr_samplers += binding->descriptorCount;
+ }
+ }
+
+ set_layout->size += descriptor_count * set_layout->binding[b].size;
+ buffer_count += descriptor_count * binding_buffer_count;
+ dynamic_offset_count += descriptor_count * set_layout->binding[b].dynamic_offset_count;
+ set_layout->shader_stages |= binding->stageFlags;
+ }
+
+ free(bindings);
+
+ set_layout->buffer_count = buffer_count;
+ set_layout->dynamic_offset_count = dynamic_offset_count;
+
+ *pSetLayout = radv_descriptor_set_layout_to_handle(set_layout);
+
+ return VK_SUCCESS;
}
-void radv_DestroyDescriptorSetLayout(
- VkDevice _device,
- VkDescriptorSetLayout _set_layout,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyDescriptorSetLayout(VkDevice _device, VkDescriptorSetLayout _set_layout,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, _set_layout);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, _set_layout);
- if (!set_layout)
- return;
+ if (!set_layout)
+ return;
- vk_object_base_finish(&set_layout->base);
- vk_free2(&device->vk.alloc, pAllocator, set_layout);
+ vk_object_base_finish(&set_layout->base);
+ vk_free2(&device->vk.alloc, pAllocator, set_layout);
}
-void radv_GetDescriptorSetLayoutSupport(VkDevice device,
- const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
- VkDescriptorSetLayoutSupport* pSupport)
+void
+radv_GetDescriptorSetLayoutSupport(VkDevice device,
+ const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+ VkDescriptorSetLayoutSupport *pSupport)
{
- VkDescriptorSetLayoutBinding *bindings = NULL;
- VkResult result = vk_create_sorted_bindings(pCreateInfo->pBindings,
- pCreateInfo->bindingCount,
- &bindings);
- if (result != VK_SUCCESS) {
- pSupport->supported = false;
- return;
- }
-
- const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
- vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
- VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count =
- vk_find_struct((void*)pCreateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT);
- const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
- vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
- if (variable_count) {
- variable_count->maxVariableDescriptorCount = 0;
- }
-
- bool supported = true;
- uint64_t size = 0;
- for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
- const VkDescriptorSetLayoutBinding *binding = bindings + i;
-
- uint64_t descriptor_size = 0;
- uint64_t descriptor_alignment = 1;
- uint32_t descriptor_count = binding->descriptorCount;
- switch (binding->descriptorType) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- descriptor_size = 16;
- descriptor_alignment = 16;
- break;
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- descriptor_size = 64;
- descriptor_alignment = 32;
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
- descriptor_size = 64;
- } else {
- descriptor_size = 96;
- }
- descriptor_alignment = 32;
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
- descriptor_size = 16;
- descriptor_alignment = 16;
- }
- break;
- case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
- descriptor_alignment = 16;
- descriptor_size = descriptor_count;
- descriptor_count = 1;
- break;
- case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
- if (!radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[i],
- &descriptor_size, &descriptor_alignment)) {
- supported = false;
- }
- break;
- default:
- break;
- }
-
- if (size && !align_u64(size, descriptor_alignment)) {
- supported = false;
- }
- size = align_u64(size, descriptor_alignment);
-
- uint64_t max_count = INT32_MAX;
- if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
- max_count = INT32_MAX - size;
- else if (descriptor_size)
- max_count = (INT32_MAX - size) / descriptor_size;
-
- if (max_count < descriptor_count) {
- supported = false;
- }
- if (variable_flags && binding->binding <variable_flags->bindingCount && variable_count &&
- (variable_flags->pBindingFlags[binding->binding] & VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
- variable_count->maxVariableDescriptorCount = MIN2(UINT32_MAX, max_count);
- }
- size += descriptor_count * descriptor_size;
- }
-
- free(bindings);
-
- pSupport->supported = supported;
+ VkDescriptorSetLayoutBinding *bindings = NULL;
+ VkResult result =
+ vk_create_sorted_bindings(pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
+ if (result != VK_SUCCESS) {
+ pSupport->supported = false;
+ return;
+ }
+
+ const VkDescriptorSetLayoutBindingFlagsCreateInfo *variable_flags =
+ vk_find_struct_const(pCreateInfo->pNext, DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO);
+ VkDescriptorSetVariableDescriptorCountLayoutSupport *variable_count = vk_find_struct(
+ (void *)pCreateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT);
+ const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
+ vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
+ if (variable_count) {
+ variable_count->maxVariableDescriptorCount = 0;
+ }
+
+ bool supported = true;
+ uint64_t size = 0;
+ for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+ const VkDescriptorSetLayoutBinding *binding = bindings + i;
+
+ uint64_t descriptor_size = 0;
+ uint64_t descriptor_alignment = 1;
+ uint32_t descriptor_count = binding->descriptorCount;
+ switch (binding->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ descriptor_size = 16;
+ descriptor_alignment = 16;
+ break;
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ descriptor_size = 64;
+ descriptor_alignment = 32;
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
+ descriptor_size = 64;
+ } else {
+ descriptor_size = 96;
+ }
+ descriptor_alignment = 32;
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ if (!has_equal_immutable_samplers(binding->pImmutableSamplers, descriptor_count)) {
+ descriptor_size = 16;
+ descriptor_alignment = 16;
+ }
+ break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ descriptor_alignment = 16;
+ descriptor_size = descriptor_count;
+ descriptor_count = 1;
+ break;
+ case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
+ if (!radv_mutable_descriptor_type_size_alignment(
+ &mutable_info->pMutableDescriptorTypeLists[i], &descriptor_size,
+ &descriptor_alignment)) {
+ supported = false;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (size && !align_u64(size, descriptor_alignment)) {
+ supported = false;
+ }
+ size = align_u64(size, descriptor_alignment);
+
+ uint64_t max_count = INT32_MAX;
+ if (binding->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+ max_count = INT32_MAX - size;
+ else if (descriptor_size)
+ max_count = (INT32_MAX - size) / descriptor_size;
+
+ if (max_count < descriptor_count) {
+ supported = false;
+ }
+ if (variable_flags && binding->binding < variable_flags->bindingCount && variable_count &&
+ (variable_flags->pBindingFlags[binding->binding] &
+ VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT)) {
+ variable_count->maxVariableDescriptorCount = MIN2(UINT32_MAX, max_count);
+ }
+ size += descriptor_count * descriptor_size;
+ }
+
+ free(bindings);
+
+ pSupport->supported = supported;
}
/*
@@ -443,1094 +447,1057 @@ void radv_GetDescriptorSetLayoutSupport(VkDevice device,
* just multiple descriptor set layouts pasted together.
*/
-VkResult radv_CreatePipelineLayout(
- VkDevice _device,
- const VkPipelineLayoutCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkPipelineLayout* pPipelineLayout)
+VkResult
+radv_CreatePipelineLayout(VkDevice _device, const VkPipelineLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipelineLayout *pPipelineLayout)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_pipeline_layout *layout;
- struct mesa_sha1 ctx;
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
-
- layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (layout == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &layout->base,
- VK_OBJECT_TYPE_PIPELINE_LAYOUT);
-
- layout->num_sets = pCreateInfo->setLayoutCount;
-
- unsigned dynamic_offset_count = 0;
- uint16_t dynamic_shader_stages = 0;
-
-
- _mesa_sha1_init(&ctx);
- for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
- RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout,
- pCreateInfo->pSetLayouts[set]);
- layout->set[set].layout = set_layout;
-
- layout->set[set].dynamic_offset_start = dynamic_offset_count;
- layout->set[set].dynamic_offset_count = 0;
- layout->set[set].dynamic_offset_stages = 0;
-
- for (uint32_t b = 0; b < set_layout->binding_count; b++) {
- layout->set[set].dynamic_offset_count +=
- set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
- layout->set[set].dynamic_offset_stages |= set_layout->dynamic_shader_stages;
- }
- dynamic_offset_count += layout->set[set].dynamic_offset_count;
- dynamic_shader_stages |= layout->set[set].dynamic_offset_stages;
-
- /* Hash the entire set layout except for the vk_object_base. The
- * rest of the set layout is carefully constructed to not have
- * pointers so a full hash instead of a per-field hash should be ok. */
- _mesa_sha1_update(&ctx,
- (const char*)set_layout + sizeof(struct vk_object_base),
- set_layout->layout_size - sizeof(struct vk_object_base));
- }
-
- layout->dynamic_offset_count = dynamic_offset_count;
- layout->dynamic_shader_stages = dynamic_shader_stages;
- layout->push_constant_size = 0;
-
- for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
- const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i;
- layout->push_constant_size = MAX2(layout->push_constant_size,
- range->offset + range->size);
- }
-
- layout->push_constant_size = align(layout->push_constant_size, 16);
- _mesa_sha1_update(&ctx, &layout->push_constant_size,
- sizeof(layout->push_constant_size));
- _mesa_sha1_final(&ctx, layout->sha1);
- *pPipelineLayout = radv_pipeline_layout_to_handle(layout);
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_pipeline_layout *layout;
+ struct mesa_sha1 ctx;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
+
+ layout = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*layout), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (layout == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &layout->base, VK_OBJECT_TYPE_PIPELINE_LAYOUT);
+
+ layout->num_sets = pCreateInfo->setLayoutCount;
+
+ unsigned dynamic_offset_count = 0;
+ uint16_t dynamic_shader_stages = 0;
+
+ _mesa_sha1_init(&ctx);
+ for (uint32_t set = 0; set < pCreateInfo->setLayoutCount; set++) {
+ RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->pSetLayouts[set]);
+ layout->set[set].layout = set_layout;
+
+ layout->set[set].dynamic_offset_start = dynamic_offset_count;
+ layout->set[set].dynamic_offset_count = 0;
+ layout->set[set].dynamic_offset_stages = 0;
+
+ for (uint32_t b = 0; b < set_layout->binding_count; b++) {
+ layout->set[set].dynamic_offset_count +=
+ set_layout->binding[b].array_size * set_layout->binding[b].dynamic_offset_count;
+ layout->set[set].dynamic_offset_stages |= set_layout->dynamic_shader_stages;
+ }
+ dynamic_offset_count += layout->set[set].dynamic_offset_count;
+ dynamic_shader_stages |= layout->set[set].dynamic_offset_stages;
+
+ /* Hash the entire set layout except for the vk_object_base. The
+ * rest of the set layout is carefully constructed to not have
+ * pointers so a full hash instead of a per-field hash should be ok. */
+ _mesa_sha1_update(&ctx, (const char *)set_layout + sizeof(struct vk_object_base),
+ set_layout->layout_size - sizeof(struct vk_object_base));
+ }
+
+ layout->dynamic_offset_count = dynamic_offset_count;
+ layout->dynamic_shader_stages = dynamic_shader_stages;
+ layout->push_constant_size = 0;
+
+ for (unsigned i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
+ const VkPushConstantRange *range = pCreateInfo->pPushConstantRanges + i;
+ layout->push_constant_size = MAX2(layout->push_constant_size, range->offset + range->size);
+ }
+
+ layout->push_constant_size = align(layout->push_constant_size, 16);
+ _mesa_sha1_update(&ctx, &layout->push_constant_size, sizeof(layout->push_constant_size));
+ _mesa_sha1_final(&ctx, layout->sha1);
+ *pPipelineLayout = radv_pipeline_layout_to_handle(layout);
+
+ return VK_SUCCESS;
}
-void radv_DestroyPipelineLayout(
- VkDevice _device,
- VkPipelineLayout _pipelineLayout,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyPipelineLayout(VkDevice _device, VkPipelineLayout _pipelineLayout,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, _pipelineLayout);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, _pipelineLayout);
- if (!pipeline_layout)
- return;
+ if (!pipeline_layout)
+ return;
- vk_object_base_finish(&pipeline_layout->base);
- vk_free2(&device->vk.alloc, pAllocator, pipeline_layout);
+ vk_object_base_finish(&pipeline_layout->base);
+ vk_free2(&device->vk.alloc, pAllocator, pipeline_layout);
}
static VkResult
-radv_descriptor_set_create(struct radv_device *device,
- struct radv_descriptor_pool *pool,
- const struct radv_descriptor_set_layout *layout,
- const uint32_t *variable_count,
- struct radv_descriptor_set **out_set)
+radv_descriptor_set_create(struct radv_device *device, struct radv_descriptor_pool *pool,
+ const struct radv_descriptor_set_layout *layout,
+ const uint32_t *variable_count, struct radv_descriptor_set **out_set)
{
- struct radv_descriptor_set *set;
- uint32_t buffer_count = layout->buffer_count;
- if (variable_count) {
- unsigned stride = 1;
- if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
- layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
- stride = 0;
- buffer_count = layout->binding[layout->binding_count - 1].buffer_offset +
- *variable_count * stride;
- }
- unsigned range_offset = sizeof(struct radv_descriptor_set_header) +
- sizeof(struct radeon_winsys_bo *) * buffer_count;
- unsigned mem_size = range_offset +
- sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;
-
- if (pool->host_memory_base) {
- if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
- return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
-
- set = (struct radv_descriptor_set*)pool->host_memory_ptr;
- pool->host_memory_ptr += mem_size;
- memset(set->descriptors, 0, sizeof(struct radeon_winsys_bo *) * buffer_count);
- } else {
- set = vk_alloc2(&device->vk.alloc, NULL, mem_size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
- if (!set)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
-
- memset(set, 0, mem_size);
-
- vk_object_base_init(&device->vk, &set->header.base,
- VK_OBJECT_TYPE_DESCRIPTOR_SET);
-
- if (layout->dynamic_offset_count) {
- set->header.dynamic_descriptors = (struct radv_descriptor_range*)((uint8_t*)set + range_offset);
- }
-
- set->header.layout = layout;
- set->header.buffer_count = buffer_count;
- uint32_t layout_size = layout->size;
- if (variable_count) {
- assert(layout->has_variable_descriptors);
- uint32_t stride = layout->binding[layout->binding_count - 1].size;
- if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
- stride = 1;
-
- layout_size = layout->binding[layout->binding_count - 1].offset +
- *variable_count * stride;
- }
- layout_size = align_u32(layout_size, 32);
- if (layout_size) {
- set->header.size = layout_size;
-
- if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
- vk_free2(&device->vk.alloc, NULL, set);
- return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
- }
-
- /* try to allocate linearly first, so that we don't spend
- * time looking for gaps if the app only allocates &
- * resets via the pool. */
- if (pool->current_offset + layout_size <= pool->size) {
- set->header.bo = pool->bo;
- set->header.mapped_ptr = (uint32_t*)(pool->mapped_ptr + pool->current_offset);
- set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + pool->current_offset) : 0;
- if (!pool->host_memory_base) {
- pool->entries[pool->entry_count].offset = pool->current_offset;
- pool->entries[pool->entry_count].size = layout_size;
- pool->entries[pool->entry_count].set = set;
- pool->entry_count++;
- }
- pool->current_offset += layout_size;
- } else if (!pool->host_memory_base) {
- uint64_t offset = 0;
- int index;
-
- for (index = 0; index < pool->entry_count; ++index) {
- if (pool->entries[index].offset - offset >= layout_size)
- break;
- offset = pool->entries[index].offset + pool->entries[index].size;
- }
-
- if (pool->size - offset < layout_size) {
- vk_free2(&device->vk.alloc, NULL, set);
- return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
- }
- set->header.bo = pool->bo;
- set->header.mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
- set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + offset) : 0;
- memmove(&pool->entries[index + 1], &pool->entries[index],
- sizeof(pool->entries[0]) * (pool->entry_count - index));
- pool->entries[index].offset = offset;
- pool->entries[index].size = layout_size;
- pool->entries[index].set = set;
- pool->entry_count++;
- } else
- return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
- }
-
- if (layout->has_immutable_samplers) {
- for (unsigned i = 0; i < layout->binding_count; ++i) {
- if (!layout->binding[i].immutable_samplers_offset ||
- layout->binding[i].immutable_samplers_equal)
- continue;
-
- unsigned offset = layout->binding[i].offset / 4;
- if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
- offset += radv_combined_image_descriptor_sampler_offset(layout->binding + i) / 4;
-
- const uint32_t *samplers = (const uint32_t*)((const char*)layout + layout->binding[i].immutable_samplers_offset);
- for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
- memcpy(set->header.mapped_ptr + offset, samplers + 4 * j, 16);
- offset += layout->binding[i].size / 4;
- }
-
- }
- }
- *out_set = set;
- return VK_SUCCESS;
+ struct radv_descriptor_set *set;
+ uint32_t buffer_count = layout->buffer_count;
+ if (variable_count) {
+ unsigned stride = 1;
+ if (layout->binding[layout->binding_count - 1].type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ layout->binding[layout->binding_count - 1].type ==
+ VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+ stride = 0;
+ buffer_count =
+ layout->binding[layout->binding_count - 1].buffer_offset + *variable_count * stride;
+ }
+ unsigned range_offset =
+ sizeof(struct radv_descriptor_set_header) + sizeof(struct radeon_winsys_bo *) * buffer_count;
+ unsigned mem_size =
+ range_offset + sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;
+
+ if (pool->host_memory_base) {
+ if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
+
+ set = (struct radv_descriptor_set *)pool->host_memory_ptr;
+ pool->host_memory_ptr += mem_size;
+ memset(set->descriptors, 0, sizeof(struct radeon_winsys_bo *) * buffer_count);
+ } else {
+ set = vk_alloc2(&device->vk.alloc, NULL, mem_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+ if (!set)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ memset(set, 0, mem_size);
+
+ vk_object_base_init(&device->vk, &set->header.base, VK_OBJECT_TYPE_DESCRIPTOR_SET);
+
+ if (layout->dynamic_offset_count) {
+ set->header.dynamic_descriptors =
+ (struct radv_descriptor_range *)((uint8_t *)set + range_offset);
+ }
+
+ set->header.layout = layout;
+ set->header.buffer_count = buffer_count;
+ uint32_t layout_size = layout->size;
+ if (variable_count) {
+ assert(layout->has_variable_descriptors);
+ uint32_t stride = layout->binding[layout->binding_count - 1].size;
+ if (layout->binding[layout->binding_count - 1].type ==
+ VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+ stride = 1;
+
+ layout_size = layout->binding[layout->binding_count - 1].offset + *variable_count * stride;
+ }
+ layout_size = align_u32(layout_size, 32);
+ if (layout_size) {
+ set->header.size = layout_size;
+
+ if (!pool->host_memory_base && pool->entry_count == pool->max_entry_count) {
+ vk_free2(&device->vk.alloc, NULL, set);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
+ }
+
+ /* try to allocate linearly first, so that we don't spend
+ * time looking for gaps if the app only allocates &
+ * resets via the pool. */
+ if (pool->current_offset + layout_size <= pool->size) {
+ set->header.bo = pool->bo;
+ set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + pool->current_offset);
+ set->header.va =
+ pool->bo ? (radv_buffer_get_va(set->header.bo) + pool->current_offset) : 0;
+ if (!pool->host_memory_base) {
+ pool->entries[pool->entry_count].offset = pool->current_offset;
+ pool->entries[pool->entry_count].size = layout_size;
+ pool->entries[pool->entry_count].set = set;
+ pool->entry_count++;
+ }
+ pool->current_offset += layout_size;
+ } else if (!pool->host_memory_base) {
+ uint64_t offset = 0;
+ int index;
+
+ for (index = 0; index < pool->entry_count; ++index) {
+ if (pool->entries[index].offset - offset >= layout_size)
+ break;
+ offset = pool->entries[index].offset + pool->entries[index].size;
+ }
+
+ if (pool->size - offset < layout_size) {
+ vk_free2(&device->vk.alloc, NULL, set);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
+ }
+ set->header.bo = pool->bo;
+ set->header.mapped_ptr = (uint32_t *)(pool->mapped_ptr + offset);
+ set->header.va = pool->bo ? (radv_buffer_get_va(set->header.bo) + offset) : 0;
+ memmove(&pool->entries[index + 1], &pool->entries[index],
+ sizeof(pool->entries[0]) * (pool->entry_count - index));
+ pool->entries[index].offset = offset;
+ pool->entries[index].size = layout_size;
+ pool->entries[index].set = set;
+ pool->entry_count++;
+ } else
+ return vk_error(device->instance, VK_ERROR_OUT_OF_POOL_MEMORY);
+ }
+
+ if (layout->has_immutable_samplers) {
+ for (unsigned i = 0; i < layout->binding_count; ++i) {
+ if (!layout->binding[i].immutable_samplers_offset ||
+ layout->binding[i].immutable_samplers_equal)
+ continue;
+
+ unsigned offset = layout->binding[i].offset / 4;
+ if (layout->binding[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ offset += radv_combined_image_descriptor_sampler_offset(layout->binding + i) / 4;
+
+ const uint32_t *samplers =
+ (const uint32_t *)((const char *)layout + layout->binding[i].immutable_samplers_offset);
+ for (unsigned j = 0; j < layout->binding[i].array_size; ++j) {
+ memcpy(set->header.mapped_ptr + offset, samplers + 4 * j, 16);
+ offset += layout->binding[i].size / 4;
+ }
+ }
+ }
+ *out_set = set;
+ return VK_SUCCESS;
}
static void
-radv_descriptor_set_destroy(struct radv_device *device,
- struct radv_descriptor_pool *pool,
- struct radv_descriptor_set *set,
- bool free_bo)
+radv_descriptor_set_destroy(struct radv_device *device, struct radv_descriptor_pool *pool,
+ struct radv_descriptor_set *set, bool free_bo)
{
- assert(!pool->host_memory_base);
-
- if (free_bo && set->header.size && !pool->host_memory_base) {
- uint32_t offset = (uint8_t*)set->header.mapped_ptr - pool->mapped_ptr;
- for (int i = 0; i < pool->entry_count; ++i) {
- if (pool->entries[i].offset == offset) {
- memmove(&pool->entries[i], &pool->entries[i+1],
- sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
- --pool->entry_count;
- break;
- }
- }
- }
- vk_object_base_finish(&set->header.base);
- vk_free2(&device->vk.alloc, NULL, set);
+ assert(!pool->host_memory_base);
+
+ if (free_bo && set->header.size && !pool->host_memory_base) {
+ uint32_t offset = (uint8_t *)set->header.mapped_ptr - pool->mapped_ptr;
+ for (int i = 0; i < pool->entry_count; ++i) {
+ if (pool->entries[i].offset == offset) {
+ memmove(&pool->entries[i], &pool->entries[i + 1],
+ sizeof(pool->entries[i]) * (pool->entry_count - i - 1));
+ --pool->entry_count;
+ break;
+ }
+ }
+ }
+ vk_object_base_finish(&set->header.base);
+ vk_free2(&device->vk.alloc, NULL, set);
}
-static void radv_destroy_descriptor_pool(struct radv_device *device,
- const VkAllocationCallbacks *pAllocator,
- struct radv_descriptor_pool *pool)
+static void
+radv_destroy_descriptor_pool(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_descriptor_pool *pool)
{
- if (!pool->host_memory_base) {
- for(int i = 0; i < pool->entry_count; ++i) {
- radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
- }
- }
-
- if (pool->bo)
- device->ws->buffer_destroy(device->ws, pool->bo);
- if (pool->host_bo)
- vk_free2(&device->vk.alloc, pAllocator, pool->host_bo);
-
- vk_object_base_finish(&pool->base);
- vk_free2(&device->vk.alloc, pAllocator, pool);
+ if (!pool->host_memory_base) {
+ for (int i = 0; i < pool->entry_count; ++i) {
+ radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
+ }
+ }
+
+ if (pool->bo)
+ device->ws->buffer_destroy(device->ws, pool->bo);
+ if (pool->host_bo)
+ vk_free2(&device->vk.alloc, pAllocator, pool->host_bo);
+
+ vk_object_base_finish(&pool->base);
+ vk_free2(&device->vk.alloc, pAllocator, pool);
}
-VkResult radv_CreateDescriptorPool(
- VkDevice _device,
- const VkDescriptorPoolCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkDescriptorPool* pDescriptorPool)
+VkResult
+radv_CreateDescriptorPool(VkDevice _device, const VkDescriptorPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorPool *pDescriptorPool)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_descriptor_pool *pool;
- uint64_t size = sizeof(struct radv_descriptor_pool);
- uint64_t bo_size = 0, bo_count = 0, range_count = 0;
-
- const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
- vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
-
- vk_foreach_struct(ext, pCreateInfo->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
- const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT *info =
- (const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT*)ext;
- /* the sizes are 4 aligned, and we need to align to at
- * most 32, which needs at most 28 bytes extra per
- * binding. */
- bo_size += 28llu * info->maxInlineUniformBlockBindings;
- break;
- }
- default:
- break;
- }
- }
-
- for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
- if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER)
- bo_count += pCreateInfo->pPoolSizes[i].descriptorCount;
-
- switch(pCreateInfo->pPoolSizes[i].type) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- range_count += pCreateInfo->pPoolSizes[i].descriptorCount;
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- /* 32 as we may need to align for images */
- bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
- break;
- case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
- /* Per spec, if a mutable descriptor type list is provided for the pool entry, we
- * allocate enough memory to hold any subset of that list.
- * If there is no mutable descriptor type list available,
- * we must allocate enough for any supported mutable descriptor type, i.e. 64 bytes. */
- if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount) {
- uint64_t mutable_size, mutable_alignment;
- if (radv_mutable_descriptor_type_size_alignment(&mutable_info->pMutableDescriptorTypeLists[i],
- &mutable_size, &mutable_alignment)) {
- bo_size += mutable_size * pCreateInfo->pPoolSizes[i].descriptorCount;
- }
- } else {
- bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
- }
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- bo_size += 96 * pCreateInfo->pPoolSizes[i].descriptorCount;
- break;
- case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
- bo_size += pCreateInfo->pPoolSizes[i].descriptorCount;
- break;
- default:
- break;
- }
- }
-
- if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
- uint64_t host_size = pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
- host_size += sizeof(struct radeon_winsys_bo*) * bo_count;
- host_size += sizeof(struct radv_descriptor_range) * range_count;
- size += host_size;
- } else {
- size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets;
- }
-
- pool = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!pool)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- memset(pool, 0, sizeof(*pool));
-
- vk_object_base_init(&device->vk, &pool->base,
- VK_OBJECT_TYPE_DESCRIPTOR_POOL);
-
- if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
- pool->host_memory_base = (uint8_t*)pool + sizeof(struct radv_descriptor_pool);
- pool->host_memory_ptr = pool->host_memory_base;
- pool->host_memory_end = (uint8_t*)pool + size;
- }
-
- if (bo_size) {
- if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_VALVE)) {
- pool->bo = device->ws->buffer_create(device->ws, bo_size, 32,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY |
- RADEON_FLAG_32BIT,
- RADV_BO_PRIORITY_DESCRIPTOR);
- if (!pool->bo) {
- radv_destroy_descriptor_pool(device, pAllocator, pool);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
- pool->mapped_ptr = (uint8_t*)device->ws->buffer_map(pool->bo);
- if (!pool->mapped_ptr) {
- radv_destroy_descriptor_pool(device, pAllocator, pool);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
- } else {
- pool->host_bo = vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!pool->host_bo) {
- radv_destroy_descriptor_pool(device, pAllocator, pool);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
- pool->mapped_ptr = pool->host_bo;
- }
- }
- pool->size = bo_size;
- pool->max_entry_count = pCreateInfo->maxSets;
-
- *pDescriptorPool = radv_descriptor_pool_to_handle(pool);
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_descriptor_pool *pool;
+ uint64_t size = sizeof(struct radv_descriptor_pool);
+ uint64_t bo_size = 0, bo_count = 0, range_count = 0;
+
+ const VkMutableDescriptorTypeCreateInfoVALVE *mutable_info =
+ vk_find_struct_const(pCreateInfo->pNext, MUTABLE_DESCRIPTOR_TYPE_CREATE_INFO_VALVE);
+
+ vk_foreach_struct(ext, pCreateInfo->pNext)
+ {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO_EXT: {
+ const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT *info =
+ (const struct VkDescriptorPoolInlineUniformBlockCreateInfoEXT *)ext;
+ /* the sizes are 4 aligned, and we need to align to at
+ * most 32, which needs at most 28 bytes extra per
+ * binding. */
+ bo_size += 28llu * info->maxInlineUniformBlockBindings;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
+ if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER)
+ bo_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+
+ switch (pCreateInfo->pPoolSizes[i].type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ range_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ /* 32 as we may need to align for images */
+ bo_size += 32 * pCreateInfo->pPoolSizes[i].descriptorCount;
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
+ break;
+ case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
+ /* Per spec, if a mutable descriptor type list is provided for the pool entry, we
+ * allocate enough memory to hold any subset of that list.
+ * If there is no mutable descriptor type list available,
+ * we must allocate enough for any supported mutable descriptor type, i.e. 64 bytes. */
+ if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount) {
+ uint64_t mutable_size, mutable_alignment;
+ if (radv_mutable_descriptor_type_size_alignment(
+ &mutable_info->pMutableDescriptorTypeLists[i], &mutable_size,
+ &mutable_alignment)) {
+ bo_size += mutable_size * pCreateInfo->pPoolSizes[i].descriptorCount;
+ }
+ } else {
+ bo_size += 64 * pCreateInfo->pPoolSizes[i].descriptorCount;
+ }
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ bo_size += 96 * pCreateInfo->pPoolSizes[i].descriptorCount;
+ break;
+ case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
+ bo_size += pCreateInfo->pPoolSizes[i].descriptorCount;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
+ uint64_t host_size = pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
+ host_size += sizeof(struct radeon_winsys_bo *) * bo_count;
+ host_size += sizeof(struct radv_descriptor_range) * range_count;
+ size += host_size;
+ } else {
+ size += sizeof(struct radv_descriptor_pool_entry) * pCreateInfo->maxSets;
+ }
+
+ pool = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!pool)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ memset(pool, 0, sizeof(*pool));
+
+ vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_DESCRIPTOR_POOL);
+
+ if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
+ pool->host_memory_base = (uint8_t *)pool + sizeof(struct radv_descriptor_pool);
+ pool->host_memory_ptr = pool->host_memory_base;
+ pool->host_memory_end = (uint8_t *)pool + size;
+ }
+
+ if (bo_size) {
+ if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_VALVE)) {
+ pool->bo = device->ws->buffer_create(
+ device->ws, bo_size, 32, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY | RADEON_FLAG_32BIT,
+ RADV_BO_PRIORITY_DESCRIPTOR);
+ if (!pool->bo) {
+ radv_destroy_descriptor_pool(device, pAllocator, pool);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+ pool->mapped_ptr = (uint8_t *)device->ws->buffer_map(pool->bo);
+ if (!pool->mapped_ptr) {
+ radv_destroy_descriptor_pool(device, pAllocator, pool);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+ } else {
+ pool->host_bo =
+ vk_alloc2(&device->vk.alloc, pAllocator, bo_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!pool->host_bo) {
+ radv_destroy_descriptor_pool(device, pAllocator, pool);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ pool->mapped_ptr = pool->host_bo;
+ }
+ }
+ pool->size = bo_size;
+ pool->max_entry_count = pCreateInfo->maxSets;
+
+ *pDescriptorPool = radv_descriptor_pool_to_handle(pool);
+ return VK_SUCCESS;
}
-void radv_DestroyDescriptorPool(
- VkDevice _device,
- VkDescriptorPool _pool,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyDescriptorPool(VkDevice _device, VkDescriptorPool _pool,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_descriptor_pool, pool, _pool);
- if (!pool)
- return;
+ if (!pool)
+ return;
- radv_destroy_descriptor_pool(device, pAllocator, pool);
+ radv_destroy_descriptor_pool(device, pAllocator, pool);
}
-VkResult radv_ResetDescriptorPool(
- VkDevice _device,
- VkDescriptorPool descriptorPool,
- VkDescriptorPoolResetFlags flags)
+VkResult
+radv_ResetDescriptorPool(VkDevice _device, VkDescriptorPool descriptorPool,
+ VkDescriptorPoolResetFlags flags)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
- if (!pool->host_memory_base) {
- for(int i = 0; i < pool->entry_count; ++i) {
- radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
- }
- pool->entry_count = 0;
- }
+ if (!pool->host_memory_base) {
+ for (int i = 0; i < pool->entry_count; ++i) {
+ radv_descriptor_set_destroy(device, pool, pool->entries[i].set, false);
+ }
+ pool->entry_count = 0;
+ }
- pool->current_offset = 0;
- pool->host_memory_ptr = pool->host_memory_base;
+ pool->current_offset = 0;
+ pool->host_memory_ptr = pool->host_memory_base;
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-VkResult radv_AllocateDescriptorSets(
- VkDevice _device,
- const VkDescriptorSetAllocateInfo* pAllocateInfo,
- VkDescriptorSet* pDescriptorSets)
+VkResult
+radv_AllocateDescriptorSets(VkDevice _device, const VkDescriptorSetAllocateInfo *pAllocateInfo,
+ VkDescriptorSet *pDescriptorSets)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_pool, pool, pAllocateInfo->descriptorPool);
-
- VkResult result = VK_SUCCESS;
- uint32_t i;
- struct radv_descriptor_set *set = NULL;
-
- const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts =
- vk_find_struct_const(pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
- const uint32_t zero = 0;
-
- /* allocate a set of buffers for each shader to contain descriptors */
- for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
- RADV_FROM_HANDLE(radv_descriptor_set_layout, layout,
- pAllocateInfo->pSetLayouts[i]);
-
- const uint32_t *variable_count = NULL;
- if (variable_counts) {
- if (i < variable_counts->descriptorSetCount)
- variable_count = variable_counts->pDescriptorCounts + i;
- else
- variable_count = &zero;
- }
-
- assert(!(layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
-
- result = radv_descriptor_set_create(device, pool, layout, variable_count, &set);
- if (result != VK_SUCCESS)
- break;
-
- pDescriptorSets[i] = radv_descriptor_set_to_handle(set);
- }
-
- if (result != VK_SUCCESS) {
- radv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool,
- i, pDescriptorSets);
- for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
- pDescriptorSets[i] = VK_NULL_HANDLE;
- }
- }
- return result;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_descriptor_pool, pool, pAllocateInfo->descriptorPool);
+
+ VkResult result = VK_SUCCESS;
+ uint32_t i;
+ struct radv_descriptor_set *set = NULL;
+
+ const VkDescriptorSetVariableDescriptorCountAllocateInfo *variable_counts = vk_find_struct_const(
+ pAllocateInfo->pNext, DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO);
+ const uint32_t zero = 0;
+
+ /* allocate a set of buffers for each shader to contain descriptors */
+ for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+ RADV_FROM_HANDLE(radv_descriptor_set_layout, layout, pAllocateInfo->pSetLayouts[i]);
+
+ const uint32_t *variable_count = NULL;
+ if (variable_counts) {
+ if (i < variable_counts->descriptorSetCount)
+ variable_count = variable_counts->pDescriptorCounts + i;
+ else
+ variable_count = &zero;
+ }
+
+ assert(!(layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+
+ result = radv_descriptor_set_create(device, pool, layout, variable_count, &set);
+ if (result != VK_SUCCESS)
+ break;
+
+ pDescriptorSets[i] = radv_descriptor_set_to_handle(set);
+ }
+
+ if (result != VK_SUCCESS) {
+ radv_FreeDescriptorSets(_device, pAllocateInfo->descriptorPool, i, pDescriptorSets);
+ for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+ pDescriptorSets[i] = VK_NULL_HANDLE;
+ }
+ }
+ return result;
}
-VkResult radv_FreeDescriptorSets(
- VkDevice _device,
- VkDescriptorPool descriptorPool,
- uint32_t count,
- const VkDescriptorSet* pDescriptorSets)
+VkResult
+radv_FreeDescriptorSets(VkDevice _device, VkDescriptorPool descriptorPool, uint32_t count,
+ const VkDescriptorSet *pDescriptorSets)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);
- for (uint32_t i = 0; i < count; i++) {
- RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
+ for (uint32_t i = 0; i < count; i++) {
+ RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
- if (set && !pool->host_memory_base)
- radv_descriptor_set_destroy(device, pool, set, true);
- }
- return VK_SUCCESS;
+ if (set && !pool->host_memory_base)
+ radv_descriptor_set_destroy(device, pool, set, true);
+ }
+ return VK_SUCCESS;
}
-static void write_texel_buffer_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- unsigned *dst,
- struct radeon_winsys_bo **buffer_list,
- const VkBufferView _buffer_view)
+static void
+write_texel_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+ unsigned *dst, struct radeon_winsys_bo **buffer_list,
+ const VkBufferView _buffer_view)
{
- RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
+ RADV_FROM_HANDLE(radv_buffer_view, buffer_view, _buffer_view);
- if (!buffer_view) {
- memset(dst, 0, 4 * 4);
- if (!cmd_buffer)
- *buffer_list = NULL;
- return;
- }
+ if (!buffer_view) {
+ memset(dst, 0, 4 * 4);
+ if (!cmd_buffer)
+ *buffer_list = NULL;
+ return;
+ }
- memcpy(dst, buffer_view->state, 4 * 4);
+ memcpy(dst, buffer_view->state, 4 * 4);
- if (cmd_buffer)
- radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo);
- else
- *buffer_list = buffer_view->bo;
+ if (cmd_buffer)
+ radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer_view->bo);
+ else
+ *buffer_list = buffer_view->bo;
}
-static void write_buffer_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- unsigned *dst,
- struct radeon_winsys_bo **buffer_list,
- const VkDescriptorBufferInfo *buffer_info)
+static void
+write_buffer_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+ unsigned *dst, struct radeon_winsys_bo **buffer_list,
+ const VkDescriptorBufferInfo *buffer_info)
{
- RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
-
- if (!buffer) {
- memset(dst, 0, 4 * 4);
- if (!cmd_buffer)
- *buffer_list = NULL;
- return;
- }
-
- uint64_t va = radv_buffer_get_va(buffer->bo);
- uint32_t range = buffer_info->range;
-
- if (buffer_info->range == VK_WHOLE_SIZE)
- range = buffer->size - buffer_info->offset;
-
- /* robustBufferAccess is relaxed enough to allow this (in combination
- * with the alignment/size we return from vkGetBufferMemoryRequirements)
- * and this allows the shader compiler to create more efficient 8/16-bit
- * buffer accesses. */
- range = align(range, 4);
-
- va += buffer_info->offset + buffer->offset;
-
- uint32_t rsrc_word3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
-
- dst[0] = va;
- dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
- dst[2] = range;
- dst[3] = rsrc_word3;
-
- if (cmd_buffer)
- radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo);
- else
- *buffer_list = buffer->bo;
+ RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
+
+ if (!buffer) {
+ memset(dst, 0, 4 * 4);
+ if (!cmd_buffer)
+ *buffer_list = NULL;
+ return;
+ }
+
+ uint64_t va = radv_buffer_get_va(buffer->bo);
+ uint32_t range = buffer_info->range;
+
+ if (buffer_info->range == VK_WHOLE_SIZE)
+ range = buffer->size - buffer_info->offset;
+
+ /* robustBufferAccess is relaxed enough to allow this (in combination
+ * with the alignment/size we return from vkGetBufferMemoryRequirements)
+ * and this allows the shader compiler to create more efficient 8/16-bit
+ * buffer accesses. */
+ range = align(range, 4);
+
+ va += buffer_info->offset + buffer->offset;
+
+ uint32_t rsrc_word3 =
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ rsrc_word3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
+ dst[0] = va;
+ dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+ dst[2] = range;
+ dst[3] = rsrc_word3;
+
+ if (cmd_buffer)
+ radv_cs_add_buffer(device->ws, cmd_buffer->cs, buffer->bo);
+ else
+ *buffer_list = buffer->bo;
}
-static void write_block_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- void *dst,
- const VkWriteDescriptorSet *writeset)
+static void
+write_block_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer, void *dst,
+ const VkWriteDescriptorSet *writeset)
{
- const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
- vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
+ const VkWriteDescriptorSetInlineUniformBlockEXT *inline_ub =
+ vk_find_struct_const(writeset->pNext, WRITE_DESCRIPTOR_SET_INLINE_UNIFORM_BLOCK_EXT);
- memcpy(dst, inline_ub->pData, inline_ub->dataSize);
+ memcpy(dst, inline_ub->pData, inline_ub->dataSize);
}
-static void write_dynamic_buffer_descriptor(struct radv_device *device,
- struct radv_descriptor_range *range,
- struct radeon_winsys_bo **buffer_list,
- const VkDescriptorBufferInfo *buffer_info)
+static void
+write_dynamic_buffer_descriptor(struct radv_device *device, struct radv_descriptor_range *range,
+ struct radeon_winsys_bo **buffer_list,
+ const VkDescriptorBufferInfo *buffer_info)
{
- RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
- uint64_t va;
- unsigned size;
+ RADV_FROM_HANDLE(radv_buffer, buffer, buffer_info->buffer);
+ uint64_t va;
+ unsigned size;
- if (!buffer) {
- range->va = 0;
- *buffer_list = NULL;
- return;
- }
+ if (!buffer) {
+ range->va = 0;
+ *buffer_list = NULL;
+ return;
+ }
- va = radv_buffer_get_va(buffer->bo);
- size = buffer_info->range;
+ va = radv_buffer_get_va(buffer->bo);
+ size = buffer_info->range;
- if (buffer_info->range == VK_WHOLE_SIZE)
- size = buffer->size - buffer_info->offset;
+ if (buffer_info->range == VK_WHOLE_SIZE)
+ size = buffer->size - buffer_info->offset;
- /* robustBufferAccess is relaxed enough to allow this (in combination
- * with the alignment/size we return from vkGetBufferMemoryRequirements)
- * and this allows the shader compiler to create more efficient 8/16-bit
- * buffer accesses. */
- size = align(size, 4);
+ /* robustBufferAccess is relaxed enough to allow this (in combination
+ * with the alignment/size we return from vkGetBufferMemoryRequirements)
+ * and this allows the shader compiler to create more efficient 8/16-bit
+ * buffer accesses. */
+ size = align(size, 4);
- va += buffer_info->offset + buffer->offset;
- range->va = va;
- range->size = size;
+ va += buffer_info->offset + buffer->offset;
+ range->va = va;
+ range->size = size;
- *buffer_list = buffer->bo;
+ *buffer_list = buffer->bo;
}
static void
-write_image_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- unsigned size, unsigned *dst,
- struct radeon_winsys_bo **buffer_list,
- VkDescriptorType descriptor_type,
- const VkDescriptorImageInfo *image_info)
+write_image_descriptor(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+ unsigned size, unsigned *dst, struct radeon_winsys_bo **buffer_list,
+ VkDescriptorType descriptor_type, const VkDescriptorImageInfo *image_info)
{
- RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView);
- union radv_descriptor *descriptor;
-
- if (!iview) {
- memset(dst, 0, size);
- if (!cmd_buffer)
- *buffer_list = NULL;
- return;
- }
-
- if (descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
- descriptor = &iview->storage_descriptor;
- } else {
- descriptor = &iview->descriptor;
- }
-
- memcpy(dst, descriptor, size);
-
- if (cmd_buffer)
- radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo);
- else
- *buffer_list = iview->bo;
+ RADV_FROM_HANDLE(radv_image_view, iview, image_info->imageView);
+ union radv_descriptor *descriptor;
+
+ if (!iview) {
+ memset(dst, 0, size);
+ if (!cmd_buffer)
+ *buffer_list = NULL;
+ return;
+ }
+
+ if (descriptor_type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
+ descriptor = &iview->storage_descriptor;
+ } else {
+ descriptor = &iview->descriptor;
+ }
+
+ memcpy(dst, descriptor, size);
+
+ if (cmd_buffer)
+ radv_cs_add_buffer(device->ws, cmd_buffer->cs, iview->bo);
+ else
+ *buffer_list = iview->bo;
}
static void
write_combined_image_sampler_descriptor(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- unsigned sampler_offset,
- unsigned *dst,
- struct radeon_winsys_bo **buffer_list,
- VkDescriptorType descriptor_type,
- const VkDescriptorImageInfo *image_info,
- bool has_sampler)
+ struct radv_cmd_buffer *cmd_buffer, unsigned sampler_offset,
+ unsigned *dst, struct radeon_winsys_bo **buffer_list,
+ VkDescriptorType descriptor_type,
+ const VkDescriptorImageInfo *image_info, bool has_sampler)
{
- RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
-
- write_image_descriptor(device, cmd_buffer, sampler_offset, dst, buffer_list,
- descriptor_type, image_info);
- /* copy over sampler state */
- if (has_sampler) {
- memcpy(dst + sampler_offset / sizeof(*dst), sampler->state, 16);
- }
+ RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
+
+ write_image_descriptor(device, cmd_buffer, sampler_offset, dst, buffer_list, descriptor_type,
+ image_info);
+ /* copy over sampler state */
+ if (has_sampler) {
+ memcpy(dst + sampler_offset / sizeof(*dst), sampler->state, 16);
+ }
}
static void
-write_sampler_descriptor(struct radv_device *device,
- unsigned *dst,
- const VkDescriptorImageInfo *image_info)
+write_sampler_descriptor(struct radv_device *device, unsigned *dst,
+ const VkDescriptorImageInfo *image_info)
{
- RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
+ RADV_FROM_HANDLE(radv_sampler, sampler, image_info->sampler);
- memcpy(dst, sampler->state, 16);
+ memcpy(dst, sampler->state, 16);
}
-void radv_update_descriptor_sets(
- struct radv_device* device,
- struct radv_cmd_buffer* cmd_buffer,
- VkDescriptorSet dstSetOverride,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet* pDescriptorWrites,
- uint32_t descriptorCopyCount,
- const VkCopyDescriptorSet* pDescriptorCopies)
+void
+radv_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+ VkDescriptorSet dstSetOverride, uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites,
+ uint32_t descriptorCopyCount,
+ const VkCopyDescriptorSet *pDescriptorCopies)
{
- uint32_t i, j;
- for (i = 0; i < descriptorWriteCount; i++) {
- const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
- RADV_FROM_HANDLE(radv_descriptor_set, set,
- dstSetOverride ? dstSetOverride : writeset->dstSet);
- const struct radv_descriptor_set_binding_layout *binding_layout =
- set->header.layout->binding + writeset->dstBinding;
- uint32_t *ptr = set->header.mapped_ptr;
- struct radeon_winsys_bo **buffer_list = set->descriptors;
- /* Immutable samplers are not copied into push descriptors when they are
- * allocated, so if we are writing push descriptors we have to copy the
- * immutable samplers into them now.
- */
- const bool copy_immutable_samplers = cmd_buffer &&
- binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal;
- const uint32_t *samplers = radv_immutable_samplers(set->header.layout, binding_layout);
-
- ptr += binding_layout->offset / 4;
-
- if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
- write_block_descriptor(device, cmd_buffer, (uint8_t*)ptr + writeset->dstArrayElement, writeset);
- continue;
- }
-
- ptr += binding_layout->size * writeset->dstArrayElement / 4;
- buffer_list += binding_layout->buffer_offset;
- buffer_list += writeset->dstArrayElement;
- for (j = 0; j < writeset->descriptorCount; ++j) {
- switch(writeset->descriptorType) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
- unsigned idx = writeset->dstArrayElement + j;
- idx += binding_layout->dynamic_offset_offset;
- assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
- write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
- buffer_list, writeset->pBufferInfo + j);
- break;
- }
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
- writeset->pBufferInfo + j);
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
- writeset->pTexelBufferView[j]);
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- write_image_descriptor(device, cmd_buffer, 64, ptr, buffer_list,
- writeset->descriptorType,
- writeset->pImageInfo + j);
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
- unsigned sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout);
- write_combined_image_sampler_descriptor(device, cmd_buffer, sampler_offset,
- ptr, buffer_list,
- writeset->descriptorType,
- writeset->pImageInfo + j,
- !binding_layout->immutable_samplers_offset);
- if (copy_immutable_samplers) {
- const unsigned idx = writeset->dstArrayElement + j;
- memcpy((char*)ptr + sampler_offset, samplers + 4 * idx, 16);
- }
- break;
- }
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- if (!binding_layout->immutable_samplers_offset) {
- write_sampler_descriptor(device, ptr,
- writeset->pImageInfo + j);
- } else if (copy_immutable_samplers) {
- unsigned idx = writeset->dstArrayElement + j;
- memcpy(ptr, samplers + 4 * idx, 16);
- }
- break;
- default:
- break;
- }
- ptr += binding_layout->size / 4;
- ++buffer_list;
- }
-
- }
-
- for (i = 0; i < descriptorCopyCount; i++) {
- const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
- RADV_FROM_HANDLE(radv_descriptor_set, src_set,
- copyset->srcSet);
- RADV_FROM_HANDLE(radv_descriptor_set, dst_set,
- copyset->dstSet);
- const struct radv_descriptor_set_binding_layout *src_binding_layout =
- src_set->header.layout->binding + copyset->srcBinding;
- const struct radv_descriptor_set_binding_layout *dst_binding_layout =
- dst_set->header.layout->binding + copyset->dstBinding;
- uint32_t *src_ptr = src_set->header.mapped_ptr;
- uint32_t *dst_ptr = dst_set->header.mapped_ptr;
- struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
- struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
-
- src_ptr += src_binding_layout->offset / 4;
- dst_ptr += dst_binding_layout->offset / 4;
-
- if (src_binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
- src_ptr += copyset->srcArrayElement / 4;
- dst_ptr += copyset->dstArrayElement / 4;
-
- memcpy(dst_ptr, src_ptr, copyset->descriptorCount);
- continue;
- }
-
- src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
- dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
-
- src_buffer_list += src_binding_layout->buffer_offset;
- src_buffer_list += copyset->srcArrayElement;
-
- dst_buffer_list += dst_binding_layout->buffer_offset;
- dst_buffer_list += copyset->dstArrayElement;
-
- /* In case of copies between mutable descriptor types
- * and non-mutable descriptor types. */
- size_t copy_size = MIN2(src_binding_layout->size, dst_binding_layout->size);
-
- for (j = 0; j < copyset->descriptorCount; ++j) {
- switch (src_binding_layout->type) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
- unsigned src_idx = copyset->srcArrayElement + j;
- unsigned dst_idx = copyset->dstArrayElement + j;
- struct radv_descriptor_range *src_range, *dst_range;
- src_idx += src_binding_layout->dynamic_offset_offset;
- dst_idx += dst_binding_layout->dynamic_offset_offset;
-
- src_range = src_set->header.dynamic_descriptors + src_idx;
- dst_range = dst_set->header.dynamic_descriptors + dst_idx;
- *dst_range = *src_range;
- break;
- }
- default:
- memcpy(dst_ptr, src_ptr, copy_size);
- }
- src_ptr += src_binding_layout->size / 4;
- dst_ptr += dst_binding_layout->size / 4;
-
- if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
- /* Sampler descriptors don't have a buffer list. */
- dst_buffer_list[j] = src_buffer_list[j];
- }
- }
- }
+ uint32_t i, j;
+ for (i = 0; i < descriptorWriteCount; i++) {
+ const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
+ RADV_FROM_HANDLE(radv_descriptor_set, set,
+ dstSetOverride ? dstSetOverride : writeset->dstSet);
+ const struct radv_descriptor_set_binding_layout *binding_layout =
+ set->header.layout->binding + writeset->dstBinding;
+ uint32_t *ptr = set->header.mapped_ptr;
+ struct radeon_winsys_bo **buffer_list = set->descriptors;
+ /* Immutable samplers are not copied into push descriptors when they are
+ * allocated, so if we are writing push descriptors we have to copy the
+ * immutable samplers into them now.
+ */
+ const bool copy_immutable_samplers = cmd_buffer &&
+ binding_layout->immutable_samplers_offset &&
+ !binding_layout->immutable_samplers_equal;
+ const uint32_t *samplers = radv_immutable_samplers(set->header.layout, binding_layout);
+
+ ptr += binding_layout->offset / 4;
+
+ if (writeset->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ write_block_descriptor(device, cmd_buffer, (uint8_t *)ptr + writeset->dstArrayElement,
+ writeset);
+ continue;
+ }
+
+ ptr += binding_layout->size * writeset->dstArrayElement / 4;
+ buffer_list += binding_layout->buffer_offset;
+ buffer_list += writeset->dstArrayElement;
+ for (j = 0; j < writeset->descriptorCount; ++j) {
+ switch (writeset->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ unsigned idx = writeset->dstArrayElement + j;
+ idx += binding_layout->dynamic_offset_offset;
+ assert(!(set->header.layout->flags &
+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+ write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
+ buffer_list, writeset->pBufferInfo + j);
+ break;
+ }
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ write_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+ writeset->pBufferInfo + j);
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ write_texel_buffer_descriptor(device, cmd_buffer, ptr, buffer_list,
+ writeset->pTexelBufferView[j]);
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ write_image_descriptor(device, cmd_buffer, 64, ptr, buffer_list,
+ writeset->descriptorType, writeset->pImageInfo + j);
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
+ unsigned sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout);
+ write_combined_image_sampler_descriptor(
+ device, cmd_buffer, sampler_offset, ptr, buffer_list, writeset->descriptorType,
+ writeset->pImageInfo + j, !binding_layout->immutable_samplers_offset);
+ if (copy_immutable_samplers) {
+ const unsigned idx = writeset->dstArrayElement + j;
+ memcpy((char *)ptr + sampler_offset, samplers + 4 * idx, 16);
+ }
+ break;
+ }
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ if (!binding_layout->immutable_samplers_offset) {
+ write_sampler_descriptor(device, ptr, writeset->pImageInfo + j);
+ } else if (copy_immutable_samplers) {
+ unsigned idx = writeset->dstArrayElement + j;
+ memcpy(ptr, samplers + 4 * idx, 16);
+ }
+ break;
+ default:
+ break;
+ }
+ ptr += binding_layout->size / 4;
+ ++buffer_list;
+ }
+ }
+
+ for (i = 0; i < descriptorCopyCount; i++) {
+ const VkCopyDescriptorSet *copyset = &pDescriptorCopies[i];
+ RADV_FROM_HANDLE(radv_descriptor_set, src_set, copyset->srcSet);
+ RADV_FROM_HANDLE(radv_descriptor_set, dst_set, copyset->dstSet);
+ const struct radv_descriptor_set_binding_layout *src_binding_layout =
+ src_set->header.layout->binding + copyset->srcBinding;
+ const struct radv_descriptor_set_binding_layout *dst_binding_layout =
+ dst_set->header.layout->binding + copyset->dstBinding;
+ uint32_t *src_ptr = src_set->header.mapped_ptr;
+ uint32_t *dst_ptr = dst_set->header.mapped_ptr;
+ struct radeon_winsys_bo **src_buffer_list = src_set->descriptors;
+ struct radeon_winsys_bo **dst_buffer_list = dst_set->descriptors;
+
+ src_ptr += src_binding_layout->offset / 4;
+ dst_ptr += dst_binding_layout->offset / 4;
+
+ if (src_binding_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ src_ptr += copyset->srcArrayElement / 4;
+ dst_ptr += copyset->dstArrayElement / 4;
+
+ memcpy(dst_ptr, src_ptr, copyset->descriptorCount);
+ continue;
+ }
+
+ src_ptr += src_binding_layout->size * copyset->srcArrayElement / 4;
+ dst_ptr += dst_binding_layout->size * copyset->dstArrayElement / 4;
+
+ src_buffer_list += src_binding_layout->buffer_offset;
+ src_buffer_list += copyset->srcArrayElement;
+
+ dst_buffer_list += dst_binding_layout->buffer_offset;
+ dst_buffer_list += copyset->dstArrayElement;
+
+ /* In case of copies between mutable descriptor types
+ * and non-mutable descriptor types. */
+ size_t copy_size = MIN2(src_binding_layout->size, dst_binding_layout->size);
+
+ for (j = 0; j < copyset->descriptorCount; ++j) {
+ switch (src_binding_layout->type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ unsigned src_idx = copyset->srcArrayElement + j;
+ unsigned dst_idx = copyset->dstArrayElement + j;
+ struct radv_descriptor_range *src_range, *dst_range;
+ src_idx += src_binding_layout->dynamic_offset_offset;
+ dst_idx += dst_binding_layout->dynamic_offset_offset;
+
+ src_range = src_set->header.dynamic_descriptors + src_idx;
+ dst_range = dst_set->header.dynamic_descriptors + dst_idx;
+ *dst_range = *src_range;
+ break;
+ }
+ default:
+ memcpy(dst_ptr, src_ptr, copy_size);
+ }
+ src_ptr += src_binding_layout->size / 4;
+ dst_ptr += dst_binding_layout->size / 4;
+
+ if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
+ /* Sampler descriptors don't have a buffer list. */
+ dst_buffer_list[j] = src_buffer_list[j];
+ }
+ }
+ }
}
-void radv_UpdateDescriptorSets(
- VkDevice _device,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet* pDescriptorWrites,
- uint32_t descriptorCopyCount,
- const VkCopyDescriptorSet* pDescriptorCopies)
+void
+radv_UpdateDescriptorSets(VkDevice _device, uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites,
+ uint32_t descriptorCopyCount,
+ const VkCopyDescriptorSet *pDescriptorCopies)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device, device, _device);
- radv_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, descriptorWriteCount, pDescriptorWrites,
- descriptorCopyCount, pDescriptorCopies);
+ radv_update_descriptor_sets(device, NULL, VK_NULL_HANDLE, descriptorWriteCount,
+ pDescriptorWrites, descriptorCopyCount, pDescriptorCopies);
}
-VkResult radv_CreateDescriptorUpdateTemplate(VkDevice _device,
- const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
+VkResult
+radv_CreateDescriptorUpdateTemplate(VkDevice _device,
+ const VkDescriptorUpdateTemplateCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorUpdateTemplate *pDescriptorUpdateTemplate)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout);
- const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount;
- const size_t size = sizeof(struct radv_descriptor_update_template) +
- sizeof(struct radv_descriptor_update_template_entry) * entry_count;
- struct radv_descriptor_update_template *templ;
- uint32_t i;
-
- templ = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!templ)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &templ->base,
- VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE);
-
- templ->entry_count = entry_count;
-
- if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR) {
- RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->pipelineLayout);
-
- /* descriptorSetLayout should be ignored for push descriptors
- * and instead it refers to pipelineLayout and set.
- */
- assert(pCreateInfo->set < MAX_SETS);
- set_layout = pipeline_layout->set[pCreateInfo->set].layout;
-
- templ->bind_point = pCreateInfo->pipelineBindPoint;
- }
-
- for (i = 0; i < entry_count; i++) {
- const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
- const struct radv_descriptor_set_binding_layout *binding_layout =
- set_layout->binding + entry->dstBinding;
- const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
- const uint32_t *immutable_samplers = NULL;
- uint32_t dst_offset;
- uint32_t dst_stride;
-
- /* dst_offset is an offset into dynamic_descriptors when the descriptor
- is dynamic, and an offset into mapped_ptr otherwise */
- switch (entry->descriptorType) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
- assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET);
- dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement;
- dst_stride = 0; /* Not used */
- break;
- default:
- switch (entry->descriptorType) {
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- /* Immutable samplers are copied into push descriptors when they are pushed */
- if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
- binding_layout->immutable_samplers_offset && !binding_layout->immutable_samplers_equal) {
- immutable_samplers = radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
- }
- break;
- default:
- break;
- }
- dst_offset = binding_layout->offset / 4;
- if (entry->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
- dst_offset += entry->dstArrayElement / 4;
- else
- dst_offset += binding_layout->size * entry->dstArrayElement / 4;
-
- dst_stride = binding_layout->size / 4;
- break;
- }
-
- templ->entry[i] = (struct radv_descriptor_update_template_entry) {
- .descriptor_type = entry->descriptorType,
- .descriptor_count = entry->descriptorCount,
- .src_offset = entry->offset,
- .src_stride = entry->stride,
- .dst_offset = dst_offset,
- .dst_stride = dst_stride,
- .buffer_offset = buffer_offset,
- .has_sampler = !binding_layout->immutable_samplers_offset,
- .sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout),
- .immutable_samplers = immutable_samplers
- };
- }
-
- *pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ);
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_descriptor_set_layout, set_layout, pCreateInfo->descriptorSetLayout);
+ const uint32_t entry_count = pCreateInfo->descriptorUpdateEntryCount;
+ const size_t size = sizeof(struct radv_descriptor_update_template) +
+ sizeof(struct radv_descriptor_update_template_entry) * entry_count;
+ struct radv_descriptor_update_template *templ;
+ uint32_t i;
+
+ templ = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!templ)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &templ->base, VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE);
+
+ templ->entry_count = entry_count;
+
+ if (pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR) {
+ RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->pipelineLayout);
+
+ /* descriptorSetLayout should be ignored for push descriptors
+ * and instead it refers to pipelineLayout and set.
+ */
+ assert(pCreateInfo->set < MAX_SETS);
+ set_layout = pipeline_layout->set[pCreateInfo->set].layout;
+
+ templ->bind_point = pCreateInfo->pipelineBindPoint;
+ }
+
+ for (i = 0; i < entry_count; i++) {
+ const VkDescriptorUpdateTemplateEntry *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
+ const struct radv_descriptor_set_binding_layout *binding_layout =
+ set_layout->binding + entry->dstBinding;
+ const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
+ const uint32_t *immutable_samplers = NULL;
+ uint32_t dst_offset;
+ uint32_t dst_stride;
+
+ /* dst_offset is an offset into dynamic_descriptors when the descriptor
+ is dynamic, and an offset into mapped_ptr otherwise */
+ switch (entry->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ assert(pCreateInfo->templateType == VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET);
+ dst_offset = binding_layout->dynamic_offset_offset + entry->dstArrayElement;
+ dst_stride = 0; /* Not used */
+ break;
+ default:
+ switch (entry->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ /* Immutable samplers are copied into push descriptors when they are pushed */
+ if (pCreateInfo->templateType ==
+ VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR &&
+ binding_layout->immutable_samplers_offset &&
+ !binding_layout->immutable_samplers_equal) {
+ immutable_samplers =
+ radv_immutable_samplers(set_layout, binding_layout) + entry->dstArrayElement * 4;
+ }
+ break;
+ default:
+ break;
+ }
+ dst_offset = binding_layout->offset / 4;
+ if (entry->descriptorType == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT)
+ dst_offset += entry->dstArrayElement / 4;
+ else
+ dst_offset += binding_layout->size * entry->dstArrayElement / 4;
+
+ dst_stride = binding_layout->size / 4;
+ break;
+ }
+
+ templ->entry[i] = (struct radv_descriptor_update_template_entry){
+ .descriptor_type = entry->descriptorType,
+ .descriptor_count = entry->descriptorCount,
+ .src_offset = entry->offset,
+ .src_stride = entry->stride,
+ .dst_offset = dst_offset,
+ .dst_stride = dst_stride,
+ .buffer_offset = buffer_offset,
+ .has_sampler = !binding_layout->immutable_samplers_offset,
+ .sampler_offset = radv_combined_image_descriptor_sampler_offset(binding_layout),
+ .immutable_samplers = immutable_samplers};
+ }
+
+ *pDescriptorUpdateTemplate = radv_descriptor_update_template_to_handle(templ);
+ return VK_SUCCESS;
}
-void radv_DestroyDescriptorUpdateTemplate(VkDevice _device,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- const VkAllocationCallbacks *pAllocator)
+void
+radv_DestroyDescriptorUpdateTemplate(VkDevice _device,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
- if (!templ)
- return;
+ if (!templ)
+ return;
- vk_object_base_finish(&templ->base);
- vk_free2(&device->vk.alloc, pAllocator, templ);
+ vk_object_base_finish(&templ->base);
+ vk_free2(&device->vk.alloc, pAllocator, templ);
}
-void radv_update_descriptor_set_with_template(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- struct radv_descriptor_set *set,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- const void *pData)
+void
+radv_update_descriptor_set_with_template(struct radv_device *device,
+ struct radv_cmd_buffer *cmd_buffer,
+ struct radv_descriptor_set *set,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+ const void *pData)
{
- RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
- uint32_t i;
-
- for (i = 0; i < templ->entry_count; ++i) {
- struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset;
- uint32_t *pDst = set->header.mapped_ptr + templ->entry[i].dst_offset;
- const uint8_t *pSrc = ((const uint8_t *) pData) + templ->entry[i].src_offset;
- uint32_t j;
-
- if (templ->entry[i].descriptor_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
- memcpy((uint8_t*)pDst, pSrc, templ->entry[i].descriptor_count);
- continue;
- }
-
- for (j = 0; j < templ->entry[i].descriptor_count; ++j) {
- switch (templ->entry[i].descriptor_type) {
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
- const unsigned idx = templ->entry[i].dst_offset + j;
- assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
- write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
- buffer_list, (struct VkDescriptorBufferInfo *) pSrc);
- break;
- }
- case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
- write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
- (struct VkDescriptorBufferInfo *) pSrc);
- break;
- case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
- write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
- *(VkBufferView *) pSrc);
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
- case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
- write_image_descriptor(device, cmd_buffer, 64, pDst, buffer_list,
- templ->entry[i].descriptor_type,
- (struct VkDescriptorImageInfo *) pSrc);
- break;
- case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
- write_combined_image_sampler_descriptor(device, cmd_buffer, templ->entry[i].sampler_offset,
- pDst, buffer_list, templ->entry[i].descriptor_type,
- (struct VkDescriptorImageInfo *) pSrc,
- templ->entry[i].has_sampler);
- if (templ->entry[i].immutable_samplers) {
- memcpy((char*)pDst + templ->entry[i].sampler_offset, templ->entry[i].immutable_samplers + 4 * j, 16);
- }
- break;
- case VK_DESCRIPTOR_TYPE_SAMPLER:
- if (templ->entry[i].has_sampler)
- write_sampler_descriptor(device, pDst,
- (struct VkDescriptorImageInfo *) pSrc);
- else if (templ->entry[i].immutable_samplers)
- memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16);
- break;
- default:
- break;
- }
- pSrc += templ->entry[i].src_stride;
- pDst += templ->entry[i].dst_stride;
- ++buffer_list;
- }
- }
+ RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
+ uint32_t i;
+
+ for (i = 0; i < templ->entry_count; ++i) {
+ struct radeon_winsys_bo **buffer_list = set->descriptors + templ->entry[i].buffer_offset;
+ uint32_t *pDst = set->header.mapped_ptr + templ->entry[i].dst_offset;
+ const uint8_t *pSrc = ((const uint8_t *)pData) + templ->entry[i].src_offset;
+ uint32_t j;
+
+ if (templ->entry[i].descriptor_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ memcpy((uint8_t *)pDst, pSrc, templ->entry[i].descriptor_count);
+ continue;
+ }
+
+ for (j = 0; j < templ->entry[i].descriptor_count; ++j) {
+ switch (templ->entry[i].descriptor_type) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
+ const unsigned idx = templ->entry[i].dst_offset + j;
+ assert(!(set->header.layout->flags &
+ VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+ write_dynamic_buffer_descriptor(device, set->header.dynamic_descriptors + idx,
+ buffer_list, (struct VkDescriptorBufferInfo *)pSrc);
+ break;
+ }
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ write_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
+ (struct VkDescriptorBufferInfo *)pSrc);
+ break;
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ write_texel_buffer_descriptor(device, cmd_buffer, pDst, buffer_list,
+ *(VkBufferView *)pSrc);
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ write_image_descriptor(device, cmd_buffer, 64, pDst, buffer_list,
+ templ->entry[i].descriptor_type,
+ (struct VkDescriptorImageInfo *)pSrc);
+ break;
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ write_combined_image_sampler_descriptor(
+ device, cmd_buffer, templ->entry[i].sampler_offset, pDst, buffer_list,
+ templ->entry[i].descriptor_type, (struct VkDescriptorImageInfo *)pSrc,
+ templ->entry[i].has_sampler);
+ if (templ->entry[i].immutable_samplers) {
+ memcpy((char *)pDst + templ->entry[i].sampler_offset,
+ templ->entry[i].immutable_samplers + 4 * j, 16);
+ }
+ break;
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ if (templ->entry[i].has_sampler)
+ write_sampler_descriptor(device, pDst, (struct VkDescriptorImageInfo *)pSrc);
+ else if (templ->entry[i].immutable_samplers)
+ memcpy(pDst, templ->entry[i].immutable_samplers + 4 * j, 16);
+ break;
+ default:
+ break;
+ }
+ pSrc += templ->entry[i].src_stride;
+ pDst += templ->entry[i].dst_stride;
+ ++buffer_list;
+ }
+ }
}
-void radv_UpdateDescriptorSetWithTemplate(VkDevice _device,
- VkDescriptorSet descriptorSet,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- const void *pData)
+void
+radv_UpdateDescriptorSetWithTemplate(VkDevice _device, VkDescriptorSet descriptorSet,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+ const void *pData)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_descriptor_set, set, descriptorSet);
- radv_update_descriptor_set_with_template(device, NULL, set, descriptorUpdateTemplate, pData);
+ radv_update_descriptor_set_with_template(device, NULL, set, descriptorUpdateTemplate, pData);
}
-
-VkResult radv_CreateSamplerYcbcrConversion(VkDevice _device,
- const VkSamplerYcbcrConversionCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSamplerYcbcrConversion* pYcbcrConversion)
+VkResult
+radv_CreateSamplerYcbcrConversion(VkDevice _device,
+ const VkSamplerYcbcrConversionCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkSamplerYcbcrConversion *pYcbcrConversion)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_sampler_ycbcr_conversion *conversion = NULL;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_sampler_ycbcr_conversion *conversion = NULL;
- conversion = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*conversion), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ conversion = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*conversion), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (conversion == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ if (conversion == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- vk_object_base_init(&device->vk, &conversion->base,
- VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION);
+ vk_object_base_init(&device->vk, &conversion->base, VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION);
- conversion->format = pCreateInfo->format;
- conversion->ycbcr_model = pCreateInfo->ycbcrModel;
- conversion->ycbcr_range = pCreateInfo->ycbcrRange;
- conversion->components = pCreateInfo->components;
- conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset;
- conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset;
- conversion->chroma_filter = pCreateInfo->chromaFilter;
+ conversion->format = pCreateInfo->format;
+ conversion->ycbcr_model = pCreateInfo->ycbcrModel;
+ conversion->ycbcr_range = pCreateInfo->ycbcrRange;
+ conversion->components = pCreateInfo->components;
+ conversion->chroma_offsets[0] = pCreateInfo->xChromaOffset;
+ conversion->chroma_offsets[1] = pCreateInfo->yChromaOffset;
+ conversion->chroma_filter = pCreateInfo->chromaFilter;
- *pYcbcrConversion = radv_sampler_ycbcr_conversion_to_handle(conversion);
- return VK_SUCCESS;
+ *pYcbcrConversion = radv_sampler_ycbcr_conversion_to_handle(conversion);
+ return VK_SUCCESS;
}
-
-void radv_DestroySamplerYcbcrConversion(VkDevice _device,
- VkSamplerYcbcrConversion ycbcrConversion,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroySamplerYcbcrConversion(VkDevice _device, VkSamplerYcbcrConversion ycbcrConversion,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_sampler_ycbcr_conversion, ycbcr_conversion, ycbcrConversion);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_sampler_ycbcr_conversion, ycbcr_conversion, ycbcrConversion);
- if (!ycbcr_conversion)
- return;
+ if (!ycbcr_conversion)
+ return;
- vk_object_base_finish(&ycbcr_conversion->base);
- vk_free2(&device->vk.alloc, pAllocator, ycbcr_conversion);
+ vk_object_base_finish(&ycbcr_conversion->base);
+ vk_free2(&device->vk.alloc, pAllocator, ycbcr_conversion);
}
diff --git a/src/amd/vulkan/radv_descriptor_set.h b/src/amd/vulkan/radv_descriptor_set.h
index fdaaca07318..65d33f19dc3 100644
--- a/src/amd/vulkan/radv_descriptor_set.h
+++ b/src/amd/vulkan/radv_descriptor_set.h
@@ -104,27 +104,30 @@ struct radv_pipeline_layout {
static inline const uint32_t *
radv_immutable_samplers(const struct radv_descriptor_set_layout *set,
- const struct radv_descriptor_set_binding_layout *binding) {
- return (const uint32_t*)((const char*)set + binding->immutable_samplers_offset);
+ const struct radv_descriptor_set_binding_layout *binding)
+{
+ return (const uint32_t *)((const char *)set + binding->immutable_samplers_offset);
}
static inline unsigned
-radv_combined_image_descriptor_sampler_offset(const struct radv_descriptor_set_binding_layout *binding)
+radv_combined_image_descriptor_sampler_offset(
+ const struct radv_descriptor_set_binding_layout *binding)
{
- return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
+ return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
}
static inline const struct radv_sampler_ycbcr_conversion *
-radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set,
- unsigned binding_index)
+radv_immutable_ycbcr_samplers(const struct radv_descriptor_set_layout *set, unsigned binding_index)
{
- if (!set->ycbcr_sampler_offsets_offset)
- return NULL;
+ if (!set->ycbcr_sampler_offsets_offset)
+ return NULL;
- const uint32_t *offsets = (const uint32_t*)((const char*)set + set->ycbcr_sampler_offsets_offset);
+ const uint32_t *offsets =
+ (const uint32_t *)((const char *)set + set->ycbcr_sampler_offsets_offset);
- if (offsets[binding_index] == 0)
- return NULL;
- return (const struct radv_sampler_ycbcr_conversion *)((const char*)set + offsets[binding_index]);
+ if (offsets[binding_index] == 0)
+ return NULL;
+ return (const struct radv_sampler_ycbcr_conversion *)((const char *)set +
+ offsets[binding_index]);
}
#endif /* RADV_DESCRIPTOR_SET_H */
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index 25af86c3950..863a33e2279 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -25,41 +25,41 @@
* IN THE SOFTWARE.
*/
+#include <fcntl.h>
#include <stdbool.h>
#include <string.h>
-#include <fcntl.h>
+#include "util/disk_cache.h"
+#include "radv_cs.h"
#include "radv_debug.h"
#include "radv_private.h"
#include "radv_shader.h"
-#include "radv_cs.h"
-#include "util/disk_cache.h"
#include "vk_util.h"
#ifdef _WIN32
-typedef void* drmDevicePtr;
+typedef void *drmDevicePtr;
#else
-#include <xf86drm.h>
#include <amdgpu.h>
+#include <xf86drm.h>
#include "drm-uapi/amdgpu_drm.h"
#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
#endif
-#include "winsys/null/radv_null_winsys_public.h"
-#include "ac_llvm_util.h"
-#include "vk_format.h"
-#include "sid.h"
-#include "git_sha1.h"
#include "util/build_id.h"
#include "util/debug.h"
+#include "util/driconf.h"
#include "util/mesa-sha1.h"
#include "util/timespec.h"
#include "util/u_atomic.h"
-#include "util/driconf.h"
+#include "winsys/null/radv_null_winsys_public.h"
+#include "ac_llvm_util.h"
+#include "git_sha1.h"
+#include "sid.h"
+#include "vk_format.h"
/* The number of IBs per submit isn't infinite, it depends on the ring type
* (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
* This limit is arbitrary but should be safe for now. Ideally, we should get
* this limit from the KMD.
-*/
+ */
#define RADV_MAX_IBS_PER_SUBMIT 192
/* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
@@ -68,257 +68,251 @@ typedef void* drmDevicePtr;
#endif
static struct radv_timeline_point *
-radv_timeline_find_point_at_least_locked(struct radv_device *device,
- struct radv_timeline *timeline,
+radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
uint64_t p);
-static struct radv_timeline_point *
-radv_timeline_add_point_locked(struct radv_device *device,
- struct radv_timeline *timeline,
- uint64_t p);
+static struct radv_timeline_point *radv_timeline_add_point_locked(struct radv_device *device,
+ struct radv_timeline *timeline,
+ uint64_t p);
-static void
-radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
- struct list_head *processing_list);
+static void radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
+ struct list_head *processing_list);
-static
-void radv_destroy_semaphore_part(struct radv_device *device,
- struct radv_semaphore_part *part);
+static void radv_destroy_semaphore_part(struct radv_device *device,
+ struct radv_semaphore_part *part);
-uint64_t radv_get_current_time(void)
+uint64_t
+radv_get_current_time(void)
{
- return os_time_get_nano();
+ return os_time_get_nano();
}
-static uint64_t radv_get_absolute_timeout(uint64_t timeout)
+static uint64_t
+radv_get_absolute_timeout(uint64_t timeout)
{
- uint64_t current_time = radv_get_current_time();
+ uint64_t current_time = radv_get_current_time();
- timeout = MIN2(UINT64_MAX - current_time, timeout);
+ timeout = MIN2(UINT64_MAX - current_time, timeout);
- return current_time + timeout;
+ return current_time + timeout;
}
static int
radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
{
- struct mesa_sha1 ctx;
- unsigned char sha1[20];
- unsigned ptr_size = sizeof(void*);
+ struct mesa_sha1 ctx;
+ unsigned char sha1[20];
+ unsigned ptr_size = sizeof(void *);
- memset(uuid, 0, VK_UUID_SIZE);
- _mesa_sha1_init(&ctx);
+ memset(uuid, 0, VK_UUID_SIZE);
+ _mesa_sha1_init(&ctx);
- if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
- !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
- return -1;
+ if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
+ !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
+ return -1;
- _mesa_sha1_update(&ctx, &family, sizeof(family));
- _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
- _mesa_sha1_final(&ctx, sha1);
+ _mesa_sha1_update(&ctx, &family, sizeof(family));
+ _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
+ _mesa_sha1_final(&ctx, sha1);
- memcpy(uuid, sha1, VK_UUID_SIZE);
- return 0;
+ memcpy(uuid, sha1, VK_UUID_SIZE);
+ return 0;
}
static void
radv_get_driver_uuid(void *uuid)
{
- ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
+ ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
}
static void
radv_get_device_uuid(struct radeon_info *info, void *uuid)
{
- ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
+ ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
}
static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device *device)
{
- int ov = driQueryOptioni(&device->instance->dri_options,
- "override_vram_size");
- if (ov >= 0)
- return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
- return device->rad_info.vram_size;
+ int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size");
+ if (ov >= 0)
+ return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
+ return device->rad_info.vram_size;
}
static uint64_t
radv_get_visible_vram_size(struct radv_physical_device *device)
{
- return MIN2(radv_get_adjusted_vram_size(device) , device->rad_info.vram_vis_size);
+ return MIN2(radv_get_adjusted_vram_size(device), device->rad_info.vram_vis_size);
}
static uint64_t
radv_get_vram_size(struct radv_physical_device *device)
{
- uint64_t total_size = radv_get_adjusted_vram_size(device);
- return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
+ uint64_t total_size = radv_get_adjusted_vram_size(device);
+ return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
}
enum radv_heap {
- RADV_HEAP_VRAM = 1 << 0,
- RADV_HEAP_GTT = 1 << 1,
- RADV_HEAP_VRAM_VIS = 1 << 2,
- RADV_HEAP_MAX = 1 << 3,
+ RADV_HEAP_VRAM = 1 << 0,
+ RADV_HEAP_GTT = 1 << 1,
+ RADV_HEAP_VRAM_VIS = 1 << 2,
+ RADV_HEAP_MAX = 1 << 3,
};
static void
radv_physical_device_init_mem_types(struct radv_physical_device *device)
{
- uint64_t visible_vram_size = radv_get_visible_vram_size(device);
- uint64_t vram_size = radv_get_vram_size(device);
- int vram_index = -1, visible_vram_index = -1, gart_index = -1;
- device->memory_properties.memoryHeapCount = 0;
- device->heaps = 0;
-
- /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
- * remainder above visible VRAM. */
- if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
- vram_index = device->memory_properties.memoryHeapCount++;
- device->heaps |= RADV_HEAP_VRAM;
- device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
- .size = vram_size,
- .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
- };
- }
-
- if (device->rad_info.gart_size > 0) {
- gart_index = device->memory_properties.memoryHeapCount++;
- device->heaps |= RADV_HEAP_GTT;
- device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
- .size = device->rad_info.gart_size,
- .flags = 0,
- };
- }
-
- if (visible_vram_size) {
- visible_vram_index = device->memory_properties.memoryHeapCount++;
- device->heaps |= RADV_HEAP_VRAM_VIS;
- device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
- .size = visible_vram_size,
- .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
- };
- }
-
- unsigned type_count = 0;
-
- if (vram_index >= 0 || visible_vram_index >= 0) {
- device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
- device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
- device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
- .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
- };
- }
-
- if (gart_index >= 0) {
- device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
- device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
- device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
- .heapIndex = gart_index,
- };
- }
- if (visible_vram_index >= 0) {
- device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
- device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
- device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
- .heapIndex = visible_vram_index,
- };
- }
-
- if (gart_index >= 0) {
- device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
- device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
- device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
- .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
- .heapIndex = gart_index,
- };
- }
- device->memory_properties.memoryTypeCount = type_count;
-
- if (device->rad_info.has_l2_uncached) {
- for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
- VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
-
- if ((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
- mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
-
- VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
- VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
- VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
-
- device->memory_domains[type_count] = device->memory_domains[i];
- device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
- device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
- .propertyFlags = property_flags,
- .heapIndex = mem_type.heapIndex,
- };
- }
- }
- device->memory_properties.memoryTypeCount = type_count;
- }
+ uint64_t visible_vram_size = radv_get_visible_vram_size(device);
+ uint64_t vram_size = radv_get_vram_size(device);
+ int vram_index = -1, visible_vram_index = -1, gart_index = -1;
+ device->memory_properties.memoryHeapCount = 0;
+ device->heaps = 0;
+
+ /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
+ * remainder above visible VRAM. */
+ if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
+ vram_index = device->memory_properties.memoryHeapCount++;
+ device->heaps |= RADV_HEAP_VRAM;
+ device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
+ .size = vram_size,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ };
+ }
+
+ if (device->rad_info.gart_size > 0) {
+ gart_index = device->memory_properties.memoryHeapCount++;
+ device->heaps |= RADV_HEAP_GTT;
+ device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
+ .size = device->rad_info.gart_size,
+ .flags = 0,
+ };
+ }
+
+ if (visible_vram_size) {
+ visible_vram_index = device->memory_properties.memoryHeapCount++;
+ device->heaps |= RADV_HEAP_VRAM_VIS;
+ device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
+ .size = visible_vram_size,
+ .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
+ };
+ }
+
+ unsigned type_count = 0;
+
+ if (vram_index >= 0 || visible_vram_index >= 0) {
+ device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
+ device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+ .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
+ };
+ }
+
+ if (gart_index >= 0) {
+ device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
+ device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+ .propertyFlags =
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ .heapIndex = gart_index,
+ };
+ }
+ if (visible_vram_index >= 0) {
+ device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
+ device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+ .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+ .heapIndex = visible_vram_index,
+ };
+ }
+
+ if (gart_index >= 0) {
+ device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
+ device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+ .heapIndex = gart_index,
+ };
+ }
+ device->memory_properties.memoryTypeCount = type_count;
+
+ if (device->rad_info.has_l2_uncached) {
+ for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
+ VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
+
+ if ((mem_type.propertyFlags &
+ (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
+ mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
+
+ VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
+ VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
+ VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
+
+ device->memory_domains[type_count] = device->memory_domains[i];
+ device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
+ device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
+ .propertyFlags = property_flags,
+ .heapIndex = mem_type.heapIndex,
+ };
+ }
+ }
+ device->memory_properties.memoryTypeCount = type_count;
+ }
}
static const char *
radv_get_compiler_string(struct radv_physical_device *pdevice)
{
- if (!pdevice->use_llvm) {
- /* Some games like SotTR apply shader workarounds if the LLVM
- * version is too old or if the LLVM version string is
- * missing. This gives 2-5% performance with SotTR and ACO.
- */
- if (driQueryOptionb(&pdevice->instance->dri_options,
- "radv_report_llvm9_version_string")) {
- return "ACO/LLVM 9.0.1";
- }
+ if (!pdevice->use_llvm) {
+ /* Some games like SotTR apply shader workarounds if the LLVM
+ * version is too old or if the LLVM version string is
+ * missing. This gives 2-5% performance with SotTR and ACO.
+ */
+ if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) {
+ return "ACO/LLVM 9.0.1";
+ }
- return "ACO";
- }
+ return "ACO";
+ }
- return "LLVM " MESA_LLVM_VERSION_STRING;
+ return "LLVM " MESA_LLVM_VERSION_STRING;
}
int
radv_get_int_debug_option(const char *name, int default_value)
{
- const char *str;
- int result;
+ const char *str;
+ int result;
- str = getenv(name);
- if (!str) {
- result = default_value;
- } else {
- char *endptr;
+ str = getenv(name);
+ if (!str) {
+ result = default_value;
+ } else {
+ char *endptr;
- result = strtol(str, &endptr, 0);
- if (str == endptr) {
- /* No digits founs. */
- result = default_value;
- }
- }
+ result = strtol(str, &endptr, 0);
+ if (str == endptr) {
+ /* No digits founs. */
+ result = default_value;
+ }
+ }
- return result;
+ return result;
}
-static bool radv_thread_trace_enabled()
+static bool
+radv_thread_trace_enabled()
{
- return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
- getenv("RADV_THREAD_TRACE_TRIGGER");
+ return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
+ getenv("RADV_THREAD_TRACE_TRIGGER");
}
-#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
- defined(VK_USE_PLATFORM_XCB_KHR) || \
- defined(VK_USE_PLATFORM_XLIB_KHR) || \
- defined(VK_USE_PLATFORM_DISPLAY_KHR)
+#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || \
+ defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)
#define RADV_USE_WSI_PLATFORM
#endif
@@ -328,42 +322,43 @@ static bool radv_thread_trace_enabled()
#define RADV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
#endif
-VkResult radv_EnumerateInstanceVersion(uint32_t* pApiVersion)
+VkResult
+radv_EnumerateInstanceVersion(uint32_t *pApiVersion)
{
- *pApiVersion = RADV_API_VERSION;
- return VK_SUCCESS;
+ *pApiVersion = RADV_API_VERSION;
+ return VK_SUCCESS;
}
static const struct vk_instance_extension_table radv_instance_extensions_supported = {
- .KHR_device_group_creation = true,
- .KHR_external_fence_capabilities = true,
- .KHR_external_memory_capabilities = true,
- .KHR_external_semaphore_capabilities = true,
- .KHR_get_physical_device_properties2 = true,
- .EXT_debug_report = true,
+ .KHR_device_group_creation = true,
+ .KHR_external_fence_capabilities = true,
+ .KHR_external_memory_capabilities = true,
+ .KHR_external_semaphore_capabilities = true,
+ .KHR_get_physical_device_properties2 = true,
+ .EXT_debug_report = true,
#ifdef RADV_USE_WSI_PLATFORM
- .KHR_get_surface_capabilities2 = true,
- .KHR_surface = true,
- .KHR_surface_protected_capabilities = true,
+ .KHR_get_surface_capabilities2 = true,
+ .KHR_surface = true,
+ .KHR_surface_protected_capabilities = true,
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
- .KHR_wayland_surface = true,
+ .KHR_wayland_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XCB_KHR
- .KHR_xcb_surface = true,
+ .KHR_xcb_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XLIB_KHR
- .KHR_xlib_surface = true,
+ .KHR_xlib_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
- .EXT_acquire_xlib_display = true,
+ .EXT_acquire_xlib_display = true,
#endif
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
- .KHR_display = true,
- .KHR_get_display_properties2 = true,
- .EXT_direct_mode_display = true,
- .EXT_display_surface_counter = true,
+ .KHR_display = true,
+ .KHR_get_display_properties2 = true,
+ .EXT_direct_mode_display = true,
+ .EXT_display_surface_counter = true,
#endif
};
@@ -371,3190 +366,3066 @@ static void
radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,
struct vk_device_extension_table *ext)
{
- *ext = (struct vk_device_extension_table) {
- .KHR_8bit_storage = true,
- .KHR_16bit_storage = true,
- .KHR_bind_memory2 = true,
- .KHR_buffer_device_address = true,
- .KHR_copy_commands2 = true,
- .KHR_create_renderpass2 = true,
- .KHR_dedicated_allocation = true,
- .KHR_deferred_host_operations = true,
- .KHR_depth_stencil_resolve = true,
- .KHR_descriptor_update_template = true,
- .KHR_device_group = true,
- .KHR_draw_indirect_count = true,
- .KHR_driver_properties = true,
- .KHR_external_fence = true,
- .KHR_external_fence_fd = true,
- .KHR_external_memory = true,
- .KHR_external_memory_fd = true,
- .KHR_external_semaphore = true,
- .KHR_external_semaphore_fd = true,
- .KHR_fragment_shading_rate = device->rad_info.chip_class >= GFX10_3,
- .KHR_get_memory_requirements2 = true,
- .KHR_image_format_list = true,
- .KHR_imageless_framebuffer = true,
+ *ext = (struct vk_device_extension_table){
+ .KHR_8bit_storage = true,
+ .KHR_16bit_storage = true,
+ .KHR_bind_memory2 = true,
+ .KHR_buffer_device_address = true,
+ .KHR_copy_commands2 = true,
+ .KHR_create_renderpass2 = true,
+ .KHR_dedicated_allocation = true,
+ .KHR_deferred_host_operations = true,
+ .KHR_depth_stencil_resolve = true,
+ .KHR_descriptor_update_template = true,
+ .KHR_device_group = true,
+ .KHR_draw_indirect_count = true,
+ .KHR_driver_properties = true,
+ .KHR_external_fence = true,
+ .KHR_external_fence_fd = true,
+ .KHR_external_memory = true,
+ .KHR_external_memory_fd = true,
+ .KHR_external_semaphore = true,
+ .KHR_external_semaphore_fd = true,
+ .KHR_fragment_shading_rate = device->rad_info.chip_class >= GFX10_3,
+ .KHR_get_memory_requirements2 = true,
+ .KHR_image_format_list = true,
+ .KHR_imageless_framebuffer = true,
#ifdef RADV_USE_WSI_PLATFORM
- .KHR_incremental_present = true,
+ .KHR_incremental_present = true,
#endif
- .KHR_maintenance1 = true,
- .KHR_maintenance2 = true,
- .KHR_maintenance3 = true,
- .KHR_multiview = true,
- .KHR_pipeline_executable_properties = true,
- .KHR_push_descriptor = true,
- .KHR_relaxed_block_layout = true,
- .KHR_sampler_mirror_clamp_to_edge = true,
- .KHR_sampler_ycbcr_conversion = true,
- .KHR_separate_depth_stencil_layouts = true,
- .KHR_shader_atomic_int64 = LLVM_VERSION_MAJOR >= 9 || !device->use_llvm,
- .KHR_shader_clock = true,
- .KHR_shader_draw_parameters = true,
- .KHR_shader_float16_int8 = true,
- .KHR_shader_float_controls = true,
- .KHR_shader_non_semantic_info = true,
- .KHR_shader_subgroup_extended_types = true,
- .KHR_shader_terminate_invocation = true,
- .KHR_spirv_1_4 = true,
- .KHR_storage_buffer_storage_class = true,
+ .KHR_maintenance1 = true,
+ .KHR_maintenance2 = true,
+ .KHR_maintenance3 = true,
+ .KHR_multiview = true,
+ .KHR_pipeline_executable_properties = true,
+ .KHR_push_descriptor = true,
+ .KHR_relaxed_block_layout = true,
+ .KHR_sampler_mirror_clamp_to_edge = true,
+ .KHR_sampler_ycbcr_conversion = true,
+ .KHR_separate_depth_stencil_layouts = true,
+ .KHR_shader_atomic_int64 = LLVM_VERSION_MAJOR >= 9 || !device->use_llvm,
+ .KHR_shader_clock = true,
+ .KHR_shader_draw_parameters = true,
+ .KHR_shader_float16_int8 = true,
+ .KHR_shader_float_controls = true,
+ .KHR_shader_non_semantic_info = true,
+ .KHR_shader_subgroup_extended_types = true,
+ .KHR_shader_terminate_invocation = true,
+ .KHR_spirv_1_4 = true,
+ .KHR_storage_buffer_storage_class = true,
#ifdef RADV_USE_WSI_PLATFORM
- .KHR_swapchain = true,
- .KHR_swapchain_mutable_format = true,
+ .KHR_swapchain = true,
+ .KHR_swapchain_mutable_format = true,
#endif
- .KHR_timeline_semaphore = true,
- .KHR_uniform_buffer_standard_layout = true,
- .KHR_variable_pointers = true,
- .KHR_vulkan_memory_model = true,
- .KHR_workgroup_memory_explicit_layout = true,
- .KHR_zero_initialize_workgroup_memory = true,
- .EXT_4444_formats = true,
- .EXT_buffer_device_address = true,
- .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
- .EXT_conditional_rendering = true,
- .EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9,
- .EXT_custom_border_color = true,
- .EXT_debug_marker = radv_thread_trace_enabled(),
- .EXT_depth_clip_enable = true,
- .EXT_depth_range_unrestricted = true,
- .EXT_descriptor_indexing = true,
- .EXT_discard_rectangles = true,
+ .KHR_timeline_semaphore = true,
+ .KHR_uniform_buffer_standard_layout = true,
+ .KHR_variable_pointers = true,
+ .KHR_vulkan_memory_model = true,
+ .KHR_workgroup_memory_explicit_layout = true,
+ .KHR_zero_initialize_workgroup_memory = true,
+ .EXT_4444_formats = true,
+ .EXT_buffer_device_address = true,
+ .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
+ .EXT_conditional_rendering = true,
+ .EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9,
+ .EXT_custom_border_color = true,
+ .EXT_debug_marker = radv_thread_trace_enabled(),
+ .EXT_depth_clip_enable = true,
+ .EXT_depth_range_unrestricted = true,
+ .EXT_descriptor_indexing = true,
+ .EXT_discard_rectangles = true,
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
- .EXT_display_control = true,
+ .EXT_display_control = true,
#endif
- .EXT_extended_dynamic_state = true,
- .EXT_external_memory_dma_buf = true,
- .EXT_external_memory_host = device->rad_info.has_userptr,
- .EXT_global_priority = true,
- .EXT_host_query_reset = true,
- .EXT_image_drm_format_modifier = device->rad_info.chip_class >= GFX9,
- .EXT_image_robustness = true,
- .EXT_index_type_uint8 = device->rad_info.chip_class >= GFX8,
- .EXT_inline_uniform_block = true,
- .EXT_line_rasterization = true,
- .EXT_memory_budget = true,
- .EXT_memory_priority = true,
- .EXT_pci_bus_info = true,
- .EXT_pipeline_creation_cache_control = true,
- .EXT_pipeline_creation_feedback = true,
- .EXT_post_depth_coverage = device->rad_info.chip_class >= GFX10,
- .EXT_private_data = true,
- .EXT_queue_family_foreign = true,
- .EXT_robustness2 = true,
- .EXT_sample_locations = device->rad_info.chip_class < GFX10,
- .EXT_sampler_filter_minmax = true,
- .EXT_scalar_block_layout = device->rad_info.chip_class >= GFX7,
- .EXT_shader_atomic_float = true,
- .EXT_shader_demote_to_helper_invocation = LLVM_VERSION_MAJOR >= 9 || !device->use_llvm,
- .EXT_shader_image_atomic_int64 = LLVM_VERSION_MAJOR >= 11 || !device->use_llvm,
- .EXT_shader_stencil_export = true,
- .EXT_shader_subgroup_ballot = true,
- .EXT_shader_subgroup_vote = true,
- .EXT_shader_viewport_index_layer = true,
- .EXT_subgroup_size_control = true,
- .EXT_texel_buffer_alignment = true,
- .EXT_transform_feedback = true,
- .EXT_vertex_attribute_divisor = true,
- .EXT_ycbcr_image_arrays = true,
- .AMD_buffer_marker = true,
- .AMD_device_coherent_memory = true,
- .AMD_draw_indirect_count = true,
- .AMD_gcn_shader = true,
- .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
- .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
- .AMD_memory_overallocation_behavior = true,
- .AMD_mixed_attachment_samples = true,
- .AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
- .AMD_shader_ballot = true,
- .AMD_shader_core_properties = true,
- .AMD_shader_core_properties2 = true,
- .AMD_shader_explicit_vertex_parameter = true,
- .AMD_shader_fragment_mask = true,
- .AMD_shader_image_load_store_lod = true,
- .AMD_shader_info = true,
- .AMD_shader_trinary_minmax = true,
- .AMD_texture_gather_bias_lod = true,
+ .EXT_extended_dynamic_state = true,
+ .EXT_external_memory_dma_buf = true,
+ .EXT_external_memory_host = device->rad_info.has_userptr,
+ .EXT_global_priority = true,
+ .EXT_host_query_reset = true,
+ .EXT_image_drm_format_modifier = device->rad_info.chip_class >= GFX9,
+ .EXT_image_robustness = true,
+ .EXT_index_type_uint8 = device->rad_info.chip_class >= GFX8,
+ .EXT_inline_uniform_block = true,
+ .EXT_line_rasterization = true,
+ .EXT_memory_budget = true,
+ .EXT_memory_priority = true,
+ .EXT_pci_bus_info = true,
+ .EXT_pipeline_creation_cache_control = true,
+ .EXT_pipeline_creation_feedback = true,
+ .EXT_post_depth_coverage = device->rad_info.chip_class >= GFX10,
+ .EXT_private_data = true,
+ .EXT_queue_family_foreign = true,
+ .EXT_robustness2 = true,
+ .EXT_sample_locations = device->rad_info.chip_class < GFX10,
+ .EXT_sampler_filter_minmax = true,
+ .EXT_scalar_block_layout = device->rad_info.chip_class >= GFX7,
+ .EXT_shader_atomic_float = true,
+ .EXT_shader_demote_to_helper_invocation = LLVM_VERSION_MAJOR >= 9 || !device->use_llvm,
+ .EXT_shader_image_atomic_int64 = LLVM_VERSION_MAJOR >= 11 || !device->use_llvm,
+ .EXT_shader_stencil_export = true,
+ .EXT_shader_subgroup_ballot = true,
+ .EXT_shader_subgroup_vote = true,
+ .EXT_shader_viewport_index_layer = true,
+ .EXT_subgroup_size_control = true,
+ .EXT_texel_buffer_alignment = true,
+ .EXT_transform_feedback = true,
+ .EXT_vertex_attribute_divisor = true,
+ .EXT_ycbcr_image_arrays = true,
+ .AMD_buffer_marker = true,
+ .AMD_device_coherent_memory = true,
+ .AMD_draw_indirect_count = true,
+ .AMD_gcn_shader = true,
+ .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
+ .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
+ .AMD_memory_overallocation_behavior = true,
+ .AMD_mixed_attachment_samples = true,
+ .AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
+ .AMD_shader_ballot = true,
+ .AMD_shader_core_properties = true,
+ .AMD_shader_core_properties2 = true,
+ .AMD_shader_explicit_vertex_parameter = true,
+ .AMD_shader_fragment_mask = true,
+ .AMD_shader_image_load_store_lod = true,
+ .AMD_shader_info = true,
+ .AMD_shader_trinary_minmax = true,
+ .AMD_texture_gather_bias_lod = true,
#ifdef ANDROID
- .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
- .ANDROID_native_buffer = true,
+ .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
+ .ANDROID_native_buffer = true,
#endif
- .GOOGLE_decorate_string = true,
- .GOOGLE_hlsl_functionality1 = true,
- .GOOGLE_user_type = true,
- .NV_compute_shader_derivatives = true,
- .VALVE_mutable_descriptor_type = true,
+ .GOOGLE_decorate_string = true,
+ .GOOGLE_hlsl_functionality1 = true,
+ .GOOGLE_user_type = true,
+ .NV_compute_shader_derivatives = true,
+ .VALVE_mutable_descriptor_type = true,
};
}
static VkResult
-radv_physical_device_try_create(struct radv_instance *instance,
- drmDevicePtr drm_device,
- struct radv_physical_device **device_out)
+radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
+ struct radv_physical_device **device_out)
{
- VkResult result;
- int fd = -1;
- int master_fd = -1;
+ VkResult result;
+ int fd = -1;
+ int master_fd = -1;
#ifdef _WIN32
- assert(drm_device == NULL);
+ assert(drm_device == NULL);
#else
- if (drm_device) {
- const char *path = drm_device->nodes[DRM_NODE_RENDER];
- drmVersionPtr version;
+ if (drm_device) {
+ const char *path = drm_device->nodes[DRM_NODE_RENDER];
+ drmVersionPtr version;
- fd = open(path, O_RDWR | O_CLOEXEC);
- if (fd < 0) {
- if (instance->debug_flags & RADV_DEBUG_STARTUP)
- radv_logi("Could not open device '%s'", path);
+ fd = open(path, O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ if (instance->debug_flags & RADV_DEBUG_STARTUP)
+ radv_logi("Could not open device '%s'", path);
- return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
- }
+ return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
+ }
- version = drmGetVersion(fd);
- if (!version) {
- close(fd);
+ version = drmGetVersion(fd);
+ if (!version) {
+ close(fd);
- if (instance->debug_flags & RADV_DEBUG_STARTUP)
- radv_logi("Could not get the kernel driver version for device '%s'", path);
+ if (instance->debug_flags & RADV_DEBUG_STARTUP)
+ radv_logi("Could not get the kernel driver version for device '%s'", path);
- return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
- "failed to get version %s: %m", path);
- }
+ return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "failed to get version %s: %m",
+ path);
+ }
- if (strcmp(version->name, "amdgpu")) {
- drmFreeVersion(version);
- close(fd);
+ if (strcmp(version->name, "amdgpu")) {
+ drmFreeVersion(version);
+ close(fd);
- if (instance->debug_flags & RADV_DEBUG_STARTUP)
- radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
+ if (instance->debug_flags & RADV_DEBUG_STARTUP)
+ radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
- return VK_ERROR_INCOMPATIBLE_DRIVER;
- }
- drmFreeVersion(version);
+ return VK_ERROR_INCOMPATIBLE_DRIVER;
+ }
+ drmFreeVersion(version);
- if (instance->debug_flags & RADV_DEBUG_STARTUP)
- radv_logi("Found compatible device '%s'.", path);
- }
+ if (instance->debug_flags & RADV_DEBUG_STARTUP)
+ radv_logi("Found compatible device '%s'.", path);
+ }
#endif
- struct radv_physical_device *device =
- vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
- if (!device) {
- result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- goto fail_fd;
- }
+ struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ if (!device) {
+ result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto fail_fd;
+ }
- struct vk_physical_device_dispatch_table dispatch_table;
- vk_physical_device_dispatch_table_from_entrypoints(
- &dispatch_table, &radv_physical_device_entrypoints, true);
+ struct vk_physical_device_dispatch_table dispatch_table;
+ vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &radv_physical_device_entrypoints, true);
- result = vk_physical_device_init(&device->vk, &instance->vk, NULL,
- &dispatch_table);
- if (result != VK_SUCCESS) {
- goto fail_alloc;
- }
+ result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table);
+ if (result != VK_SUCCESS) {
+ goto fail_alloc;
+ }
- device->instance = instance;
+ device->instance = instance;
#ifdef _WIN32
- device->ws = radv_null_winsys_create();
+ device->ws = radv_null_winsys_create();
#else
- if (drm_device) {
- device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
- instance->perftest_flags);
- } else {
- device->ws = radv_null_winsys_create();
- }
+ if (drm_device) {
+ device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags);
+ } else {
+ device->ws = radv_null_winsys_create();
+ }
#endif
- if (!device->ws) {
- result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
- "failed to initialize winsys");
- goto fail_base;
- }
+ if (!device->ws) {
+ result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
+ goto fail_base;
+ }
#ifndef _WIN32
- if (drm_device && instance->vk.enabled_extensions.KHR_display) {
- master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
- if (master_fd >= 0) {
- uint32_t accel_working = 0;
- struct drm_amdgpu_info request = {
- .return_pointer = (uintptr_t)&accel_working,
- .return_size = sizeof(accel_working),
- .query = AMDGPU_INFO_ACCEL_WORKING
- };
-
- if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
- close(master_fd);
- master_fd = -1;
- }
- }
- }
+ if (drm_device && instance->vk.enabled_extensions.KHR_display) {
+ master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
+ if (master_fd >= 0) {
+ uint32_t accel_working = 0;
+ struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
+ .return_size = sizeof(accel_working),
+ .query = AMDGPU_INFO_ACCEL_WORKING};
+
+ if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <
+ 0 ||
+ !accel_working) {
+ close(master_fd);
+ master_fd = -1;
+ }
+ }
+ }
#endif
- device->master_fd = master_fd;
- device->local_fd = fd;
- device->ws->query_info(device->ws, &device->rad_info);
+ device->master_fd = master_fd;
+ device->local_fd = fd;
+ device->ws->query_info(device->ws, &device->rad_info);
- device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
+ device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
- snprintf(device->name, sizeof(device->name),
- "AMD RADV %s (%s)",
- device->rad_info.name, radv_get_compiler_string(device));
+ snprintf(device->name, sizeof(device->name), "AMD RADV %s (%s)", device->rad_info.name,
+ radv_get_compiler_string(device));
#ifdef ENABLE_SHADER_CACHE
- if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
- result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
- "cannot generate UUID");
- goto fail_wsi;
- }
-
- /* These flags affect shader compilation. */
- uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
-
- /* The gpu id is already embedded in the uuid so we just pass "radv"
- * when creating the cache.
- */
- char buf[VK_UUID_SIZE * 2 + 1];
- disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
- device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
+ if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
+ result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
+ goto fail_wsi;
+ }
+
+ /* These flags affect shader compilation. */
+ uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
+
+ /* The gpu id is already embedded in the uuid so we just pass "radv"
+ * when creating the cache.
+ */
+ char buf[VK_UUID_SIZE * 2 + 1];
+ disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
+ device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
#endif
- if (device->rad_info.chip_class < GFX8 ||
- device->rad_info.chip_class > GFX10)
- fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
+ if (device->rad_info.chip_class < GFX8 || device->rad_info.chip_class > GFX10)
+ fprintf(stderr,
+ "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
- radv_get_driver_uuid(&device->driver_uuid);
- radv_get_device_uuid(&device->rad_info, &device->device_uuid);
+ radv_get_driver_uuid(&device->driver_uuid);
+ radv_get_device_uuid(&device->rad_info, &device->device_uuid);
- device->out_of_order_rast_allowed = device->rad_info.has_out_of_order_rast &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
+ device->out_of_order_rast_allowed =
+ device->rad_info.has_out_of_order_rast &&
+ !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
- device->dcc_msaa_allowed =
- (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
+ device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
- device->use_ngg = device->rad_info.chip_class >= GFX10 &&
- device->rad_info.family != CHIP_NAVI14 &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
+ device->use_ngg = device->rad_info.chip_class >= GFX10 &&
+ device->rad_info.family != CHIP_NAVI14 &&
+ !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
- device->use_ngg_streamout = false;
+ device->use_ngg_streamout = false;
- /* Determine the number of threads per wave for all stages. */
- device->cs_wave_size = 64;
- device->ps_wave_size = 64;
- device->ge_wave_size = 64;
+ /* Determine the number of threads per wave for all stages. */
+ device->cs_wave_size = 64;
+ device->ps_wave_size = 64;
+ device->ge_wave_size = 64;
- if (device->rad_info.chip_class >= GFX10) {
- if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
- device->cs_wave_size = 32;
+ if (device->rad_info.chip_class >= GFX10) {
+ if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
+ device->cs_wave_size = 32;
- /* For pixel shaders, wave64 is recommanded. */
- if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
- device->ps_wave_size = 32;
+ /* For pixel shaders, wave64 is recommanded. */
+ if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
+ device->ps_wave_size = 32;
- if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
- device->ge_wave_size = 32;
- }
+ if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
+ device->ge_wave_size = 32;
+ }
- radv_physical_device_init_mem_types(device);
+ radv_physical_device_init_mem_types(device);
- radv_physical_device_get_supported_extensions(device,
- &device->vk.supported_extensions);
+ radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);
#ifndef _WIN32
- if (drm_device)
- device->bus_info = *drm_device->businfo.pci;
+ if (drm_device)
+ device->bus_info = *drm_device->businfo.pci;
#endif
- if ((device->instance->debug_flags & RADV_DEBUG_INFO))
- ac_print_gpu_info(&device->rad_info, stdout);
+ if ((device->instance->debug_flags & RADV_DEBUG_INFO))
+ ac_print_gpu_info(&device->rad_info, stdout);
- /* The WSI is structured as a layer on top of the driver, so this has
- * to be the last part of initialization (at least until we get other
- * semi-layers).
- */
- result = radv_init_wsi(device);
- if (result != VK_SUCCESS) {
- vk_error(instance, result);
- goto fail_disk_cache;
- }
+ /* The WSI is structured as a layer on top of the driver, so this has
+ * to be the last part of initialization (at least until we get other
+ * semi-layers).
+ */
+ result = radv_init_wsi(device);
+ if (result != VK_SUCCESS) {
+ vk_error(instance, result);
+ goto fail_disk_cache;
+ }
- *device_out = device;
+ *device_out = device;
- return VK_SUCCESS;
+ return VK_SUCCESS;
fail_disk_cache:
- disk_cache_destroy(device->disk_cache);
+ disk_cache_destroy(device->disk_cache);
fail_wsi:
- device->ws->destroy(device->ws);
+ device->ws->destroy(device->ws);
fail_base:
- vk_physical_device_finish(&device->vk);
+ vk_physical_device_finish(&device->vk);
fail_alloc:
- vk_free(&instance->vk.alloc, device);
+ vk_free(&instance->vk.alloc, device);
fail_fd:
- if (fd != -1)
- close(fd);
- if (master_fd != -1)
- close(master_fd);
- return result;
+ if (fd != -1)
+ close(fd);
+ if (master_fd != -1)
+ close(master_fd);
+ return result;
}
static void
radv_physical_device_destroy(struct radv_physical_device *device)
{
- radv_finish_wsi(device);
- device->ws->destroy(device->ws);
- disk_cache_destroy(device->disk_cache);
- if (device->local_fd != -1)
- close(device->local_fd);
- if (device->master_fd != -1)
- close(device->master_fd);
- vk_physical_device_finish(&device->vk);
- vk_free(&device->instance->vk.alloc, device);
+ radv_finish_wsi(device);
+ device->ws->destroy(device->ws);
+ disk_cache_destroy(device->disk_cache);
+ if (device->local_fd != -1)
+ close(device->local_fd);
+ if (device->master_fd != -1)
+ close(device->master_fd);
+ vk_physical_device_finish(&device->vk);
+ vk_free(&device->instance->vk.alloc, device);
}
static void *
default_alloc_func(void *pUserData, size_t size, size_t align,
VkSystemAllocationScope allocationScope)
{
- return malloc(size);
+ return malloc(size);
}
static void *
-default_realloc_func(void *pUserData, void *pOriginal, size_t size,
- size_t align, VkSystemAllocationScope allocationScope)
+default_realloc_func(void *pUserData, void *pOriginal, size_t size, size_t align,
+ VkSystemAllocationScope allocationScope)
{
- return realloc(pOriginal, size);
+ return realloc(pOriginal, size);
}
static void
default_free_func(void *pUserData, void *pMemory)
{
- free(pMemory);
+ free(pMemory);
}
static const VkAllocationCallbacks default_alloc = {
- .pUserData = NULL,
- .pfnAllocation = default_alloc_func,
- .pfnReallocation = default_realloc_func,
- .pfnFree = default_free_func,
+ .pUserData = NULL,
+ .pfnAllocation = default_alloc_func,
+ .pfnReallocation = default_realloc_func,
+ .pfnFree = default_free_func,
};
static const struct debug_control radv_debug_options[] = {
- {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
- {"nodcc", RADV_DEBUG_NO_DCC},
- {"shaders", RADV_DEBUG_DUMP_SHADERS},
- {"nocache", RADV_DEBUG_NO_CACHE},
- {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
- {"nohiz", RADV_DEBUG_NO_HIZ},
- {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
- {"allbos", RADV_DEBUG_ALL_BOS},
- {"noibs", RADV_DEBUG_NO_IBS},
- {"spirv", RADV_DEBUG_DUMP_SPIRV},
- {"vmfaults", RADV_DEBUG_VM_FAULTS},
- {"zerovram", RADV_DEBUG_ZERO_VRAM},
- {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
- {"preoptir", RADV_DEBUG_PREOPTIR},
- {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
- {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
- {"info", RADV_DEBUG_INFO},
- {"errors", RADV_DEBUG_ERRORS},
- {"startup", RADV_DEBUG_STARTUP},
- {"checkir", RADV_DEBUG_CHECKIR},
- {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
- {"nobinning", RADV_DEBUG_NOBINNING},
- {"nongg", RADV_DEBUG_NO_NGG},
- {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
- {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
- {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
- {"llvm", RADV_DEBUG_LLVM},
- {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
- {"hang", RADV_DEBUG_HANG},
- {"img", RADV_DEBUG_IMG},
- {"noumr", RADV_DEBUG_NO_UMR},
- {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
- {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
- {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
- {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
- {NULL, 0}
-};
+ {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
+ {"nodcc", RADV_DEBUG_NO_DCC},
+ {"shaders", RADV_DEBUG_DUMP_SHADERS},
+ {"nocache", RADV_DEBUG_NO_CACHE},
+ {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
+ {"nohiz", RADV_DEBUG_NO_HIZ},
+ {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
+ {"allbos", RADV_DEBUG_ALL_BOS},
+ {"noibs", RADV_DEBUG_NO_IBS},
+ {"spirv", RADV_DEBUG_DUMP_SPIRV},
+ {"vmfaults", RADV_DEBUG_VM_FAULTS},
+ {"zerovram", RADV_DEBUG_ZERO_VRAM},
+ {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
+ {"preoptir", RADV_DEBUG_PREOPTIR},
+ {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
+ {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
+ {"info", RADV_DEBUG_INFO},
+ {"errors", RADV_DEBUG_ERRORS},
+ {"startup", RADV_DEBUG_STARTUP},
+ {"checkir", RADV_DEBUG_CHECKIR},
+ {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
+ {"nobinning", RADV_DEBUG_NOBINNING},
+ {"nongg", RADV_DEBUG_NO_NGG},
+ {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
+ {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
+ {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
+ {"llvm", RADV_DEBUG_LLVM},
+ {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
+ {"hang", RADV_DEBUG_HANG},
+ {"img", RADV_DEBUG_IMG},
+ {"noumr", RADV_DEBUG_NO_UMR},
+ {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
+ {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
+ {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
+ {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
+ {NULL, 0}};
const char *
radv_get_debug_option_name(int id)
{
- assert(id < ARRAY_SIZE(radv_debug_options) - 1);
- return radv_debug_options[id].string;
+ assert(id < ARRAY_SIZE(radv_debug_options) - 1);
+ return radv_debug_options[id].string;
}
static const struct debug_control radv_perftest_options[] = {
- {"localbos", RADV_PERFTEST_LOCAL_BOS},
- {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
- {"bolist", RADV_PERFTEST_BO_LIST},
- {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
- {"cswave32", RADV_PERFTEST_CS_WAVE_32},
- {"pswave32", RADV_PERFTEST_PS_WAVE_32},
- {"gewave32", RADV_PERFTEST_GE_WAVE_32},
- {"dfsm", RADV_PERFTEST_DFSM},
- {"nosam", RADV_PERFTEST_NO_SAM},
- {"sam", RADV_PERFTEST_SAM},
- {"dccstores", RADV_PERFTEST_DCC_STORES},
- {NULL, 0}
-};
+ {"localbos", RADV_PERFTEST_LOCAL_BOS}, {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
+ {"bolist", RADV_PERFTEST_BO_LIST}, {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
+ {"cswave32", RADV_PERFTEST_CS_WAVE_32}, {"pswave32", RADV_PERFTEST_PS_WAVE_32},
+ {"gewave32", RADV_PERFTEST_GE_WAVE_32}, {"dfsm", RADV_PERFTEST_DFSM},
+ {"nosam", RADV_PERFTEST_NO_SAM}, {"sam", RADV_PERFTEST_SAM},
+ {"dccstores", RADV_PERFTEST_DCC_STORES}, {NULL, 0}};
const char *
radv_get_perftest_option_name(int id)
{
- assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
- return radv_perftest_options[id].string;
+ assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
+ return radv_perftest_options[id].string;
}
static void
-radv_handle_per_app_options(struct radv_instance *instance,
- const VkApplicationInfo *info)
-{
- const char *name = info ? info->pApplicationName : NULL;
- const char *engine_name = info ? info->pEngineName : NULL;
-
- if (name) {
- if (!strcmp(name, "DOOM_VFR")) {
- /* Work around a Doom VFR game bug */
- instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
- } else if (!strcmp(name, "Fledge")) {
- /*
- * Zero VRAM for "The Surge 2"
- *
- * This avoid a hang when when rendering any level. Likely
- * uninitialized data in an indirect draw.
- */
- instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
- } else if (!strcmp(name, "No Man's Sky")) {
- /* Work around a NMS game bug */
- instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
- } else if (!strcmp(name, "DOOMEternal")) {
- /* Zero VRAM for Doom Eternal to fix rendering issues. */
- instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
- } else if (!strcmp(name, "ShadowOfTheTomb")) {
- /* Work around flickering foliage for native Shadow of the Tomb Raider
- * on GFX10.3 */
- instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
- }
- }
-
- if (engine_name) {
- if (!strcmp(engine_name, "vkd3d")) {
- /* Zero VRAM for all VKD3D (DX12->VK) games to fix
- * rendering issues.
- */
- instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
- } else if (!strcmp(engine_name, "Quantic Dream Engine")) {
- /* Fix various artifacts in Detroit: Become Human */
- instance->debug_flags |= RADV_DEBUG_ZERO_VRAM |
- RADV_DEBUG_DISCARD_TO_DEMOTE;
-
- /* Fix rendering issues in Detroit: Become Human
- * because the game uses render loops (it
- * samples/renders from/to the same depth/stencil
- * texture inside the same draw) without input
- * attachments and that is invalid Vulkan usage.
- */
- instance->disable_tc_compat_htile_in_general = true;
- }
- }
-
- instance->enable_mrt_output_nan_fixup =
- driQueryOptionb(&instance->dri_options,
- "radv_enable_mrt_output_nan_fixup");
-
- instance->disable_shrink_image_store =
- driQueryOptionb(&instance->dri_options,
- "radv_disable_shrink_image_store");
-
- if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
- instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
+radv_handle_per_app_options(struct radv_instance *instance, const VkApplicationInfo *info)
+{
+ const char *name = info ? info->pApplicationName : NULL;
+ const char *engine_name = info ? info->pEngineName : NULL;
+
+ if (name) {
+ if (!strcmp(name, "DOOM_VFR")) {
+ /* Work around a Doom VFR game bug */
+ instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
+ } else if (!strcmp(name, "Fledge")) {
+ /*
+ * Zero VRAM for "The Surge 2"
+ *
+ * This avoid a hang when when rendering any level. Likely
+ * uninitialized data in an indirect draw.
+ */
+ instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
+ } else if (!strcmp(name, "No Man's Sky")) {
+ /* Work around a NMS game bug */
+ instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
+ } else if (!strcmp(name, "DOOMEternal")) {
+ /* Zero VRAM for Doom Eternal to fix rendering issues. */
+ instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
+ } else if (!strcmp(name, "ShadowOfTheTomb")) {
+ /* Work around flickering foliage for native Shadow of the Tomb Raider
+ * on GFX10.3 */
+ instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
+ }
+ }
+
+ if (engine_name) {
+ if (!strcmp(engine_name, "vkd3d")) {
+ /* Zero VRAM for all VKD3D (DX12->VK) games to fix
+ * rendering issues.
+ */
+ instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
+ } else if (!strcmp(engine_name, "Quantic Dream Engine")) {
+ /* Fix various artifacts in Detroit: Become Human */
+ instance->debug_flags |= RADV_DEBUG_ZERO_VRAM | RADV_DEBUG_DISCARD_TO_DEMOTE;
+
+ /* Fix rendering issues in Detroit: Become Human
+ * because the game uses render loops (it
+ * samples/renders from/to the same depth/stencil
+ * texture inside the same draw) without input
+ * attachments and that is invalid Vulkan usage.
+ */
+ instance->disable_tc_compat_htile_in_general = true;
+ }
+ }
+
+ instance->enable_mrt_output_nan_fixup =
+ driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
+
+ instance->disable_shrink_image_store =
+ driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
+
+ if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
+ instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
}
static const driOptionDescription radv_dri_options[] = {
- DRI_CONF_SECTION_PERFORMANCE
- DRI_CONF_ADAPTIVE_SYNC(true)
- DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
- DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
- DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
- DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
- DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
- DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
- DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
- DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
- DRI_CONF_SECTION_END
-
- DRI_CONF_SECTION_DEBUG
- DRI_CONF_OVERRIDE_VRAM_SIZE()
- DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
- DRI_CONF_SECTION_END
-};
+ DRI_CONF_SECTION_PERFORMANCE DRI_CONF_ADAPTIVE_SYNC(
+ true) DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0) DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
+ DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false) DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
+ DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
+ DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false) DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
+ DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0) DRI_CONF_SECTION_END
+
+ DRI_CONF_SECTION_DEBUG DRI_CONF_OVERRIDE_VRAM_SIZE()
+ DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false) DRI_CONF_SECTION_END};
-static void radv_init_dri_options(struct radv_instance *instance)
+static void
+radv_init_dri_options(struct radv_instance *instance)
{
- driParseOptionInfo(&instance->available_dri_options, radv_dri_options, ARRAY_SIZE(radv_dri_options));
- driParseConfigFiles(&instance->dri_options,
- &instance->available_dri_options,
- 0, "radv", NULL,
- instance->vk.app_info.app_name,
- instance->vk.app_info.app_version,
- instance->vk.app_info.engine_name,
- instance->vk.app_info.engine_version);
+ driParseOptionInfo(&instance->available_dri_options, radv_dri_options,
+ ARRAY_SIZE(radv_dri_options));
+ driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL,
+ instance->vk.app_info.app_name, instance->vk.app_info.app_version,
+ instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
+}
+
+VkResult
+radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
+{
+ struct radv_instance *instance;
+ VkResult result;
+
+ if (!pAllocator)
+ pAllocator = &default_alloc;
+
+ instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ if (!instance)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct vk_instance_dispatch_table dispatch_table;
+ vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);
+ result = vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table,
+ pCreateInfo, pAllocator);
+ if (result != VK_SUCCESS) {
+ vk_free(pAllocator, instance);
+ return vk_error(instance, result);
+ }
+
+ instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);
+
+ const char *radv_perftest_str = getenv("RADV_PERFTEST");
+ instance->perftest_flags = parse_debug_string(radv_perftest_str, radv_perftest_options);
+
+ if (radv_perftest_str) {
+ /* Output warnings for famous RADV_PERFTEST options that no
+ * longer exist or are deprecated.
+ */
+ if (strstr(radv_perftest_str, "aco")) {
+ fprintf(
+ stderr,
+ "*******************************************************************************\n");
+ fprintf(
+ stderr,
+ "* WARNING: Unknown option RADV_PERFTEST='aco'. ACO is enabled by default now. *\n");
+ fprintf(
+ stderr,
+ "*******************************************************************************\n");
+ }
+ if (strstr(radv_perftest_str, "llvm")) {
+ fprintf(
+ stderr,
+ "*********************************************************************************\n");
+ fprintf(
+ stderr,
+ "* WARNING: Unknown option 'RADV_PERFTEST=llvm'. Did you mean 'RADV_DEBUG=llvm'? *\n");
+ fprintf(
+ stderr,
+ "*********************************************************************************\n");
+ abort();
+ }
+ }
+
+ if (instance->debug_flags & RADV_DEBUG_STARTUP)
+ radv_logi("Created an instance");
+
+ instance->physical_devices_enumerated = false;
+ list_inithead(&instance->physical_devices);
+
+ VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
+
+ radv_init_dri_options(instance);
+ radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
+
+ *pInstance = radv_instance_to_handle(instance);
+
+ return VK_SUCCESS;
}
-VkResult radv_CreateInstance(
- const VkInstanceCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkInstance* pInstance)
+void
+radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator)
{
- struct radv_instance *instance;
- VkResult result;
+ RADV_FROM_HANDLE(radv_instance, instance, _instance);
- if (!pAllocator)
- pAllocator = &default_alloc;
+ if (!instance)
+ return;
- instance = vk_zalloc(pAllocator, sizeof(*instance), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
- if (!instance)
- return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link)
+ {
+ radv_physical_device_destroy(pdevice);
+ }
- struct vk_instance_dispatch_table dispatch_table;
- vk_instance_dispatch_table_from_entrypoints(
- &dispatch_table, &radv_instance_entrypoints, true);
- result = vk_instance_init(&instance->vk,
- &radv_instance_extensions_supported,
- &dispatch_table,
- pCreateInfo, pAllocator);
- if (result != VK_SUCCESS) {
- vk_free(pAllocator, instance);
- return vk_error(instance, result);
- }
+ VG(VALGRIND_DESTROY_MEMPOOL(instance));
- instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
- radv_debug_options);
+ driDestroyOptionCache(&instance->dri_options);
+ driDestroyOptionInfo(&instance->available_dri_options);
- const char *radv_perftest_str = getenv("RADV_PERFTEST");
- instance->perftest_flags = parse_debug_string(radv_perftest_str,
- radv_perftest_options);
+ vk_instance_finish(&instance->vk);
+ vk_free(&instance->vk.alloc, instance);
+}
- if (radv_perftest_str) {
- /* Output warnings for famous RADV_PERFTEST options that no
- * longer exist or are deprecated.
- */
- if (strstr(radv_perftest_str, "aco")) {
- fprintf(stderr, "*******************************************************************************\n");
- fprintf(stderr, "* WARNING: Unknown option RADV_PERFTEST='aco'. ACO is enabled by default now. *\n");
- fprintf(stderr, "*******************************************************************************\n");
- }
- if (strstr(radv_perftest_str, "llvm")) {
- fprintf(stderr, "*********************************************************************************\n");
- fprintf(stderr, "* WARNING: Unknown option 'RADV_PERFTEST=llvm'. Did you mean 'RADV_DEBUG=llvm'? *\n");
- fprintf(stderr, "*********************************************************************************\n");
- abort();
- }
- }
+static VkResult
+radv_enumerate_physical_devices(struct radv_instance *instance)
+{
+ if (instance->physical_devices_enumerated)
+ return VK_SUCCESS;
- if (instance->debug_flags & RADV_DEBUG_STARTUP)
- radv_logi("Created an instance");
+ instance->physical_devices_enumerated = true;
- instance->physical_devices_enumerated = false;
- list_inithead(&instance->physical_devices);
+ VkResult result = VK_SUCCESS;
- VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
+ if (getenv("RADV_FORCE_FAMILY")) {
+ /* When RADV_FORCE_FAMILY is set, the driver creates a nul
+ * device that allows to test the compiler without having an
+ * AMDGPU instance.
+ */
+ struct radv_physical_device *pdevice;
- radv_init_dri_options(instance);
- radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
+ result = radv_physical_device_try_create(instance, NULL, &pdevice);
+ if (result != VK_SUCCESS)
+ return result;
- *pInstance = radv_instance_to_handle(instance);
+ list_addtail(&pdevice->link, &instance->physical_devices);
+ return VK_SUCCESS;
+ }
- return VK_SUCCESS;
+#ifndef _WIN32
+ /* TODO: Check for more devices ? */
+ drmDevicePtr devices[8];
+ int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
+
+ if (instance->debug_flags & RADV_DEBUG_STARTUP)
+ radv_logi("Found %d drm nodes", max_devices);
+
+ if (max_devices < 1)
+ return vk_error(instance, VK_SUCCESS);
+
+ for (unsigned i = 0; i < (unsigned)max_devices; i++) {
+ if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
+ devices[i]->bustype == DRM_BUS_PCI &&
+ devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
+
+ struct radv_physical_device *pdevice;
+ result = radv_physical_device_try_create(instance, devices[i], &pdevice);
+ /* Incompatible DRM device, skip. */
+ if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
+ result = VK_SUCCESS;
+ continue;
+ }
+
+ /* Error creating the physical device, report the error. */
+ if (result != VK_SUCCESS)
+ break;
+
+ list_addtail(&pdevice->link, &instance->physical_devices);
+ }
+ }
+ drmFreeDevices(devices, max_devices);
+#endif
+
+ /* If we successfully enumerated any devices, call it success */
+ return result;
}
-void radv_DestroyInstance(
- VkInstance _instance,
- const VkAllocationCallbacks* pAllocator)
+VkResult
+radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount,
+ VkPhysicalDevice *pPhysicalDevices)
{
- RADV_FROM_HANDLE(radv_instance, instance, _instance);
-
- if (!instance)
- return;
+ RADV_FROM_HANDLE(radv_instance, instance, _instance);
+ VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount);
- list_for_each_entry_safe(struct radv_physical_device, pdevice,
- &instance->physical_devices, link) {
- radv_physical_device_destroy(pdevice);
- }
+ VkResult result = radv_enumerate_physical_devices(instance);
+ if (result != VK_SUCCESS)
+ return result;
- VG(VALGRIND_DESTROY_MEMPOOL(instance));
+ list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
+ {
+ vk_outarray_append_typed(VkPhysicalDevice, &out, i)
+ {
+ *i = radv_physical_device_to_handle(pdevice);
+ }
+ }
- driDestroyOptionCache(&instance->dri_options);
- driDestroyOptionInfo(&instance->available_dri_options);
-
- vk_instance_finish(&instance->vk);
- vk_free(&instance->vk.alloc, instance);
+ return vk_outarray_status(&out);
}
-static VkResult
-radv_enumerate_physical_devices(struct radv_instance *instance)
+VkResult
+radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount,
+ VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
{
- if (instance->physical_devices_enumerated)
- return VK_SUCCESS;
-
- instance->physical_devices_enumerated = true;
-
- VkResult result = VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_instance, instance, _instance);
+ VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties,
+ pPhysicalDeviceGroupCount);
- if (getenv("RADV_FORCE_FAMILY")) {
- /* When RADV_FORCE_FAMILY is set, the driver creates a nul
- * device that allows to test the compiler without having an
- * AMDGPU instance.
- */
- struct radv_physical_device *pdevice;
+ VkResult result = radv_enumerate_physical_devices(instance);
+ if (result != VK_SUCCESS)
+ return result;
- result = radv_physical_device_try_create(instance, NULL, &pdevice);
- if (result != VK_SUCCESS)
- return result;
+ list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
+ {
+ vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
+ {
+ p->physicalDeviceCount = 1;
+ memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
+ p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
+ p->subsetAllocation = false;
+ }
+ }
- list_addtail(&pdevice->link, &instance->physical_devices);
- return VK_SUCCESS;
- }
-
-#ifndef _WIN32
- /* TODO: Check for more devices ? */
- drmDevicePtr devices[8];
- int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
-
- if (instance->debug_flags & RADV_DEBUG_STARTUP)
- radv_logi("Found %d drm nodes", max_devices);
-
- if (max_devices < 1)
- return vk_error(instance, VK_SUCCESS);
-
- for (unsigned i = 0; i < (unsigned)max_devices; i++) {
- if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
- devices[i]->bustype == DRM_BUS_PCI &&
- devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
-
- struct radv_physical_device *pdevice;
- result = radv_physical_device_try_create(instance, devices[i],
- &pdevice);
- /* Incompatible DRM device, skip. */
- if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
- result = VK_SUCCESS;
- continue;
- }
-
- /* Error creating the physical device, report the error. */
- if (result != VK_SUCCESS)
- break;
-
- list_addtail(&pdevice->link, &instance->physical_devices);
- }
- }
- drmFreeDevices(devices, max_devices);
-#endif
+ return vk_outarray_status(&out);
+}
- /* If we successfully enumerated any devices, call it success */
- return result;
-}
-
-VkResult radv_EnumeratePhysicalDevices(
- VkInstance _instance,
- uint32_t* pPhysicalDeviceCount,
- VkPhysicalDevice* pPhysicalDevices)
-{
- RADV_FROM_HANDLE(radv_instance, instance, _instance);
- VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices,
- pPhysicalDeviceCount);
-
- VkResult result = radv_enumerate_physical_devices(instance);
- if (result != VK_SUCCESS)
- return result;
-
- list_for_each_entry(struct radv_physical_device, pdevice,
- &instance->physical_devices, link) {
- vk_outarray_append_typed(VkPhysicalDevice , &out, i) {
- *i = radv_physical_device_to_handle(pdevice);
- }
- }
-
- return vk_outarray_status(&out);
-}
-
-VkResult radv_EnumeratePhysicalDeviceGroups(
- VkInstance _instance,
- uint32_t* pPhysicalDeviceGroupCount,
- VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
-{
- RADV_FROM_HANDLE(radv_instance, instance, _instance);
- VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
- pPhysicalDeviceGroupProperties,
- pPhysicalDeviceGroupCount);
-
- VkResult result = radv_enumerate_physical_devices(instance);
- if (result != VK_SUCCESS)
- return result;
-
- list_for_each_entry(struct radv_physical_device, pdevice,
- &instance->physical_devices, link) {
- vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) {
- p->physicalDeviceCount = 1;
- memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
- p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
- p->subsetAllocation = false;
- }
- }
-
- return vk_outarray_status(&out);
-}
-
-void radv_GetPhysicalDeviceFeatures(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceFeatures* pFeatures)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- memset(pFeatures, 0, sizeof(*pFeatures));
-
- *pFeatures = (VkPhysicalDeviceFeatures) {
- .robustBufferAccess = true,
- .fullDrawIndexUint32 = true,
- .imageCubeArray = true,
- .independentBlend = true,
- .geometryShader = true,
- .tessellationShader = true,
- .sampleRateShading = true,
- .dualSrcBlend = true,
- .logicOp = true,
- .multiDrawIndirect = true,
- .drawIndirectFirstInstance = true,
- .depthClamp = true,
- .depthBiasClamp = true,
- .fillModeNonSolid = true,
- .depthBounds = true,
- .wideLines = true,
- .largePoints = true,
- .alphaToOne = false,
- .multiViewport = true,
- .samplerAnisotropy = true,
- .textureCompressionETC2 = radv_device_supports_etc(pdevice),
- .textureCompressionASTC_LDR = false,
- .textureCompressionBC = true,
- .occlusionQueryPrecise = true,
- .pipelineStatisticsQuery = true,
- .vertexPipelineStoresAndAtomics = true,
- .fragmentStoresAndAtomics = true,
- .shaderTessellationAndGeometryPointSize = true,
- .shaderImageGatherExtended = true,
- .shaderStorageImageExtendedFormats = true,
- .shaderStorageImageMultisample = true,
- .shaderUniformBufferArrayDynamicIndexing = true,
- .shaderSampledImageArrayDynamicIndexing = true,
- .shaderStorageBufferArrayDynamicIndexing = true,
- .shaderStorageImageArrayDynamicIndexing = true,
- .shaderStorageImageReadWithoutFormat = true,
- .shaderStorageImageWriteWithoutFormat = true,
- .shaderClipDistance = true,
- .shaderCullDistance = true,
- .shaderFloat64 = true,
- .shaderInt64 = true,
- .shaderInt16 = true,
- .sparseBinding = true,
- .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
- .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
- .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
- .variableMultisampleRate = true,
- .shaderResourceMinLod = true,
- .shaderResourceResidency = true,
- .inheritedQueries = true,
- };
+void
+radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+ memset(pFeatures, 0, sizeof(*pFeatures));
+
+ *pFeatures = (VkPhysicalDeviceFeatures){
+ .robustBufferAccess = true,
+ .fullDrawIndexUint32 = true,
+ .imageCubeArray = true,
+ .independentBlend = true,
+ .geometryShader = true,
+ .tessellationShader = true,
+ .sampleRateShading = true,
+ .dualSrcBlend = true,
+ .logicOp = true,
+ .multiDrawIndirect = true,
+ .drawIndirectFirstInstance = true,
+ .depthClamp = true,
+ .depthBiasClamp = true,
+ .fillModeNonSolid = true,
+ .depthBounds = true,
+ .wideLines = true,
+ .largePoints = true,
+ .alphaToOne = false,
+ .multiViewport = true,
+ .samplerAnisotropy = true,
+ .textureCompressionETC2 = radv_device_supports_etc(pdevice),
+ .textureCompressionASTC_LDR = false,
+ .textureCompressionBC = true,
+ .occlusionQueryPrecise = true,
+ .pipelineStatisticsQuery = true,
+ .vertexPipelineStoresAndAtomics = true,
+ .fragmentStoresAndAtomics = true,
+ .shaderTessellationAndGeometryPointSize = true,
+ .shaderImageGatherExtended = true,
+ .shaderStorageImageExtendedFormats = true,
+ .shaderStorageImageMultisample = true,
+ .shaderUniformBufferArrayDynamicIndexing = true,
+ .shaderSampledImageArrayDynamicIndexing = true,
+ .shaderStorageBufferArrayDynamicIndexing = true,
+ .shaderStorageImageArrayDynamicIndexing = true,
+ .shaderStorageImageReadWithoutFormat = true,
+ .shaderStorageImageWriteWithoutFormat = true,
+ .shaderClipDistance = true,
+ .shaderCullDistance = true,
+ .shaderFloat64 = true,
+ .shaderInt64 = true,
+ .shaderInt16 = true,
+ .sparseBinding = true,
+ .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
+ .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
+ .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
+ .variableMultisampleRate = true,
+ .shaderResourceMinLod = true,
+ .shaderResourceResidency = true,
+ .inheritedQueries = true,
+ };
}
static void
radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
- VkPhysicalDeviceVulkan11Features *f)
+ VkPhysicalDeviceVulkan11Features *f)
{
- assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
+ assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
- f->storageBuffer16BitAccess = true;
- f->uniformAndStorageBuffer16BitAccess = true;
- f->storagePushConstant16 = true;
- f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit && (LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm);
- f->multiview = true;
- f->multiviewGeometryShader = true;
- f->multiviewTessellationShader = true;
- f->variablePointersStorageBuffer = true;
- f->variablePointers = true;
- f->protectedMemory = false;
- f->samplerYcbcrConversion = true;
- f->shaderDrawParameters = true;
+ f->storageBuffer16BitAccess = true;
+ f->uniformAndStorageBuffer16BitAccess = true;
+ f->storagePushConstant16 = true;
+ f->storageInputOutput16 =
+ pdevice->rad_info.has_packed_math_16bit && (LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm);
+ f->multiview = true;
+ f->multiviewGeometryShader = true;
+ f->multiviewTessellationShader = true;
+ f->variablePointersStorageBuffer = true;
+ f->variablePointers = true;
+ f->protectedMemory = false;
+ f->samplerYcbcrConversion = true;
+ f->shaderDrawParameters = true;
}
static void
radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
- VkPhysicalDeviceVulkan12Features *f)
-{
- assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
-
- f->samplerMirrorClampToEdge = true;
- f->drawIndirectCount = true;
- f->storageBuffer8BitAccess = true;
- f->uniformAndStorageBuffer8BitAccess = true;
- f->storagePushConstant8 = true;
- f->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
- f->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
- f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
- f->shaderInt8 = true;
-
- f->descriptorIndexing = true;
- f->shaderInputAttachmentArrayDynamicIndexing = true;
- f->shaderUniformTexelBufferArrayDynamicIndexing = true;
- f->shaderStorageTexelBufferArrayDynamicIndexing = true;
- f->shaderUniformBufferArrayNonUniformIndexing = true;
- f->shaderSampledImageArrayNonUniformIndexing = true;
- f->shaderStorageBufferArrayNonUniformIndexing = true;
- f->shaderStorageImageArrayNonUniformIndexing = true;
- f->shaderInputAttachmentArrayNonUniformIndexing = true;
- f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
- f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
- f->descriptorBindingUniformBufferUpdateAfterBind = true;
- f->descriptorBindingSampledImageUpdateAfterBind = true;
- f->descriptorBindingStorageImageUpdateAfterBind = true;
- f->descriptorBindingStorageBufferUpdateAfterBind = true;
- f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
- f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
- f->descriptorBindingUpdateUnusedWhilePending = true;
- f->descriptorBindingPartiallyBound = true;
- f->descriptorBindingVariableDescriptorCount = true;
- f->runtimeDescriptorArray = true;
-
- f->samplerFilterMinmax = true;
- f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
- f->imagelessFramebuffer = true;
- f->uniformBufferStandardLayout = true;
- f->shaderSubgroupExtendedTypes = true;
- f->separateDepthStencilLayouts = true;
- f->hostQueryReset = true;
- f->timelineSemaphore = true,
- f->bufferDeviceAddress = true;
- f->bufferDeviceAddressCaptureReplay = false;
- f->bufferDeviceAddressMultiDevice = false;
- f->vulkanMemoryModel = true;
- f->vulkanMemoryModelDeviceScope = true;
- f->vulkanMemoryModelAvailabilityVisibilityChains = false;
- f->shaderOutputViewportIndex = true;
- f->shaderOutputLayer = true;
- f->subgroupBroadcastDynamicId = true;
-}
-
-void radv_GetPhysicalDeviceFeatures2(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceFeatures2 *pFeatures)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
-
- VkPhysicalDeviceVulkan11Features core_1_1 = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
- };
- radv_get_physical_device_features_1_1(pdevice, &core_1_1);
-
- VkPhysicalDeviceVulkan12Features core_1_2 = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
- };
- radv_get_physical_device_features_1_2(pdevice, &core_1_2);
-
-#define CORE_FEATURE(major, minor, feature) \
- features->feature = core_##major##_##minor.feature
-
- vk_foreach_struct(ext, pFeatures->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
- VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
- CORE_FEATURE(1, 1, variablePointersStorageBuffer);
- CORE_FEATURE(1, 1, variablePointers);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
- VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
- CORE_FEATURE(1, 1, multiview);
- CORE_FEATURE(1, 1, multiviewGeometryShader);
- CORE_FEATURE(1, 1, multiviewTessellationShader);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
- VkPhysicalDeviceShaderDrawParametersFeatures *features =
- (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
- CORE_FEATURE(1, 1, shaderDrawParameters);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
- VkPhysicalDeviceProtectedMemoryFeatures *features =
- (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
- CORE_FEATURE(1, 1, protectedMemory);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
- VkPhysicalDevice16BitStorageFeatures *features =
- (VkPhysicalDevice16BitStorageFeatures*)ext;
- CORE_FEATURE(1, 1, storageBuffer16BitAccess);
- CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
- CORE_FEATURE(1, 1, storagePushConstant16);
- CORE_FEATURE(1, 1, storageInputOutput16);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
- VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
- (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
- CORE_FEATURE(1, 1, samplerYcbcrConversion);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
- VkPhysicalDeviceDescriptorIndexingFeatures *features =
- (VkPhysicalDeviceDescriptorIndexingFeatures*)ext;
- CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
- CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
- CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
- CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
- CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
- CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
- CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
- CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
- CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
- CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
- CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
- CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
- CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
- CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
- CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
- CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
- CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
- CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
- CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
- CORE_FEATURE(1, 2, runtimeDescriptorArray);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
- VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
- (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
- features->conditionalRendering = true;
- features->inheritedConditionalRendering = false;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
- VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
- (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
- features->vertexAttributeInstanceRateDivisor = true;
- features->vertexAttributeInstanceRateZeroDivisor = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
- VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
- (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
- features->transformFeedback = true;
- features->geometryStreams = !pdevice->use_ngg_streamout;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
- VkPhysicalDeviceScalarBlockLayoutFeatures *features =
- (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
- CORE_FEATURE(1, 2, scalarBlockLayout);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
- VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
- (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
- features->memoryPriority = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
- VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
- (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
- features->bufferDeviceAddress = true;
- features->bufferDeviceAddressCaptureReplay = false;
- features->bufferDeviceAddressMultiDevice = false;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
- VkPhysicalDeviceBufferDeviceAddressFeatures *features =
- (VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
- CORE_FEATURE(1, 2, bufferDeviceAddress);
- CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
- CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
- VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
- (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
- features->depthClipEnable = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
- VkPhysicalDeviceHostQueryResetFeatures *features =
- (VkPhysicalDeviceHostQueryResetFeatures *)ext;
- CORE_FEATURE(1, 2, hostQueryReset);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
- VkPhysicalDevice8BitStorageFeatures *features =
- (VkPhysicalDevice8BitStorageFeatures *)ext;
- CORE_FEATURE(1, 2, storageBuffer8BitAccess);
- CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
- CORE_FEATURE(1, 2, storagePushConstant8);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
- VkPhysicalDeviceShaderFloat16Int8Features *features =
- (VkPhysicalDeviceShaderFloat16Int8Features*)ext;
- CORE_FEATURE(1, 2, shaderFloat16);
- CORE_FEATURE(1, 2, shaderInt8);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
- VkPhysicalDeviceShaderAtomicInt64Features *features =
- (VkPhysicalDeviceShaderAtomicInt64Features *)ext;
- CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
- CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
- VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
- (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
- features->shaderDemoteToHelperInvocation = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
- VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
- (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
-
- features->inlineUniformBlock = true;
- features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
- VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
- (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
- features->computeDerivativeGroupQuads = false;
- features->computeDerivativeGroupLinear = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
- VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
- (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
- features->ycbcrImageArrays = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
- VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
- (VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
- CORE_FEATURE(1, 2, uniformBufferStandardLayout);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
- VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
- (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
- features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
- VkPhysicalDeviceImagelessFramebufferFeatures *features =
- (VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
- CORE_FEATURE(1, 2, imagelessFramebuffer);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
- VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
- (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
- features->pipelineExecutableInfo = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
- VkPhysicalDeviceShaderClockFeaturesKHR *features =
- (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
- features->shaderSubgroupClock = true;
- features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
- VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
- (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
- features->texelBufferAlignment = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
- VkPhysicalDeviceTimelineSemaphoreFeatures *features =
- (VkPhysicalDeviceTimelineSemaphoreFeatures *) ext;
- CORE_FEATURE(1, 2, timelineSemaphore);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
- VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
- (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
- features->subgroupSizeControl = true;
- features->computeFullSubgroups = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
- VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
- (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
- features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
- VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
- (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
- CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
- VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
- (VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
- CORE_FEATURE(1, 2, separateDepthStencilLayouts);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
- radv_get_physical_device_features_1_1(pdevice, (void *)ext);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
- radv_get_physical_device_features_1_2(pdevice, (void *)ext);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
- VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
- (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
- features->rectangularLines = false;
- features->bresenhamLines = true;
- features->smoothLines = false;
- features->stippledRectangularLines = false;
- /* FIXME: Some stippled Bresenham CTS fails on Vega10
- * but work on Raven.
- */
- features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;
- features->stippledSmoothLines = false;
- break;
- }
- case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
- VkDeviceMemoryOverallocationCreateInfoAMD *features =
- (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
- features->overallocationBehavior = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
- VkPhysicalDeviceRobustness2FeaturesEXT *features =
- (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
- features->robustBufferAccess2 = true;
- features->robustImageAccess2 = true;
- features->nullDescriptor = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
- VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
- (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
- features->customBorderColors = true;
- features->customBorderColorWithoutFormat = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
- VkPhysicalDevicePrivateDataFeaturesEXT *features =
- (VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
- features->privateData = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
- VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
- (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
- features-> pipelineCreationCacheControl = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
- VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =
- (VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
- CORE_FEATURE(1, 2, vulkanMemoryModel);
- CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
- CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
- VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
- (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *) ext;
- features->extendedDynamicState = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
- VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
- (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
- features->robustImageAccess = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
- VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
- (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
- features->shaderBufferFloat32Atomics = true;
- features->shaderBufferFloat32AtomicAdd = false;
- features->shaderBufferFloat64Atomics = true;
- features->shaderBufferFloat64AtomicAdd = false;
- features->shaderSharedFloat32Atomics = true;
- features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8 &&
- (!pdevice->use_llvm || LLVM_VERSION_MAJOR >= 10);
- features->shaderSharedFloat64Atomics = true;
- features->shaderSharedFloat64AtomicAdd = false;
- features->shaderImageFloat32Atomics = true;
- features->shaderImageFloat32AtomicAdd = false;
- features->sparseImageFloat32Atomics = true;
- features->sparseImageFloat32AtomicAdd = false;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
- VkPhysicalDevice4444FormatsFeaturesEXT *features =
- (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
- features->formatA4R4G4B4 = true;
- features->formatA4B4G4R4 = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
- VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
- (VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
- features->shaderTerminateInvocation = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
- VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
- (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
- features->shaderImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
- features->sparseImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
- VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
- (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
- features->mutableDescriptorType = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
- VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
- (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
- features->pipelineFragmentShadingRate = true;
- features->primitiveFragmentShadingRate = true;
- features->attachmentFragmentShadingRate = false; /* TODO */
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
- VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
- (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
- features->workgroupMemoryExplicitLayout = true;
- features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
- features->workgroupMemoryExplicitLayout8BitAccess = true;
- features->workgroupMemoryExplicitLayout16BitAccess = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
- VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
- (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR*)ext;
- features->shaderZeroInitializeWorkgroupMemory = true;
- break;
- }
- default:
- break;
- }
- }
+ VkPhysicalDeviceVulkan12Features *f)
+{
+ assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
+
+ f->samplerMirrorClampToEdge = true;
+ f->drawIndirectCount = true;
+ f->storageBuffer8BitAccess = true;
+ f->uniformAndStorageBuffer8BitAccess = true;
+ f->storagePushConstant8 = true;
+ f->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
+ f->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
+ f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
+ f->shaderInt8 = true;
+
+ f->descriptorIndexing = true;
+ f->shaderInputAttachmentArrayDynamicIndexing = true;
+ f->shaderUniformTexelBufferArrayDynamicIndexing = true;
+ f->shaderStorageTexelBufferArrayDynamicIndexing = true;
+ f->shaderUniformBufferArrayNonUniformIndexing = true;
+ f->shaderSampledImageArrayNonUniformIndexing = true;
+ f->shaderStorageBufferArrayNonUniformIndexing = true;
+ f->shaderStorageImageArrayNonUniformIndexing = true;
+ f->shaderInputAttachmentArrayNonUniformIndexing = true;
+ f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
+ f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
+ f->descriptorBindingUniformBufferUpdateAfterBind = true;
+ f->descriptorBindingSampledImageUpdateAfterBind = true;
+ f->descriptorBindingStorageImageUpdateAfterBind = true;
+ f->descriptorBindingStorageBufferUpdateAfterBind = true;
+ f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
+ f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
+ f->descriptorBindingUpdateUnusedWhilePending = true;
+ f->descriptorBindingPartiallyBound = true;
+ f->descriptorBindingVariableDescriptorCount = true;
+ f->runtimeDescriptorArray = true;
+
+ f->samplerFilterMinmax = true;
+ f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
+ f->imagelessFramebuffer = true;
+ f->uniformBufferStandardLayout = true;
+ f->shaderSubgroupExtendedTypes = true;
+ f->separateDepthStencilLayouts = true;
+ f->hostQueryReset = true;
+ f->timelineSemaphore = true, f->bufferDeviceAddress = true;
+ f->bufferDeviceAddressCaptureReplay = false;
+ f->bufferDeviceAddressMultiDevice = false;
+ f->vulkanMemoryModel = true;
+ f->vulkanMemoryModelDeviceScope = true;
+ f->vulkanMemoryModelAvailabilityVisibilityChains = false;
+ f->shaderOutputViewportIndex = true;
+ f->shaderOutputLayer = true;
+ f->subgroupBroadcastDynamicId = true;
+}
+
+void
+radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceFeatures2 *pFeatures)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+ radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
+
+ VkPhysicalDeviceVulkan11Features core_1_1 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
+ };
+ radv_get_physical_device_features_1_1(pdevice, &core_1_1);
+
+ VkPhysicalDeviceVulkan12Features core_1_2 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
+ };
+ radv_get_physical_device_features_1_2(pdevice, &core_1_2);
+
+#define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature
+
+ vk_foreach_struct(ext, pFeatures->pNext)
+ {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
+ VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
+ CORE_FEATURE(1, 1, variablePointersStorageBuffer);
+ CORE_FEATURE(1, 1, variablePointers);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
+ VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures *)ext;
+ CORE_FEATURE(1, 1, multiview);
+ CORE_FEATURE(1, 1, multiviewGeometryShader);
+ CORE_FEATURE(1, 1, multiviewTessellationShader);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
+ VkPhysicalDeviceShaderDrawParametersFeatures *features =
+ (VkPhysicalDeviceShaderDrawParametersFeatures *)ext;
+ CORE_FEATURE(1, 1, shaderDrawParameters);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
+ VkPhysicalDeviceProtectedMemoryFeatures *features =
+ (VkPhysicalDeviceProtectedMemoryFeatures *)ext;
+ CORE_FEATURE(1, 1, protectedMemory);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
+ VkPhysicalDevice16BitStorageFeatures *features =
+ (VkPhysicalDevice16BitStorageFeatures *)ext;
+ CORE_FEATURE(1, 1, storageBuffer16BitAccess);
+ CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
+ CORE_FEATURE(1, 1, storagePushConstant16);
+ CORE_FEATURE(1, 1, storageInputOutput16);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
+ VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
+ (VkPhysicalDeviceSamplerYcbcrConversionFeatures *)ext;
+ CORE_FEATURE(1, 1, samplerYcbcrConversion);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
+ VkPhysicalDeviceDescriptorIndexingFeatures *features =
+ (VkPhysicalDeviceDescriptorIndexingFeatures *)ext;
+ CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
+ CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
+ CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
+ CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
+ CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
+ CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
+ CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
+ CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
+ CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
+ CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
+ CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
+ CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
+ CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
+ CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
+ CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
+ CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
+ CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
+ CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
+ CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
+ CORE_FEATURE(1, 2, runtimeDescriptorArray);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
+ VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
+ (VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext;
+ features->conditionalRendering = true;
+ features->inheritedConditionalRendering = false;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
+ VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
+ (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
+ features->vertexAttributeInstanceRateDivisor = true;
+ features->vertexAttributeInstanceRateZeroDivisor = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
+ VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
+ (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
+ features->transformFeedback = true;
+ features->geometryStreams = !pdevice->use_ngg_streamout;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
+ VkPhysicalDeviceScalarBlockLayoutFeatures *features =
+ (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
+ CORE_FEATURE(1, 2, scalarBlockLayout);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
+ VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
+ (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
+ features->memoryPriority = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
+ VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
+ (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
+ features->bufferDeviceAddress = true;
+ features->bufferDeviceAddressCaptureReplay = false;
+ features->bufferDeviceAddressMultiDevice = false;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
+ VkPhysicalDeviceBufferDeviceAddressFeatures *features =
+ (VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
+ CORE_FEATURE(1, 2, bufferDeviceAddress);
+ CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
+ CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
+ VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
+ (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
+ features->depthClipEnable = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
+ VkPhysicalDeviceHostQueryResetFeatures *features =
+ (VkPhysicalDeviceHostQueryResetFeatures *)ext;
+ CORE_FEATURE(1, 2, hostQueryReset);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
+ VkPhysicalDevice8BitStorageFeatures *features = (VkPhysicalDevice8BitStorageFeatures *)ext;
+ CORE_FEATURE(1, 2, storageBuffer8BitAccess);
+ CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
+ CORE_FEATURE(1, 2, storagePushConstant8);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
+ VkPhysicalDeviceShaderFloat16Int8Features *features =
+ (VkPhysicalDeviceShaderFloat16Int8Features *)ext;
+ CORE_FEATURE(1, 2, shaderFloat16);
+ CORE_FEATURE(1, 2, shaderInt8);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
+ VkPhysicalDeviceShaderAtomicInt64Features *features =
+ (VkPhysicalDeviceShaderAtomicInt64Features *)ext;
+ CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
+ CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
+ VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
+ (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
+ features->shaderDemoteToHelperInvocation = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
+ VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
+ (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
+
+ features->inlineUniformBlock = true;
+ features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
+ VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
+ (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
+ features->computeDerivativeGroupQuads = false;
+ features->computeDerivativeGroupLinear = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
+ VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
+ (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
+ features->ycbcrImageArrays = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
+ VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
+ (VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
+ CORE_FEATURE(1, 2, uniformBufferStandardLayout);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
+ VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
+ (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
+ features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
+ VkPhysicalDeviceImagelessFramebufferFeatures *features =
+ (VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
+ CORE_FEATURE(1, 2, imagelessFramebuffer);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
+ VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
+ (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
+ features->pipelineExecutableInfo = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
+ VkPhysicalDeviceShaderClockFeaturesKHR *features =
+ (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
+ features->shaderSubgroupClock = true;
+ features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
+ VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
+ (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
+ features->texelBufferAlignment = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
+ VkPhysicalDeviceTimelineSemaphoreFeatures *features =
+ (VkPhysicalDeviceTimelineSemaphoreFeatures *)ext;
+ CORE_FEATURE(1, 2, timelineSemaphore);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
+ VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
+ (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
+ features->subgroupSizeControl = true;
+ features->computeFullSubgroups = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
+ VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
+ (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
+ features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
+ VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
+ (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
+ CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
+ VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
+ (VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
+ CORE_FEATURE(1, 2, separateDepthStencilLayouts);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
+ radv_get_physical_device_features_1_1(pdevice, (void *)ext);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
+ radv_get_physical_device_features_1_2(pdevice, (void *)ext);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
+ VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
+ (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
+ features->rectangularLines = false;
+ features->bresenhamLines = true;
+ features->smoothLines = false;
+ features->stippledRectangularLines = false;
+ /* FIXME: Some stippled Bresenham CTS fails on Vega10
+ * but work on Raven.
+ */
+ features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;
+ features->stippledSmoothLines = false;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
+ VkDeviceMemoryOverallocationCreateInfoAMD *features =
+ (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
+ features->overallocationBehavior = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
+ VkPhysicalDeviceRobustness2FeaturesEXT *features =
+ (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
+ features->robustBufferAccess2 = true;
+ features->robustImageAccess2 = true;
+ features->nullDescriptor = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
+ VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
+ (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
+ features->customBorderColors = true;
+ features->customBorderColorWithoutFormat = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
+ VkPhysicalDevicePrivateDataFeaturesEXT *features =
+ (VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
+ features->privateData = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
+ VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
+ (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
+ features->pipelineCreationCacheControl = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
+ VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =
+ (VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
+ CORE_FEATURE(1, 2, vulkanMemoryModel);
+ CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
+ CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
+ VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
+ (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
+ features->extendedDynamicState = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
+ VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
+ (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
+ features->robustImageAccess = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
+ VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
+ (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
+ features->shaderBufferFloat32Atomics = true;
+ features->shaderBufferFloat32AtomicAdd = false;
+ features->shaderBufferFloat64Atomics = true;
+ features->shaderBufferFloat64AtomicAdd = false;
+ features->shaderSharedFloat32Atomics = true;
+ features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8 &&
+ (!pdevice->use_llvm || LLVM_VERSION_MAJOR >= 10);
+ features->shaderSharedFloat64Atomics = true;
+ features->shaderSharedFloat64AtomicAdd = false;
+ features->shaderImageFloat32Atomics = true;
+ features->shaderImageFloat32AtomicAdd = false;
+ features->sparseImageFloat32Atomics = true;
+ features->sparseImageFloat32AtomicAdd = false;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
+ VkPhysicalDevice4444FormatsFeaturesEXT *features =
+ (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
+ features->formatA4R4G4B4 = true;
+ features->formatA4B4G4R4 = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
+ VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
+ (VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
+ features->shaderTerminateInvocation = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
+ VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
+ (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
+ features->shaderImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
+ features->sparseImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
+ VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
+ (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
+ features->mutableDescriptorType = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
+ VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
+ (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
+ features->pipelineFragmentShadingRate = true;
+ features->primitiveFragmentShadingRate = true;
+ features->attachmentFragmentShadingRate = false; /* TODO */
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
+ VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
+ (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
+ features->workgroupMemoryExplicitLayout = true;
+ features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
+ features->workgroupMemoryExplicitLayout8BitAccess = true;
+ features->workgroupMemoryExplicitLayout16BitAccess = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
+ VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
+ (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
+ features->shaderZeroInitializeWorkgroupMemory = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
#undef CORE_FEATURE
}
static size_t
radv_max_descriptor_set_size()
{
- /* make sure that the entire descriptor set is addressable with a signed
- * 32-bit int. So the sum of all limits scaled by descriptor size has to
- * be at most 2 GiB. the combined image & samples object count as one of
- * both. This limit is for the pipeline layout, not for the set layout, but
- * there is no set limit, so we just set a pipeline limit. I don't think
- * any app is going to hit this soon. */
- return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS
- - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
- (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
- 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
- 32 /* sampler, largest when combined with image */ +
- 64 /* sampled image */ +
- 64 /* storage image */);
+ /* make sure that the entire descriptor set is addressable with a signed
+ * 32-bit int. So the sum of all limits scaled by descriptor size has to
+ * be at most 2 GiB. the combined image & samples object count as one of
+ * both. This limit is for the pipeline layout, not for the set layout, but
+ * there is no set limit, so we just set a pipeline limit. I don't think
+ * any app is going to hit this soon. */
+ return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
+ MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
+ (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
+ 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
+ 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
+ 64 /* storage image */);
}
static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
{
- uint32_t uniform_offset_alignment = driQueryOptioni(&pdevice->instance->dri_options,
- "radv_override_uniform_offset_alignment");
- if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
- fprintf(stderr, "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
- "not a power of two\n", uniform_offset_alignment);
- uniform_offset_alignment = 0;
- }
-
- /* Take at least the hardware limit. */
- return MAX2(uniform_offset_alignment, 4);
-}
-
-void radv_GetPhysicalDeviceProperties(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceProperties* pProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- VkSampleCountFlags sample_counts = 0xf;
-
- size_t max_descriptor_set_size = radv_max_descriptor_set_size();
-
- VkPhysicalDeviceLimits limits = {
- .maxImageDimension1D = (1 << 14),
- .maxImageDimension2D = (1 << 14),
- .maxImageDimension3D = (1 << 11),
- .maxImageDimensionCube = (1 << 14),
- .maxImageArrayLayers = (1 << 11),
- .maxTexelBufferElements = UINT32_MAX,
- .maxUniformBufferRange = UINT32_MAX,
- .maxStorageBufferRange = UINT32_MAX,
- .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
- .maxMemoryAllocationCount = UINT32_MAX,
- .maxSamplerAllocationCount = 64 * 1024,
- .bufferImageGranularity = 64, /* A cache line */
- .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
- .maxBoundDescriptorSets = MAX_SETS,
- .maxPerStageDescriptorSamplers = max_descriptor_set_size,
- .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
- .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
- .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
- .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
- .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
- .maxPerStageResources = max_descriptor_set_size,
- .maxDescriptorSetSamplers = max_descriptor_set_size,
- .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
- .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
- .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
- .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
- .maxDescriptorSetSampledImages = max_descriptor_set_size,
- .maxDescriptorSetStorageImages = max_descriptor_set_size,
- .maxDescriptorSetInputAttachments = max_descriptor_set_size,
- .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
- .maxVertexInputBindings = MAX_VBS,
- .maxVertexInputAttributeOffset = 2047,
- .maxVertexInputBindingStride = 2048,
- .maxVertexOutputComponents = 128,
- .maxTessellationGenerationLevel = 64,
- .maxTessellationPatchSize = 32,
- .maxTessellationControlPerVertexInputComponents = 128,
- .maxTessellationControlPerVertexOutputComponents = 128,
- .maxTessellationControlPerPatchOutputComponents = 120,
- .maxTessellationControlTotalOutputComponents = 4096,
- .maxTessellationEvaluationInputComponents = 128,
- .maxTessellationEvaluationOutputComponents = 128,
- .maxGeometryShaderInvocations = 127,
- .maxGeometryInputComponents = 64,
- .maxGeometryOutputComponents = 128,
- .maxGeometryOutputVertices = 256,
- .maxGeometryTotalOutputComponents = 1024,
- .maxFragmentInputComponents = 128,
- .maxFragmentOutputAttachments = 8,
- .maxFragmentDualSrcAttachments = 1,
- .maxFragmentCombinedOutputResources = 8,
- .maxComputeSharedMemorySize = 32768,
- .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
- .maxComputeWorkGroupInvocations = 1024,
- .maxComputeWorkGroupSize = {
- 1024,
- 1024,
- 1024
- },
- .subPixelPrecisionBits = 8,
- .subTexelPrecisionBits = 8,
- .mipmapPrecisionBits = 8,
- .maxDrawIndexedIndexValue = UINT32_MAX,
- .maxDrawIndirectCount = UINT32_MAX,
- .maxSamplerLodBias = 16,
- .maxSamplerAnisotropy = 16,
- .maxViewports = MAX_VIEWPORTS,
- .maxViewportDimensions = { (1 << 14), (1 << 14) },
- .viewportBoundsRange = { INT16_MIN, INT16_MAX },
- .viewportSubPixelBits = 8,
- .minMemoryMapAlignment = 4096, /* A page */
- .minTexelBufferOffsetAlignment = 4,
- .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
- .minStorageBufferOffsetAlignment = 4,
- .minTexelOffset = -32,
- .maxTexelOffset = 31,
- .minTexelGatherOffset = -32,
- .maxTexelGatherOffset = 31,
- .minInterpolationOffset = -2,
- .maxInterpolationOffset = 2,
- .subPixelInterpolationOffsetBits = 8,
- .maxFramebufferWidth = (1 << 14),
- .maxFramebufferHeight = (1 << 14),
- .maxFramebufferLayers = (1 << 10),
- .framebufferColorSampleCounts = sample_counts,
- .framebufferDepthSampleCounts = sample_counts,
- .framebufferStencilSampleCounts = sample_counts,
- .framebufferNoAttachmentsSampleCounts = sample_counts,
- .maxColorAttachments = MAX_RTS,
- .sampledImageColorSampleCounts = sample_counts,
- .sampledImageIntegerSampleCounts = sample_counts,
- .sampledImageDepthSampleCounts = sample_counts,
- .sampledImageStencilSampleCounts = sample_counts,
- .storageImageSampleCounts = sample_counts,
- .maxSampleMaskWords = 1,
- .timestampComputeAndGraphics = true,
- .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
- .maxClipDistances = 8,
- .maxCullDistances = 8,
- .maxCombinedClipAndCullDistances = 8,
- .discreteQueuePriorities = 2,
- .pointSizeRange = { 0.0, 8191.875 },
- .lineWidthRange = { 0.0, 8191.875 },
- .pointSizeGranularity = (1.0 / 8.0),
- .lineWidthGranularity = (1.0 / 8.0),
- .strictLines = false, /* FINISHME */
- .standardSampleLocations = true,
- .optimalBufferCopyOffsetAlignment = 128,
- .optimalBufferCopyRowPitchAlignment = 128,
- .nonCoherentAtomSize = 64,
- };
-
- *pProperties = (VkPhysicalDeviceProperties) {
- .apiVersion = RADV_API_VERSION,
- .driverVersion = vk_get_driver_version(),
- .vendorID = ATI_VENDOR_ID,
- .deviceID = pdevice->rad_info.pci_id,
- .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
- .limits = limits,
- .sparseProperties = {
- .residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
- .residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
- },
- };
-
- strcpy(pProperties->deviceName, pdevice->name);
- memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
+ uint32_t uniform_offset_alignment =
+ driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment");
+ if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
+ fprintf(stderr,
+ "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
+ "not a power of two\n",
+ uniform_offset_alignment);
+ uniform_offset_alignment = 0;
+ }
+
+ /* Take at least the hardware limit. */
+ return MAX2(uniform_offset_alignment, 4);
+}
+
+void
+radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceProperties *pProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+ VkSampleCountFlags sample_counts = 0xf;
+
+ size_t max_descriptor_set_size = radv_max_descriptor_set_size();
+
+ VkPhysicalDeviceLimits limits = {
+ .maxImageDimension1D = (1 << 14),
+ .maxImageDimension2D = (1 << 14),
+ .maxImageDimension3D = (1 << 11),
+ .maxImageDimensionCube = (1 << 14),
+ .maxImageArrayLayers = (1 << 11),
+ .maxTexelBufferElements = UINT32_MAX,
+ .maxUniformBufferRange = UINT32_MAX,
+ .maxStorageBufferRange = UINT32_MAX,
+ .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
+ .maxMemoryAllocationCount = UINT32_MAX,
+ .maxSamplerAllocationCount = 64 * 1024,
+ .bufferImageGranularity = 64, /* A cache line */
+ .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
+ .maxBoundDescriptorSets = MAX_SETS,
+ .maxPerStageDescriptorSamplers = max_descriptor_set_size,
+ .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
+ .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
+ .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
+ .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
+ .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
+ .maxPerStageResources = max_descriptor_set_size,
+ .maxDescriptorSetSamplers = max_descriptor_set_size,
+ .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
+ .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
+ .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
+ .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
+ .maxDescriptorSetSampledImages = max_descriptor_set_size,
+ .maxDescriptorSetStorageImages = max_descriptor_set_size,
+ .maxDescriptorSetInputAttachments = max_descriptor_set_size,
+ .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
+ .maxVertexInputBindings = MAX_VBS,
+ .maxVertexInputAttributeOffset = 2047,
+ .maxVertexInputBindingStride = 2048,
+ .maxVertexOutputComponents = 128,
+ .maxTessellationGenerationLevel = 64,
+ .maxTessellationPatchSize = 32,
+ .maxTessellationControlPerVertexInputComponents = 128,
+ .maxTessellationControlPerVertexOutputComponents = 128,
+ .maxTessellationControlPerPatchOutputComponents = 120,
+ .maxTessellationControlTotalOutputComponents = 4096,
+ .maxTessellationEvaluationInputComponents = 128,
+ .maxTessellationEvaluationOutputComponents = 128,
+ .maxGeometryShaderInvocations = 127,
+ .maxGeometryInputComponents = 64,
+ .maxGeometryOutputComponents = 128,
+ .maxGeometryOutputVertices = 256,
+ .maxGeometryTotalOutputComponents = 1024,
+ .maxFragmentInputComponents = 128,
+ .maxFragmentOutputAttachments = 8,
+ .maxFragmentDualSrcAttachments = 1,
+ .maxFragmentCombinedOutputResources = 8,
+ .maxComputeSharedMemorySize = 32768,
+ .maxComputeWorkGroupCount = {65535, 65535, 65535},
+ .maxComputeWorkGroupInvocations = 1024,
+ .maxComputeWorkGroupSize = {1024, 1024, 1024},
+ .subPixelPrecisionBits = 8,
+ .subTexelPrecisionBits = 8,
+ .mipmapPrecisionBits = 8,
+ .maxDrawIndexedIndexValue = UINT32_MAX,
+ .maxDrawIndirectCount = UINT32_MAX,
+ .maxSamplerLodBias = 16,
+ .maxSamplerAnisotropy = 16,
+ .maxViewports = MAX_VIEWPORTS,
+ .maxViewportDimensions = {(1 << 14), (1 << 14)},
+ .viewportBoundsRange = {INT16_MIN, INT16_MAX},
+ .viewportSubPixelBits = 8,
+ .minMemoryMapAlignment = 4096, /* A page */
+ .minTexelBufferOffsetAlignment = 4,
+ .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
+ .minStorageBufferOffsetAlignment = 4,
+ .minTexelOffset = -32,
+ .maxTexelOffset = 31,
+ .minTexelGatherOffset = -32,
+ .maxTexelGatherOffset = 31,
+ .minInterpolationOffset = -2,
+ .maxInterpolationOffset = 2,
+ .subPixelInterpolationOffsetBits = 8,
+ .maxFramebufferWidth = (1 << 14),
+ .maxFramebufferHeight = (1 << 14),
+ .maxFramebufferLayers = (1 << 10),
+ .framebufferColorSampleCounts = sample_counts,
+ .framebufferDepthSampleCounts = sample_counts,
+ .framebufferStencilSampleCounts = sample_counts,
+ .framebufferNoAttachmentsSampleCounts = sample_counts,
+ .maxColorAttachments = MAX_RTS,
+ .sampledImageColorSampleCounts = sample_counts,
+ .sampledImageIntegerSampleCounts = sample_counts,
+ .sampledImageDepthSampleCounts = sample_counts,
+ .sampledImageStencilSampleCounts = sample_counts,
+ .storageImageSampleCounts = sample_counts,
+ .maxSampleMaskWords = 1,
+ .timestampComputeAndGraphics = true,
+ .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
+ .maxClipDistances = 8,
+ .maxCullDistances = 8,
+ .maxCombinedClipAndCullDistances = 8,
+ .discreteQueuePriorities = 2,
+ .pointSizeRange = {0.0, 8191.875},
+ .lineWidthRange = {0.0, 8191.875},
+ .pointSizeGranularity = (1.0 / 8.0),
+ .lineWidthGranularity = (1.0 / 8.0),
+ .strictLines = false, /* FINISHME */
+ .standardSampleLocations = true,
+ .optimalBufferCopyOffsetAlignment = 128,
+ .optimalBufferCopyRowPitchAlignment = 128,
+ .nonCoherentAtomSize = 64,
+ };
+
+ *pProperties = (VkPhysicalDeviceProperties){
+ .apiVersion = RADV_API_VERSION,
+ .driverVersion = vk_get_driver_version(),
+ .vendorID = ATI_VENDOR_ID,
+ .deviceID = pdevice->rad_info.pci_id,
+ .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU
+ : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
+ .limits = limits,
+ .sparseProperties =
+ {
+ .residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
+ .residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
+ },
+ };
+
+ strcpy(pProperties->deviceName, pdevice->name);
+ memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
}
static void
radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
- VkPhysicalDeviceVulkan11Properties *p)
-{
- assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
-
- memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
- memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
- memset(p->deviceLUID, 0, VK_LUID_SIZE);
- /* The LUID is for Windows. */
- p->deviceLUIDValid = false;
- p->deviceNodeMask = 0;
-
- p->subgroupSize = RADV_SUBGROUP_SIZE;
- p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS |
- VK_SHADER_STAGE_COMPUTE_BIT;
- p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
- VK_SUBGROUP_FEATURE_VOTE_BIT |
- VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
- VK_SUBGROUP_FEATURE_BALLOT_BIT |
- VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
- VK_SUBGROUP_FEATURE_QUAD_BIT |
- VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
- VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
- p->subgroupQuadOperationsInAllStages = true;
-
- p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
- p->maxMultiviewViewCount = MAX_VIEWS;
- p->maxMultiviewInstanceIndex = INT_MAX;
- p->protectedNoFault = false;
- p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
- p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
+ VkPhysicalDeviceVulkan11Properties *p)
+{
+ assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
+
+ memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
+ memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
+ memset(p->deviceLUID, 0, VK_LUID_SIZE);
+ /* The LUID is for Windows. */
+ p->deviceLUIDValid = false;
+ p->deviceNodeMask = 0;
+
+ p->subgroupSize = RADV_SUBGROUP_SIZE;
+ p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
+ p->subgroupSupportedOperations =
+ VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
+ VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
+ VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
+ VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
+ p->subgroupQuadOperationsInAllStages = true;
+
+ p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
+ p->maxMultiviewViewCount = MAX_VIEWS;
+ p->maxMultiviewInstanceIndex = INT_MAX;
+ p->protectedNoFault = false;
+ p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
+ p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
}
static void
radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
- VkPhysicalDeviceVulkan12Properties *p)
-{
- assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
-
- p->driverID = VK_DRIVER_ID_MESA_RADV;
- snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
- snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
- "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (%s)",
- radv_get_compiler_string(pdevice));
- p->conformanceVersion = (VkConformanceVersion) {
- .major = 1,
- .minor = 2,
- .subminor = 3,
- .patch = 0,
- };
-
- /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
- * controlled by the same config register.
- */
- if (pdevice->rad_info.has_packed_math_16bit) {
- p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
- p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
- } else {
- p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
- p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
- }
-
- /* With LLVM, do not allow both preserving and flushing denorms because
- * different shaders in the same pipeline can have different settings and
- * this won't work for merged shaders. To make it work, this requires LLVM
- * support for changing the register. The same logic applies for the
- * rounding modes because they are configured with the same config
- * register.
- */
- p->shaderDenormFlushToZeroFloat32 = true;
- p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
- p->shaderRoundingModeRTEFloat32 = true;
- p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
- p->shaderSignedZeroInfNanPreserveFloat32 = true;
-
- p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
- p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
- p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
- p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
- p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
-
- p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
- p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
- p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
- p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
- p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
-
- p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
- p->shaderUniformBufferArrayNonUniformIndexingNative = false;
- p->shaderSampledImageArrayNonUniformIndexingNative = false;
- p->shaderStorageBufferArrayNonUniformIndexingNative = false;
- p->shaderStorageImageArrayNonUniformIndexingNative = false;
- p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
- p->robustBufferAccessUpdateAfterBind = true;
- p->quadDivergentImplicitLod = false;
-
- size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
- MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
- (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
- 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
- 32 /* sampler, largest when combined with image */ +
- 64 /* sampled image */ +
- 64 /* storage image */);
- p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
- p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
- p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
- p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
- p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
- p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
- p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
- p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
- p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
- p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
- p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
- p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
- p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
- p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
- p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
-
- /* We support all of the depth resolve modes */
- p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
- VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
- VK_RESOLVE_MODE_MIN_BIT_KHR |
- VK_RESOLVE_MODE_MAX_BIT_KHR;
-
- /* Average doesn't make sense for stencil so we don't support that */
- p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
- VK_RESOLVE_MODE_MIN_BIT_KHR |
- VK_RESOLVE_MODE_MAX_BIT_KHR;
-
- p->independentResolveNone = true;
- p->independentResolve = true;
-
- /* GFX6-8 only support single channel min/max filter. */
- p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
- p->filterMinmaxSingleComponentFormats = true;
-
- p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
-
- p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
-}
-
-void radv_GetPhysicalDeviceProperties2(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceProperties2 *pProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
-
- VkPhysicalDeviceVulkan11Properties core_1_1 = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
- };
- radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
-
- VkPhysicalDeviceVulkan12Properties core_1_2 = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
- };
- radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
-
-#define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
- memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
+ VkPhysicalDeviceVulkan12Properties *p)
+{
+ assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
+
+ p->driverID = VK_DRIVER_ID_MESA_RADV;
+ snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
+ snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (%s)",
+ radv_get_compiler_string(pdevice));
+ p->conformanceVersion = (VkConformanceVersion){
+ .major = 1,
+ .minor = 2,
+ .subminor = 3,
+ .patch = 0,
+ };
+
+ /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
+ * controlled by the same config register.
+ */
+ if (pdevice->rad_info.has_packed_math_16bit) {
+ p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+ p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
+ } else {
+ p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
+ p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
+ }
+
+ /* With LLVM, do not allow both preserving and flushing denorms because
+ * different shaders in the same pipeline can have different settings and
+ * this won't work for merged shaders. To make it work, this requires LLVM
+ * support for changing the register. The same logic applies for the
+ * rounding modes because they are configured with the same config
+ * register.
+ */
+ p->shaderDenormFlushToZeroFloat32 = true;
+ p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
+ p->shaderRoundingModeRTEFloat32 = true;
+ p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
+ p->shaderSignedZeroInfNanPreserveFloat32 = true;
+
+ p->shaderDenormFlushToZeroFloat16 =
+ pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
+ p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
+ p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
+ p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
+ p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
+
+ p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
+ p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
+ p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
+ p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
+ p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
+
+ p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
+ p->shaderUniformBufferArrayNonUniformIndexingNative = false;
+ p->shaderSampledImageArrayNonUniformIndexingNative = false;
+ p->shaderStorageBufferArrayNonUniformIndexingNative = false;
+ p->shaderStorageImageArrayNonUniformIndexingNative = false;
+ p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
+ p->robustBufferAccessUpdateAfterBind = true;
+ p->quadDivergentImplicitLod = false;
+
+ size_t max_descriptor_set_size =
+ ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
+ MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
+ (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
+ 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
+ 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
+ 64 /* storage image */);
+ p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
+ p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
+ p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
+ p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
+ p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
+ p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
+ p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
+ p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
+ p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
+ p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
+ p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
+ p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
+ p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
+ p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
+ p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
+
+ /* We support all of the depth resolve modes */
+ p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+ VK_RESOLVE_MODE_AVERAGE_BIT_KHR | VK_RESOLVE_MODE_MIN_BIT_KHR |
+ VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+ /* Average doesn't make sense for stencil so we don't support that */
+ p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
+ VK_RESOLVE_MODE_MIN_BIT_KHR | VK_RESOLVE_MODE_MAX_BIT_KHR;
+
+ p->independentResolveNone = true;
+ p->independentResolve = true;
+
+ /* GFX6-8 only support single channel min/max filter. */
+ p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
+ p->filterMinmaxSingleComponentFormats = true;
+
+ p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
+
+ p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
+}
+
+void
+radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceProperties2 *pProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+ radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
+
+ VkPhysicalDeviceVulkan11Properties core_1_1 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
+ };
+ radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
+
+ VkPhysicalDeviceVulkan12Properties core_1_2 = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
+ };
+ radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
+
+#define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
+ memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
sizeof(core_##major##_##minor.core_property))
-#define CORE_PROPERTY(major, minor, property) \
+#define CORE_PROPERTY(major, minor, property) \
CORE_RENAMED_PROPERTY(major, minor, property, property)
- vk_foreach_struct(ext, pProperties->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
- VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
- (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
- properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
- VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
- CORE_PROPERTY(1, 1, deviceUUID);
- CORE_PROPERTY(1, 1, driverUUID);
- CORE_PROPERTY(1, 1, deviceLUID);
- CORE_PROPERTY(1, 1, deviceLUIDValid);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
- VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
- CORE_PROPERTY(1, 1, maxMultiviewViewCount);
- CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
- VkPhysicalDevicePointClippingProperties *properties =
- (VkPhysicalDevicePointClippingProperties*)ext;
- CORE_PROPERTY(1, 1, pointClippingBehavior);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
- VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
- (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
- properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
- VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
- (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
- properties->minImportedHostPointerAlignment = 4096;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
- VkPhysicalDeviceSubgroupProperties *properties =
- (VkPhysicalDeviceSubgroupProperties*)ext;
- CORE_PROPERTY(1, 1, subgroupSize);
- CORE_RENAMED_PROPERTY(1, 1, supportedStages,
- subgroupSupportedStages);
- CORE_RENAMED_PROPERTY(1, 1, supportedOperations,
- subgroupSupportedOperations);
- CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages,
- subgroupQuadOperationsInAllStages);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
- VkPhysicalDeviceMaintenance3Properties *properties =
- (VkPhysicalDeviceMaintenance3Properties*)ext;
- CORE_PROPERTY(1, 1, maxPerSetDescriptors);
- CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
- VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
- (VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
- CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
- CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
- VkPhysicalDeviceShaderCorePropertiesAMD *properties =
- (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
-
- /* Shader engines. */
- properties->shaderEngineCount =
- pdevice->rad_info.max_se;
- properties->shaderArraysPerEngineCount =
- pdevice->rad_info.max_sa_per_se;
- properties->computeUnitsPerShaderArray =
- pdevice->rad_info.min_good_cu_per_sa;
- properties->simdPerComputeUnit =
- pdevice->rad_info.num_simd_per_compute_unit;
- properties->wavefrontsPerSimd =
- pdevice->rad_info.max_wave64_per_simd;
- properties->wavefrontSize = 64;
-
- /* SGPR. */
- properties->sgprsPerSimd =
- pdevice->rad_info.num_physical_sgprs_per_simd;
- properties->minSgprAllocation =
- pdevice->rad_info.min_sgpr_alloc;
- properties->maxSgprAllocation =
- pdevice->rad_info.max_sgpr_alloc;
- properties->sgprAllocationGranularity =
- pdevice->rad_info.sgpr_alloc_granularity;
-
- /* VGPR. */
- properties->vgprsPerSimd =
- pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
- properties->minVgprAllocation =
- pdevice->rad_info.min_wave64_vgpr_alloc;
- properties->maxVgprAllocation =
- pdevice->rad_info.max_vgpr_alloc;
- properties->vgprAllocationGranularity =
- pdevice->rad_info.wave64_vgpr_alloc_granularity;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
- VkPhysicalDeviceShaderCoreProperties2AMD *properties =
- (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
-
- properties->shaderCoreFeatures = 0;
- properties->activeComputeUnitCount =
- pdevice->rad_info.num_good_compute_units;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
- VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
- (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
- properties->maxVertexAttribDivisor = UINT32_MAX;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
- VkPhysicalDeviceDescriptorIndexingProperties *properties =
- (VkPhysicalDeviceDescriptorIndexingProperties*)ext;
- CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
- CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
- CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
- CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
- CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
- CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
- CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
- CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
- CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
- CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
- CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
- CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
- CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
- CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
- CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
- CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
- CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
- CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
- CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
- CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
- CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
- CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
- CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
- VkPhysicalDeviceProtectedMemoryProperties *properties =
- (VkPhysicalDeviceProtectedMemoryProperties *)ext;
- CORE_PROPERTY(1, 1, protectedNoFault);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
- VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
- (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
- properties->primitiveOverestimationSize = 0;
- properties->maxExtraPrimitiveOverestimationSize = 0;
- properties->extraPrimitiveOverestimationSizeGranularity = 0;
- properties->primitiveUnderestimation = false;
- properties->conservativePointAndLineRasterization = false;
- properties->degenerateTrianglesRasterized = true;
- properties->degenerateLinesRasterized = false;
- properties->fullyCoveredFragmentShaderInputVariable = false;
- properties->conservativeRasterizationPostDepthCoverage = false;
- break;
- }
+ vk_foreach_struct(ext, pProperties->pNext)
+ {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
+ VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
+ (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
+ properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
+ VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties *)ext;
+ CORE_PROPERTY(1, 1, deviceUUID);
+ CORE_PROPERTY(1, 1, driverUUID);
+ CORE_PROPERTY(1, 1, deviceLUID);
+ CORE_PROPERTY(1, 1, deviceLUIDValid);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
+ VkPhysicalDeviceMultiviewProperties *properties =
+ (VkPhysicalDeviceMultiviewProperties *)ext;
+ CORE_PROPERTY(1, 1, maxMultiviewViewCount);
+ CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
+ VkPhysicalDevicePointClippingProperties *properties =
+ (VkPhysicalDevicePointClippingProperties *)ext;
+ CORE_PROPERTY(1, 1, pointClippingBehavior);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
+ VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
+ (VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext;
+ properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
+ VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
+ (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext;
+ properties->minImportedHostPointerAlignment = 4096;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
+ VkPhysicalDeviceSubgroupProperties *properties = (VkPhysicalDeviceSubgroupProperties *)ext;
+ CORE_PROPERTY(1, 1, subgroupSize);
+ CORE_RENAMED_PROPERTY(1, 1, supportedStages, subgroupSupportedStages);
+ CORE_RENAMED_PROPERTY(1, 1, supportedOperations, subgroupSupportedOperations);
+ CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages, subgroupQuadOperationsInAllStages);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
+ VkPhysicalDeviceMaintenance3Properties *properties =
+ (VkPhysicalDeviceMaintenance3Properties *)ext;
+ CORE_PROPERTY(1, 1, maxPerSetDescriptors);
+ CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
+ VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
+ (VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
+ CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
+ CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
+ VkPhysicalDeviceShaderCorePropertiesAMD *properties =
+ (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
+
+ /* Shader engines. */
+ properties->shaderEngineCount = pdevice->rad_info.max_se;
+ properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;
+ properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;
+ properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;
+ properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd;
+ properties->wavefrontSize = 64;
+
+ /* SGPR. */
+ properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;
+ properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;
+ properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;
+ properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;
+
+ /* VGPR. */
+ properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
+ properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;
+ properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;
+ properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
+ VkPhysicalDeviceShaderCoreProperties2AMD *properties =
+ (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
+
+ properties->shaderCoreFeatures = 0;
+ properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
+ VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
+ (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
+ properties->maxVertexAttribDivisor = UINT32_MAX;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
+ VkPhysicalDeviceDescriptorIndexingProperties *properties =
+ (VkPhysicalDeviceDescriptorIndexingProperties *)ext;
+ CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
+ CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
+ CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
+ CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
+ CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
+ CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
+ CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
+ CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
+ CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
+ CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
+ CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
+ CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
+ CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
+ CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
+ CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
+ CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
+ CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
+ CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
+ CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
+ CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
+ CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
+ CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
+ CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
+ VkPhysicalDeviceProtectedMemoryProperties *properties =
+ (VkPhysicalDeviceProtectedMemoryProperties *)ext;
+ CORE_PROPERTY(1, 1, protectedNoFault);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
+ VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
+ (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
+ properties->primitiveOverestimationSize = 0;
+ properties->maxExtraPrimitiveOverestimationSize = 0;
+ properties->extraPrimitiveOverestimationSizeGranularity = 0;
+ properties->primitiveUnderestimation = false;
+ properties->conservativePointAndLineRasterization = false;
+ properties->degenerateTrianglesRasterized = true;
+ properties->degenerateLinesRasterized = false;
+ properties->fullyCoveredFragmentShaderInputVariable = false;
+ properties->conservativeRasterizationPostDepthCoverage = false;
+ break;
+ }
#ifndef _WIN32
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
- VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
- (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
- properties->pciDomain = pdevice->bus_info.domain;
- properties->pciBus = pdevice->bus_info.bus;
- properties->pciDevice = pdevice->bus_info.dev;
- properties->pciFunction = pdevice->bus_info.func;
- break;
- }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
+ VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
+ (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
+ properties->pciDomain = pdevice->bus_info.domain;
+ properties->pciBus = pdevice->bus_info.bus;
+ properties->pciDevice = pdevice->bus_info.dev;
+ properties->pciFunction = pdevice->bus_info.func;
+ break;
+ }
#endif
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
- VkPhysicalDeviceDriverProperties *properties =
- (VkPhysicalDeviceDriverProperties *) ext;
- CORE_PROPERTY(1, 2, driverID);
- CORE_PROPERTY(1, 2, driverName);
- CORE_PROPERTY(1, 2, driverInfo);
- CORE_PROPERTY(1, 2, conformanceVersion);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
- VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
- (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
- properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
- properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
- properties->maxTransformFeedbackBufferSize = UINT32_MAX;
- properties->maxTransformFeedbackStreamDataSize = 512;
- properties->maxTransformFeedbackBufferDataSize = 512;
- properties->maxTransformFeedbackBufferDataStride = 512;
- properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
- properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
- properties->transformFeedbackRasterizationStreamSelect = false;
- properties->transformFeedbackDraw = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
- VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
- (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
-
- props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
- props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
- props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
- props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
- props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
- VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
- (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
-
- VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT |
- VK_SAMPLE_COUNT_4_BIT;
- if (pdevice->rad_info.chip_class < GFX10) {
- /* FIXME: Some MSAA8x tests fail for weird
- * reasons on GFX10+ when the same pattern is
- * used inside the same render pass.
- */
- supported_samples |= VK_SAMPLE_COUNT_8_BIT;
- }
-
- properties->sampleLocationSampleCounts = supported_samples;
- properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
- properties->sampleLocationCoordinateRange[0] = 0.0f;
- properties->sampleLocationCoordinateRange[1] = 0.9375f;
- properties->sampleLocationSubPixelBits = 4;
- properties->variableSampleLocations = false;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
- VkPhysicalDeviceDepthStencilResolveProperties *properties =
- (VkPhysicalDeviceDepthStencilResolveProperties *)ext;
- CORE_PROPERTY(1, 2, supportedDepthResolveModes);
- CORE_PROPERTY(1, 2, supportedStencilResolveModes);
- CORE_PROPERTY(1, 2, independentResolveNone);
- CORE_PROPERTY(1, 2, independentResolve);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
- VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
- (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
- properties->storageTexelBufferOffsetAlignmentBytes = 4;
- properties->storageTexelBufferOffsetSingleTexelAlignment = true;
- properties->uniformTexelBufferOffsetAlignmentBytes = 4;
- properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES : {
- VkPhysicalDeviceFloatControlsProperties *properties =
- (VkPhysicalDeviceFloatControlsProperties *)ext;
- CORE_PROPERTY(1, 2, denormBehaviorIndependence);
- CORE_PROPERTY(1, 2, roundingModeIndependence);
- CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
- CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
- CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
- CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
- CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
- CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
- CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
- CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
- CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
- CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
- CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
- CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
- CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
- CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
- CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
- VkPhysicalDeviceTimelineSemaphoreProperties *properties =
- (VkPhysicalDeviceTimelineSemaphoreProperties *) ext;
- CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
- VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
- (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
- props->minSubgroupSize = 64;
- props->maxSubgroupSize = 64;
- props->maxComputeWorkgroupSubgroups = UINT32_MAX;
- props->requiredSubgroupSizeStages = 0;
-
- if (pdevice->rad_info.chip_class >= GFX10) {
- /* Only GFX10+ supports wave32. */
- props->minSubgroupSize = 32;
- props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
- }
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
- radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
- break;
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
- radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
- break;
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
- VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
- (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
- props->lineSubPixelPrecisionBits = 4;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
- VkPhysicalDeviceRobustness2PropertiesEXT *properties =
- (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
- properties->robustStorageBufferAccessSizeAlignment = 4;
- properties->robustUniformBufferAccessSizeAlignment = 4;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
- VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
- (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
- props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
- VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
- (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
- props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
- props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
- props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
- props->primitiveFragmentShadingRateWithMultipleViewports = true;
- props->layeredShadingRateAttachments = false;
- props->fragmentShadingRateNonTrivialCombinerOps = true;
- props->maxFragmentSize = (VkExtent2D) { 2, 2 };
- props->maxFragmentSizeAspectRatio = 1;
- props->maxFragmentShadingRateCoverageSamples = 2 * 2;
- props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
- props->fragmentShadingRateWithShaderDepthStencilWrites = false;
- props->fragmentShadingRateWithSampleMask = true;
- props->fragmentShadingRateWithShaderSampleMask = false;
- props->fragmentShadingRateWithConservativeRasterization = true;
- props->fragmentShadingRateWithFragmentShaderInterlock = false;
- props->fragmentShadingRateWithCustomSampleLocations = true;
- props->fragmentShadingRateStrictMultiplyCombiner = true;
- break;
- }
- default:
- break;
- }
- }
-}
-
-static void radv_get_physical_device_queue_family_properties(
- struct radv_physical_device* pdevice,
- uint32_t* pCount,
- VkQueueFamilyProperties** pQueueFamilyProperties)
-{
- int num_queue_families = 1;
- int idx;
- if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
- !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
- num_queue_families++;
-
- if (pQueueFamilyProperties == NULL) {
- *pCount = num_queue_families;
- return;
- }
-
- if (!*pCount)
- return;
-
- idx = 0;
- if (*pCount >= 1) {
- *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
- .queueFlags = VK_QUEUE_GRAPHICS_BIT |
- VK_QUEUE_COMPUTE_BIT |
- VK_QUEUE_TRANSFER_BIT |
- VK_QUEUE_SPARSE_BINDING_BIT,
- .queueCount = 1,
- .timestampValidBits = 64,
- .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
- };
- idx++;
- }
-
- if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
- !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
- if (*pCount > idx) {
- *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
- .queueFlags = VK_QUEUE_COMPUTE_BIT |
- VK_QUEUE_TRANSFER_BIT |
- VK_QUEUE_SPARSE_BINDING_BIT,
- .queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
- .timestampValidBits = 64,
- .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
- };
- idx++;
- }
- }
- *pCount = idx;
-}
-
-void radv_GetPhysicalDeviceQueueFamilyProperties(
- VkPhysicalDevice physicalDevice,
- uint32_t* pCount,
- VkQueueFamilyProperties* pQueueFamilyProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- if (!pQueueFamilyProperties) {
- radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
- return;
- }
- VkQueueFamilyProperties *properties[] = {
- pQueueFamilyProperties + 0,
- pQueueFamilyProperties + 1,
- pQueueFamilyProperties + 2,
- };
- radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
- assert(*pCount <= 3);
-}
-
-void radv_GetPhysicalDeviceQueueFamilyProperties2(
- VkPhysicalDevice physicalDevice,
- uint32_t* pCount,
- VkQueueFamilyProperties2 *pQueueFamilyProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- if (!pQueueFamilyProperties) {
- radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
- return;
- }
- VkQueueFamilyProperties *properties[] = {
- &pQueueFamilyProperties[0].queueFamilyProperties,
- &pQueueFamilyProperties[1].queueFamilyProperties,
- &pQueueFamilyProperties[2].queueFamilyProperties,
- };
- radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
- assert(*pCount <= 3);
-}
-
-void radv_GetPhysicalDeviceMemoryProperties(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceMemoryProperties *pMemoryProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-
- *pMemoryProperties = physical_device->memory_properties;
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
+ VkPhysicalDeviceDriverProperties *properties = (VkPhysicalDeviceDriverProperties *)ext;
+ CORE_PROPERTY(1, 2, driverID);
+ CORE_PROPERTY(1, 2, driverName);
+ CORE_PROPERTY(1, 2, driverInfo);
+ CORE_PROPERTY(1, 2, conformanceVersion);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
+ VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
+ (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
+ properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
+ properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
+ properties->maxTransformFeedbackBufferSize = UINT32_MAX;
+ properties->maxTransformFeedbackStreamDataSize = 512;
+ properties->maxTransformFeedbackBufferDataSize = 512;
+ properties->maxTransformFeedbackBufferDataStride = 512;
+ properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
+ properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
+ properties->transformFeedbackRasterizationStreamSelect = false;
+ properties->transformFeedbackDraw = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
+ VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
+ (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
+
+ props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
+ props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
+ props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
+ MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
+ props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
+ props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
+ VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
+ (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
+
+ VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
+ if (pdevice->rad_info.chip_class < GFX10) {
+ /* FIXME: Some MSAA8x tests fail for weird
+ * reasons on GFX10+ when the same pattern is
+ * used inside the same render pass.
+ */
+ supported_samples |= VK_SAMPLE_COUNT_8_BIT;
+ }
+
+ properties->sampleLocationSampleCounts = supported_samples;
+ properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
+ properties->sampleLocationCoordinateRange[0] = 0.0f;
+ properties->sampleLocationCoordinateRange[1] = 0.9375f;
+ properties->sampleLocationSubPixelBits = 4;
+ properties->variableSampleLocations = false;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
+ VkPhysicalDeviceDepthStencilResolveProperties *properties =
+ (VkPhysicalDeviceDepthStencilResolveProperties *)ext;
+ CORE_PROPERTY(1, 2, supportedDepthResolveModes);
+ CORE_PROPERTY(1, 2, supportedStencilResolveModes);
+ CORE_PROPERTY(1, 2, independentResolveNone);
+ CORE_PROPERTY(1, 2, independentResolve);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
+ VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
+ (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
+ properties->storageTexelBufferOffsetAlignmentBytes = 4;
+ properties->storageTexelBufferOffsetSingleTexelAlignment = true;
+ properties->uniformTexelBufferOffsetAlignmentBytes = 4;
+ properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES: {
+ VkPhysicalDeviceFloatControlsProperties *properties =
+ (VkPhysicalDeviceFloatControlsProperties *)ext;
+ CORE_PROPERTY(1, 2, denormBehaviorIndependence);
+ CORE_PROPERTY(1, 2, roundingModeIndependence);
+ CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
+ CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
+ CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
+ CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
+ CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
+ CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
+ CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
+ CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
+ CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
+ CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
+ CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
+ CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
+ CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
+ CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
+ CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
+ VkPhysicalDeviceTimelineSemaphoreProperties *properties =
+ (VkPhysicalDeviceTimelineSemaphoreProperties *)ext;
+ CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
+ VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
+ (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
+ props->minSubgroupSize = 64;
+ props->maxSubgroupSize = 64;
+ props->maxComputeWorkgroupSubgroups = UINT32_MAX;
+ props->requiredSubgroupSizeStages = 0;
+
+ if (pdevice->rad_info.chip_class >= GFX10) {
+ /* Only GFX10+ supports wave32. */
+ props->minSubgroupSize = 32;
+ props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
+ }
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
+ radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
+ break;
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
+ radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
+ break;
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
+ VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
+ (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
+ props->lineSubPixelPrecisionBits = 4;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
+ VkPhysicalDeviceRobustness2PropertiesEXT *properties =
+ (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
+ properties->robustStorageBufferAccessSizeAlignment = 4;
+ properties->robustUniformBufferAccessSizeAlignment = 4;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
+ VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
+ (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
+ props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
+ VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
+ (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
+ props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
+ props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
+ props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
+ props->primitiveFragmentShadingRateWithMultipleViewports = true;
+ props->layeredShadingRateAttachments = false;
+ props->fragmentShadingRateNonTrivialCombinerOps = true;
+ props->maxFragmentSize = (VkExtent2D){2, 2};
+ props->maxFragmentSizeAspectRatio = 1;
+ props->maxFragmentShadingRateCoverageSamples = 2 * 2;
+ props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
+ props->fragmentShadingRateWithShaderDepthStencilWrites = false;
+ props->fragmentShadingRateWithSampleMask = true;
+ props->fragmentShadingRateWithShaderSampleMask = false;
+ props->fragmentShadingRateWithConservativeRasterization = true;
+ props->fragmentShadingRateWithFragmentShaderInterlock = false;
+ props->fragmentShadingRateWithCustomSampleLocations = true;
+ props->fragmentShadingRateStrictMultiplyCombiner = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+static void
+radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,
+ uint32_t *pCount,
+ VkQueueFamilyProperties **pQueueFamilyProperties)
+{
+ int num_queue_families = 1;
+ int idx;
+ if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
+ !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
+ num_queue_families++;
+
+ if (pQueueFamilyProperties == NULL) {
+ *pCount = num_queue_families;
+ return;
+ }
+
+ if (!*pCount)
+ return;
+
+ idx = 0;
+ if (*pCount >= 1) {
+ *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
+ .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
+ VK_QUEUE_SPARSE_BINDING_BIT,
+ .queueCount = 1,
+ .timestampValidBits = 64,
+ .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
+ };
+ idx++;
+ }
+
+ if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
+ !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
+ if (*pCount > idx) {
+ *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
+ .queueFlags =
+ VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
+ .queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
+ .timestampValidBits = 64,
+ .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
+ };
+ idx++;
+ }
+ }
+ *pCount = idx;
+}
+
+void
+radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t *pCount,
+ VkQueueFamilyProperties *pQueueFamilyProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+ if (!pQueueFamilyProperties) {
+ radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+ return;
+ }
+ VkQueueFamilyProperties *properties[] = {
+ pQueueFamilyProperties + 0,
+ pQueueFamilyProperties + 1,
+ pQueueFamilyProperties + 2,
+ };
+ radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
+ assert(*pCount <= 3);
+}
+
+void
+radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
+ VkQueueFamilyProperties2 *pQueueFamilyProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+ if (!pQueueFamilyProperties) {
+ radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
+ return;
+ }
+ VkQueueFamilyProperties *properties[] = {
+ &pQueueFamilyProperties[0].queueFamilyProperties,
+ &pQueueFamilyProperties[1].queueFamilyProperties,
+ &pQueueFamilyProperties[2].queueFamilyProperties,
+ };
+ radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
+ assert(*pCount <= 3);
+}
+
+void
+radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryProperties *pMemoryProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+
+ *pMemoryProperties = physical_device->memory_properties;
}
static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
-{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
-
- /* For all memory heaps, the computation of budget is as follow:
- * heap_budget = heap_size - global_heap_usage + app_heap_usage
- *
- * The Vulkan spec 1.1.97 says that the budget should include any
- * currently allocated device memory.
- *
- * Note that the application heap usages are not really accurate (eg.
- * in presence of shared buffers).
- */
- unsigned mask = device->heaps;
- unsigned heap = 0;
- while (mask) {
- uint64_t internal_usage = 0, total_usage = 0;
- unsigned type = 1u << u_bit_scan(&mask);
-
- switch(type) {
- case RADV_HEAP_VRAM:
- internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
- total_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
- break;
- case RADV_HEAP_VRAM_VIS:
- internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
- if (!(device->heaps & RADV_HEAP_VRAM))
- internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
- total_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
- break;
- case RADV_HEAP_GTT:
- internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
- total_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
- break;
- }
-
- uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
- MIN2(device->memory_properties.memoryHeaps[heap].size,
- total_usage);
- memoryBudget->heapBudget[heap] = free_space + internal_usage;
- memoryBudget->heapUsage[heap] = internal_usage;
- ++heap;
- }
-
- assert(heap == memory_properties->memoryHeapCount);
-
- /* The heapBudget and heapUsage values must be zero for array elements
- * greater than or equal to
- * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
- */
- for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
- memoryBudget->heapBudget[i] = 0;
- memoryBudget->heapUsage[i] = 0;
- }
-}
-
-void radv_GetPhysicalDeviceMemoryProperties2(
- VkPhysicalDevice physicalDevice,
- VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
-{
- radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
- &pMemoryProperties->memoryProperties);
-
- VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
- vk_find_struct(pMemoryProperties->pNext,
- PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
- if (memory_budget)
- radv_get_memory_budget_properties(physicalDevice, memory_budget);
-}
-
-VkResult radv_GetMemoryHostPointerPropertiesEXT(
- VkDevice _device,
- VkExternalMemoryHandleTypeFlagBits handleType,
- const void *pHostPointer,
- VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
-
- switch (handleType)
- {
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
- const struct radv_physical_device *physical_device = device->physical_device;
- uint32_t memoryTypeBits = 0;
- for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
- if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
- !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
- memoryTypeBits = (1 << i);
- break;
- }
- }
- pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
- return VK_SUCCESS;
- }
- default:
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
- }
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
+{
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
+
+ /* For all memory heaps, the computation of budget is as follow:
+ * heap_budget = heap_size - global_heap_usage + app_heap_usage
+ *
+ * The Vulkan spec 1.1.97 says that the budget should include any
+ * currently allocated device memory.
+ *
+ * Note that the application heap usages are not really accurate (eg.
+ * in presence of shared buffers).
+ */
+ unsigned mask = device->heaps;
+ unsigned heap = 0;
+ while (mask) {
+ uint64_t internal_usage = 0, total_usage = 0;
+ unsigned type = 1u << u_bit_scan(&mask);
+
+ switch (type) {
+ case RADV_HEAP_VRAM:
+ internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
+ total_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
+ break;
+ case RADV_HEAP_VRAM_VIS:
+ internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
+ if (!(device->heaps & RADV_HEAP_VRAM))
+ internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
+ total_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
+ break;
+ case RADV_HEAP_GTT:
+ internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
+ total_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
+ break;
+ }
+
+ uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
+ MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);
+ memoryBudget->heapBudget[heap] = free_space + internal_usage;
+ memoryBudget->heapUsage[heap] = internal_usage;
+ ++heap;
+ }
+
+ assert(heap == memory_properties->memoryHeapCount);
+
+ /* The heapBudget and heapUsage values must be zero for array elements
+ * greater than or equal to
+ * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
+ */
+ for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
+ memoryBudget->heapBudget[i] = 0;
+ memoryBudget->heapUsage[i] = 0;
+ }
+}
+
+void
+radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
+{
+ radv_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties);
+
+ VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
+ vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
+ if (memory_budget)
+ radv_get_memory_budget_properties(physicalDevice, memory_budget);
+}
+
+VkResult
+radv_GetMemoryHostPointerPropertiesEXT(
+ VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,
+ VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+
+ switch (handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
+ const struct radv_physical_device *physical_device = device->physical_device;
+ uint32_t memoryTypeBits = 0;
+ for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
+ if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
+ !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
+ memoryTypeBits = (1 << i);
+ break;
+ }
+ }
+ pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
+ return VK_SUCCESS;
+ }
+ default:
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ }
}
static enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
{
- /* Default to MEDIUM when a specific global priority isn't requested */
- if (!pObj)
- return RADEON_CTX_PRIORITY_MEDIUM;
-
- switch(pObj->globalPriority) {
- case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
- return RADEON_CTX_PRIORITY_REALTIME;
- case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
- return RADEON_CTX_PRIORITY_HIGH;
- case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
- return RADEON_CTX_PRIORITY_MEDIUM;
- case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
- return RADEON_CTX_PRIORITY_LOW;
- default:
- unreachable("Illegal global priority value");
- return RADEON_CTX_PRIORITY_INVALID;
- }
+ /* Default to MEDIUM when a specific global priority isn't requested */
+ if (!pObj)
+ return RADEON_CTX_PRIORITY_MEDIUM;
+
+ switch (pObj->globalPriority) {
+ case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
+ return RADEON_CTX_PRIORITY_REALTIME;
+ case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
+ return RADEON_CTX_PRIORITY_HIGH;
+ case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
+ return RADEON_CTX_PRIORITY_MEDIUM;
+ case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
+ return RADEON_CTX_PRIORITY_LOW;
+ default:
+ unreachable("Illegal global priority value");
+ return RADEON_CTX_PRIORITY_INVALID;
+ }
}
static int
-radv_queue_init(struct radv_device *device, struct radv_queue *queue,
- uint32_t queue_family_index, int idx,
- VkDeviceQueueCreateFlags flags,
- const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
+radv_queue_init(struct radv_device *device, struct radv_queue *queue, uint32_t queue_family_index,
+ int idx, VkDeviceQueueCreateFlags flags,
+ const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
{
- queue->device = device;
- queue->queue_family_index = queue_family_index;
- queue->queue_idx = idx;
- queue->priority = radv_get_queue_global_priority(global_priority);
- queue->flags = flags;
- queue->hw_ctx = device->hw_ctx[queue->priority];
+ queue->device = device;
+ queue->queue_family_index = queue_family_index;
+ queue->queue_idx = idx;
+ queue->priority = radv_get_queue_global_priority(global_priority);
+ queue->flags = flags;
+ queue->hw_ctx = device->hw_ctx[queue->priority];
- vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
+ vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
- list_inithead(&queue->pending_submissions);
- mtx_init(&queue->pending_mutex, mtx_plain);
+ list_inithead(&queue->pending_submissions);
+ mtx_init(&queue->pending_mutex, mtx_plain);
- mtx_init(&queue->thread_mutex, mtx_plain);
- if (u_cnd_monotonic_init(&queue->thread_cond)) {
- vk_object_base_finish(&queue->base);
- return vk_error(device->instance, VK_ERROR_INITIALIZATION_FAILED);
- }
- queue->cond_created = true;
+ mtx_init(&queue->thread_mutex, mtx_plain);
+ if (u_cnd_monotonic_init(&queue->thread_cond)) {
+ vk_object_base_finish(&queue->base);
+ return vk_error(device->instance, VK_ERROR_INITIALIZATION_FAILED);
+ }
+ queue->cond_created = true;
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
static void
radv_queue_finish(struct radv_queue *queue)
{
- if (queue->hw_ctx) {
- if (queue->cond_created) {
- if (queue->thread_running) {
- p_atomic_set(&queue->thread_exit, true);
- u_cnd_monotonic_broadcast(&queue->thread_cond);
- thrd_join(queue->submission_thread, NULL);
- }
-
- u_cnd_monotonic_destroy(&queue->thread_cond);
- }
-
- mtx_destroy(&queue->pending_mutex);
- mtx_destroy(&queue->thread_mutex);
- }
-
- if (queue->initial_full_flush_preamble_cs)
- queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
- if (queue->initial_preamble_cs)
- queue->device->ws->cs_destroy(queue->initial_preamble_cs);
- if (queue->continue_preamble_cs)
- queue->device->ws->cs_destroy(queue->continue_preamble_cs);
- if (queue->descriptor_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
- if (queue->scratch_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
- if (queue->esgs_ring_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
- if (queue->gsvs_ring_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
- if (queue->tess_rings_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->tess_rings_bo);
- if (queue->gds_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_bo);
- if (queue->gds_oa_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_oa_bo);
- if (queue->compute_scratch_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
-
- vk_object_base_finish(&queue->base);
+ if (queue->hw_ctx) {
+ if (queue->cond_created) {
+ if (queue->thread_running) {
+ p_atomic_set(&queue->thread_exit, true);
+ u_cnd_monotonic_broadcast(&queue->thread_cond);
+ thrd_join(queue->submission_thread, NULL);
+ }
+
+ u_cnd_monotonic_destroy(&queue->thread_cond);
+ }
+
+ mtx_destroy(&queue->pending_mutex);
+ mtx_destroy(&queue->thread_mutex);
+ }
+
+ if (queue->initial_full_flush_preamble_cs)
+ queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
+ if (queue->initial_preamble_cs)
+ queue->device->ws->cs_destroy(queue->initial_preamble_cs);
+ if (queue->continue_preamble_cs)
+ queue->device->ws->cs_destroy(queue->continue_preamble_cs);
+ if (queue->descriptor_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
+ if (queue->scratch_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
+ if (queue->esgs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
+ if (queue->gsvs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
+ if (queue->tess_rings_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->tess_rings_bo);
+ if (queue->gds_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_bo);
+ if (queue->gds_oa_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_oa_bo);
+ if (queue->compute_scratch_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
+
+ vk_object_base_finish(&queue->base);
}
static void
radv_device_init_gs_info(struct radv_device *device)
{
- device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
- device->physical_device->rad_info.family);
+ device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
+ device->physical_device->rad_info.family);
}
static VkResult
check_physical_device_features(VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceFeatures *features)
+ const VkPhysicalDeviceFeatures *features)
{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
- VkPhysicalDeviceFeatures supported_features;
- radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
- VkBool32 *supported_feature = (VkBool32 *)&supported_features;
- VkBool32 *enabled_feature = (VkBool32 *)features;
- unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
- for (uint32_t i = 0; i < num_features; i++) {
- if (enabled_feature[i] && !supported_feature[i])
- return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
- }
+ RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+ VkPhysicalDeviceFeatures supported_features;
+ radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
+ VkBool32 *supported_feature = (VkBool32 *)&supported_features;
+ VkBool32 *enabled_feature = (VkBool32 *)features;
+ unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
+ for (uint32_t i = 0; i < num_features; i++) {
+ if (enabled_feature[i] && !supported_feature[i])
+ return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-static VkResult radv_device_init_border_color(struct radv_device *device)
+static VkResult
+radv_device_init_border_color(struct radv_device *device)
{
- VkResult result;
+ VkResult result;
- device->border_color_data.bo =
- device->ws->buffer_create(device->ws,
- RADV_BORDER_COLOR_BUFFER_SIZE,
- 4096,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_READ_ONLY |
- RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_SHADER);
+ device->border_color_data.bo = device->ws->buffer_create(
+ device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+ RADV_BO_PRIORITY_SHADER);
- if (device->border_color_data.bo == NULL)
- return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ if (device->border_color_data.bo == NULL)
+ return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
- if (result != VK_SUCCESS)
- return vk_error(device->physical_device->instance, result);
+ result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
+ if (result != VK_SUCCESS)
+ return vk_error(device->physical_device->instance, result);
- device->border_color_data.colors_gpu_ptr =
- device->ws->buffer_map(device->border_color_data.bo);
- if (!device->border_color_data.colors_gpu_ptr)
- return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- mtx_init(&device->border_color_data.mutex, mtx_plain);
+ device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
+ if (!device->border_color_data.colors_gpu_ptr)
+ return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ mtx_init(&device->border_color_data.mutex, mtx_plain);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-static void radv_device_finish_border_color(struct radv_device *device)
+static void
+radv_device_finish_border_color(struct radv_device *device)
{
- if (device->border_color_data.bo) {
- device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
- device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
+ if (device->border_color_data.bo) {
+ device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
+ device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
- mtx_destroy(&device->border_color_data.mutex);
- }
+ mtx_destroy(&device->border_color_data.mutex);
+ }
}
VkResult
-_radv_device_set_lost(struct radv_device *device,
- const char *file, int line,
- const char *msg, ...)
-{
- VkResult err;
- va_list ap;
-
- p_atomic_inc(&device->lost);
-
- va_start(ap, msg);
- err = __vk_errorv(device->physical_device->instance, device,
- VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
- VK_ERROR_DEVICE_LOST, file, line, msg, ap);
- va_end(ap);
-
- return err;
-}
-
-VkResult radv_CreateDevice(
- VkPhysicalDevice physicalDevice,
- const VkDeviceCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkDevice* pDevice)
-{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
- VkResult result;
- struct radv_device *device;
-
- bool keep_shader_info = false;
- bool robust_buffer_access = false;
- bool robust_buffer_access2 = false;
- bool overallocation_disallowed = false;
- bool custom_border_colors = false;
- bool vrs_enabled = false;
-
- /* Check enabled features */
- if (pCreateInfo->pEnabledFeatures) {
- result = check_physical_device_features(physicalDevice,
- pCreateInfo->pEnabledFeatures);
- if (result != VK_SUCCESS)
- return result;
-
- if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
- robust_buffer_access = true;
- }
-
- vk_foreach_struct_const(ext, pCreateInfo->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
- const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
- result = check_physical_device_features(physicalDevice,
- &features->features);
- if (result != VK_SUCCESS)
- return result;
-
- if (features->features.robustBufferAccess)
- robust_buffer_access = true;
- break;
- }
- case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
- const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
- if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
- overallocation_disallowed = true;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
- const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
- custom_border_colors = border_color_features->customBorderColors;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
- const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
- vrs_enabled = vrs->pipelineFragmentShadingRate ||
- vrs->primitiveFragmentShadingRate ||
- vrs->attachmentFragmentShadingRate;
- break;
- }
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
- const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
- if (features->robustBufferAccess2)
- robust_buffer_access2 = true;
- break;
- }
- default:
- break;
- }
- }
-
- device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
- sizeof(*device), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
- if (!device)
- return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- struct vk_device_dispatch_table dispatch_table;
-
- if (radv_thread_trace_enabled()) {
- vk_device_dispatch_table_from_entrypoints(&dispatch_table,
- &sqtt_device_entrypoints, true);
- vk_device_dispatch_table_from_entrypoints(&dispatch_table,
- &radv_device_entrypoints, false);
- } else {
- vk_device_dispatch_table_from_entrypoints(&dispatch_table,
- &radv_device_entrypoints, true);
- }
-
- result = vk_device_init(&device->vk, &physical_device->vk,
- &dispatch_table, pCreateInfo, pAllocator);
- if (result != VK_SUCCESS) {
- vk_free(&device->vk.alloc, device);
- return result;
- }
-
- device->instance = physical_device->instance;
- device->physical_device = physical_device;
-
- device->ws = physical_device->ws;
-
- keep_shader_info = device->vk.enabled_extensions.AMD_shader_info;
-
- /* With update after bind we can't attach bo's to the command buffer
- * from the descriptor set anymore, so we have to use a global BO list.
- */
- device->use_global_bo_list =
- (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
- device->vk.enabled_extensions.EXT_descriptor_indexing ||
- device->vk.enabled_extensions.EXT_buffer_device_address ||
- device->vk.enabled_extensions.KHR_buffer_device_address;
-
- device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
- device->robust_buffer_access2 = robust_buffer_access2;
-
- device->adjust_frag_coord_z = (vrs_enabled ||
- device->vk.enabled_extensions.KHR_fragment_shading_rate ||
- device->force_vrs != RADV_FORCE_VRS_NONE) &&
- (device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
- device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
- device->physical_device->rad_info.family == CHIP_VANGOGH);
-
- mtx_init(&device->shader_slab_mutex, mtx_plain);
- list_inithead(&device->shader_slabs);
-
- device->overallocation_disallowed = overallocation_disallowed;
- mtx_init(&device->overallocation_mutex, mtx_plain);
-
- /* Create one context per queue priority. */
- for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
- const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
- const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
- vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
- enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
-
- if (device->hw_ctx[priority])
- continue;
-
- result = device->ws->ctx_create(device->ws, priority,
- &device->hw_ctx[priority]);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
- const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
- uint32_t qfi = queue_create->queueFamilyIndex;
- const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
- vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
-
- device->queues[qfi] = vk_alloc(&device->vk.alloc,
- queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
- if (!device->queues[qfi]) {
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail;
- }
-
- memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
-
- device->queue_count[qfi] = queue_create->queueCount;
-
- for (unsigned q = 0; q < queue_create->queueCount; q++) {
- result = radv_queue_init(device, &device->queues[qfi][q],
- qfi, q, queue_create->flags,
- global_priority);
- if (result != VK_SUCCESS)
- goto fail;
- }
- }
-
- device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
- !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
-
- /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
- device->dfsm_allowed = device->pbb_allowed &&
- (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
-
- /* The maximum number of scratch waves. Scratch space isn't divided
- * evenly between CUs. The number is only a function of the number of CUs.
- * We can decrease the constant to decrease the scratch buffer size.
- *
- * sctx->scratch_waves must be >= the maximum possible size of
- * 1 threadgroup, so that the hw doesn't hang from being unable
- * to start any.
- *
- * The recommended value is 4 per CU at most. Higher numbers don't
- * bring much benefit, but they still occupy chip resources (think
- * async compute). I've seen ~2% performance difference between 4 and 32.
- */
- uint32_t max_threads_per_block = 2048;
- device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
- max_threads_per_block / 64);
-
- device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
-
- if (device->physical_device->rad_info.chip_class >= GFX7) {
- /* If the KMD allows it (there is a KMD hw register for it),
- * allow launching waves out-of-order.
- */
- device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
- }
-
- radv_device_init_gs_info(device);
-
- device->tess_offchip_block_dw_size =
- device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
-
- if (getenv("RADV_TRACE_FILE")) {
- fprintf(stderr, "***********************************************************************************\n");
- fprintf(stderr, "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
- fprintf(stderr, "***********************************************************************************\n");
- abort();
- }
-
- if (device->instance->debug_flags & RADV_DEBUG_HANG) {
- /* Enable GPU hangs detection and dump logs if a GPU hang is
- * detected.
- */
- keep_shader_info = true;
-
- if (!radv_init_trace(device))
- goto fail;
-
- fprintf(stderr, "*****************************************************************************\n");
- fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
- fprintf(stderr, "*****************************************************************************\n");
-
- /* Wait for idle after every draw/dispatch to identify the
- * first bad call.
- */
- device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
-
- radv_dump_enabled_options(device, stderr);
- }
-
- if (radv_thread_trace_enabled()) {
- fprintf(stderr, "*************************************************\n");
- fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
- fprintf(stderr, "*************************************************\n");
-
- if (device->physical_device->rad_info.chip_class < GFX8 ||
- device->physical_device->rad_info.chip_class > GFX10_3) {
- fprintf(stderr, "GPU hardware not supported: refer to "
- "the RGP documentation for the list of "
- "supported GPUs!\n");
- abort();
- }
-
- if (!radv_thread_trace_init(device))
- goto fail;
- }
-
- if (getenv("RADV_TRAP_HANDLER")) {
- /* TODO: Add support for more hardware. */
- assert(device->physical_device->rad_info.chip_class == GFX8);
-
- fprintf(stderr, "**********************************************************************\n");
- fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
- fprintf(stderr, "**********************************************************************\n");
-
- /* To get the disassembly of the faulty shaders, we have to
- * keep some shader info around.
- */
- keep_shader_info = true;
-
- if (!radv_trap_handler_init(device))
- goto fail;
- }
-
- if (getenv("RADV_FORCE_VRS")) {
- const char *vrs_rates = getenv("RADV_FORCE_VRS");
-
- if (device->physical_device->rad_info.chip_class < GFX10_3)
- fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
- else if (device->physical_device->use_llvm)
- fprintf(stderr, "radv: Forcing VRS rates is only supported with ACO\n");
- else if (!strcmp(vrs_rates, "2x2"))
- device->force_vrs = RADV_FORCE_VRS_2x2;
- else if (!strcmp(vrs_rates, "2x1"))
- device->force_vrs = RADV_FORCE_VRS_2x1;
- else if (!strcmp(vrs_rates, "1x2"))
- device->force_vrs = RADV_FORCE_VRS_1x2;
- else
- fprintf(stderr, "radv: Invalid VRS rates specified "
- "(valid values are 2x2, 2x1 and 1x2)\n");
- }
-
- device->keep_shader_info = keep_shader_info;
- result = radv_device_init_meta(device);
- if (result != VK_SUCCESS)
- goto fail;
-
- radv_device_init_msaa(device);
-
- /* If the border color extension is enabled, let's create the buffer we need. */
- if (custom_border_colors) {
- result = radv_device_init_border_color(device);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
- device->empty_cs[family] = device->ws->cs_create(device->ws, family);
- if (!device->empty_cs[family])
- goto fail;
-
- switch (family) {
- case RADV_QUEUE_GENERAL:
- radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
- radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
- break;
- case RADV_QUEUE_COMPUTE:
- radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
- radeon_emit(device->empty_cs[family], 0);
- break;
- }
-
- result = device->ws->cs_finalize(device->empty_cs[family]);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX7)
- cik_create_gfx_config(device);
-
- VkPipelineCacheCreateInfo ci;
- ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
- ci.pNext = NULL;
- ci.flags = 0;
- ci.pInitialData = NULL;
- ci.initialDataSize = 0;
- VkPipelineCache pc;
- result = radv_CreatePipelineCache(radv_device_to_handle(device),
- &ci, NULL, &pc);
- if (result != VK_SUCCESS)
- goto fail_meta;
-
- device->mem_cache = radv_pipeline_cache_from_handle(pc);
-
- if (u_cnd_monotonic_init(&device->timeline_cond)) {
- result = VK_ERROR_INITIALIZATION_FAILED;
- goto fail_mem_cache;
- }
-
- device->force_aniso =
- MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
- if (device->force_aniso >= 0) {
- fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
- 1 << util_logbase2(device->force_aniso));
- }
-
- *pDevice = radv_device_to_handle(device);
- return VK_SUCCESS;
+_radv_device_set_lost(struct radv_device *device, const char *file, int line, const char *msg, ...)
+{
+ VkResult err;
+ va_list ap;
+
+ p_atomic_inc(&device->lost);
+
+ va_start(ap, msg);
+ err =
+ __vk_errorv(device->physical_device->instance, device, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
+ VK_ERROR_DEVICE_LOST, file, line, msg, ap);
+ va_end(ap);
+
+ return err;
+}
+
+VkResult
+radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
+{
+ RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+ VkResult result;
+ struct radv_device *device;
+
+ bool keep_shader_info = false;
+ bool robust_buffer_access = false;
+ bool robust_buffer_access2 = false;
+ bool overallocation_disallowed = false;
+ bool custom_border_colors = false;
+ bool vrs_enabled = false;
+
+ /* Check enabled features */
+ if (pCreateInfo->pEnabledFeatures) {
+ result = check_physical_device_features(physicalDevice, pCreateInfo->pEnabledFeatures);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
+ robust_buffer_access = true;
+ }
+
+ vk_foreach_struct_const(ext, pCreateInfo->pNext)
+ {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
+ const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
+ result = check_physical_device_features(physicalDevice, &features->features);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (features->features.robustBufferAccess)
+ robust_buffer_access = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
+ const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
+ if (overallocation->overallocationBehavior ==
+ VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
+ overallocation_disallowed = true;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
+ const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =
+ (const void *)ext;
+ custom_border_colors = border_color_features->customBorderColors;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
+ const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
+ vrs_enabled = vrs->pipelineFragmentShadingRate || vrs->primitiveFragmentShadingRate ||
+ vrs->attachmentFragmentShadingRate;
+ break;
+ }
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
+ const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
+ if (features->robustBufferAccess2)
+ robust_buffer_access2 = true;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!device)
+ return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct vk_device_dispatch_table dispatch_table;
+
+ if (radv_thread_trace_enabled()) {
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
+ } else {
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true);
+ }
+
+ result =
+ vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator);
+ if (result != VK_SUCCESS) {
+ vk_free(&device->vk.alloc, device);
+ return result;
+ }
+
+ device->instance = physical_device->instance;
+ device->physical_device = physical_device;
+
+ device->ws = physical_device->ws;
+
+ keep_shader_info = device->vk.enabled_extensions.AMD_shader_info;
+
+ /* With update after bind we can't attach bo's to the command buffer
+ * from the descriptor set anymore, so we have to use a global BO list.
+ */
+ device->use_global_bo_list = (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
+ device->vk.enabled_extensions.EXT_descriptor_indexing ||
+ device->vk.enabled_extensions.EXT_buffer_device_address ||
+ device->vk.enabled_extensions.KHR_buffer_device_address;
+
+ device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
+ device->robust_buffer_access2 = robust_buffer_access2;
+
+ device->adjust_frag_coord_z =
+ (vrs_enabled || device->vk.enabled_extensions.KHR_fragment_shading_rate ||
+ device->force_vrs != RADV_FORCE_VRS_NONE) &&
+ (device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
+ device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
+ device->physical_device->rad_info.family == CHIP_VANGOGH);
+
+ mtx_init(&device->shader_slab_mutex, mtx_plain);
+ list_inithead(&device->shader_slabs);
+
+ device->overallocation_disallowed = overallocation_disallowed;
+ mtx_init(&device->overallocation_mutex, mtx_plain);
+
+ /* Create one context per queue priority. */
+ for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+ const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
+ const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
+ vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
+ enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
+
+ if (device->hw_ctx[priority])
+ continue;
+
+ result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+ const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
+ uint32_t qfi = queue_create->queueFamilyIndex;
+ const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
+ vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
+
+ device->queues[qfi] =
+ vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!device->queues[qfi]) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail;
+ }
+
+ memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
+
+ device->queue_count[qfi] = queue_create->queueCount;
+
+ for (unsigned q = 0; q < queue_create->queueCount; q++) {
+ result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, queue_create->flags,
+ global_priority);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+ }
+
+ device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
+ !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
+
+ /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
+ device->dfsm_allowed =
+ device->pbb_allowed && (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
+
+ /* The maximum number of scratch waves. Scratch space isn't divided
+ * evenly between CUs. The number is only a function of the number of CUs.
+ * We can decrease the constant to decrease the scratch buffer size.
+ *
+ * sctx->scratch_waves must be >= the maximum possible size of
+ * 1 threadgroup, so that the hw doesn't hang from being unable
+ * to start any.
+ *
+ * The recommended value is 4 per CU at most. Higher numbers don't
+ * bring much benefit, but they still occupy chip resources (think
+ * async compute). I've seen ~2% performance difference between 4 and 32.
+ */
+ uint32_t max_threads_per_block = 2048;
+ device->scratch_waves =
+ MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);
+
+ device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
+
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
+ /* If the KMD allows it (there is a KMD hw register for it),
+ * allow launching waves out-of-order.
+ */
+ device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
+ }
+
+ radv_device_init_gs_info(device);
+
+ device->tess_offchip_block_dw_size =
+ device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
+
+ if (getenv("RADV_TRACE_FILE")) {
+ fprintf(
+ stderr,
+ "***********************************************************************************\n");
+ fprintf(
+ stderr,
+ "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
+ fprintf(
+ stderr,
+ "***********************************************************************************\n");
+ abort();
+ }
+
+ if (device->instance->debug_flags & RADV_DEBUG_HANG) {
+ /* Enable GPU hangs detection and dump logs if a GPU hang is
+ * detected.
+ */
+ keep_shader_info = true;
+
+ if (!radv_init_trace(device))
+ goto fail;
+
+ fprintf(stderr,
+ "*****************************************************************************\n");
+ fprintf(stderr,
+ "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
+ fprintf(stderr,
+ "*****************************************************************************\n");
+
+ /* Wait for idle after every draw/dispatch to identify the
+ * first bad call.
+ */
+ device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
+
+ radv_dump_enabled_options(device, stderr);
+ }
+
+ if (radv_thread_trace_enabled()) {
+ fprintf(stderr, "*************************************************\n");
+ fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
+ fprintf(stderr, "*************************************************\n");
+
+ if (device->physical_device->rad_info.chip_class < GFX8 ||
+ device->physical_device->rad_info.chip_class > GFX10_3) {
+ fprintf(stderr, "GPU hardware not supported: refer to "
+ "the RGP documentation for the list of "
+ "supported GPUs!\n");
+ abort();
+ }
+
+ if (!radv_thread_trace_init(device))
+ goto fail;
+ }
+
+ if (getenv("RADV_TRAP_HANDLER")) {
+ /* TODO: Add support for more hardware. */
+ assert(device->physical_device->rad_info.chip_class == GFX8);
+
+ fprintf(stderr, "**********************************************************************\n");
+ fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
+ fprintf(stderr, "**********************************************************************\n");
+
+ /* To get the disassembly of the faulty shaders, we have to
+ * keep some shader info around.
+ */
+ keep_shader_info = true;
+
+ if (!radv_trap_handler_init(device))
+ goto fail;
+ }
+
+ if (getenv("RADV_FORCE_VRS")) {
+ const char *vrs_rates = getenv("RADV_FORCE_VRS");
+
+ if (device->physical_device->rad_info.chip_class < GFX10_3)
+ fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
+ else if (device->physical_device->use_llvm)
+ fprintf(stderr, "radv: Forcing VRS rates is only supported with ACO\n");
+ else if (!strcmp(vrs_rates, "2x2"))
+ device->force_vrs = RADV_FORCE_VRS_2x2;
+ else if (!strcmp(vrs_rates, "2x1"))
+ device->force_vrs = RADV_FORCE_VRS_2x1;
+ else if (!strcmp(vrs_rates, "1x2"))
+ device->force_vrs = RADV_FORCE_VRS_1x2;
+ else
+ fprintf(stderr, "radv: Invalid VRS rates specified "
+ "(valid values are 2x2, 2x1 and 1x2)\n");
+ }
+
+ device->keep_shader_info = keep_shader_info;
+ result = radv_device_init_meta(device);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ radv_device_init_msaa(device);
+
+ /* If the border color extension is enabled, let's create the buffer we need. */
+ if (custom_border_colors) {
+ result = radv_device_init_border_color(device);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
+ device->empty_cs[family] = device->ws->cs_create(device->ws, family);
+ if (!device->empty_cs[family])
+ goto fail;
+
+ switch (family) {
+ case RADV_QUEUE_GENERAL:
+ radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
+ radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
+ break;
+ case RADV_QUEUE_COMPUTE:
+ radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(device->empty_cs[family], 0);
+ break;
+ }
+
+ result = device->ws->cs_finalize(device->empty_cs[family]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX7)
+ cik_create_gfx_config(device);
+
+ VkPipelineCacheCreateInfo ci;
+ ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
+ ci.pNext = NULL;
+ ci.flags = 0;
+ ci.pInitialData = NULL;
+ ci.initialDataSize = 0;
+ VkPipelineCache pc;
+ result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc);
+ if (result != VK_SUCCESS)
+ goto fail_meta;
+
+ device->mem_cache = radv_pipeline_cache_from_handle(pc);
+
+ if (u_cnd_monotonic_init(&device->timeline_cond)) {
+ result = VK_ERROR_INITIALIZATION_FAILED;
+ goto fail_mem_cache;
+ }
+
+ device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
+ if (device->force_aniso >= 0) {
+ fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
+ 1 << util_logbase2(device->force_aniso));
+ }
+
+ *pDevice = radv_device_to_handle(device);
+ return VK_SUCCESS;
fail_mem_cache:
- radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
+ radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
fail_meta:
- radv_device_finish_meta(device);
+ radv_device_finish_meta(device);
fail:
- radv_thread_trace_finish(device);
- free(device->thread_trace.trigger_file);
+ radv_thread_trace_finish(device);
+ free(device->thread_trace.trigger_file);
- radv_trap_handler_finish(device);
- radv_finish_trace(device);
+ radv_trap_handler_finish(device);
+ radv_finish_trace(device);
- if (device->gfx_init)
- device->ws->buffer_destroy(device->ws, device->gfx_init);
+ if (device->gfx_init)
+ device->ws->buffer_destroy(device->ws, device->gfx_init);
- radv_device_finish_border_color(device);
+ radv_device_finish_border_color(device);
- for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
- for (unsigned q = 0; q < device->queue_count[i]; q++)
- radv_queue_finish(&device->queues[i][q]);
- if (device->queue_count[i])
- vk_free(&device->vk.alloc, device->queues[i]);
- }
+ for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
+ for (unsigned q = 0; q < device->queue_count[i]; q++)
+ radv_queue_finish(&device->queues[i][q]);
+ if (device->queue_count[i])
+ vk_free(&device->vk.alloc, device->queues[i]);
+ }
- for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
- if (device->hw_ctx[i])
- device->ws->ctx_destroy(device->hw_ctx[i]);
- }
+ for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
+ if (device->hw_ctx[i])
+ device->ws->ctx_destroy(device->hw_ctx[i]);
+ }
- vk_device_finish(&device->vk);
- vk_free(&device->vk.alloc, device);
- return result;
+ vk_device_finish(&device->vk);
+ vk_free(&device->vk.alloc, device);
+ return result;
}
-void radv_DestroyDevice(
- VkDevice _device,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device, device, _device);
- if (!device)
- return;
+ if (!device)
+ return;
- if (device->gfx_init)
- device->ws->buffer_destroy(device->ws, device->gfx_init);
+ if (device->gfx_init)
+ device->ws->buffer_destroy(device->ws, device->gfx_init);
- radv_device_finish_border_color(device);
+ radv_device_finish_border_color(device);
- for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
- for (unsigned q = 0; q < device->queue_count[i]; q++)
- radv_queue_finish(&device->queues[i][q]);
- if (device->queue_count[i])
- vk_free(&device->vk.alloc, device->queues[i]);
- if (device->empty_cs[i])
- device->ws->cs_destroy(device->empty_cs[i]);
- }
+ for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
+ for (unsigned q = 0; q < device->queue_count[i]; q++)
+ radv_queue_finish(&device->queues[i][q]);
+ if (device->queue_count[i])
+ vk_free(&device->vk.alloc, device->queues[i]);
+ if (device->empty_cs[i])
+ device->ws->cs_destroy(device->empty_cs[i]);
+ }
- for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
- if (device->hw_ctx[i])
- device->ws->ctx_destroy(device->hw_ctx[i]);
- }
+ for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
+ if (device->hw_ctx[i])
+ device->ws->ctx_destroy(device->hw_ctx[i]);
+ }
- radv_device_finish_meta(device);
+ radv_device_finish_meta(device);
- VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
- radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
+ VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
+ radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
- radv_trap_handler_finish(device);
- radv_finish_trace(device);
+ radv_trap_handler_finish(device);
+ radv_finish_trace(device);
- radv_destroy_shader_slabs(device);
+ radv_destroy_shader_slabs(device);
- u_cnd_monotonic_destroy(&device->timeline_cond);
+ u_cnd_monotonic_destroy(&device->timeline_cond);
- free(device->thread_trace.trigger_file);
- radv_thread_trace_finish(device);
+ free(device->thread_trace.trigger_file);
+ radv_thread_trace_finish(device);
- vk_device_finish(&device->vk);
- vk_free(&device->vk.alloc, device);
+ vk_device_finish(&device->vk);
+ vk_free(&device->vk.alloc, device);
}
-VkResult radv_EnumerateInstanceLayerProperties(
- uint32_t* pPropertyCount,
- VkLayerProperties* pProperties)
+VkResult
+radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties)
{
- if (pProperties == NULL) {
- *pPropertyCount = 0;
- return VK_SUCCESS;
- }
+ if (pProperties == NULL) {
+ *pPropertyCount = 0;
+ return VK_SUCCESS;
+ }
- /* None supported at this time */
- return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+ /* None supported at this time */
+ return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
}
-VkResult radv_EnumerateDeviceLayerProperties(
- VkPhysicalDevice physicalDevice,
- uint32_t* pPropertyCount,
- VkLayerProperties* pProperties)
+VkResult
+radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
+ VkLayerProperties *pProperties)
{
- if (pProperties == NULL) {
- *pPropertyCount = 0;
- return VK_SUCCESS;
- }
+ if (pProperties == NULL) {
+ *pPropertyCount = 0;
+ return VK_SUCCESS;
+ }
- /* None supported at this time */
- return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+ /* None supported at this time */
+ return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
}
-void radv_GetDeviceQueue2(
- VkDevice _device,
- const VkDeviceQueueInfo2* pQueueInfo,
- VkQueue* pQueue)
+void
+radv_GetDeviceQueue2(VkDevice _device, const VkDeviceQueueInfo2 *pQueueInfo, VkQueue *pQueue)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_queue *queue;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_queue *queue;
- queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
- if (pQueueInfo->flags != queue->flags) {
- /* From the Vulkan 1.1.70 spec:
- *
- * "The queue returned by vkGetDeviceQueue2 must have the same
- * flags value from this structure as that used at device
- * creation time in a VkDeviceQueueCreateInfo instance. If no
- * matching flags were specified at device creation time then
- * pQueue will return VK_NULL_HANDLE."
- */
- *pQueue = VK_NULL_HANDLE;
- return;
- }
+ queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
+ if (pQueueInfo->flags != queue->flags) {
+ /* From the Vulkan 1.1.70 spec:
+ *
+ * "The queue returned by vkGetDeviceQueue2 must have the same
+ * flags value from this structure as that used at device
+ * creation time in a VkDeviceQueueCreateInfo instance. If no
+ * matching flags were specified at device creation time then
+ * pQueue will return VK_NULL_HANDLE."
+ */
+ *pQueue = VK_NULL_HANDLE;
+ return;
+ }
- *pQueue = radv_queue_to_handle(queue);
+ *pQueue = radv_queue_to_handle(queue);
}
-void radv_GetDeviceQueue(
- VkDevice _device,
- uint32_t queueFamilyIndex,
- uint32_t queueIndex,
- VkQueue* pQueue)
+void
+radv_GetDeviceQueue(VkDevice _device, uint32_t queueFamilyIndex, uint32_t queueIndex,
+ VkQueue *pQueue)
{
- const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
- .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
- .queueFamilyIndex = queueFamilyIndex,
- .queueIndex = queueIndex
- };
+ const VkDeviceQueueInfo2 info =
+ (VkDeviceQueueInfo2){.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
+ .queueFamilyIndex = queueFamilyIndex,
+ .queueIndex = queueIndex};
- radv_GetDeviceQueue2(_device, &info, pQueue);
+ radv_GetDeviceQueue2(_device, &info, pQueue);
}
static void
-fill_geom_tess_rings(struct radv_queue *queue,
- uint32_t *map,
- bool add_sample_positions,
- uint32_t esgs_ring_size,
- struct radeon_winsys_bo *esgs_ring_bo,
- uint32_t gsvs_ring_size,
- struct radeon_winsys_bo *gsvs_ring_bo,
- uint32_t tess_factor_ring_size,
- uint32_t tess_offchip_ring_offset,
- uint32_t tess_offchip_ring_size,
- struct radeon_winsys_bo *tess_rings_bo)
-{
- uint32_t *desc = &map[4];
-
- if (esgs_ring_bo) {
- uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
-
- /* stride 0, num records - size, add tid, swizzle, elsize4,
- index stride 64 */
- desc[0] = esgs_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
- S_008F04_SWIZZLE_ENABLE(true);
- desc[2] = esgs_ring_size;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_INDEX_STRIDE(3) |
- S_008F0C_ADD_TID_ENABLE(1);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(1);
- }
-
- /* GS entry for ES->GS ring */
- /* stride 0, num records - size, elsize0,
- index stride 0 */
- desc[4] = esgs_va;
- desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
- desc[6] = esgs_ring_size;
- desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
- desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
- }
-
- desc += 8;
-
- if (gsvs_ring_bo) {
- uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
-
- /* VS entry for GS->VS ring */
- /* stride 0, num records - size, elsize0,
- index stride 0 */
- desc[0] = gsvs_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
- desc[2] = gsvs_ring_size;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
-
- /* stride gsvs_itemsize, num records 64
- elsize 4, index stride 16 */
- /* shader will patch stride and desc[2] */
- desc[4] = gsvs_va;
- desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
- S_008F04_SWIZZLE_ENABLE(1);
- desc[6] = 0;
- desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
- S_008F0C_INDEX_STRIDE(1) |
- S_008F0C_ADD_TID_ENABLE(true);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
- desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
- S_008F0C_ELEMENT_SIZE(1);
- }
-
- }
-
- desc += 8;
-
- if (tess_rings_bo) {
- uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
- uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
-
- desc[0] = tess_va;
- desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
- desc[2] = tess_factor_ring_size;
- desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
- desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
-
- desc[4] = tess_offchip_va;
- desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
- desc[6] = tess_offchip_ring_size;
- desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
- desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
- }
-
- desc += 8;
-
- if (add_sample_positions) {
- /* add sample positions after all rings */
- memcpy(desc, queue->device->sample_locations_1x, 8);
- desc += 2;
- memcpy(desc, queue->device->sample_locations_2x, 16);
- desc += 4;
- memcpy(desc, queue->device->sample_locations_4x, 32);
- desc += 8;
- memcpy(desc, queue->device->sample_locations_8x, 64);
- }
+fill_geom_tess_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_positions,
+ uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,
+ uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,
+ uint32_t tess_factor_ring_size, uint32_t tess_offchip_ring_offset,
+ uint32_t tess_offchip_ring_size, struct radeon_winsys_bo *tess_rings_bo)
+{
+ uint32_t *desc = &map[4];
+
+ if (esgs_ring_bo) {
+ uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
+
+ /* stride 0, num records - size, add tid, swizzle, elsize4,
+ index stride 64 */
+ desc[0] = esgs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | S_008F04_SWIZZLE_ENABLE(true);
+ desc[2] = esgs_ring_size;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
+ }
+
+ /* GS entry for ES->GS ring */
+ /* stride 0, num records - size, elsize0,
+ index stride 0 */
+ desc[4] = esgs_va;
+ desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
+ desc[6] = esgs_ring_size;
+ desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+ }
+
+ desc += 8;
+
+ if (gsvs_ring_bo) {
+ uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
+
+ /* VS entry for GS->VS ring */
+ /* stride 0, num records - size, elsize0,
+ index stride 0 */
+ desc[0] = gsvs_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
+ desc[2] = gsvs_ring_size;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
+ /* stride gsvs_itemsize, num records 64
+ elsize 4, index stride 16 */
+ /* shader will patch stride and desc[2] */
+ desc[4] = gsvs_va;
+ desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
+ desc[6] = 0;
+ desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
+ }
+ }
+
+ desc += 8;
+
+ if (tess_rings_bo) {
+ uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
+ uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
+
+ desc[0] = tess_va;
+ desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
+ desc[2] = tess_factor_ring_size;
+ desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
+ desc[4] = tess_offchip_va;
+ desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
+ desc[6] = tess_offchip_ring_size;
+ desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+ }
+
+ desc += 8;
+
+ if (add_sample_positions) {
+ /* add sample positions after all rings */
+ memcpy(desc, queue->device->sample_locations_1x, 8);
+ desc += 2;
+ memcpy(desc, queue->device->sample_locations_2x, 16);
+ desc += 4;
+ memcpy(desc, queue->device->sample_locations_4x, 32);
+ desc += 8;
+ memcpy(desc, queue->device->sample_locations_8x, 64);
+ }
}
static unsigned
radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
{
- bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
- device->physical_device->rad_info.family != CHIP_CARRIZO &&
- device->physical_device->rad_info.family != CHIP_STONEY;
- unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
- unsigned max_offchip_buffers;
- unsigned offchip_granularity;
- unsigned hs_offchip_param;
-
- /*
- * Per RadeonSI:
- * This must be one less than the maximum number due to a hw limitation.
- * Various hardware bugs need thGFX7
- *
- * Per AMDVLK:
- * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
- * Gfx7 should limit max_offchip_buffers to 508
- * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
- *
- * Follow AMDVLK here.
- */
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- max_offchip_buffers_per_se = 128;
- } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
- device->physical_device->rad_info.chip_class == GFX7 ||
- device->physical_device->rad_info.chip_class == GFX6)
- --max_offchip_buffers_per_se;
-
- max_offchip_buffers = max_offchip_buffers_per_se *
- device->physical_device->rad_info.max_se;
-
- /* Hawaii has a bug with offchip buffers > 256 that can be worked
- * around by setting 4K granularity.
- */
- if (device->tess_offchip_block_dw_size == 4096) {
- assert(device->physical_device->rad_info.family == CHIP_HAWAII);
- offchip_granularity = V_03093C_X_4K_DWORDS;
- } else {
- assert(device->tess_offchip_block_dw_size == 8192);
- offchip_granularity = V_03093C_X_8K_DWORDS;
- }
-
- switch (device->physical_device->rad_info.chip_class) {
- case GFX6:
- max_offchip_buffers = MIN2(max_offchip_buffers, 126);
- break;
- case GFX7:
- case GFX8:
- case GFX9:
- max_offchip_buffers = MIN2(max_offchip_buffers, 508);
- break;
- case GFX10:
- break;
- default:
- break;
- }
-
- *max_offchip_buffers_p = max_offchip_buffers;
- if (device->physical_device->rad_info.chip_class >= GFX10_3) {
- hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
- S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
- } else if (device->physical_device->rad_info.chip_class >= GFX7) {
- if (device->physical_device->rad_info.chip_class >= GFX8)
- --max_offchip_buffers;
- hs_offchip_param =
- S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
- S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
- } else {
- hs_offchip_param =
- S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
- }
- return hs_offchip_param;
+ bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
+ device->physical_device->rad_info.family != CHIP_CARRIZO &&
+ device->physical_device->rad_info.family != CHIP_STONEY;
+ unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
+ unsigned max_offchip_buffers;
+ unsigned offchip_granularity;
+ unsigned hs_offchip_param;
+
+ /*
+ * Per RadeonSI:
+ * This must be one less than the maximum number due to a hw limitation.
+ * Various hardware bugs need thGFX7
+ *
+ * Per AMDVLK:
+ * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
+ * Gfx7 should limit max_offchip_buffers to 508
+ * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
+ *
+ * Follow AMDVLK here.
+ */
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ max_offchip_buffers_per_se = 128;
+ } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
+ device->physical_device->rad_info.chip_class == GFX7 ||
+ device->physical_device->rad_info.chip_class == GFX6)
+ --max_offchip_buffers_per_se;
+
+ max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se;
+
+ /* Hawaii has a bug with offchip buffers > 256 that can be worked
+ * around by setting 4K granularity.
+ */
+ if (device->tess_offchip_block_dw_size == 4096) {
+ assert(device->physical_device->rad_info.family == CHIP_HAWAII);
+ offchip_granularity = V_03093C_X_4K_DWORDS;
+ } else {
+ assert(device->tess_offchip_block_dw_size == 8192);
+ offchip_granularity = V_03093C_X_8K_DWORDS;
+ }
+
+ switch (device->physical_device->rad_info.chip_class) {
+ case GFX6:
+ max_offchip_buffers = MIN2(max_offchip_buffers, 126);
+ break;
+ case GFX7:
+ case GFX8:
+ case GFX9:
+ max_offchip_buffers = MIN2(max_offchip_buffers, 508);
+ break;
+ case GFX10:
+ break;
+ default:
+ break;
+ }
+
+ *max_offchip_buffers_p = max_offchip_buffers;
+ if (device->physical_device->rad_info.chip_class >= GFX10_3) {
+ hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
+ S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
+ } else if (device->physical_device->rad_info.chip_class >= GFX7) {
+ if (device->physical_device->rad_info.chip_class >= GFX8)
+ --max_offchip_buffers;
+ hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
+ S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
+ } else {
+ hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
+ }
+ return hs_offchip_param;
}
static void
radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
- struct radeon_winsys_bo *esgs_ring_bo,
- uint32_t esgs_ring_size,
- struct radeon_winsys_bo *gsvs_ring_bo,
- uint32_t gsvs_ring_size)
+ struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,
+ struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
{
- if (!esgs_ring_bo && !gsvs_ring_bo)
- return;
+ if (!esgs_ring_bo && !gsvs_ring_bo)
+ return;
- if (esgs_ring_bo)
- radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
+ if (esgs_ring_bo)
+ radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
- if (gsvs_ring_bo)
- radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
+ if (gsvs_ring_bo)
+ radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
- if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
- radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
- radeon_emit(cs, esgs_ring_size >> 8);
- radeon_emit(cs, gsvs_ring_size >> 8);
- } else {
- radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
- radeon_emit(cs, esgs_ring_size >> 8);
- radeon_emit(cs, gsvs_ring_size >> 8);
- }
+ if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
+ radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
+ radeon_emit(cs, esgs_ring_size >> 8);
+ radeon_emit(cs, gsvs_ring_size >> 8);
+ } else {
+ radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
+ radeon_emit(cs, esgs_ring_size >> 8);
+ radeon_emit(cs, gsvs_ring_size >> 8);
+ }
}
static void
radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
- unsigned hs_offchip_param, unsigned tf_ring_size,
- struct radeon_winsys_bo *tess_rings_bo)
-{
- uint64_t tf_va;
-
- if (!tess_rings_bo)
- return;
-
- tf_va = radv_buffer_get_va(tess_rings_bo);
-
- radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
- radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
- S_030938_SIZE(tf_ring_size / 4));
- radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
- tf_va >> 8);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
- S_030984_BASE_HI(tf_va >> 40));
- } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
- radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
- S_030944_BASE_HI(tf_va >> 40));
- }
- radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
- hs_offchip_param);
- } else {
- radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
- S_008988_SIZE(tf_ring_size / 4));
- radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
- tf_va >> 8);
- radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
- hs_offchip_param);
- }
+ unsigned hs_offchip_param, unsigned tf_ring_size,
+ struct radeon_winsys_bo *tess_rings_bo)
+{
+ uint64_t tf_va;
+
+ if (!tess_rings_bo)
+ return;
+
+ tf_va = radv_buffer_get_va(tess_rings_bo);
+
+ radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
+ radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size / 4));
+ radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
+ S_030984_BASE_HI(tf_va >> 40));
+ } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
+ radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
+ }
+ radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
+ } else {
+ radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size / 4));
+ radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
+ radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
+ }
}
static void
@@ -3562,17 +3433,17 @@ radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *scratch_bo)
{
- if (queue->queue_family_index != RADV_QUEUE_GENERAL)
- return;
+ if (queue->queue_family_index != RADV_QUEUE_GENERAL)
+ return;
- if (!scratch_bo)
- return;
+ if (!scratch_bo)
+ return;
- radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
+ radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
- radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
- S_0286E8_WAVES(waves) |
- S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
+ radeon_set_context_reg(
+ cs, R_0286E8_SPI_TMPRING_SIZE,
+ S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
}
static void
@@ -3580,3163 +3451,2965 @@ radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
uint32_t size_per_wave, uint32_t waves,
struct radeon_winsys_bo *compute_scratch_bo)
{
- uint64_t scratch_va;
+ uint64_t scratch_va;
- if (!compute_scratch_bo)
- return;
+ if (!compute_scratch_bo)
+ return;
- scratch_va = radv_buffer_get_va(compute_scratch_bo);
+ scratch_va = radv_buffer_get_va(compute_scratch_bo);
- radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
+ radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
- radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
- radeon_emit(cs, scratch_va);
- radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
- S_008F04_SWIZZLE_ENABLE(1));
+ radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
+ radeon_emit(cs, scratch_va);
+ radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1));
- radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
- S_00B860_WAVES(waves) |
- S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
+ radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
+ S_00B860_WAVES(waves) | S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
}
static void
-radv_emit_global_shader_pointers(struct radv_queue *queue,
- struct radeon_cmdbuf *cs,
- struct radeon_winsys_bo *descriptor_bo)
-{
- uint64_t va;
-
- if (!descriptor_bo)
- return;
-
- va = radv_buffer_get_va(descriptor_bo);
-
- radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
-
- if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
- uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
- R_00B130_SPI_SHADER_USER_DATA_VS_0,
- R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
- R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
-
- for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
- radv_emit_shader_pointer(queue->device, cs, regs[i],
- va, true);
- }
- } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
- uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
- R_00B130_SPI_SHADER_USER_DATA_VS_0,
- R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
- R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
-
- for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
- radv_emit_shader_pointer(queue->device, cs, regs[i],
- va, true);
- }
- } else {
- uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
- R_00B130_SPI_SHADER_USER_DATA_VS_0,
- R_00B230_SPI_SHADER_USER_DATA_GS_0,
- R_00B330_SPI_SHADER_USER_DATA_ES_0,
- R_00B430_SPI_SHADER_USER_DATA_HS_0,
- R_00B530_SPI_SHADER_USER_DATA_LS_0};
-
- for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
- radv_emit_shader_pointer(queue->device, cs, regs[i],
- va, true);
- }
- }
+radv_emit_global_shader_pointers(struct radv_queue *queue, struct radeon_cmdbuf *cs,
+ struct radeon_winsys_bo *descriptor_bo)
+{
+ uint64_t va;
+
+ if (!descriptor_bo)
+ return;
+
+ va = radv_buffer_get_va(descriptor_bo);
+
+ radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
+
+ if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
+ uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
+ R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+ R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
+
+ for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+ radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
+ }
+ } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
+ uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
+ R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
+ R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
+
+ for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+ radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
+ }
+ } else {
+ uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
+ R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0,
+ R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0};
+
+ for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
+ radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
+ }
+ }
}
static void
radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
{
- struct radv_device *device = queue->device;
+ struct radv_device *device = queue->device;
- if (device->gfx_init) {
- uint64_t va = radv_buffer_get_va(device->gfx_init);
+ if (device->gfx_init) {
+ uint64_t va = radv_buffer_get_va(device->gfx_init);
- radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
+ radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
- radv_cs_add_buffer(device->ws, cs, device->gfx_init);
- } else {
- si_emit_graphics(device, cs);
- }
+ radv_cs_add_buffer(device->ws, cs, device->gfx_init);
+ } else {
+ si_emit_graphics(device, cs);
+ }
}
static void
radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
{
- si_emit_compute(queue->device, cs);
+ si_emit_compute(queue->device, cs);
}
static VkResult
-radv_get_preamble_cs(struct radv_queue *queue,
- uint32_t scratch_size_per_wave,
- uint32_t scratch_waves,
- uint32_t compute_scratch_size_per_wave,
- uint32_t compute_scratch_waves,
- uint32_t esgs_ring_size,
- uint32_t gsvs_ring_size,
- bool needs_tess_rings,
- bool needs_gds,
- bool needs_gds_oa,
- bool needs_sample_positions,
- struct radeon_cmdbuf **initial_full_flush_preamble_cs,
+radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
+ uint32_t scratch_waves, uint32_t compute_scratch_size_per_wave,
+ uint32_t compute_scratch_waves, uint32_t esgs_ring_size,
+ uint32_t gsvs_ring_size, bool needs_tess_rings, bool needs_gds,
+ bool needs_gds_oa, bool needs_sample_positions,
+ struct radeon_cmdbuf **initial_full_flush_preamble_cs,
struct radeon_cmdbuf **initial_preamble_cs,
struct radeon_cmdbuf **continue_preamble_cs)
{
- struct radeon_winsys_bo *scratch_bo = NULL;
- struct radeon_winsys_bo *descriptor_bo = NULL;
- struct radeon_winsys_bo *compute_scratch_bo = NULL;
- struct radeon_winsys_bo *esgs_ring_bo = NULL;
- struct radeon_winsys_bo *gsvs_ring_bo = NULL;
- struct radeon_winsys_bo *tess_rings_bo = NULL;
- struct radeon_winsys_bo *gds_bo = NULL;
- struct radeon_winsys_bo *gds_oa_bo = NULL;
- struct radeon_cmdbuf *dest_cs[3] = {0};
- bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
- unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
- unsigned max_offchip_buffers;
- unsigned hs_offchip_param = 0;
- unsigned tess_offchip_ring_offset;
- uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
- if (!queue->has_tess_rings) {
- if (needs_tess_rings)
- add_tess_rings = true;
- }
- if (!queue->has_gds) {
- if (needs_gds)
- add_gds = true;
- }
- if (!queue->has_gds_oa) {
- if (needs_gds_oa)
- add_gds_oa = true;
- }
- if (!queue->has_sample_positions) {
- if (needs_sample_positions)
- add_sample_positions = true;
- }
- tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
- hs_offchip_param = radv_get_hs_offchip_param(queue->device,
- &max_offchip_buffers);
- tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
- tess_offchip_ring_size = max_offchip_buffers *
- queue->device->tess_offchip_block_dw_size * 4;
-
- scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
- if (scratch_size_per_wave)
- scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
- else
- scratch_waves = 0;
-
- compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
- if (compute_scratch_size_per_wave)
- compute_scratch_waves = MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
- else
- compute_scratch_waves = 0;
-
- if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
- scratch_waves <= queue->scratch_waves &&
- compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
- compute_scratch_waves <= queue->compute_scratch_waves &&
- esgs_ring_size <= queue->esgs_ring_size &&
- gsvs_ring_size <= queue->gsvs_ring_size &&
- !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
- queue->initial_preamble_cs) {
- *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
- *initial_preamble_cs = queue->initial_preamble_cs;
- *continue_preamble_cs = queue->continue_preamble_cs;
- if (!scratch_size_per_wave && !compute_scratch_size_per_wave &&
- !esgs_ring_size && !gsvs_ring_size && !needs_tess_rings &&
- !needs_gds && !needs_gds_oa && !needs_sample_positions)
- *continue_preamble_cs = NULL;
- return VK_SUCCESS;
- }
-
- uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
- uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
- if (scratch_size > queue_scratch_size) {
- scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
- scratch_size,
- 4096,
- RADEON_DOMAIN_VRAM,
- ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH);
- if (!scratch_bo)
- goto fail;
- } else
- scratch_bo = queue->scratch_bo;
-
- uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
- uint32_t compute_queue_scratch_size = queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
- if (compute_scratch_size > compute_queue_scratch_size) {
- compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
- compute_scratch_size,
- 4096,
- RADEON_DOMAIN_VRAM,
- ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH);
- if (!compute_scratch_bo)
- goto fail;
-
- } else
- compute_scratch_bo = queue->compute_scratch_bo;
-
- if (esgs_ring_size > queue->esgs_ring_size) {
- esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
- esgs_ring_size,
- 4096,
- RADEON_DOMAIN_VRAM,
- ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH);
- if (!esgs_ring_bo)
- goto fail;
- } else {
- esgs_ring_bo = queue->esgs_ring_bo;
- esgs_ring_size = queue->esgs_ring_size;
- }
-
- if (gsvs_ring_size > queue->gsvs_ring_size) {
- gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
- gsvs_ring_size,
- 4096,
- RADEON_DOMAIN_VRAM,
- ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH);
- if (!gsvs_ring_bo)
- goto fail;
- } else {
- gsvs_ring_bo = queue->gsvs_ring_bo;
- gsvs_ring_size = queue->gsvs_ring_size;
- }
-
- if (add_tess_rings) {
- tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
- tess_offchip_ring_offset + tess_offchip_ring_size,
- 256,
- RADEON_DOMAIN_VRAM,
- ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH);
- if (!tess_rings_bo)
- goto fail;
- } else {
- tess_rings_bo = queue->tess_rings_bo;
- }
-
- if (add_gds) {
- assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
-
- /* 4 streamout GDS counters.
- * We need 256B (64 dw) of GDS, otherwise streamout hangs.
- */
- gds_bo = queue->device->ws->buffer_create(queue->device->ws,
- 256, 4,
- RADEON_DOMAIN_GDS,
- ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH);
- if (!gds_bo)
- goto fail;
- } else {
- gds_bo = queue->gds_bo;
- }
-
- if (add_gds_oa) {
- assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
-
- gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws,
- 4, 1,
- RADEON_DOMAIN_OA,
- ring_bo_flags,
- RADV_BO_PRIORITY_SCRATCH);
- if (!gds_oa_bo)
- goto fail;
- } else {
- gds_oa_bo = queue->gds_oa_bo;
- }
-
- if (scratch_bo != queue->scratch_bo ||
- esgs_ring_bo != queue->esgs_ring_bo ||
- gsvs_ring_bo != queue->gsvs_ring_bo ||
- tess_rings_bo != queue->tess_rings_bo ||
- add_sample_positions) {
- uint32_t size = 0;
- if (gsvs_ring_bo || esgs_ring_bo ||
- tess_rings_bo || add_sample_positions) {
- size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
- if (add_sample_positions)
- size += 128; /* 64+32+16+8 = 120 bytes */
- }
- else if (scratch_bo)
- size = 8; /* 2 dword */
-
- descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
- size,
- 4096,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY,
- RADV_BO_PRIORITY_DESCRIPTOR);
- if (!descriptor_bo)
- goto fail;
- } else
- descriptor_bo = queue->descriptor_bo;
-
- if (descriptor_bo != queue->descriptor_bo) {
- uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
- if (!map)
- goto fail;
-
- if (scratch_bo) {
- uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
- uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
- S_008F04_SWIZZLE_ENABLE(1);
- map[0] = scratch_va;
- map[1] = rsrc1;
- }
-
- if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
- fill_geom_tess_rings(queue, map, add_sample_positions,
- esgs_ring_size, esgs_ring_bo,
- gsvs_ring_size, gsvs_ring_bo,
- tess_factor_ring_size,
- tess_offchip_ring_offset,
- tess_offchip_ring_size,
- tess_rings_bo);
-
- queue->device->ws->buffer_unmap(descriptor_bo);
- }
-
- for(int i = 0; i < 3; ++i) {
- enum rgp_flush_bits sqtt_flush_bits = 0;
- struct radeon_cmdbuf *cs = NULL;
- cs = queue->device->ws->cs_create(queue->device->ws,
- queue->queue_family_index ? RING_COMPUTE : RING_GFX);
- if (!cs)
- goto fail;
-
- dest_cs[i] = cs;
-
- if (scratch_bo)
- radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
-
- /* Emit initial configuration. */
- switch (queue->queue_family_index) {
- case RADV_QUEUE_GENERAL:
- radv_init_graphics_state(cs, queue);
- break;
- case RADV_QUEUE_COMPUTE:
- radv_init_compute_state(cs, queue);
- break;
- case RADV_QUEUE_TRANSFER:
- break;
- }
-
- if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
- }
-
- radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
- gsvs_ring_bo, gsvs_ring_size);
- radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
- tess_factor_ring_size, tess_rings_bo);
- radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
- radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave,
- compute_scratch_waves, compute_scratch_bo);
- radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
- scratch_waves, scratch_bo);
-
- if (gds_bo)
- radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
- if (gds_oa_bo)
- radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
-
- if (i == 0) {
- si_cs_emit_cache_flush(cs,
- queue->device->physical_device->rad_info.chip_class,
- NULL, 0,
- queue->queue_family_index == RING_COMPUTE &&
- queue->device->physical_device->rad_info.chip_class >= GFX7,
- (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
- RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SCACHE |
- RADV_CMD_FLAG_INV_VCACHE |
- RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_START_PIPELINE_STATS, &sqtt_flush_bits, 0);
- } else if (i == 1) {
- si_cs_emit_cache_flush(cs,
- queue->device->physical_device->rad_info.chip_class,
- NULL, 0,
- queue->queue_family_index == RING_COMPUTE &&
- queue->device->physical_device->rad_info.chip_class >= GFX7,
- RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SCACHE |
- RADV_CMD_FLAG_INV_VCACHE |
- RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_START_PIPELINE_STATS, &sqtt_flush_bits, 0);
- }
-
- if (queue->device->ws->cs_finalize(cs) != VK_SUCCESS)
- goto fail;
- }
-
- if (queue->initial_full_flush_preamble_cs)
- queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
-
- if (queue->initial_preamble_cs)
- queue->device->ws->cs_destroy(queue->initial_preamble_cs);
-
- if (queue->continue_preamble_cs)
- queue->device->ws->cs_destroy(queue->continue_preamble_cs);
-
- queue->initial_full_flush_preamble_cs = dest_cs[0];
- queue->initial_preamble_cs = dest_cs[1];
- queue->continue_preamble_cs = dest_cs[2];
-
- if (scratch_bo != queue->scratch_bo) {
- if (queue->scratch_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
- queue->scratch_bo = scratch_bo;
- }
- queue->scratch_size_per_wave = scratch_size_per_wave;
- queue->scratch_waves = scratch_waves;
-
- if (compute_scratch_bo != queue->compute_scratch_bo) {
- if (queue->compute_scratch_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
- queue->compute_scratch_bo = compute_scratch_bo;
- }
- queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
- queue->compute_scratch_waves = compute_scratch_waves;
-
- if (esgs_ring_bo != queue->esgs_ring_bo) {
- if (queue->esgs_ring_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
- queue->esgs_ring_bo = esgs_ring_bo;
- queue->esgs_ring_size = esgs_ring_size;
- }
-
- if (gsvs_ring_bo != queue->gsvs_ring_bo) {
- if (queue->gsvs_ring_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
- queue->gsvs_ring_bo = gsvs_ring_bo;
- queue->gsvs_ring_size = gsvs_ring_size;
- }
-
- if (tess_rings_bo != queue->tess_rings_bo) {
- queue->tess_rings_bo = tess_rings_bo;
- queue->has_tess_rings = true;
- }
-
- if (gds_bo != queue->gds_bo) {
- queue->gds_bo = gds_bo;
- queue->has_gds = true;
- }
-
- if (gds_oa_bo != queue->gds_oa_bo) {
- queue->gds_oa_bo = gds_oa_bo;
- queue->has_gds_oa = true;
- }
-
- if (descriptor_bo != queue->descriptor_bo) {
- if (queue->descriptor_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
-
- queue->descriptor_bo = descriptor_bo;
- }
-
- if (add_sample_positions)
- queue->has_sample_positions = true;
-
- *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
- *initial_preamble_cs = queue->initial_preamble_cs;
- *continue_preamble_cs = queue->continue_preamble_cs;
- if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
- *continue_preamble_cs = NULL;
- return VK_SUCCESS;
+ struct radeon_winsys_bo *scratch_bo = NULL;
+ struct radeon_winsys_bo *descriptor_bo = NULL;
+ struct radeon_winsys_bo *compute_scratch_bo = NULL;
+ struct radeon_winsys_bo *esgs_ring_bo = NULL;
+ struct radeon_winsys_bo *gsvs_ring_bo = NULL;
+ struct radeon_winsys_bo *tess_rings_bo = NULL;
+ struct radeon_winsys_bo *gds_bo = NULL;
+ struct radeon_winsys_bo *gds_oa_bo = NULL;
+ struct radeon_cmdbuf *dest_cs[3] = {0};
+ bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
+ unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
+ unsigned max_offchip_buffers;
+ unsigned hs_offchip_param = 0;
+ unsigned tess_offchip_ring_offset;
+ uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
+ if (!queue->has_tess_rings) {
+ if (needs_tess_rings)
+ add_tess_rings = true;
+ }
+ if (!queue->has_gds) {
+ if (needs_gds)
+ add_gds = true;
+ }
+ if (!queue->has_gds_oa) {
+ if (needs_gds_oa)
+ add_gds_oa = true;
+ }
+ if (!queue->has_sample_positions) {
+ if (needs_sample_positions)
+ add_sample_positions = true;
+ }
+ tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
+ hs_offchip_param = radv_get_hs_offchip_param(queue->device, &max_offchip_buffers);
+ tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
+ tess_offchip_ring_size = max_offchip_buffers * queue->device->tess_offchip_block_dw_size * 4;
+
+ scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
+ if (scratch_size_per_wave)
+ scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
+ else
+ scratch_waves = 0;
+
+ compute_scratch_size_per_wave =
+ MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
+ if (compute_scratch_size_per_wave)
+ compute_scratch_waves =
+ MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
+ else
+ compute_scratch_waves = 0;
+
+ if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
+ scratch_waves <= queue->scratch_waves &&
+ compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
+ compute_scratch_waves <= queue->compute_scratch_waves &&
+ esgs_ring_size <= queue->esgs_ring_size && gsvs_ring_size <= queue->gsvs_ring_size &&
+ !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
+ queue->initial_preamble_cs) {
+ *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
+ *initial_preamble_cs = queue->initial_preamble_cs;
+ *continue_preamble_cs = queue->continue_preamble_cs;
+ if (!scratch_size_per_wave && !compute_scratch_size_per_wave && !esgs_ring_size &&
+ !gsvs_ring_size && !needs_tess_rings && !needs_gds && !needs_gds_oa &&
+ !needs_sample_positions)
+ *continue_preamble_cs = NULL;
+ return VK_SUCCESS;
+ }
+
+ uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
+ uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
+ if (scratch_size > queue_scratch_size) {
+ scratch_bo =
+ queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM,
+ ring_bo_flags, RADV_BO_PRIORITY_SCRATCH);
+ if (!scratch_bo)
+ goto fail;
+ } else
+ scratch_bo = queue->scratch_bo;
+
+ uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
+ uint32_t compute_queue_scratch_size =
+ queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
+ if (compute_scratch_size > compute_queue_scratch_size) {
+ compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size,
+ 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
+ RADV_BO_PRIORITY_SCRATCH);
+ if (!compute_scratch_bo)
+ goto fail;
+
+ } else
+ compute_scratch_bo = queue->compute_scratch_bo;
+
+ if (esgs_ring_size > queue->esgs_ring_size) {
+ esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096,
+ RADEON_DOMAIN_VRAM, ring_bo_flags,
+ RADV_BO_PRIORITY_SCRATCH);
+ if (!esgs_ring_bo)
+ goto fail;
+ } else {
+ esgs_ring_bo = queue->esgs_ring_bo;
+ esgs_ring_size = queue->esgs_ring_size;
+ }
+
+ if (gsvs_ring_size > queue->gsvs_ring_size) {
+ gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096,
+ RADEON_DOMAIN_VRAM, ring_bo_flags,
+ RADV_BO_PRIORITY_SCRATCH);
+ if (!gsvs_ring_bo)
+ goto fail;
+ } else {
+ gsvs_ring_bo = queue->gsvs_ring_bo;
+ gsvs_ring_size = queue->gsvs_ring_size;
+ }
+
+ if (add_tess_rings) {
+ tess_rings_bo = queue->device->ws->buffer_create(
+ queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256,
+ RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH);
+ if (!tess_rings_bo)
+ goto fail;
+ } else {
+ tess_rings_bo = queue->tess_rings_bo;
+ }
+
+ if (add_gds) {
+ assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
+
+ /* 4 streamout GDS counters.
+ * We need 256B (64 dw) of GDS, otherwise streamout hangs.
+ */
+ gds_bo = queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
+ ring_bo_flags, RADV_BO_PRIORITY_SCRATCH);
+ if (!gds_bo)
+ goto fail;
+ } else {
+ gds_bo = queue->gds_bo;
+ }
+
+ if (add_gds_oa) {
+ assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
+
+ gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA,
+ ring_bo_flags, RADV_BO_PRIORITY_SCRATCH);
+ if (!gds_oa_bo)
+ goto fail;
+ } else {
+ gds_oa_bo = queue->gds_oa_bo;
+ }
+
+ if (scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||
+ gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||
+ add_sample_positions) {
+ uint32_t size = 0;
+ if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || add_sample_positions) {
+ size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
+ if (add_sample_positions)
+ size += 128; /* 64+32+16+8 = 120 bytes */
+ } else if (scratch_bo)
+ size = 8; /* 2 dword */
+
+ descriptor_bo = queue->device->ws->buffer_create(
+ queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
+ RADV_BO_PRIORITY_DESCRIPTOR);
+ if (!descriptor_bo)
+ goto fail;
+ } else
+ descriptor_bo = queue->descriptor_bo;
+
+ if (descriptor_bo != queue->descriptor_bo) {
+ uint32_t *map = (uint32_t *)queue->device->ws->buffer_map(descriptor_bo);
+ if (!map)
+ goto fail;
+
+ if (scratch_bo) {
+ uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
+ uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
+ map[0] = scratch_va;
+ map[1] = rsrc1;
+ }
+
+ if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
+ fill_geom_tess_rings(queue, map, add_sample_positions, esgs_ring_size, esgs_ring_bo,
+ gsvs_ring_size, gsvs_ring_bo, tess_factor_ring_size,
+ tess_offchip_ring_offset, tess_offchip_ring_size, tess_rings_bo);
+
+ queue->device->ws->buffer_unmap(descriptor_bo);
+ }
+
+ for (int i = 0; i < 3; ++i) {
+ enum rgp_flush_bits sqtt_flush_bits = 0;
+ struct radeon_cmdbuf *cs = NULL;
+ cs = queue->device->ws->cs_create(queue->device->ws,
+ queue->queue_family_index ? RING_COMPUTE : RING_GFX);
+ if (!cs)
+ goto fail;
+
+ dest_cs[i] = cs;
+
+ if (scratch_bo)
+ radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
+
+ /* Emit initial configuration. */
+ switch (queue->queue_family_index) {
+ case RADV_QUEUE_GENERAL:
+ radv_init_graphics_state(cs, queue);
+ break;
+ case RADV_QUEUE_COMPUTE:
+ radv_init_compute_state(cs, queue);
+ break;
+ case RADV_QUEUE_TRANSFER:
+ break;
+ }
+
+ if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+ }
+
+ radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, gsvs_ring_bo,
+ gsvs_ring_size);
+ radv_emit_tess_factor_ring(queue, cs, hs_offchip_param, tess_factor_ring_size, tess_rings_bo);
+ radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
+ radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave, compute_scratch_waves,
+ compute_scratch_bo);
+ radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave, scratch_waves, scratch_bo);
+
+ if (gds_bo)
+ radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
+ if (gds_oa_bo)
+ radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
+
+ if (i == 0) {
+ si_cs_emit_cache_flush(
+ cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
+ queue->queue_family_index == RING_COMPUTE &&
+ queue->device->physical_device->rad_info.chip_class >= GFX7,
+ (queue->queue_family_index == RADV_QUEUE_COMPUTE
+ ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
+ : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
+ RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS,
+ &sqtt_flush_bits, 0);
+ } else if (i == 1) {
+ si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
+ queue->queue_family_index == RING_COMPUTE &&
+ queue->device->physical_device->rad_info.chip_class >= GFX7,
+ RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
+ RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
+ RADV_CMD_FLAG_START_PIPELINE_STATS,
+ &sqtt_flush_bits, 0);
+ }
+
+ if (queue->device->ws->cs_finalize(cs) != VK_SUCCESS)
+ goto fail;
+ }
+
+ if (queue->initial_full_flush_preamble_cs)
+ queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
+
+ if (queue->initial_preamble_cs)
+ queue->device->ws->cs_destroy(queue->initial_preamble_cs);
+
+ if (queue->continue_preamble_cs)
+ queue->device->ws->cs_destroy(queue->continue_preamble_cs);
+
+ queue->initial_full_flush_preamble_cs = dest_cs[0];
+ queue->initial_preamble_cs = dest_cs[1];
+ queue->continue_preamble_cs = dest_cs[2];
+
+ if (scratch_bo != queue->scratch_bo) {
+ if (queue->scratch_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
+ queue->scratch_bo = scratch_bo;
+ }
+ queue->scratch_size_per_wave = scratch_size_per_wave;
+ queue->scratch_waves = scratch_waves;
+
+ if (compute_scratch_bo != queue->compute_scratch_bo) {
+ if (queue->compute_scratch_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
+ queue->compute_scratch_bo = compute_scratch_bo;
+ }
+ queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
+ queue->compute_scratch_waves = compute_scratch_waves;
+
+ if (esgs_ring_bo != queue->esgs_ring_bo) {
+ if (queue->esgs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
+ queue->esgs_ring_bo = esgs_ring_bo;
+ queue->esgs_ring_size = esgs_ring_size;
+ }
+
+ if (gsvs_ring_bo != queue->gsvs_ring_bo) {
+ if (queue->gsvs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
+ queue->gsvs_ring_bo = gsvs_ring_bo;
+ queue->gsvs_ring_size = gsvs_ring_size;
+ }
+
+ if (tess_rings_bo != queue->tess_rings_bo) {
+ queue->tess_rings_bo = tess_rings_bo;
+ queue->has_tess_rings = true;
+ }
+
+ if (gds_bo != queue->gds_bo) {
+ queue->gds_bo = gds_bo;
+ queue->has_gds = true;
+ }
+
+ if (gds_oa_bo != queue->gds_oa_bo) {
+ queue->gds_oa_bo = gds_oa_bo;
+ queue->has_gds_oa = true;
+ }
+
+ if (descriptor_bo != queue->descriptor_bo) {
+ if (queue->descriptor_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
+
+ queue->descriptor_bo = descriptor_bo;
+ }
+
+ if (add_sample_positions)
+ queue->has_sample_positions = true;
+
+ *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
+ *initial_preamble_cs = queue->initial_preamble_cs;
+ *continue_preamble_cs = queue->continue_preamble_cs;
+ if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
+ *continue_preamble_cs = NULL;
+ return VK_SUCCESS;
fail:
- for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
- if (dest_cs[i])
- queue->device->ws->cs_destroy(dest_cs[i]);
- if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, descriptor_bo);
- if (scratch_bo && scratch_bo != queue->scratch_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, scratch_bo);
- if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, compute_scratch_bo);
- if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, esgs_ring_bo);
- if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, gsvs_ring_bo);
- if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, tess_rings_bo);
- if (gds_bo && gds_bo != queue->gds_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, gds_bo);
- if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
- queue->device->ws->buffer_destroy(queue->device->ws, gds_oa_bo);
-
- return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
-}
-
-static VkResult radv_alloc_sem_counts(struct radv_device *device,
- struct radv_winsys_sem_counts *counts,
- int num_sems,
- struct radv_semaphore_part **sems,
- const uint64_t *timeline_values,
- VkFence _fence,
- bool is_signal)
-{
- int syncobj_idx = 0, non_reset_idx = 0, timeline_idx = 0;
-
- if (num_sems == 0 && _fence == VK_NULL_HANDLE)
- return VK_SUCCESS;
-
- for (uint32_t i = 0; i < num_sems; i++) {
- switch(sems[i]->kind) {
- case RADV_SEMAPHORE_SYNCOBJ:
- counts->syncobj_count++;
- counts->syncobj_reset_count++;
- break;
- case RADV_SEMAPHORE_NONE:
- break;
- case RADV_SEMAPHORE_TIMELINE:
- counts->syncobj_count++;
- break;
- case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
- counts->timeline_syncobj_count++;
- break;
- }
- }
-
- if (_fence != VK_NULL_HANDLE)
- counts->syncobj_count++;
-
- if (counts->syncobj_count || counts->timeline_syncobj_count) {
- counts->points = (uint64_t *)malloc(
- sizeof(*counts->syncobj) * counts->syncobj_count +
- (sizeof(*counts->syncobj) + sizeof(*counts->points)) * counts->timeline_syncobj_count);
- if (!counts->points)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- counts->syncobj = (uint32_t*)(counts->points + counts->timeline_syncobj_count);
- }
-
- non_reset_idx = counts->syncobj_reset_count;
-
- for (uint32_t i = 0; i < num_sems; i++) {
- switch(sems[i]->kind) {
- case RADV_SEMAPHORE_NONE:
- unreachable("Empty semaphore");
- break;
- case RADV_SEMAPHORE_SYNCOBJ:
- counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
- break;
- case RADV_SEMAPHORE_TIMELINE: {
- mtx_lock(&sems[i]->timeline.mutex);
- struct radv_timeline_point *point = NULL;
- if (is_signal) {
- point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
- } else {
- point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline, timeline_values[i]);
- }
-
- mtx_unlock(&sems[i]->timeline.mutex);
-
- if (point) {
- counts->syncobj[non_reset_idx++] = point->syncobj;
- } else {
- /* Explicitly remove the semaphore so we might not find
- * a point later post-submit. */
- sems[i] = NULL;
- }
- break;
- }
- case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
- counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
- counts->points[timeline_idx] = timeline_values[i];
- ++timeline_idx;
- break;
- }
- }
-
- if (_fence != VK_NULL_HANDLE) {
- RADV_FROM_HANDLE(radv_fence, fence, _fence);
-
- struct radv_fence_part *part =
- fence->temporary.kind != RADV_FENCE_NONE ?
- &fence->temporary : &fence->permanent;
- counts->syncobj[non_reset_idx++] = part->syncobj;
- }
-
- assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
- counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
-
- return VK_SUCCESS;
+ for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
+ if (dest_cs[i])
+ queue->device->ws->cs_destroy(dest_cs[i]);
+ if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, descriptor_bo);
+ if (scratch_bo && scratch_bo != queue->scratch_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, scratch_bo);
+ if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, compute_scratch_bo);
+ if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, esgs_ring_bo);
+ if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, gsvs_ring_bo);
+ if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, tess_rings_bo);
+ if (gds_bo && gds_bo != queue->gds_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, gds_bo);
+ if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
+ queue->device->ws->buffer_destroy(queue->device->ws, gds_oa_bo);
+
+ return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+}
+
+static VkResult
+radv_alloc_sem_counts(struct radv_device *device, struct radv_winsys_sem_counts *counts,
+ int num_sems, struct radv_semaphore_part **sems,
+ const uint64_t *timeline_values, VkFence _fence, bool is_signal)
+{
+ int syncobj_idx = 0, non_reset_idx = 0, timeline_idx = 0;
+
+ if (num_sems == 0 && _fence == VK_NULL_HANDLE)
+ return VK_SUCCESS;
+
+ for (uint32_t i = 0; i < num_sems; i++) {
+ switch (sems[i]->kind) {
+ case RADV_SEMAPHORE_SYNCOBJ:
+ counts->syncobj_count++;
+ counts->syncobj_reset_count++;
+ break;
+ case RADV_SEMAPHORE_NONE:
+ break;
+ case RADV_SEMAPHORE_TIMELINE:
+ counts->syncobj_count++;
+ break;
+ case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
+ counts->timeline_syncobj_count++;
+ break;
+ }
+ }
+
+ if (_fence != VK_NULL_HANDLE)
+ counts->syncobj_count++;
+
+ if (counts->syncobj_count || counts->timeline_syncobj_count) {
+ counts->points = (uint64_t *)malloc(sizeof(*counts->syncobj) * counts->syncobj_count +
+ (sizeof(*counts->syncobj) + sizeof(*counts->points)) *
+ counts->timeline_syncobj_count);
+ if (!counts->points)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ counts->syncobj = (uint32_t *)(counts->points + counts->timeline_syncobj_count);
+ }
+
+ non_reset_idx = counts->syncobj_reset_count;
+
+ for (uint32_t i = 0; i < num_sems; i++) {
+ switch (sems[i]->kind) {
+ case RADV_SEMAPHORE_NONE:
+ unreachable("Empty semaphore");
+ break;
+ case RADV_SEMAPHORE_SYNCOBJ:
+ counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
+ break;
+ case RADV_SEMAPHORE_TIMELINE: {
+ mtx_lock(&sems[i]->timeline.mutex);
+ struct radv_timeline_point *point = NULL;
+ if (is_signal) {
+ point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
+ } else {
+ point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline,
+ timeline_values[i]);
+ }
+
+ mtx_unlock(&sems[i]->timeline.mutex);
+
+ if (point) {
+ counts->syncobj[non_reset_idx++] = point->syncobj;
+ } else {
+ /* Explicitly remove the semaphore so we might not find
+ * a point later post-submit. */
+ sems[i] = NULL;
+ }
+ break;
+ }
+ case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
+ counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
+ counts->points[timeline_idx] = timeline_values[i];
+ ++timeline_idx;
+ break;
+ }
+ }
+
+ if (_fence != VK_NULL_HANDLE) {
+ RADV_FROM_HANDLE(radv_fence, fence, _fence);
+
+ struct radv_fence_part *part =
+ fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
+ counts->syncobj[non_reset_idx++] = part->syncobj;
+ }
+
+ assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
+ counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
+
+ return VK_SUCCESS;
}
static void
radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
{
- free(sem_info->wait.points);
- free(sem_info->signal.points);
+ free(sem_info->wait.points);
+ free(sem_info->signal.points);
}
-
-static void radv_free_temp_syncobjs(struct radv_device *device,
- int num_sems,
- struct radv_semaphore_part *sems)
+static void
+radv_free_temp_syncobjs(struct radv_device *device, int num_sems, struct radv_semaphore_part *sems)
{
- for (uint32_t i = 0; i < num_sems; i++) {
- radv_destroy_semaphore_part(device, sems + i);
- }
+ for (uint32_t i = 0; i < num_sems; i++) {
+ radv_destroy_semaphore_part(device, sems + i);
+ }
}
static VkResult
-radv_alloc_sem_info(struct radv_device *device,
- struct radv_winsys_sem_info *sem_info,
- int num_wait_sems,
- struct radv_semaphore_part **wait_sems,
- const uint64_t *wait_values,
- int num_signal_sems,
- struct radv_semaphore_part **signal_sems,
- const uint64_t *signal_values,
- VkFence fence)
-{
- VkResult ret;
-
- ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values, VK_NULL_HANDLE, false);
- if (ret)
- return ret;
- ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems, signal_values, fence, true);
- if (ret)
- radv_free_sem_info(sem_info);
-
- /* caller can override these */
- sem_info->cs_emit_wait = true;
- sem_info->cs_emit_signal = true;
- return ret;
+radv_alloc_sem_info(struct radv_device *device, struct radv_winsys_sem_info *sem_info,
+ int num_wait_sems, struct radv_semaphore_part **wait_sems,
+ const uint64_t *wait_values, int num_signal_sems,
+ struct radv_semaphore_part **signal_sems, const uint64_t *signal_values,
+ VkFence fence)
+{
+ VkResult ret;
+
+ ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values,
+ VK_NULL_HANDLE, false);
+ if (ret)
+ return ret;
+ ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems,
+ signal_values, fence, true);
+ if (ret)
+ radv_free_sem_info(sem_info);
+
+ /* caller can override these */
+ sem_info->cs_emit_wait = true;
+ sem_info->cs_emit_signal = true;
+ return ret;
}
static void
-radv_finalize_timelines(struct radv_device *device,
- uint32_t num_wait_sems,
- struct radv_semaphore_part **wait_sems,
- const uint64_t *wait_values,
- uint32_t num_signal_sems,
- struct radv_semaphore_part **signal_sems,
- const uint64_t *signal_values,
- struct list_head *processing_list)
-{
- for (uint32_t i = 0; i < num_wait_sems; ++i) {
- if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
- mtx_lock(&wait_sems[i]->timeline.mutex);
- struct radv_timeline_point *point =
- radv_timeline_find_point_at_least_locked(device, &wait_sems[i]->timeline, wait_values[i]);
- point->wait_count -= 2;
- mtx_unlock(&wait_sems[i]->timeline.mutex);
- }
- }
- for (uint32_t i = 0; i < num_signal_sems; ++i) {
- if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
- mtx_lock(&signal_sems[i]->timeline.mutex);
- struct radv_timeline_point *point =
- radv_timeline_find_point_at_least_locked(device, &signal_sems[i]->timeline, signal_values[i]);
- signal_sems[i]->timeline.highest_submitted =
- MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
- point->wait_count -= 2;
- radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
- mtx_unlock(&signal_sems[i]->timeline.mutex);
- } else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
- signal_sems[i]->timeline_syncobj.max_point =
- MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
- }
- }
+radv_finalize_timelines(struct radv_device *device, uint32_t num_wait_sems,
+ struct radv_semaphore_part **wait_sems, const uint64_t *wait_values,
+ uint32_t num_signal_sems, struct radv_semaphore_part **signal_sems,
+ const uint64_t *signal_values, struct list_head *processing_list)
+{
+ for (uint32_t i = 0; i < num_wait_sems; ++i) {
+ if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
+ mtx_lock(&wait_sems[i]->timeline.mutex);
+ struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
+ device, &wait_sems[i]->timeline, wait_values[i]);
+ point->wait_count -= 2;
+ mtx_unlock(&wait_sems[i]->timeline.mutex);
+ }
+ }
+ for (uint32_t i = 0; i < num_signal_sems; ++i) {
+ if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
+ mtx_lock(&signal_sems[i]->timeline.mutex);
+ struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
+ device, &signal_sems[i]->timeline, signal_values[i]);
+ signal_sems[i]->timeline.highest_submitted =
+ MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
+ point->wait_count -= 2;
+ radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
+ mtx_unlock(&signal_sems[i]->timeline.mutex);
+ } else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
+ signal_sems[i]->timeline_syncobj.max_point =
+ MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
+ }
+ }
}
static VkResult
-radv_sparse_buffer_bind_memory(struct radv_device *device,
- const VkSparseBufferMemoryBindInfo *bind)
+radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind)
{
- RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
- VkResult result;
+ RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
+ VkResult result;
- for (uint32_t i = 0; i < bind->bindCount; ++i) {
- struct radv_device_memory *mem = NULL;
+ for (uint32_t i = 0; i < bind->bindCount; ++i) {
+ struct radv_device_memory *mem = NULL;
- if (bind->pBinds[i].memory != VK_NULL_HANDLE)
- mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
+ if (bind->pBinds[i].memory != VK_NULL_HANDLE)
+ mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
- result = device->ws->buffer_virtual_bind(device->ws,
- buffer->bo,
- bind->pBinds[i].resourceOffset,
- bind->pBinds[i].size,
- mem ? mem->bo : NULL,
- bind->pBinds[i].memoryOffset);
- if (result != VK_SUCCESS)
- return result;
- }
+ result = device->ws->buffer_virtual_bind(device->ws, buffer->bo,
+ bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
+ mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
+ if (result != VK_SUCCESS)
+ return result;
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
static VkResult
radv_sparse_image_opaque_bind_memory(struct radv_device *device,
const VkSparseImageOpaqueMemoryBindInfo *bind)
{
- RADV_FROM_HANDLE(radv_image, image, bind->image);
- VkResult result;
+ RADV_FROM_HANDLE(radv_image, image, bind->image);
+ VkResult result;
- for (uint32_t i = 0; i < bind->bindCount; ++i) {
- struct radv_device_memory *mem = NULL;
+ for (uint32_t i = 0; i < bind->bindCount; ++i) {
+ struct radv_device_memory *mem = NULL;
- if (bind->pBinds[i].memory != VK_NULL_HANDLE)
- mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
+ if (bind->pBinds[i].memory != VK_NULL_HANDLE)
+ mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
- result = device->ws->buffer_virtual_bind(device->ws,
- image->bo,
- bind->pBinds[i].resourceOffset,
- bind->pBinds[i].size,
- mem ? mem->bo : NULL,
- bind->pBinds[i].memoryOffset);
- if (result != VK_SUCCESS)
- return result;
- }
+ result = device->ws->buffer_virtual_bind(device->ws, image->bo,
+ bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
+ mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
+ if (result != VK_SUCCESS)
+ return result;
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
static VkResult
-radv_sparse_image_bind_memory(struct radv_device *device,
- const VkSparseImageMemoryBindInfo *bind)
-{
- RADV_FROM_HANDLE(radv_image, image, bind->image);
- struct radeon_surf *surface = &image->planes[0].surface;
- uint32_t bs = vk_format_get_blocksize(image->vk_format);
- VkResult result;
-
- for (uint32_t i = 0; i < bind->bindCount; ++i) {
- struct radv_device_memory *mem = NULL;
- uint32_t offset, pitch;
- uint32_t mem_offset = bind->pBinds[i].memoryOffset;
- const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
- const uint32_t level = bind->pBinds[i].subresource.mipLevel;
-
- VkExtent3D bind_extent = bind->pBinds[i].extent;
- bind_extent.width = DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk_format));
- bind_extent.height = DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk_format));
-
- VkOffset3D bind_offset = bind->pBinds[i].offset;
- bind_offset.x /= vk_format_get_blockwidth(image->vk_format);
- bind_offset.y /= vk_format_get_blockheight(image->vk_format);
-
- if (bind->pBinds[i].memory != VK_NULL_HANDLE)
- mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- offset = surface->u.gfx9.surf_slice_size * layer +
- surface->u.gfx9.prt_level_offset[level];
- pitch = surface->u.gfx9.prt_level_pitch[level];
- } else {
- offset = surface->u.legacy.level[level].offset +
- surface->u.legacy.level[level].slice_size_dw * 4 * layer;
- pitch = surface->u.legacy.level[level].nblk_x;
- }
-
- offset += (bind_offset.y * pitch * bs) +
- (bind_offset.x * surface->prt_tile_height * bs);
-
- uint32_t aligned_extent_width = ALIGN(bind_extent.width,
- surface->prt_tile_width);
-
- bool whole_subres = bind_offset.x == 0 &&
- aligned_extent_width == pitch;
-
- if (whole_subres) {
- uint32_t aligned_extent_height = ALIGN(bind_extent.height,
- surface->prt_tile_height);
-
- uint32_t size = aligned_extent_width * aligned_extent_height * bs;
- result = device->ws->buffer_virtual_bind(device->ws,
- image->bo,
- offset,
- size,
- mem ? mem->bo : NULL,
- mem_offset);
- if (result != VK_SUCCESS)
- return result;
- } else {
- uint32_t img_increment = pitch * bs;
- uint32_t mem_increment = aligned_extent_width * bs;
- uint32_t size = mem_increment * surface->prt_tile_height;
- for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
- result = device->ws->buffer_virtual_bind(device->ws,
- image->bo,
- offset + img_increment * y,
- size,
- mem ? mem->bo : NULL,
- mem_offset + mem_increment * y);
- if (result != VK_SUCCESS)
- return result;
- }
- }
- }
-
- return VK_SUCCESS;
+radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
+{
+ RADV_FROM_HANDLE(radv_image, image, bind->image);
+ struct radeon_surf *surface = &image->planes[0].surface;
+ uint32_t bs = vk_format_get_blocksize(image->vk_format);
+ VkResult result;
+
+ for (uint32_t i = 0; i < bind->bindCount; ++i) {
+ struct radv_device_memory *mem = NULL;
+ uint32_t offset, pitch;
+ uint32_t mem_offset = bind->pBinds[i].memoryOffset;
+ const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
+ const uint32_t level = bind->pBinds[i].subresource.mipLevel;
+
+ VkExtent3D bind_extent = bind->pBinds[i].extent;
+ bind_extent.width =
+ DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk_format));
+ bind_extent.height =
+ DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk_format));
+
+ VkOffset3D bind_offset = bind->pBinds[i].offset;
+ bind_offset.x /= vk_format_get_blockwidth(image->vk_format);
+ bind_offset.y /= vk_format_get_blockheight(image->vk_format);
+
+ if (bind->pBinds[i].memory != VK_NULL_HANDLE)
+ mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
+ pitch = surface->u.gfx9.prt_level_pitch[level];
+ } else {
+ offset = surface->u.legacy.level[level].offset +
+ surface->u.legacy.level[level].slice_size_dw * 4 * layer;
+ pitch = surface->u.legacy.level[level].nblk_x;
+ }
+
+ offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs);
+
+ uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
+
+ bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch;
+
+ if (whole_subres) {
+ uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
+
+ uint32_t size = aligned_extent_width * aligned_extent_height * bs;
+ result = device->ws->buffer_virtual_bind(device->ws, image->bo, offset, size,
+ mem ? mem->bo : NULL, mem_offset);
+ if (result != VK_SUCCESS)
+ return result;
+ } else {
+ uint32_t img_increment = pitch * bs;
+ uint32_t mem_increment = aligned_extent_width * bs;
+ uint32_t size = mem_increment * surface->prt_tile_height;
+ for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
+ result = device->ws->buffer_virtual_bind(
+ device->ws, image->bo, offset + img_increment * y, size, mem ? mem->bo : NULL,
+ mem_offset + mem_increment * y);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ }
+ }
+
+ return VK_SUCCESS;
}
static VkResult
-radv_get_preambles(struct radv_queue *queue,
- const VkCommandBuffer *cmd_buffers,
- uint32_t cmd_buffer_count,
- struct radeon_cmdbuf **initial_full_flush_preamble_cs,
+radv_get_preambles(struct radv_queue *queue, const VkCommandBuffer *cmd_buffers,
+ uint32_t cmd_buffer_count, struct radeon_cmdbuf **initial_full_flush_preamble_cs,
struct radeon_cmdbuf **initial_preamble_cs,
struct radeon_cmdbuf **continue_preamble_cs)
{
- uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
- uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
- uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
- bool tess_rings_needed = false;
- bool gds_needed = false;
- bool gds_oa_needed = false;
- bool sample_positions_needed = false;
-
- for (uint32_t j = 0; j < cmd_buffer_count; j++) {
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
- cmd_buffers[j]);
-
- scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
- waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
- compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave,
- cmd_buffer->compute_scratch_size_per_wave_needed);
- compute_waves_wanted = MAX2(compute_waves_wanted,
- cmd_buffer->compute_scratch_waves_wanted);
- esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
- gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
- tess_rings_needed |= cmd_buffer->tess_rings_needed;
- gds_needed |= cmd_buffer->gds_needed;
- gds_oa_needed |= cmd_buffer->gds_oa_needed;
- sample_positions_needed |= cmd_buffer->sample_positions_needed;
- }
-
- return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
- compute_scratch_size_per_wave, compute_waves_wanted,
- esgs_ring_size, gsvs_ring_size, tess_rings_needed,
- gds_needed, gds_oa_needed, sample_positions_needed,
- initial_full_flush_preamble_cs,
- initial_preamble_cs, continue_preamble_cs);
+ uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
+ uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
+ uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
+ bool tess_rings_needed = false;
+ bool gds_needed = false;
+ bool gds_oa_needed = false;
+ bool sample_positions_needed = false;
+
+ for (uint32_t j = 0; j < cmd_buffer_count; j++) {
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, cmd_buffers[j]);
+
+ scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
+ waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
+ compute_scratch_size_per_wave =
+ MAX2(compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed);
+ compute_waves_wanted = MAX2(compute_waves_wanted, cmd_buffer->compute_scratch_waves_wanted);
+ esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
+ gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
+ tess_rings_needed |= cmd_buffer->tess_rings_needed;
+ gds_needed |= cmd_buffer->gds_needed;
+ gds_oa_needed |= cmd_buffer->gds_oa_needed;
+ sample_positions_needed |= cmd_buffer->sample_positions_needed;
+ }
+
+ return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
+ compute_scratch_size_per_wave, compute_waves_wanted, esgs_ring_size,
+ gsvs_ring_size, tess_rings_needed, gds_needed, gds_oa_needed,
+ sample_positions_needed, initial_full_flush_preamble_cs,
+ initial_preamble_cs, continue_preamble_cs);
}
struct radv_deferred_queue_submission {
- struct radv_queue *queue;
- VkCommandBuffer *cmd_buffers;
- uint32_t cmd_buffer_count;
-
- /* Sparse bindings that happen on a queue. */
- VkSparseBufferMemoryBindInfo *buffer_binds;
- uint32_t buffer_bind_count;
- VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
- uint32_t image_opaque_bind_count;
- VkSparseImageMemoryBindInfo *image_binds;
- uint32_t image_bind_count;
-
- bool flush_caches;
- VkShaderStageFlags wait_dst_stage_mask;
- struct radv_semaphore_part **wait_semaphores;
- uint32_t wait_semaphore_count;
- struct radv_semaphore_part **signal_semaphores;
- uint32_t signal_semaphore_count;
- VkFence fence;
-
- uint64_t *wait_values;
- uint64_t *signal_values;
-
- struct radv_semaphore_part *temporary_semaphore_parts;
- uint32_t temporary_semaphore_part_count;
-
- struct list_head queue_pending_list;
- uint32_t submission_wait_count;
- struct radv_timeline_waiter *wait_nodes;
-
- struct list_head processing_list;
+ struct radv_queue *queue;
+ VkCommandBuffer *cmd_buffers;
+ uint32_t cmd_buffer_count;
+
+ /* Sparse bindings that happen on a queue. */
+ VkSparseBufferMemoryBindInfo *buffer_binds;
+ uint32_t buffer_bind_count;
+ VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
+ uint32_t image_opaque_bind_count;
+ VkSparseImageMemoryBindInfo *image_binds;
+ uint32_t image_bind_count;
+
+ bool flush_caches;
+ VkShaderStageFlags wait_dst_stage_mask;
+ struct radv_semaphore_part **wait_semaphores;
+ uint32_t wait_semaphore_count;
+ struct radv_semaphore_part **signal_semaphores;
+ uint32_t signal_semaphore_count;
+ VkFence fence;
+
+ uint64_t *wait_values;
+ uint64_t *signal_values;
+
+ struct radv_semaphore_part *temporary_semaphore_parts;
+ uint32_t temporary_semaphore_part_count;
+
+ struct list_head queue_pending_list;
+ uint32_t submission_wait_count;
+ struct radv_timeline_waiter *wait_nodes;
+
+ struct list_head processing_list;
};
struct radv_queue_submission {
- const VkCommandBuffer *cmd_buffers;
- uint32_t cmd_buffer_count;
-
- /* Sparse bindings that happen on a queue. */
- const VkSparseBufferMemoryBindInfo *buffer_binds;
- uint32_t buffer_bind_count;
- const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
- uint32_t image_opaque_bind_count;
- const VkSparseImageMemoryBindInfo *image_binds;
- uint32_t image_bind_count;
-
- bool flush_caches;
- VkPipelineStageFlags wait_dst_stage_mask;
- const VkSemaphore *wait_semaphores;
- uint32_t wait_semaphore_count;
- const VkSemaphore *signal_semaphores;
- uint32_t signal_semaphore_count;
- VkFence fence;
-
- const uint64_t *wait_values;
- uint32_t wait_value_count;
- const uint64_t *signal_values;
- uint32_t signal_value_count;
+ const VkCommandBuffer *cmd_buffers;
+ uint32_t cmd_buffer_count;
+
+ /* Sparse bindings that happen on a queue. */
+ const VkSparseBufferMemoryBindInfo *buffer_binds;
+ uint32_t buffer_bind_count;
+ const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
+ uint32_t image_opaque_bind_count;
+ const VkSparseImageMemoryBindInfo *image_binds;
+ uint32_t image_bind_count;
+
+ bool flush_caches;
+ VkPipelineStageFlags wait_dst_stage_mask;
+ const VkSemaphore *wait_semaphores;
+ uint32_t wait_semaphore_count;
+ const VkSemaphore *signal_semaphores;
+ uint32_t signal_semaphore_count;
+ VkFence fence;
+
+ const uint64_t *wait_values;
+ uint32_t wait_value_count;
+ const uint64_t *signal_values;
+ uint32_t signal_value_count;
};
-static VkResult
-radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
- uint32_t decrement,
- struct list_head *processing_list);
+static VkResult radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
+ uint32_t decrement,
+ struct list_head *processing_list);
static VkResult
radv_create_deferred_submission(struct radv_queue *queue,
const struct radv_queue_submission *submission,
struct radv_deferred_queue_submission **out)
{
- struct radv_deferred_queue_submission *deferred = NULL;
- size_t size = sizeof(struct radv_deferred_queue_submission);
-
- uint32_t temporary_count = 0;
- for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
- RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
- if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
- ++temporary_count;
- }
-
- size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
- size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
- size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
- size += submission->image_bind_count * sizeof(VkSparseImageMemoryBindInfo);
-
- for (uint32_t i = 0; i < submission->image_bind_count; ++i)
- size += submission->image_binds[i].bindCount * sizeof(VkSparseImageMemoryBind);
-
- size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
- size += temporary_count * sizeof(struct radv_semaphore_part);
- size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
- size += submission->wait_value_count * sizeof(uint64_t);
- size += submission->signal_value_count * sizeof(uint64_t);
- size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
-
- deferred = calloc(1, size);
- if (!deferred)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- deferred->queue = queue;
-
- deferred->cmd_buffers = (void*)(deferred + 1);
- deferred->cmd_buffer_count = submission->cmd_buffer_count;
- if (submission->cmd_buffer_count) {
- memcpy(deferred->cmd_buffers, submission->cmd_buffers,
- submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
- }
-
- deferred->buffer_binds = (void*)(deferred->cmd_buffers + submission->cmd_buffer_count);
- deferred->buffer_bind_count = submission->buffer_bind_count;
- if (submission->buffer_bind_count) {
- memcpy(deferred->buffer_binds, submission->buffer_binds,
- submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
- }
-
- deferred->image_opaque_binds = (void*)(deferred->buffer_binds + submission->buffer_bind_count);
- deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
- if (submission->image_opaque_bind_count) {
- memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
- submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
- }
-
- deferred->image_binds = (void*)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
- deferred->image_bind_count = submission->image_bind_count;
-
- VkSparseImageMemoryBind *sparse_image_binds = (void*)(deferred->image_binds + deferred->image_bind_count);
- for (uint32_t i = 0; i < deferred->image_bind_count; ++i) {
- deferred->image_binds[i] = submission->image_binds[i];
- deferred->image_binds[i].pBinds = sparse_image_binds;
-
- for (uint32_t j = 0; j < deferred->image_binds[i].bindCount; ++j)
- *sparse_image_binds++ = submission->image_binds[i].pBinds[j];
- }
-
- deferred->flush_caches = submission->flush_caches;
- deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
-
- deferred->wait_semaphores = (void*)sparse_image_binds;
- deferred->wait_semaphore_count = submission->wait_semaphore_count;
-
- deferred->signal_semaphores = (void*)(deferred->wait_semaphores + deferred->wait_semaphore_count);
- deferred->signal_semaphore_count = submission->signal_semaphore_count;
-
- deferred->fence = submission->fence;
-
- deferred->temporary_semaphore_parts = (void*)(deferred->signal_semaphores + deferred->signal_semaphore_count);
- deferred->temporary_semaphore_part_count = temporary_count;
-
- uint32_t temporary_idx = 0;
- for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
- RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
- if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
- deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
- deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
- semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
- ++temporary_idx;
- } else
- deferred->wait_semaphores[i] = &semaphore->permanent;
- }
-
- for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
- RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
- if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
- deferred->signal_semaphores[i] = &semaphore->temporary;
- } else {
- deferred->signal_semaphores[i] = &semaphore->permanent;
- }
- }
-
- deferred->wait_values = (void*)(deferred->temporary_semaphore_parts + temporary_count);
- if (submission->wait_value_count) {
- memcpy(deferred->wait_values, submission->wait_values, submission->wait_value_count * sizeof(uint64_t));
- }
- deferred->signal_values = deferred->wait_values + submission->wait_value_count;
- if (submission->signal_value_count) {
- memcpy(deferred->signal_values, submission->signal_values, submission->signal_value_count * sizeof(uint64_t));
- }
-
- deferred->wait_nodes = (void*)(deferred->signal_values + submission->signal_value_count);
- /* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
- * ensure the submission is not accidentally triggered early when adding wait timelines. */
- deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
-
- *out = deferred;
- return VK_SUCCESS;
+ struct radv_deferred_queue_submission *deferred = NULL;
+ size_t size = sizeof(struct radv_deferred_queue_submission);
+
+ uint32_t temporary_count = 0;
+ for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
+ if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
+ ++temporary_count;
+ }
+
+ size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
+ size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
+ size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
+ size += submission->image_bind_count * sizeof(VkSparseImageMemoryBindInfo);
+
+ for (uint32_t i = 0; i < submission->image_bind_count; ++i)
+ size += submission->image_binds[i].bindCount * sizeof(VkSparseImageMemoryBind);
+
+ size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
+ size += temporary_count * sizeof(struct radv_semaphore_part);
+ size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
+ size += submission->wait_value_count * sizeof(uint64_t);
+ size += submission->signal_value_count * sizeof(uint64_t);
+ size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
+
+ deferred = calloc(1, size);
+ if (!deferred)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ deferred->queue = queue;
+
+ deferred->cmd_buffers = (void *)(deferred + 1);
+ deferred->cmd_buffer_count = submission->cmd_buffer_count;
+ if (submission->cmd_buffer_count) {
+ memcpy(deferred->cmd_buffers, submission->cmd_buffers,
+ submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
+ }
+
+ deferred->buffer_binds = (void *)(deferred->cmd_buffers + submission->cmd_buffer_count);
+ deferred->buffer_bind_count = submission->buffer_bind_count;
+ if (submission->buffer_bind_count) {
+ memcpy(deferred->buffer_binds, submission->buffer_binds,
+ submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
+ }
+
+ deferred->image_opaque_binds = (void *)(deferred->buffer_binds + submission->buffer_bind_count);
+ deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
+ if (submission->image_opaque_bind_count) {
+ memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
+ submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
+ }
+
+ deferred->image_binds =
+ (void *)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
+ deferred->image_bind_count = submission->image_bind_count;
+
+ VkSparseImageMemoryBind *sparse_image_binds =
+ (void *)(deferred->image_binds + deferred->image_bind_count);
+ for (uint32_t i = 0; i < deferred->image_bind_count; ++i) {
+ deferred->image_binds[i] = submission->image_binds[i];
+ deferred->image_binds[i].pBinds = sparse_image_binds;
+
+ for (uint32_t j = 0; j < deferred->image_binds[i].bindCount; ++j)
+ *sparse_image_binds++ = submission->image_binds[i].pBinds[j];
+ }
+
+ deferred->flush_caches = submission->flush_caches;
+ deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
+
+ deferred->wait_semaphores = (void *)sparse_image_binds;
+ deferred->wait_semaphore_count = submission->wait_semaphore_count;
+
+ deferred->signal_semaphores =
+ (void *)(deferred->wait_semaphores + deferred->wait_semaphore_count);
+ deferred->signal_semaphore_count = submission->signal_semaphore_count;
+
+ deferred->fence = submission->fence;
+
+ deferred->temporary_semaphore_parts =
+ (void *)(deferred->signal_semaphores + deferred->signal_semaphore_count);
+ deferred->temporary_semaphore_part_count = temporary_count;
+
+ uint32_t temporary_idx = 0;
+ for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
+ if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
+ deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
+ deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
+ semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
+ ++temporary_idx;
+ } else
+ deferred->wait_semaphores[i] = &semaphore->permanent;
+ }
+
+ for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
+ if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
+ deferred->signal_semaphores[i] = &semaphore->temporary;
+ } else {
+ deferred->signal_semaphores[i] = &semaphore->permanent;
+ }
+ }
+
+ deferred->wait_values = (void *)(deferred->temporary_semaphore_parts + temporary_count);
+ if (submission->wait_value_count) {
+ memcpy(deferred->wait_values, submission->wait_values,
+ submission->wait_value_count * sizeof(uint64_t));
+ }
+ deferred->signal_values = deferred->wait_values + submission->wait_value_count;
+ if (submission->signal_value_count) {
+ memcpy(deferred->signal_values, submission->signal_values,
+ submission->signal_value_count * sizeof(uint64_t));
+ }
+
+ deferred->wait_nodes = (void *)(deferred->signal_values + submission->signal_value_count);
+ /* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
+ * ensure the submission is not accidentally triggered early when adding wait timelines. */
+ deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
+
+ *out = deferred;
+ return VK_SUCCESS;
}
static VkResult
radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
struct list_head *processing_list)
{
- uint32_t wait_cnt = 0;
- struct radv_timeline_waiter *waiter = submission->wait_nodes;
- for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
- if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
- mtx_lock(&submission->wait_semaphores[i]->timeline.mutex);
- if (submission->wait_semaphores[i]->timeline.highest_submitted < submission->wait_values[i]) {
- ++wait_cnt;
- waiter->value = submission->wait_values[i];
- waiter->submission = submission;
- list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
- ++waiter;
- }
- mtx_unlock(&submission->wait_semaphores[i]->timeline.mutex);
- }
- }
+ uint32_t wait_cnt = 0;
+ struct radv_timeline_waiter *waiter = submission->wait_nodes;
+ for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+ if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
+ mtx_lock(&submission->wait_semaphores[i]->timeline.mutex);
+ if (submission->wait_semaphores[i]->timeline.highest_submitted <
+ submission->wait_values[i]) {
+ ++wait_cnt;
+ waiter->value = submission->wait_values[i];
+ waiter->submission = submission;
+ list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
+ ++waiter;
+ }
+ mtx_unlock(&submission->wait_semaphores[i]->timeline.mutex);
+ }
+ }
- mtx_lock(&submission->queue->pending_mutex);
+ mtx_lock(&submission->queue->pending_mutex);
- bool is_first = list_is_empty(&submission->queue->pending_submissions);
- list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
+ bool is_first = list_is_empty(&submission->queue->pending_submissions);
+ list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
- mtx_unlock(&submission->queue->pending_mutex);
+ mtx_unlock(&submission->queue->pending_mutex);
- /* If there is already a submission in the queue, that will decrement the counter by 1 when
- * submitted, but if the queue was empty, we decrement ourselves as there is no previous
- * submission. */
- uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
+ /* If there is already a submission in the queue, that will decrement the counter by 1 when
+ * submitted, but if the queue was empty, we decrement ourselves as there is no previous
+ * submission. */
+ uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
- /* if decrement is zero, then we don't have a refcounted reference to the
- * submission anymore, so it is not safe to access the submission. */
- if (!decrement)
- return VK_SUCCESS;
+ /* if decrement is zero, then we don't have a refcounted reference to the
+ * submission anymore, so it is not safe to access the submission. */
+ if (!decrement)
+ return VK_SUCCESS;
- return radv_queue_trigger_submission(submission, decrement, processing_list);
+ return radv_queue_trigger_submission(submission, decrement, processing_list);
}
static void
radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
struct list_head *processing_list)
{
- mtx_lock(&submission->queue->pending_mutex);
- list_del(&submission->queue_pending_list);
+ mtx_lock(&submission->queue->pending_mutex);
+ list_del(&submission->queue_pending_list);
- /* trigger the next submission in the queue. */
- if (!list_is_empty(&submission->queue->pending_submissions)) {
- struct radv_deferred_queue_submission *next_submission =
- list_first_entry(&submission->queue->pending_submissions,
- struct radv_deferred_queue_submission,
- queue_pending_list);
- radv_queue_trigger_submission(next_submission, 1, processing_list);
- }
- mtx_unlock(&submission->queue->pending_mutex);
+ /* trigger the next submission in the queue. */
+ if (!list_is_empty(&submission->queue->pending_submissions)) {
+ struct radv_deferred_queue_submission *next_submission =
+ list_first_entry(&submission->queue->pending_submissions,
+ struct radv_deferred_queue_submission, queue_pending_list);
+ radv_queue_trigger_submission(next_submission, 1, processing_list);
+ }
+ mtx_unlock(&submission->queue->pending_mutex);
- u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);
+ u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);
}
static VkResult
radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
struct list_head *processing_list)
{
- struct radv_queue *queue = submission->queue;
- struct radeon_winsys_ctx *ctx = queue->hw_ctx;
- uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
- bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
- bool can_patch = true;
- uint32_t advance;
- struct radv_winsys_sem_info sem_info = {0};
- VkResult result;
- struct radeon_cmdbuf *initial_preamble_cs = NULL;
- struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
- struct radeon_cmdbuf *continue_preamble_cs = NULL;
-
- result = radv_get_preambles(queue, submission->cmd_buffers,
- submission->cmd_buffer_count,
- &initial_preamble_cs,
- &initial_flush_preamble_cs,
- &continue_preamble_cs);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = radv_alloc_sem_info(queue->device,
- &sem_info,
- submission->wait_semaphore_count,
- submission->wait_semaphores,
- submission->wait_values,
- submission->signal_semaphore_count,
- submission->signal_semaphores,
- submission->signal_values,
- submission->fence);
- if (result != VK_SUCCESS)
- goto fail;
-
- for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
- result = radv_sparse_buffer_bind_memory(queue->device,
- submission->buffer_binds + i);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
- result = radv_sparse_image_opaque_bind_memory(queue->device,
- submission->image_opaque_binds + i);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
- result = radv_sparse_image_bind_memory(queue->device,
- submission->image_binds + i);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- if (!submission->cmd_buffer_count) {
- result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
- &queue->device->empty_cs[queue->queue_family_index],
- 1, NULL, NULL,
- &sem_info,
- false);
- if (result != VK_SUCCESS)
- goto fail;
- } else {
- struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
- (submission->cmd_buffer_count));
-
- for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
- assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
-
- cs_array[j] = cmd_buffer->cs;
- if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
- can_patch = false;
-
- cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
- }
-
- for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
- struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
- advance = MIN2(max_cs_submission,
- submission->cmd_buffer_count - j);
-
- if (queue->device->trace_bo)
- *queue->device->trace_id_ptr = 0;
-
- sem_info.cs_emit_wait = j == 0;
- sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
-
- result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
- advance, initial_preamble, continue_preamble_cs,
- &sem_info,
- can_patch);
- if (result != VK_SUCCESS) {
- free(cs_array);
- goto fail;
- }
-
- if (queue->device->trace_bo) {
- radv_check_gpu_hangs(queue, cs_array[j]);
- }
-
- if (queue->device->tma_bo) {
- radv_check_trap_handler(queue);
- }
- }
-
- free(cs_array);
- }
-
- radv_finalize_timelines(queue->device,
- submission->wait_semaphore_count,
- submission->wait_semaphores,
- submission->wait_values,
- submission->signal_semaphore_count,
- submission->signal_semaphores,
- submission->signal_values,
- processing_list);
- /* Has to happen after timeline finalization to make sure the
- * condition variable is only triggered when timelines and queue have
- * been updated. */
- radv_queue_submission_update_queue(submission, processing_list);
+ struct radv_queue *queue = submission->queue;
+ struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+ uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
+ bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
+ bool can_patch = true;
+ uint32_t advance;
+ struct radv_winsys_sem_info sem_info = {0};
+ VkResult result;
+ struct radeon_cmdbuf *initial_preamble_cs = NULL;
+ struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
+ struct radeon_cmdbuf *continue_preamble_cs = NULL;
+
+ result =
+ radv_get_preambles(queue, submission->cmd_buffers, submission->cmd_buffer_count,
+ &initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = radv_alloc_sem_info(queue->device, &sem_info, submission->wait_semaphore_count,
+ submission->wait_semaphores, submission->wait_values,
+ submission->signal_semaphore_count, submission->signal_semaphores,
+ submission->signal_values, submission->fence);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
+ result = radv_sparse_buffer_bind_memory(queue->device, submission->buffer_binds + i);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
+ result =
+ radv_sparse_image_opaque_bind_memory(queue->device, submission->image_opaque_binds + i);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
+ result = radv_sparse_image_bind_memory(queue->device, submission->image_binds + i);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ if (!submission->cmd_buffer_count) {
+ result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
+ &queue->device->empty_cs[queue->queue_family_index], 1,
+ NULL, NULL, &sem_info, false);
+ if (result != VK_SUCCESS)
+ goto fail;
+ } else {
+ struct radeon_cmdbuf **cs_array =
+ malloc(sizeof(struct radeon_cmdbuf *) * (submission->cmd_buffer_count));
+
+ for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
+ assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+ cs_array[j] = cmd_buffer->cs;
+ if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
+ can_patch = false;
+
+ cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
+ }
+
+ for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
+ struct radeon_cmdbuf *initial_preamble =
+ (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
+ advance = MIN2(max_cs_submission, submission->cmd_buffer_count - j);
+
+ if (queue->device->trace_bo)
+ *queue->device->trace_id_ptr = 0;
+
+ sem_info.cs_emit_wait = j == 0;
+ sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
+
+ result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,
+ initial_preamble, continue_preamble_cs, &sem_info,
+ can_patch);
+ if (result != VK_SUCCESS) {
+ free(cs_array);
+ goto fail;
+ }
+
+ if (queue->device->trace_bo) {
+ radv_check_gpu_hangs(queue, cs_array[j]);
+ }
+
+ if (queue->device->tma_bo) {
+ radv_check_trap_handler(queue);
+ }
+ }
+
+ free(cs_array);
+ }
+
+ radv_finalize_timelines(queue->device, submission->wait_semaphore_count,
+ submission->wait_semaphores, submission->wait_values,
+ submission->signal_semaphore_count, submission->signal_semaphores,
+ submission->signal_values, processing_list);
+ /* Has to happen after timeline finalization to make sure the
+ * condition variable is only triggered when timelines and queue have
+ * been updated. */
+ radv_queue_submission_update_queue(submission, processing_list);
fail:
- if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
- /* When something bad happened during the submission, such as
- * an out of memory issue, it might be hard to recover from
- * this inconsistent state. To avoid this sort of problem, we
- * assume that we are in a really bad situation and return
- * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
- * to submit the same job again to this device.
- */
- result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
- }
-
- radv_free_temp_syncobjs(queue->device,
- submission->temporary_semaphore_part_count,
- submission->temporary_semaphore_parts);
- radv_free_sem_info(&sem_info);
- free(submission);
- return result;
+ if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
+ /* When something bad happened during the submission, such as
+ * an out of memory issue, it might be hard to recover from
+ * this inconsistent state. To avoid this sort of problem, we
+ * assume that we are in a really bad situation and return
+ * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
+ * to submit the same job again to this device.
+ */
+ result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
+ }
+
+ radv_free_temp_syncobjs(queue->device, submission->temporary_semaphore_part_count,
+ submission->temporary_semaphore_parts);
+ radv_free_sem_info(&sem_info);
+ free(submission);
+ return result;
}
static VkResult
radv_process_submissions(struct list_head *processing_list)
{
- while(!list_is_empty(processing_list)) {
- struct radv_deferred_queue_submission *submission =
- list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
- list_del(&submission->processing_list);
+ while (!list_is_empty(processing_list)) {
+ struct radv_deferred_queue_submission *submission =
+ list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
+ list_del(&submission->processing_list);
- VkResult result = radv_queue_submit_deferred(submission, processing_list);
- if (result != VK_SUCCESS)
- return result;
- }
- return VK_SUCCESS;
+ VkResult result = radv_queue_submit_deferred(submission, processing_list);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ return VK_SUCCESS;
}
static VkResult
wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,
uint64_t timeout)
{
- struct radv_device *device = submission->queue->device;
- uint32_t syncobj_count = 0;
- uint32_t syncobj_idx = 0;
+ struct radv_device *device = submission->queue->device;
+ uint32_t syncobj_count = 0;
+ uint32_t syncobj_idx = 0;
- for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
- if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
- continue;
+ for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+ if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
+ continue;
- if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
- continue;
- ++syncobj_count;
- }
+ if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
+ continue;
+ ++syncobj_count;
+ }
- if (!syncobj_count)
- return VK_SUCCESS;
+ if (!syncobj_count)
+ return VK_SUCCESS;
- uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
- if (!points)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
+ if (!points)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- uint32_t *syncobj = (uint32_t*)(points + syncobj_count);
+ uint32_t *syncobj = (uint32_t *)(points + syncobj_count);
- for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
- if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
- continue;
+ for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
+ if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
+ continue;
- if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
- continue;
+ if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
+ continue;
- syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
- points[syncobj_idx] = submission->wait_values[i];
- ++syncobj_idx;
- }
- bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true, true, timeout);
+ syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
+ points[syncobj_idx] = submission->wait_values[i];
+ ++syncobj_idx;
+ }
+ bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true,
+ true, timeout);
- free(points);
- return success ? VK_SUCCESS : VK_TIMEOUT;
+ free(points);
+ return success ? VK_SUCCESS : VK_TIMEOUT;
}
-static int radv_queue_submission_thread_run(void *q)
-{
- struct radv_queue *queue = q;
-
- mtx_lock(&queue->thread_mutex);
- while (!p_atomic_read(&queue->thread_exit)) {
- struct radv_deferred_queue_submission *submission = queue->thread_submission;
- struct list_head processing_list;
- VkResult result = VK_SUCCESS;
- if (!submission) {
- u_cnd_monotonic_wait(&queue->thread_cond, &queue->thread_mutex);
- continue;
- }
- mtx_unlock(&queue->thread_mutex);
-
- /* Wait at most 5 seconds so we have a chance to notice shutdown when
- * a semaphore never gets signaled. If it takes longer we just retry
- * the wait next iteration. */
- result = wait_for_submission_timelines_available(submission,
- radv_get_absolute_timeout(5000000000));
- if (result != VK_SUCCESS) {
- mtx_lock(&queue->thread_mutex);
- continue;
- }
-
- /* The lock isn't held but nobody will add one until we finish
- * the current submission. */
- p_atomic_set(&queue->thread_submission, NULL);
-
- list_inithead(&processing_list);
- list_addtail(&submission->processing_list, &processing_list);
- result = radv_process_submissions(&processing_list);
-
- mtx_lock(&queue->thread_mutex);
- }
- mtx_unlock(&queue->thread_mutex);
- return 0;
+static int
+radv_queue_submission_thread_run(void *q)
+{
+ struct radv_queue *queue = q;
+
+ mtx_lock(&queue->thread_mutex);
+ while (!p_atomic_read(&queue->thread_exit)) {
+ struct radv_deferred_queue_submission *submission = queue->thread_submission;
+ struct list_head processing_list;
+ VkResult result = VK_SUCCESS;
+ if (!submission) {
+ u_cnd_monotonic_wait(&queue->thread_cond, &queue->thread_mutex);
+ continue;
+ }
+ mtx_unlock(&queue->thread_mutex);
+
+ /* Wait at most 5 seconds so we have a chance to notice shutdown when
+ * a semaphore never gets signaled. If it takes longer we just retry
+ * the wait next iteration. */
+ result =
+ wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(5000000000));
+ if (result != VK_SUCCESS) {
+ mtx_lock(&queue->thread_mutex);
+ continue;
+ }
+
+ /* The lock isn't held but nobody will add one until we finish
+ * the current submission. */
+ p_atomic_set(&queue->thread_submission, NULL);
+
+ list_inithead(&processing_list);
+ list_addtail(&submission->processing_list, &processing_list);
+ result = radv_process_submissions(&processing_list);
+
+ mtx_lock(&queue->thread_mutex);
+ }
+ mtx_unlock(&queue->thread_mutex);
+ return 0;
}
static VkResult
-radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
- uint32_t decrement,
+radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission, uint32_t decrement,
struct list_head *processing_list)
{
- struct radv_queue *queue = submission->queue;
- int ret;
- if (p_atomic_add_return(&submission->submission_wait_count, -decrement))
- return VK_SUCCESS;
+ struct radv_queue *queue = submission->queue;
+ int ret;
+ if (p_atomic_add_return(&submission->submission_wait_count, -decrement))
+ return VK_SUCCESS;
- if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) == VK_SUCCESS) {
- list_addtail(&submission->processing_list, processing_list);
- return VK_SUCCESS;
- }
+ if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) ==
+ VK_SUCCESS) {
+ list_addtail(&submission->processing_list, processing_list);
+ return VK_SUCCESS;
+ }
- mtx_lock(&queue->thread_mutex);
+ mtx_lock(&queue->thread_mutex);
- /* A submission can only be ready for the thread if it doesn't have
- * any predecessors in the same queue, so there can only be one such
- * submission at a time. */
- assert(queue->thread_submission == NULL);
+ /* A submission can only be ready for the thread if it doesn't have
+ * any predecessors in the same queue, so there can only be one such
+ * submission at a time. */
+ assert(queue->thread_submission == NULL);
- /* Only start the thread on demand to save resources for the many games
- * which only use binary semaphores. */
- if (!queue->thread_running) {
- ret = thrd_create(&queue->submission_thread,
- radv_queue_submission_thread_run, queue);
- if (ret) {
- mtx_unlock(&queue->thread_mutex);
- return vk_errorf(queue->device->instance,
- VK_ERROR_DEVICE_LOST,
- "Failed to start submission thread");
- }
- queue->thread_running = true;
- }
+ /* Only start the thread on demand to save resources for the many games
+ * which only use binary semaphores. */
+ if (!queue->thread_running) {
+ ret = thrd_create(&queue->submission_thread, radv_queue_submission_thread_run, queue);
+ if (ret) {
+ mtx_unlock(&queue->thread_mutex);
+ return vk_errorf(queue->device->instance, VK_ERROR_DEVICE_LOST,
+ "Failed to start submission thread");
+ }
+ queue->thread_running = true;
+ }
- queue->thread_submission = submission;
- mtx_unlock(&queue->thread_mutex);
+ queue->thread_submission = submission;
+ mtx_unlock(&queue->thread_mutex);
- u_cnd_monotonic_signal(&queue->thread_cond);
- return VK_SUCCESS;
+ u_cnd_monotonic_signal(&queue->thread_cond);
+ return VK_SUCCESS;
}
-static VkResult radv_queue_submit(struct radv_queue *queue,
- const struct radv_queue_submission *submission)
+static VkResult
+radv_queue_submit(struct radv_queue *queue, const struct radv_queue_submission *submission)
{
- struct radv_deferred_queue_submission *deferred = NULL;
+ struct radv_deferred_queue_submission *deferred = NULL;
- VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
- if (result != VK_SUCCESS)
- return result;
+ VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
+ if (result != VK_SUCCESS)
+ return result;
- struct list_head processing_list;
- list_inithead(&processing_list);
+ struct list_head processing_list;
+ list_inithead(&processing_list);
- result = radv_queue_enqueue_submission(deferred, &processing_list);
- if (result != VK_SUCCESS) {
- /* If anything is in the list we leak. */
- assert(list_is_empty(&processing_list));
- return result;
- }
- return radv_process_submissions(&processing_list);
+ result = radv_queue_enqueue_submission(deferred, &processing_list);
+ if (result != VK_SUCCESS) {
+ /* If anything is in the list we leak. */
+ assert(list_is_empty(&processing_list));
+ return result;
+ }
+ return radv_process_submissions(&processing_list);
}
bool
radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
{
- struct radeon_winsys_ctx *ctx = queue->hw_ctx;
- struct radv_winsys_sem_info sem_info = {0};
- VkResult result;
+ struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+ struct radv_winsys_sem_info sem_info = {0};
+ VkResult result;
- result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
- 0, NULL, VK_NULL_HANDLE);
- if (result != VK_SUCCESS)
- return false;
+ result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0, 0, NULL, VK_NULL_HANDLE);
+ if (result != VK_SUCCESS)
+ return false;
- result = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1,
- NULL, NULL, &sem_info,
- false);
- radv_free_sem_info(&sem_info);
- if (result != VK_SUCCESS)
- return false;
-
- return true;
+ result =
+ queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, NULL, NULL, &sem_info, false);
+ radv_free_sem_info(&sem_info);
+ if (result != VK_SUCCESS)
+ return false;
+ return true;
}
/* Signals fence as soon as all the work currently put on queue is done. */
-static VkResult radv_signal_fence(struct radv_queue *queue,
- VkFence fence)
-{
- return radv_queue_submit(queue, &(struct radv_queue_submission) {
- .fence = fence
- });
-}
-
-static bool radv_submit_has_effects(const VkSubmitInfo *info)
-{
- return info->commandBufferCount ||
- info->waitSemaphoreCount ||
- info->signalSemaphoreCount;
-}
-
-VkResult radv_QueueSubmit(
- VkQueue _queue,
- uint32_t submitCount,
- const VkSubmitInfo* pSubmits,
- VkFence fence)
-{
- RADV_FROM_HANDLE(radv_queue, queue, _queue);
- VkResult result;
- uint32_t fence_idx = 0;
- bool flushed_caches = false;
-
- if (radv_device_is_lost(queue->device))
- return VK_ERROR_DEVICE_LOST;
-
- if (fence != VK_NULL_HANDLE) {
- for (uint32_t i = 0; i < submitCount; ++i)
- if (radv_submit_has_effects(pSubmits + i))
- fence_idx = i;
- } else
- fence_idx = UINT32_MAX;
-
- for (uint32_t i = 0; i < submitCount; i++) {
- if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
- continue;
-
- VkPipelineStageFlags wait_dst_stage_mask = 0;
- for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
- wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
- }
-
- const VkTimelineSemaphoreSubmitInfo *timeline_info =
- vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
-
- result = radv_queue_submit(queue, &(struct radv_queue_submission) {
- .cmd_buffers = pSubmits[i].pCommandBuffers,
- .cmd_buffer_count = pSubmits[i].commandBufferCount,
- .wait_dst_stage_mask = wait_dst_stage_mask,
- .flush_caches = !flushed_caches,
- .wait_semaphores = pSubmits[i].pWaitSemaphores,
- .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
- .signal_semaphores = pSubmits[i].pSignalSemaphores,
- .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
- .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
- .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
- .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
- .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
- .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
- });
- if (result != VK_SUCCESS)
- return result;
-
- flushed_caches = true;
- }
-
- if (fence != VK_NULL_HANDLE && !submitCount) {
- result = radv_signal_fence(queue, fence);
- if (result != VK_SUCCESS)
- return result;
- }
-
- return VK_SUCCESS;
+static VkResult
+radv_signal_fence(struct radv_queue *queue, VkFence fence)
+{
+ return radv_queue_submit(queue, &(struct radv_queue_submission){.fence = fence});
+}
+
+static bool
+radv_submit_has_effects(const VkSubmitInfo *info)
+{
+ return info->commandBufferCount || info->waitSemaphoreCount || info->signalSemaphoreCount;
+}
+
+VkResult
+radv_QueueSubmit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence)
+{
+ RADV_FROM_HANDLE(radv_queue, queue, _queue);
+ VkResult result;
+ uint32_t fence_idx = 0;
+ bool flushed_caches = false;
+
+ if (radv_device_is_lost(queue->device))
+ return VK_ERROR_DEVICE_LOST;
+
+ if (fence != VK_NULL_HANDLE) {
+ for (uint32_t i = 0; i < submitCount; ++i)
+ if (radv_submit_has_effects(pSubmits + i))
+ fence_idx = i;
+ } else
+ fence_idx = UINT32_MAX;
+
+ for (uint32_t i = 0; i < submitCount; i++) {
+ if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
+ continue;
+
+ VkPipelineStageFlags wait_dst_stage_mask = 0;
+ for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
+ wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
+ }
+
+ const VkTimelineSemaphoreSubmitInfo *timeline_info =
+ vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
+
+ result = radv_queue_submit(
+ queue, &(struct radv_queue_submission){
+ .cmd_buffers = pSubmits[i].pCommandBuffers,
+ .cmd_buffer_count = pSubmits[i].commandBufferCount,
+ .wait_dst_stage_mask = wait_dst_stage_mask,
+ .flush_caches = !flushed_caches,
+ .wait_semaphores = pSubmits[i].pWaitSemaphores,
+ .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
+ .signal_semaphores = pSubmits[i].pSignalSemaphores,
+ .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
+ .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
+ .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
+ .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
+ ? timeline_info->waitSemaphoreValueCount
+ : 0,
+ .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
+ .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
+ ? timeline_info->signalSemaphoreValueCount
+ : 0,
+ });
+ if (result != VK_SUCCESS)
+ return result;
+
+ flushed_caches = true;
+ }
+
+ if (fence != VK_NULL_HANDLE && !submitCount) {
+ result = radv_signal_fence(queue, fence);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
}
static const char *
radv_get_queue_family_name(struct radv_queue *queue)
{
- switch (queue->queue_family_index) {
- case RADV_QUEUE_GENERAL:
- return "graphics";
- case RADV_QUEUE_COMPUTE:
- return "compute";
- case RADV_QUEUE_TRANSFER:
- return "transfer";
- default:
- unreachable("Unknown queue family");
- }
+ switch (queue->queue_family_index) {
+ case RADV_QUEUE_GENERAL:
+ return "graphics";
+ case RADV_QUEUE_COMPUTE:
+ return "compute";
+ case RADV_QUEUE_TRANSFER:
+ return "transfer";
+ default:
+ unreachable("Unknown queue family");
+ }
}
-VkResult radv_QueueWaitIdle(
- VkQueue _queue)
+VkResult
+radv_QueueWaitIdle(VkQueue _queue)
{
- RADV_FROM_HANDLE(radv_queue, queue, _queue);
+ RADV_FROM_HANDLE(radv_queue, queue, _queue);
- if (radv_device_is_lost(queue->device))
- return VK_ERROR_DEVICE_LOST;
+ if (radv_device_is_lost(queue->device))
+ return VK_ERROR_DEVICE_LOST;
- mtx_lock(&queue->pending_mutex);
- while (!list_is_empty(&queue->pending_submissions)) {
- u_cnd_monotonic_wait(&queue->device->timeline_cond, &queue->pending_mutex);
- }
- mtx_unlock(&queue->pending_mutex);
+ mtx_lock(&queue->pending_mutex);
+ while (!list_is_empty(&queue->pending_submissions)) {
+ u_cnd_monotonic_wait(&queue->device->timeline_cond, &queue->pending_mutex);
+ }
+ mtx_unlock(&queue->pending_mutex);
- if (!queue->device->ws->ctx_wait_idle(queue->hw_ctx,
- radv_queue_family_to_ring(queue->queue_family_index),
- queue->queue_idx)) {
- return radv_device_set_lost(queue->device,
- "Failed to wait for a '%s' queue "
- "to be idle. GPU hang ?",
- radv_get_queue_family_name(queue));
- }
+ if (!queue->device->ws->ctx_wait_idle(
+ queue->hw_ctx, radv_queue_family_to_ring(queue->queue_family_index), queue->queue_idx)) {
+ return radv_device_set_lost(queue->device,
+ "Failed to wait for a '%s' queue "
+ "to be idle. GPU hang ?",
+ radv_get_queue_family_name(queue));
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-VkResult radv_DeviceWaitIdle(
- VkDevice _device)
+VkResult
+radv_DeviceWaitIdle(VkDevice _device)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device, device, _device);
- for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
- for (unsigned q = 0; q < device->queue_count[i]; q++) {
- VkResult result =
- radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
+ for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
+ for (unsigned q = 0; q < device->queue_count[i]; q++) {
+ VkResult result = radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
- if (result != VK_SUCCESS)
- return result;
- }
- }
- return VK_SUCCESS;
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ }
+ return VK_SUCCESS;
}
-VkResult radv_EnumerateInstanceExtensionProperties(
- const char* pLayerName,
- uint32_t* pPropertyCount,
- VkExtensionProperties* pProperties)
+VkResult
+radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount,
+ VkExtensionProperties *pProperties)
{
- if (pLayerName)
- return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+ if (pLayerName)
+ return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
- return vk_enumerate_instance_extension_properties(
- &radv_instance_extensions_supported,
- pPropertyCount, pProperties);
+ return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,
+ pPropertyCount, pProperties);
}
-PFN_vkVoidFunction radv_GetInstanceProcAddr(
- VkInstance _instance,
- const char* pName)
+PFN_vkVoidFunction
+radv_GetInstanceProcAddr(VkInstance _instance, const char *pName)
{
- RADV_FROM_HANDLE(radv_instance, instance, _instance);
+ RADV_FROM_HANDLE(radv_instance, instance, _instance);
- /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
- * when we have to return valid function pointers, NULL, or it's left
- * undefined. See the table for exact details.
- */
- if (pName == NULL)
- return NULL;
+ /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
+ * when we have to return valid function pointers, NULL, or it's left
+ * undefined. See the table for exact details.
+ */
+ if (pName == NULL)
+ return NULL;
-#define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
- if (strcmp(pName, "vk" #entrypoint) == 0) \
- return (PFN_vkVoidFunction)radv_##entrypoint
+#define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
+ if (strcmp(pName, "vk" #entrypoint) == 0) \
+ return (PFN_vkVoidFunction)radv_##entrypoint
- LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
- LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
- LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
- LOOKUP_RADV_ENTRYPOINT(CreateInstance);
+ LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
+ LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
+ LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
+ LOOKUP_RADV_ENTRYPOINT(CreateInstance);
- /* GetInstanceProcAddr() can also be called with a NULL instance.
- * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
- */
- LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
+ /* GetInstanceProcAddr() can also be called with a NULL instance.
+ * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
+ */
+ LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
#undef LOOKUP_RADV_ENTRYPOINT
- if (instance == NULL)
- return NULL;
+ if (instance == NULL)
+ return NULL;
- return vk_instance_get_proc_addr(&instance->vk,
- &radv_instance_entrypoints,
- pName);
+ return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName);
}
/* The loader wants us to expose a second GetInstanceProcAddr function
* to work around certain LD_PRELOAD issues seen in apps.
*/
PUBLIC
-VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
- VkInstance instance,
- const char* pName)
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
{
- return radv_GetInstanceProcAddr(instance, pName);
+ return radv_GetInstanceProcAddr(instance, pName);
}
PUBLIC
-VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
- VkInstance _instance,
- const char* pName)
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
{
- RADV_FROM_HANDLE(radv_instance, instance, _instance);
- return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
+ RADV_FROM_HANDLE(radv_instance, instance, _instance);
+ return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
}
-bool radv_get_memory_fd(struct radv_device *device,
- struct radv_device_memory *memory,
- int *pFD)
+bool
+radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
{
- /* Only set BO metadata for the first plane */
- if (memory->image && memory->image->offset == 0) {
- struct radeon_bo_metadata metadata;
- radv_init_metadata(device, memory->image, &metadata);
- device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
- }
+ /* Only set BO metadata for the first plane */
+ if (memory->image && memory->image->offset == 0) {
+ struct radeon_bo_metadata metadata;
+ radv_init_metadata(device, memory->image, &metadata);
+ device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
+ }
- return device->ws->buffer_get_fd(device->ws, memory->bo,
- pFD);
+ return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
}
-
void
-radv_free_memory(struct radv_device *device,
- const VkAllocationCallbacks* pAllocator,
- struct radv_device_memory *mem)
+radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_device_memory *mem)
{
- if (mem == NULL)
- return;
+ if (mem == NULL)
+ return;
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- if (mem->android_hardware_buffer)
- AHardwareBuffer_release(mem->android_hardware_buffer);
+ if (mem->android_hardware_buffer)
+ AHardwareBuffer_release(mem->android_hardware_buffer);
#endif
- if (mem->bo) {
- if (device->overallocation_disallowed) {
- mtx_lock(&device->overallocation_mutex);
- device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
- mtx_unlock(&device->overallocation_mutex);
- }
-
- if (device->use_global_bo_list)
- device->ws->buffer_make_resident(device->ws, mem->bo, false);
- device->ws->buffer_destroy(device->ws, mem->bo);
- mem->bo = NULL;
- }
-
- vk_object_base_finish(&mem->base);
- vk_free2(&device->vk.alloc, pAllocator, mem);
-}
-
-static VkResult radv_alloc_memory(struct radv_device *device,
- const VkMemoryAllocateInfo* pAllocateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkDeviceMemory* pMem)
-{
- struct radv_device_memory *mem;
- VkResult result;
- enum radeon_bo_domain domain;
- uint32_t flags = 0;
-
- assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
-
- const VkImportMemoryFdInfoKHR *import_info =
- vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
- const VkMemoryDedicatedAllocateInfo *dedicate_info =
- vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
- const VkExportMemoryAllocateInfo *export_info =
- vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
- const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
- vk_find_struct_const(pAllocateInfo->pNext,
- IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
- const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
- vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
-
- const struct wsi_memory_allocate_info *wsi_info =
- vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
-
- if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
- !(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
- /* Apparently, this is allowed */
- *pMem = VK_NULL_HANDLE;
- return VK_SUCCESS;
- }
-
- mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (mem == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &mem->base,
- VK_OBJECT_TYPE_DEVICE_MEMORY);
-
- if (wsi_info && wsi_info->implicit_sync)
- flags |= RADEON_FLAG_IMPLICIT_SYNC;
-
- if (dedicate_info) {
- mem->image = radv_image_from_handle(dedicate_info->image);
- mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
- } else {
- mem->image = NULL;
- mem->buffer = NULL;
- }
-
- float priority_float = 0.5;
- const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
- vk_find_struct_const(pAllocateInfo->pNext,
- MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
- if (priority_ext)
- priority_float = priority_ext->priority;
-
- unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
- (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
-
- mem->user_ptr = NULL;
- mem->bo = NULL;
+ if (mem->bo) {
+ if (device->overallocation_disallowed) {
+ mtx_lock(&device->overallocation_mutex);
+ device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
+ mtx_unlock(&device->overallocation_mutex);
+ }
+
+ if (device->use_global_bo_list)
+ device->ws->buffer_make_resident(device->ws, mem->bo, false);
+ device->ws->buffer_destroy(device->ws, mem->bo);
+ mem->bo = NULL;
+ }
+
+ vk_object_base_finish(&mem->base);
+ vk_free2(&device->vk.alloc, pAllocator, mem);
+}
+
+static VkResult
+radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
+ const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
+{
+ struct radv_device_memory *mem;
+ VkResult result;
+ enum radeon_bo_domain domain;
+ uint32_t flags = 0;
+
+ assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
+
+ const VkImportMemoryFdInfoKHR *import_info =
+ vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
+ const VkMemoryDedicatedAllocateInfo *dedicate_info =
+ vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
+ const VkExportMemoryAllocateInfo *export_info =
+ vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
+ const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
+ vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
+ const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
+ vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
+
+ const struct wsi_memory_allocate_info *wsi_info =
+ vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
+
+ if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
+ !(export_info && (export_info->handleTypes &
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
+ /* Apparently, this is allowed */
+ *pMem = VK_NULL_HANDLE;
+ return VK_SUCCESS;
+ }
+
+ mem =
+ vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (mem == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
+
+ if (wsi_info && wsi_info->implicit_sync)
+ flags |= RADEON_FLAG_IMPLICIT_SYNC;
+
+ if (dedicate_info) {
+ mem->image = radv_image_from_handle(dedicate_info->image);
+ mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
+ } else {
+ mem->image = NULL;
+ mem->buffer = NULL;
+ }
+
+ float priority_float = 0.5;
+ const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
+ vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
+ if (priority_ext)
+ priority_float = priority_ext->priority;
+
+ unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
+ (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
+
+ mem->user_ptr = NULL;
+ mem->bo = NULL;
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- mem->android_hardware_buffer = NULL;
+ mem->android_hardware_buffer = NULL;
#endif
- if (ahb_import_info) {
- result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
- if (result != VK_SUCCESS)
- goto fail;
- } else if(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
- result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
- if (result != VK_SUCCESS)
- goto fail;
- } else if (import_info) {
- assert(import_info->handleType ==
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
- import_info->handleType ==
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
- mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
- priority, NULL);
- if (!mem->bo) {
- result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
- goto fail;
- } else {
- close(import_info->fd);
- }
-
- if (mem->image && mem->image->plane_count == 1 &&
- !vk_format_is_depth_or_stencil(mem->image->vk_format) &&
- mem->image->info.samples == 1 &&
- mem->image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
- struct radeon_bo_metadata metadata;
- device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
-
- struct radv_image_create_info create_info = {
- .no_metadata_planes = true,
- .bo_metadata = &metadata
- };
-
- /* This gives a basic ability to import radeonsi images
- * that don't have DCC. This is not guaranteed by any
- * spec and can be removed after we support modifiers. */
- result = radv_image_create_layout(device, create_info, NULL,
- mem->image);
- if (result != VK_SUCCESS) {
- device->ws->buffer_destroy(device->ws, mem->bo);
- goto fail;
- }
- }
- } else if (host_ptr_info) {
- assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
- mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
- pAllocateInfo->allocationSize,
- priority);
- if (!mem->bo) {
- result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
- goto fail;
- } else {
- mem->user_ptr = host_ptr_info->pHostPointer;
- }
- } else {
- uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
- uint32_t heap_index;
-
- heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
- domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
- flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
-
- if (!import_info && (!export_info || !export_info->handleTypes)) {
- flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
- if (device->use_global_bo_list) {
- flags |= RADEON_FLAG_PREFER_LOCAL_BO;
- }
- }
-
- if (device->overallocation_disallowed) {
- uint64_t total_size =
- device->physical_device->memory_properties.memoryHeaps[heap_index].size;
-
- mtx_lock(&device->overallocation_mutex);
- if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
- mtx_unlock(&device->overallocation_mutex);
- result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- goto fail;
- }
- device->allocated_memory_size[heap_index] += alloc_size;
- mtx_unlock(&device->overallocation_mutex);
- }
-
- mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
- domain, flags, priority);
-
- if (!mem->bo) {
- if (device->overallocation_disallowed) {
- mtx_lock(&device->overallocation_mutex);
- device->allocated_memory_size[heap_index] -= alloc_size;
- mtx_unlock(&device->overallocation_mutex);
- }
- result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- goto fail;
- }
-
- mem->heap_index = heap_index;
- mem->alloc_size = alloc_size;
- }
-
- if (!wsi_info) {
- if (device->use_global_bo_list) {
- result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
- if (result != VK_SUCCESS)
- goto fail;
- }
- }
-
- *pMem = radv_device_memory_to_handle(mem);
-
- return VK_SUCCESS;
+ if (ahb_import_info) {
+ result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
+ if (result != VK_SUCCESS)
+ goto fail;
+ } else if (export_info && (export_info->handleTypes &
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
+ result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
+ if (result != VK_SUCCESS)
+ goto fail;
+ } else if (import_info) {
+ assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+ import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+ mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, NULL);
+ if (!mem->bo) {
+ result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ goto fail;
+ } else {
+ close(import_info->fd);
+ }
+
+ if (mem->image && mem->image->plane_count == 1 &&
+ !vk_format_is_depth_or_stencil(mem->image->vk_format) && mem->image->info.samples == 1 &&
+ mem->image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ struct radeon_bo_metadata metadata;
+ device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
+
+ struct radv_image_create_info create_info = {.no_metadata_planes = true,
+ .bo_metadata = &metadata};
+
+ /* This gives a basic ability to import radeonsi images
+ * that don't have DCC. This is not guaranteed by any
+ * spec and can be removed after we support modifiers. */
+ result = radv_image_create_layout(device, create_info, NULL, mem->image);
+ if (result != VK_SUCCESS) {
+ device->ws->buffer_destroy(device->ws, mem->bo);
+ goto fail;
+ }
+ }
+ } else if (host_ptr_info) {
+ assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
+ mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
+ pAllocateInfo->allocationSize, priority);
+ if (!mem->bo) {
+ result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ goto fail;
+ } else {
+ mem->user_ptr = host_ptr_info->pHostPointer;
+ }
+ } else {
+ uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
+ uint32_t heap_index;
+
+ heap_index =
+ device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]
+ .heapIndex;
+ domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
+ flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
+
+ if (!import_info && (!export_info || !export_info->handleTypes)) {
+ flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
+ if (device->use_global_bo_list) {
+ flags |= RADEON_FLAG_PREFER_LOCAL_BO;
+ }
+ }
+
+ if (device->overallocation_disallowed) {
+ uint64_t total_size =
+ device->physical_device->memory_properties.memoryHeaps[heap_index].size;
+
+ mtx_lock(&device->overallocation_mutex);
+ if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
+ mtx_unlock(&device->overallocation_mutex);
+ result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ goto fail;
+ }
+ device->allocated_memory_size[heap_index] += alloc_size;
+ mtx_unlock(&device->overallocation_mutex);
+ }
+
+ mem->bo = device->ws->buffer_create(device->ws, alloc_size,
+ device->physical_device->rad_info.max_alignment, domain,
+ flags, priority);
+
+ if (!mem->bo) {
+ if (device->overallocation_disallowed) {
+ mtx_lock(&device->overallocation_mutex);
+ device->allocated_memory_size[heap_index] -= alloc_size;
+ mtx_unlock(&device->overallocation_mutex);
+ }
+ result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ goto fail;
+ }
+
+ mem->heap_index = heap_index;
+ mem->alloc_size = alloc_size;
+ }
+
+ if (!wsi_info) {
+ if (device->use_global_bo_list) {
+ result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+ }
+
+ *pMem = radv_device_memory_to_handle(mem);
+
+ return VK_SUCCESS;
fail:
- radv_free_memory(device, pAllocator,mem);
+ radv_free_memory(device, pAllocator, mem);
- return result;
+ return result;
}
-VkResult radv_AllocateMemory(
- VkDevice _device,
- const VkMemoryAllocateInfo* pAllocateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkDeviceMemory* pMem)
+VkResult
+radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo,
+ const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
}
-void radv_FreeMemory(
- VkDevice _device,
- VkDeviceMemory _mem,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
- radv_free_memory(device, pAllocator, mem);
+ radv_free_memory(device, pAllocator, mem);
}
-VkResult radv_MapMemory(
- VkDevice _device,
- VkDeviceMemory _memory,
- VkDeviceSize offset,
- VkDeviceSize size,
- VkMemoryMapFlags flags,
- void** ppData)
+VkResult
+radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size,
+ VkMemoryMapFlags flags, void **ppData)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
- if (mem == NULL) {
- *ppData = NULL;
- return VK_SUCCESS;
- }
+ if (mem == NULL) {
+ *ppData = NULL;
+ return VK_SUCCESS;
+ }
- if (mem->user_ptr)
- *ppData = mem->user_ptr;
- else
- *ppData = device->ws->buffer_map(mem->bo);
+ if (mem->user_ptr)
+ *ppData = mem->user_ptr;
+ else
+ *ppData = device->ws->buffer_map(mem->bo);
- if (*ppData) {
- *ppData = (uint8_t*)*ppData + offset;
- return VK_SUCCESS;
- }
+ if (*ppData) {
+ *ppData = (uint8_t *)*ppData + offset;
+ return VK_SUCCESS;
+ }
- return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
+ return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
}
-void radv_UnmapMemory(
- VkDevice _device,
- VkDeviceMemory _memory)
+void
+radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
- if (mem == NULL)
- return;
+ if (mem == NULL)
+ return;
- if (mem->user_ptr == NULL)
- device->ws->buffer_unmap(mem->bo);
+ if (mem->user_ptr == NULL)
+ device->ws->buffer_unmap(mem->bo);
}
-VkResult radv_FlushMappedMemoryRanges(
- VkDevice _device,
- uint32_t memoryRangeCount,
- const VkMappedMemoryRange* pMemoryRanges)
+VkResult
+radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
{
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-VkResult radv_InvalidateMappedMemoryRanges(
- VkDevice _device,
- uint32_t memoryRangeCount,
- const VkMappedMemoryRange* pMemoryRanges)
+VkResult
+radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
{
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-void radv_GetBufferMemoryRequirements(
- VkDevice _device,
- VkBuffer _buffer,
- VkMemoryRequirements* pMemoryRequirements)
+void
+radv_GetBufferMemoryRequirements(VkDevice _device, VkBuffer _buffer,
+ VkMemoryRequirements *pMemoryRequirements)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
+ pMemoryRequirements->memoryTypeBits =
+ (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
- if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
- pMemoryRequirements->alignment = 4096;
- else
- pMemoryRequirements->alignment = 16;
+ if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
+ pMemoryRequirements->alignment = 4096;
+ else
+ pMemoryRequirements->alignment = 16;
- pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
+ pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
}
-void radv_GetBufferMemoryRequirements2(
- VkDevice device,
- const VkBufferMemoryRequirementsInfo2 *pInfo,
- VkMemoryRequirements2 *pMemoryRequirements)
+void
+radv_GetBufferMemoryRequirements2(VkDevice device, const VkBufferMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ radv_GetBufferMemoryRequirements(device, pInfo->buffer,
+ &pMemoryRequirements->memoryRequirements);
+ vk_foreach_struct(ext, pMemoryRequirements->pNext)
+ {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+ VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
+ req->requiresDedicatedAllocation = false;
+ req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+void
+radv_GetImageMemoryRequirements(VkDevice _device, VkImage _image,
+ VkMemoryRequirements *pMemoryRequirements)
{
- radv_GetBufferMemoryRequirements(device, pInfo->buffer,
- &pMemoryRequirements->memoryRequirements);
- vk_foreach_struct(ext, pMemoryRequirements->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
- VkMemoryDedicatedRequirements *req =
- (VkMemoryDedicatedRequirements *) ext;
- req->requiresDedicatedAllocation = false;
- req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
- break;
- }
- default:
- break;
- }
- }
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_image, image, _image);
+
+ pMemoryRequirements->memoryTypeBits =
+ (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
+
+ pMemoryRequirements->size = image->size;
+ pMemoryRequirements->alignment = image->alignment;
}
-void radv_GetImageMemoryRequirements(
- VkDevice _device,
- VkImage _image,
- VkMemoryRequirements* pMemoryRequirements)
+void
+radv_GetImageMemoryRequirements2(VkDevice device, const VkImageMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_image, image, _image);
+ radv_GetImageMemoryRequirements(device, pInfo->image, &pMemoryRequirements->memoryRequirements);
- pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
+ RADV_FROM_HANDLE(radv_image, image, pInfo->image);
- pMemoryRequirements->size = image->size;
- pMemoryRequirements->alignment = image->alignment;
+ vk_foreach_struct(ext, pMemoryRequirements->pNext)
+ {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
+ VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
+ req->requiresDedicatedAllocation =
+ image->shareable && image->tiling != VK_IMAGE_TILING_LINEAR;
+ req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
+ break;
+ }
+ default:
+ break;
+ }
+ }
}
-void radv_GetImageMemoryRequirements2(
- VkDevice device,
- const VkImageMemoryRequirementsInfo2 *pInfo,
- VkMemoryRequirements2 *pMemoryRequirements)
+void
+radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,
+ VkDeviceSize *pCommittedMemoryInBytes)
{
- radv_GetImageMemoryRequirements(device, pInfo->image,
- &pMemoryRequirements->memoryRequirements);
+ *pCommittedMemoryInBytes = 0;
+}
- RADV_FROM_HANDLE(radv_image, image, pInfo->image);
+VkResult
+radv_BindBufferMemory2(VkDevice device, uint32_t bindInfoCount,
+ const VkBindBufferMemoryInfo *pBindInfos)
+{
+ for (uint32_t i = 0; i < bindInfoCount; ++i) {
+ RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
+ RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
- vk_foreach_struct(ext, pMemoryRequirements->pNext) {
- switch (ext->sType) {
- case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
- VkMemoryDedicatedRequirements *req =
- (VkMemoryDedicatedRequirements *) ext;
- req->requiresDedicatedAllocation = image->shareable &&
- image->tiling != VK_IMAGE_TILING_LINEAR;
- req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
- break;
- }
- default:
- break;
- }
- }
+ if (mem) {
+ buffer->bo = mem->bo;
+ buffer->offset = pBindInfos[i].memoryOffset;
+ } else {
+ buffer->bo = NULL;
+ }
+ }
+ return VK_SUCCESS;
}
-void radv_GetDeviceMemoryCommitment(
- VkDevice device,
- VkDeviceMemory memory,
- VkDeviceSize* pCommittedMemoryInBytes)
+VkResult
+radv_BindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory,
+ VkDeviceSize memoryOffset)
{
- *pCommittedMemoryInBytes = 0;
+ const VkBindBufferMemoryInfo info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+ .buffer = buffer,
+ .memory = memory,
+ .memoryOffset = memoryOffset};
+
+ return radv_BindBufferMemory2(device, 1, &info);
}
-VkResult radv_BindBufferMemory2(VkDevice device,
- uint32_t bindInfoCount,
- const VkBindBufferMemoryInfo *pBindInfos)
+VkResult
+radv_BindImageMemory2(VkDevice device, uint32_t bindInfoCount,
+ const VkBindImageMemoryInfo *pBindInfos)
{
- for (uint32_t i = 0; i < bindInfoCount; ++i) {
- RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
- RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
+ for (uint32_t i = 0; i < bindInfoCount; ++i) {
+ RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
+ RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
- if (mem) {
- buffer->bo = mem->bo;
- buffer->offset = pBindInfos[i].memoryOffset;
- } else {
- buffer->bo = NULL;
- }
- }
- return VK_SUCCESS;
-}
-
-VkResult radv_BindBufferMemory(
- VkDevice device,
- VkBuffer buffer,
- VkDeviceMemory memory,
- VkDeviceSize memoryOffset)
-{
- const VkBindBufferMemoryInfo info = {
- .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
- .buffer = buffer,
- .memory = memory,
- .memoryOffset = memoryOffset
- };
-
- return radv_BindBufferMemory2(device, 1, &info);
-}
-
-VkResult radv_BindImageMemory2(VkDevice device,
- uint32_t bindInfoCount,
- const VkBindImageMemoryInfo *pBindInfos)
-{
- for (uint32_t i = 0; i < bindInfoCount; ++i) {
- RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
- RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
-
- if (mem) {
- image->bo = mem->bo;
- image->offset = pBindInfos[i].memoryOffset;
- } else {
- image->bo = NULL;
- image->offset = 0;
- }
- }
- return VK_SUCCESS;
-}
-
-
-VkResult radv_BindImageMemory(
- VkDevice device,
- VkImage image,
- VkDeviceMemory memory,
- VkDeviceSize memoryOffset)
-{
- const VkBindImageMemoryInfo info = {
- .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
- .image = image,
- .memory = memory,
- .memoryOffset = memoryOffset
- };
-
- return radv_BindImageMemory2(device, 1, &info);
-}
-
-static bool radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
-{
- return info->bufferBindCount ||
- info->imageOpaqueBindCount ||
- info->imageBindCount ||
- info->waitSemaphoreCount ||
- info->signalSemaphoreCount;
-}
-
- VkResult radv_QueueBindSparse(
- VkQueue _queue,
- uint32_t bindInfoCount,
- const VkBindSparseInfo* pBindInfo,
- VkFence fence)
-{
- RADV_FROM_HANDLE(radv_queue, queue, _queue);
- uint32_t fence_idx = 0;
-
- if (radv_device_is_lost(queue->device))
- return VK_ERROR_DEVICE_LOST;
+ if (mem) {
+ image->bo = mem->bo;
+ image->offset = pBindInfos[i].memoryOffset;
+ } else {
+ image->bo = NULL;
+ image->offset = 0;
+ }
+ }
+ return VK_SUCCESS;
+}
- if (fence != VK_NULL_HANDLE) {
- for (uint32_t i = 0; i < bindInfoCount; ++i)
- if (radv_sparse_bind_has_effects(pBindInfo + i))
- fence_idx = i;
- } else
- fence_idx = UINT32_MAX;
-
- for (uint32_t i = 0; i < bindInfoCount; ++i) {
- if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
- continue;
-
- const VkTimelineSemaphoreSubmitInfo *timeline_info =
- vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
-
- VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) {
- .buffer_binds = pBindInfo[i].pBufferBinds,
- .buffer_bind_count = pBindInfo[i].bufferBindCount,
- .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
- .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
- .image_binds = pBindInfo[i].pImageBinds,
- .image_bind_count = pBindInfo[i].imageBindCount,
- .wait_semaphores = pBindInfo[i].pWaitSemaphores,
- .wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
- .signal_semaphores = pBindInfo[i].pSignalSemaphores,
- .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
- .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
- .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
- .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
- .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
- .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
- });
-
- if (result != VK_SUCCESS)
- return result;
- }
-
- if (fence != VK_NULL_HANDLE && !bindInfoCount) {
- VkResult result = radv_signal_fence(queue, fence);
- if (result != VK_SUCCESS)
- return result;
- }
-
- return VK_SUCCESS;
+VkResult
+radv_BindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory,
+ VkDeviceSize memoryOffset)
+{
+ const VkBindImageMemoryInfo info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+ .image = image,
+ .memory = memory,
+ .memoryOffset = memoryOffset};
+
+ return radv_BindImageMemory2(device, 1, &info);
+}
+
+static bool
+radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
+{
+ return info->bufferBindCount || info->imageOpaqueBindCount || info->imageBindCount ||
+ info->waitSemaphoreCount || info->signalSemaphoreCount;
+}
+
+VkResult
+radv_QueueBindSparse(VkQueue _queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo,
+ VkFence fence)
+{
+ RADV_FROM_HANDLE(radv_queue, queue, _queue);
+ uint32_t fence_idx = 0;
+
+ if (radv_device_is_lost(queue->device))
+ return VK_ERROR_DEVICE_LOST;
+
+ if (fence != VK_NULL_HANDLE) {
+ for (uint32_t i = 0; i < bindInfoCount; ++i)
+ if (radv_sparse_bind_has_effects(pBindInfo + i))
+ fence_idx = i;
+ } else
+ fence_idx = UINT32_MAX;
+
+ for (uint32_t i = 0; i < bindInfoCount; ++i) {
+ if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
+ continue;
+
+ const VkTimelineSemaphoreSubmitInfo *timeline_info =
+ vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
+
+ VkResult result = radv_queue_submit(
+ queue, &(struct radv_queue_submission){
+ .buffer_binds = pBindInfo[i].pBufferBinds,
+ .buffer_bind_count = pBindInfo[i].bufferBindCount,
+ .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
+ .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
+ .image_binds = pBindInfo[i].pImageBinds,
+ .image_bind_count = pBindInfo[i].imageBindCount,
+ .wait_semaphores = pBindInfo[i].pWaitSemaphores,
+ .wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
+ .signal_semaphores = pBindInfo[i].pSignalSemaphores,
+ .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
+ .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
+ .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
+ .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
+ ? timeline_info->waitSemaphoreValueCount
+ : 0,
+ .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
+ .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
+ ? timeline_info->signalSemaphoreValueCount
+ : 0,
+ });
+
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ if (fence != VK_NULL_HANDLE && !bindInfoCount) {
+ VkResult result = radv_signal_fence(queue, fence);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ return VK_SUCCESS;
}
static void
-radv_destroy_fence_part(struct radv_device *device,
- struct radv_fence_part *part)
+radv_destroy_fence_part(struct radv_device *device, struct radv_fence_part *part)
{
- if (part->kind != RADV_FENCE_NONE)
- device->ws->destroy_syncobj(device->ws, part->syncobj);
- part->kind = RADV_FENCE_NONE;
+ if (part->kind != RADV_FENCE_NONE)
+ device->ws->destroy_syncobj(device->ws, part->syncobj);
+ part->kind = RADV_FENCE_NONE;
}
static void
-radv_destroy_fence(struct radv_device *device,
- const VkAllocationCallbacks *pAllocator,
- struct radv_fence *fence)
+radv_destroy_fence(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_fence *fence)
{
- radv_destroy_fence_part(device, &fence->temporary);
- radv_destroy_fence_part(device, &fence->permanent);
+ radv_destroy_fence_part(device, &fence->temporary);
+ radv_destroy_fence_part(device, &fence->permanent);
- vk_object_base_finish(&fence->base);
- vk_free2(&device->vk.alloc, pAllocator, fence);
+ vk_object_base_finish(&fence->base);
+ vk_free2(&device->vk.alloc, pAllocator, fence);
}
-VkResult radv_CreateFence(
- VkDevice _device,
- const VkFenceCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkFence* pFence)
+VkResult
+radv_CreateFence(VkDevice _device, const VkFenceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkFence *pFence)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- bool create_signaled = false;
- struct radv_fence *fence;
- int ret;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ bool create_signaled = false;
+ struct radv_fence *fence;
+ int ret;
- fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!fence)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!fence)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
+ vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
- fence->permanent.kind = RADV_FENCE_SYNCOBJ;
+ fence->permanent.kind = RADV_FENCE_SYNCOBJ;
- if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
- create_signaled = true;
+ if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
+ create_signaled = true;
- ret = device->ws->create_syncobj(device->ws, create_signaled,
- &fence->permanent.syncobj);
- if (ret) {
- radv_destroy_fence(device, pAllocator, fence);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
+ ret = device->ws->create_syncobj(device->ws, create_signaled, &fence->permanent.syncobj);
+ if (ret) {
+ radv_destroy_fence(device, pAllocator, fence);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
- *pFence = radv_fence_to_handle(fence);
+ *pFence = radv_fence_to_handle(fence);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-
-void radv_DestroyFence(
- VkDevice _device,
- VkFence _fence,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyFence(VkDevice _device, VkFence _fence, const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_fence, fence, _fence);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_fence, fence, _fence);
- if (!fence)
- return;
+ if (!fence)
+ return;
- radv_destroy_fence(device, pAllocator, fence);
+ radv_destroy_fence(device, pAllocator, fence);
}
-VkResult radv_WaitForFences(
- VkDevice _device,
- uint32_t fenceCount,
- const VkFence* pFences,
- VkBool32 waitAll,
- uint64_t timeout)
+VkResult
+radv_WaitForFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll,
+ uint64_t timeout)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- uint32_t *handles;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ uint32_t *handles;
- if (radv_device_is_lost(device))
- return VK_ERROR_DEVICE_LOST;
+ if (radv_device_is_lost(device))
+ return VK_ERROR_DEVICE_LOST;
- timeout = radv_get_absolute_timeout(timeout);
+ timeout = radv_get_absolute_timeout(timeout);
- handles = malloc(sizeof(uint32_t) * fenceCount);
- if (!handles)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ handles = malloc(sizeof(uint32_t) * fenceCount);
+ if (!handles)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- for (uint32_t i = 0; i < fenceCount; ++i) {
- RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+ for (uint32_t i = 0; i < fenceCount; ++i) {
+ RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
- struct radv_fence_part *part =
- fence->temporary.kind != RADV_FENCE_NONE ?
- &fence->temporary : &fence->permanent;
+ struct radv_fence_part *part =
+ fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
- assert(part->kind == RADV_FENCE_SYNCOBJ);
- handles[i] = part->syncobj;
- }
+ assert(part->kind == RADV_FENCE_SYNCOBJ);
+ handles[i] = part->syncobj;
+ }
- bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
- free(handles);
- return success ? VK_SUCCESS : VK_TIMEOUT;
+ bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
+ free(handles);
+ return success ? VK_SUCCESS : VK_TIMEOUT;
}
-VkResult radv_ResetFences(VkDevice _device,
- uint32_t fenceCount,
- const VkFence *pFences)
+VkResult
+radv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device, device, _device);
- for (unsigned i = 0; i < fenceCount; ++i) {
- RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
+ for (unsigned i = 0; i < fenceCount; ++i) {
+ RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
- /* From the Vulkan 1.0.53 spec:
- *
- * "If any member of pFences currently has its payload
- * imported with temporary permanence, that fence’s prior
- * permanent payload is irst restored. The remaining
- * operations described therefore operate on the restored
- * payload."
- */
- if (fence->temporary.kind != RADV_FENCE_NONE)
- radv_destroy_fence_part(device, &fence->temporary);
+ /* From the Vulkan 1.0.53 spec:
+ *
+ * "If any member of pFences currently has its payload
+ * imported with temporary permanence, that fence’s prior
+ * permanent payload is irst restored. The remaining
+ * operations described therefore operate on the restored
+ * payload."
+ */
+ if (fence->temporary.kind != RADV_FENCE_NONE)
+ radv_destroy_fence_part(device, &fence->temporary);
- device->ws->reset_syncobj(device->ws, fence->permanent.syncobj);
- }
+ device->ws->reset_syncobj(device->ws, fence->permanent.syncobj);
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
+VkResult
+radv_GetFenceStatus(VkDevice _device, VkFence _fence)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_fence, fence, _fence);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_fence, fence, _fence);
- struct radv_fence_part *part =
- fence->temporary.kind != RADV_FENCE_NONE ?
- &fence->temporary : &fence->permanent;
+ struct radv_fence_part *part =
+ fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
- if (radv_device_is_lost(device))
- return VK_ERROR_DEVICE_LOST;
+ if (radv_device_is_lost(device))
+ return VK_ERROR_DEVICE_LOST;
- bool success = device->ws->wait_syncobj(device->ws,
- &part->syncobj, 1, true, 0);
- return success ? VK_SUCCESS : VK_NOT_READY;
+ bool success = device->ws->wait_syncobj(device->ws, &part->syncobj, 1, true, 0);
+ return success ? VK_SUCCESS : VK_NOT_READY;
}
-
// Queue semaphore functions
static void
radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
{
- timeline->highest_signaled = value;
- timeline->highest_submitted = value;
- list_inithead(&timeline->points);
- list_inithead(&timeline->free_points);
- list_inithead(&timeline->waiters);
- mtx_init(&timeline->mutex, mtx_plain);
+ timeline->highest_signaled = value;
+ timeline->highest_submitted = value;
+ list_inithead(&timeline->points);
+ list_inithead(&timeline->free_points);
+ list_inithead(&timeline->waiters);
+ mtx_init(&timeline->mutex, mtx_plain);
}
static void
-radv_destroy_timeline(struct radv_device *device,
- struct radv_timeline *timeline)
-{
- list_for_each_entry_safe(struct radv_timeline_point, point,
- &timeline->free_points, list) {
- list_del(&point->list);
- device->ws->destroy_syncobj(device->ws, point->syncobj);
- free(point);
- }
- list_for_each_entry_safe(struct radv_timeline_point, point,
- &timeline->points, list) {
- list_del(&point->list);
- device->ws->destroy_syncobj(device->ws, point->syncobj);
- free(point);
- }
- mtx_destroy(&timeline->mutex);
+radv_destroy_timeline(struct radv_device *device, struct radv_timeline *timeline)
+{
+ list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->free_points, list)
+ {
+ list_del(&point->list);
+ device->ws->destroy_syncobj(device->ws, point->syncobj);
+ free(point);
+ }
+ list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
+ {
+ list_del(&point->list);
+ device->ws->destroy_syncobj(device->ws, point->syncobj);
+ free(point);
+ }
+ mtx_destroy(&timeline->mutex);
}
static void
-radv_timeline_gc_locked(struct radv_device *device,
- struct radv_timeline *timeline)
+radv_timeline_gc_locked(struct radv_device *device, struct radv_timeline *timeline)
{
- list_for_each_entry_safe(struct radv_timeline_point, point,
- &timeline->points, list) {
- if (point->wait_count || point->value > timeline->highest_submitted)
- return;
+ list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
+ {
+ if (point->wait_count || point->value > timeline->highest_submitted)
+ return;
- if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
- timeline->highest_signaled = point->value;
- list_del(&point->list);
- list_add(&point->list, &timeline->free_points);
- }
- }
+ if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
+ timeline->highest_signaled = point->value;
+ list_del(&point->list);
+ list_add(&point->list, &timeline->free_points);
+ }
+ }
}
static struct radv_timeline_point *
-radv_timeline_find_point_at_least_locked(struct radv_device *device,
- struct radv_timeline *timeline,
+radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
uint64_t p)
{
- radv_timeline_gc_locked(device, timeline);
+ radv_timeline_gc_locked(device, timeline);
- if (p <= timeline->highest_signaled)
- return NULL;
+ if (p <= timeline->highest_signaled)
+ return NULL;
- list_for_each_entry(struct radv_timeline_point, point,
- &timeline->points, list) {
- if (point->value >= p) {
- ++point->wait_count;
- return point;
- }
- }
- return NULL;
+ list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
+ {
+ if (point->value >= p) {
+ ++point->wait_count;
+ return point;
+ }
+ }
+ return NULL;
}
static struct radv_timeline_point *
-radv_timeline_add_point_locked(struct radv_device *device,
- struct radv_timeline *timeline,
+radv_timeline_add_point_locked(struct radv_device *device, struct radv_timeline *timeline,
uint64_t p)
{
- radv_timeline_gc_locked(device, timeline);
+ radv_timeline_gc_locked(device, timeline);
- struct radv_timeline_point *ret = NULL;
- struct radv_timeline_point *prev = NULL;
- int r;
+ struct radv_timeline_point *ret = NULL;
+ struct radv_timeline_point *prev = NULL;
+ int r;
- if (p <= timeline->highest_signaled)
- return NULL;
+ if (p <= timeline->highest_signaled)
+ return NULL;
- list_for_each_entry(struct radv_timeline_point, point,
- &timeline->points, list) {
- if (point->value == p) {
- return NULL;
- }
+ list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
+ {
+ if (point->value == p) {
+ return NULL;
+ }
- if (point->value < p)
- prev = point;
- }
+ if (point->value < p)
+ prev = point;
+ }
- if (list_is_empty(&timeline->free_points)) {
- ret = malloc(sizeof(struct radv_timeline_point));
- r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
- if (r) {
- free(ret);
- return NULL;
- }
- } else {
- ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
- list_del(&ret->list);
+ if (list_is_empty(&timeline->free_points)) {
+ ret = malloc(sizeof(struct radv_timeline_point));
+ r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
+ if (r) {
+ free(ret);
+ return NULL;
+ }
+ } else {
+ ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
+ list_del(&ret->list);
- device->ws->reset_syncobj(device->ws, ret->syncobj);
- }
+ device->ws->reset_syncobj(device->ws, ret->syncobj);
+ }
- ret->value = p;
- ret->wait_count = 1;
+ ret->value = p;
+ ret->wait_count = 1;
- if (prev) {
- list_add(&ret->list, &prev->list);
- } else {
- list_addtail(&ret->list, &timeline->points);
- }
- return ret;
+ if (prev) {
+ list_add(&ret->list, &prev->list);
+ } else {
+ list_addtail(&ret->list, &timeline->points);
+ }
+ return ret;
}
-
static VkResult
-radv_timeline_wait(struct radv_device *device,
- struct radv_timeline *timeline,
- uint64_t value,
+radv_timeline_wait(struct radv_device *device, struct radv_timeline *timeline, uint64_t value,
uint64_t abs_timeout)
{
- mtx_lock(&timeline->mutex);
+ mtx_lock(&timeline->mutex);
- while(timeline->highest_submitted < value) {
- struct timespec abstime;
- timespec_from_nsec(&abstime, abs_timeout);
+ while (timeline->highest_submitted < value) {
+ struct timespec abstime;
+ timespec_from_nsec(&abstime, abs_timeout);
- u_cnd_monotonic_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
+ u_cnd_monotonic_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
- if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
- mtx_unlock(&timeline->mutex);
- return VK_TIMEOUT;
- }
- }
+ if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
+ mtx_unlock(&timeline->mutex);
+ return VK_TIMEOUT;
+ }
+ }
- struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(device, timeline, value);
- mtx_unlock(&timeline->mutex);
- if (!point)
- return VK_SUCCESS;
+ struct radv_timeline_point *point =
+ radv_timeline_find_point_at_least_locked(device, timeline, value);
+ mtx_unlock(&timeline->mutex);
+ if (!point)
+ return VK_SUCCESS;
- bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
+ bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
- mtx_lock(&timeline->mutex);
- point->wait_count--;
- mtx_unlock(&timeline->mutex);
- return success ? VK_SUCCESS : VK_TIMEOUT;
+ mtx_lock(&timeline->mutex);
+ point->wait_count--;
+ mtx_unlock(&timeline->mutex);
+ return success ? VK_SUCCESS : VK_TIMEOUT;
}
static void
radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
struct list_head *processing_list)
{
- list_for_each_entry_safe(struct radv_timeline_waiter, waiter,
- &timeline->waiters, list) {
- if (waiter->value > timeline->highest_submitted)
- continue;
+ list_for_each_entry_safe(struct radv_timeline_waiter, waiter, &timeline->waiters, list)
+ {
+ if (waiter->value > timeline->highest_submitted)
+ continue;
- radv_queue_trigger_submission(waiter->submission, 1, processing_list);
- list_del(&waiter->list);
- }
+ radv_queue_trigger_submission(waiter->submission, 1, processing_list);
+ list_del(&waiter->list);
+ }
}
-static
-void radv_destroy_semaphore_part(struct radv_device *device,
- struct radv_semaphore_part *part)
-{
- switch(part->kind) {
- case RADV_SEMAPHORE_NONE:
- break;
- case RADV_SEMAPHORE_TIMELINE:
- radv_destroy_timeline(device, &part->timeline);
- break;
- case RADV_SEMAPHORE_SYNCOBJ:
- case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
- device->ws->destroy_syncobj(device->ws, part->syncobj);
- break;
- }
- part->kind = RADV_SEMAPHORE_NONE;
+static void
+radv_destroy_semaphore_part(struct radv_device *device, struct radv_semaphore_part *part)
+{
+ switch (part->kind) {
+ case RADV_SEMAPHORE_NONE:
+ break;
+ case RADV_SEMAPHORE_TIMELINE:
+ radv_destroy_timeline(device, &part->timeline);
+ break;
+ case RADV_SEMAPHORE_SYNCOBJ:
+ case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
+ device->ws->destroy_syncobj(device->ws, part->syncobj);
+ break;
+ }
+ part->kind = RADV_SEMAPHORE_NONE;
}
static VkSemaphoreTypeKHR
radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
{
- const VkSemaphoreTypeCreateInfo *type_info =
- vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
+ const VkSemaphoreTypeCreateInfo *type_info =
+ vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
- if (!type_info)
- return VK_SEMAPHORE_TYPE_BINARY;
+ if (!type_info)
+ return VK_SEMAPHORE_TYPE_BINARY;
- if (initial_value)
- *initial_value = type_info->initialValue;
- return type_info->semaphoreType;
+ if (initial_value)
+ *initial_value = type_info->initialValue;
+ return type_info->semaphoreType;
}
static void
-radv_destroy_semaphore(struct radv_device *device,
- const VkAllocationCallbacks *pAllocator,
- struct radv_semaphore *sem)
-{
- radv_destroy_semaphore_part(device, &sem->temporary);
- radv_destroy_semaphore_part(device, &sem->permanent);
- vk_object_base_finish(&sem->base);
- vk_free2(&device->vk.alloc, pAllocator, sem);
-}
-
-VkResult radv_CreateSemaphore(
- VkDevice _device,
- const VkSemaphoreCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSemaphore* pSemaphore)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- uint64_t initial_value = 0;
- VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
-
- struct radv_semaphore *sem = vk_alloc2(&device->vk.alloc, pAllocator,
- sizeof(*sem), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!sem)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &sem->base,
- VK_OBJECT_TYPE_SEMAPHORE);
-
- sem->temporary.kind = RADV_SEMAPHORE_NONE;
- sem->permanent.kind = RADV_SEMAPHORE_NONE;
-
- if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
- device->physical_device->rad_info.has_timeline_syncobj) {
- int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
- if (ret) {
- radv_destroy_semaphore(device, pAllocator, sem);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
- device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
- sem->permanent.timeline_syncobj.max_point = initial_value;
- sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
- } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
- radv_create_timeline(&sem->permanent.timeline, initial_value);
- sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
- } else {
- int ret = device->ws->create_syncobj(device->ws, false,
- &sem->permanent.syncobj);
- if (ret) {
- radv_destroy_semaphore(device, pAllocator, sem);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
- sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
- }
-
- *pSemaphore = radv_semaphore_to_handle(sem);
- return VK_SUCCESS;
-}
-
-void radv_DestroySemaphore(
- VkDevice _device,
- VkSemaphore _semaphore,
- const VkAllocationCallbacks* pAllocator)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
- if (!_semaphore)
- return;
-
- radv_destroy_semaphore(device, pAllocator, sem);
+radv_destroy_semaphore(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_semaphore *sem)
+{
+ radv_destroy_semaphore_part(device, &sem->temporary);
+ radv_destroy_semaphore_part(device, &sem->permanent);
+ vk_object_base_finish(&sem->base);
+ vk_free2(&device->vk.alloc, pAllocator, sem);
}
VkResult
-radv_GetSemaphoreCounterValue(VkDevice _device,
- VkSemaphore _semaphore,
- uint64_t* pValue)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
-
- if (radv_device_is_lost(device))
- return VK_ERROR_DEVICE_LOST;
-
- struct radv_semaphore_part *part =
- semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
-
- switch (part->kind) {
- case RADV_SEMAPHORE_TIMELINE: {
- mtx_lock(&part->timeline.mutex);
- radv_timeline_gc_locked(device, &part->timeline);
- *pValue = part->timeline.highest_signaled;
- mtx_unlock(&part->timeline.mutex);
- return VK_SUCCESS;
- }
- case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
- return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
- }
- case RADV_SEMAPHORE_NONE:
- case RADV_SEMAPHORE_SYNCOBJ:
- unreachable("Invalid semaphore type");
- }
- unreachable("Unhandled semaphore type");
+radv_CreateSemaphore(VkDevice _device, const VkSemaphoreCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ uint64_t initial_value = 0;
+ VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
+
+ struct radv_semaphore *sem =
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!sem)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &sem->base, VK_OBJECT_TYPE_SEMAPHORE);
+
+ sem->temporary.kind = RADV_SEMAPHORE_NONE;
+ sem->permanent.kind = RADV_SEMAPHORE_NONE;
+
+ if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
+ device->physical_device->rad_info.has_timeline_syncobj) {
+ int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
+ if (ret) {
+ radv_destroy_semaphore(device, pAllocator, sem);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
+ sem->permanent.timeline_syncobj.max_point = initial_value;
+ sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
+ } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
+ radv_create_timeline(&sem->permanent.timeline, initial_value);
+ sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
+ } else {
+ int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
+ if (ret) {
+ radv_destroy_semaphore(device, pAllocator, sem);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
+ }
+
+ *pSemaphore = radv_semaphore_to_handle(sem);
+ return VK_SUCCESS;
+}
+
+void
+radv_DestroySemaphore(VkDevice _device, VkSemaphore _semaphore,
+ const VkAllocationCallbacks *pAllocator)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
+ if (!_semaphore)
+ return;
+
+ radv_destroy_semaphore(device, pAllocator, sem);
}
+VkResult
+radv_GetSemaphoreCounterValue(VkDevice _device, VkSemaphore _semaphore, uint64_t *pValue)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
+
+ if (radv_device_is_lost(device))
+ return VK_ERROR_DEVICE_LOST;
+
+ struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
+ ? &semaphore->temporary
+ : &semaphore->permanent;
+
+ switch (part->kind) {
+ case RADV_SEMAPHORE_TIMELINE: {
+ mtx_lock(&part->timeline.mutex);
+ radv_timeline_gc_locked(device, &part->timeline);
+ *pValue = part->timeline.highest_signaled;
+ mtx_unlock(&part->timeline.mutex);
+ return VK_SUCCESS;
+ }
+ case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
+ return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
+ }
+ case RADV_SEMAPHORE_NONE:
+ case RADV_SEMAPHORE_SYNCOBJ:
+ unreachable("Invalid semaphore type");
+ }
+ unreachable("Unhandled semaphore type");
+}
static VkResult
-radv_wait_timelines(struct radv_device *device,
- const VkSemaphoreWaitInfo* pWaitInfo,
+radv_wait_timelines(struct radv_device *device, const VkSemaphoreWaitInfo *pWaitInfo,
uint64_t abs_timeout)
{
- if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
- for (;;) {
- for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
- RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
- VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
-
- if (result == VK_SUCCESS)
- return VK_SUCCESS;
- }
- if (radv_get_current_time() > abs_timeout)
- return VK_TIMEOUT;
- }
- }
-
- for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
- RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
- VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], abs_timeout);
-
- if (result != VK_SUCCESS)
- return result;
- }
- return VK_SUCCESS;
+ if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
+ for (;;) {
+ for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+ VkResult result =
+ radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
+
+ if (result == VK_SUCCESS)
+ return VK_SUCCESS;
+ }
+ if (radv_get_current_time() > abs_timeout)
+ return VK_TIMEOUT;
+ }
+ }
+
+ for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+ VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline,
+ pWaitInfo->pValues[i], abs_timeout);
+
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ return VK_SUCCESS;
}
VkResult
-radv_WaitSemaphores(VkDevice _device,
- const VkSemaphoreWaitInfo* pWaitInfo,
- uint64_t timeout)
+radv_WaitSemaphores(VkDevice _device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device, device, _device);
- if (radv_device_is_lost(device))
- return VK_ERROR_DEVICE_LOST;
+ if (radv_device_is_lost(device))
+ return VK_ERROR_DEVICE_LOST;
- uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
+ uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
- if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind == RADV_SEMAPHORE_TIMELINE)
- return radv_wait_timelines(device, pWaitInfo, abs_timeout);
+ if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind ==
+ RADV_SEMAPHORE_TIMELINE)
+ return radv_wait_timelines(device, pWaitInfo, abs_timeout);
- if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
- return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "semaphoreCount integer overflow");
+ if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
+ return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
+ "semaphoreCount integer overflow");
- bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
- uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
- if (!handles)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
+ uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
+ if (!handles)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
- RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
- handles[i] = semaphore->permanent.syncobj;
- }
+ for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
+ handles[i] = semaphore->permanent.syncobj;
+ }
- bool success = device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
- pWaitInfo->semaphoreCount, wait_all, false,
- abs_timeout);
- free(handles);
- return success ? VK_SUCCESS : VK_TIMEOUT;
+ bool success =
+ device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
+ pWaitInfo->semaphoreCount, wait_all, false, abs_timeout);
+ free(handles);
+ return success ? VK_SUCCESS : VK_TIMEOUT;
}
VkResult
-radv_SignalSemaphore(VkDevice _device,
- const VkSemaphoreSignalInfo* pSignalInfo)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
-
- struct radv_semaphore_part *part =
- semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
-
- switch(part->kind) {
- case RADV_SEMAPHORE_TIMELINE: {
- mtx_lock(&part->timeline.mutex);
- radv_timeline_gc_locked(device, &part->timeline);
- part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
- part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
-
- struct list_head processing_list;
- list_inithead(&processing_list);
- radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
- mtx_unlock(&part->timeline.mutex);
-
- VkResult result = radv_process_submissions(&processing_list);
-
- /* This needs to happen after radv_process_submissions, so
- * that any submitted submissions that are now unblocked get
- * processed before we wake the application. This way we
- * ensure that any binary semaphores that are now unblocked
- * are usable by the application. */
- u_cnd_monotonic_broadcast(&device->timeline_cond);
-
- return result;
- }
- case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
- part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
- device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
- break;
- }
- case RADV_SEMAPHORE_NONE:
- case RADV_SEMAPHORE_SYNCOBJ:
- unreachable("Invalid semaphore type");
- }
- return VK_SUCCESS;
+radv_SignalSemaphore(VkDevice _device, const VkSemaphoreSignalInfo *pSignalInfo)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
+
+ struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
+ ? &semaphore->temporary
+ : &semaphore->permanent;
+
+ switch (part->kind) {
+ case RADV_SEMAPHORE_TIMELINE: {
+ mtx_lock(&part->timeline.mutex);
+ radv_timeline_gc_locked(device, &part->timeline);
+ part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
+ part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
+
+ struct list_head processing_list;
+ list_inithead(&processing_list);
+ radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
+ mtx_unlock(&part->timeline.mutex);
+
+ VkResult result = radv_process_submissions(&processing_list);
+
+ /* This needs to happen after radv_process_submissions, so
+ * that any submitted submissions that are now unblocked get
+ * processed before we wake the application. This way we
+ * ensure that any binary semaphores that are now unblocked
+ * are usable by the application. */
+ u_cnd_monotonic_broadcast(&device->timeline_cond);
+
+ return result;
+ }
+ case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
+ part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
+ device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
+ break;
+ }
+ case RADV_SEMAPHORE_NONE:
+ case RADV_SEMAPHORE_SYNCOBJ:
+ unreachable("Invalid semaphore type");
+ }
+ return VK_SUCCESS;
}
-static void radv_destroy_event(struct radv_device *device,
- const VkAllocationCallbacks* pAllocator,
- struct radv_event *event)
+static void
+radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_event *event)
{
- if (event->bo)
- device->ws->buffer_destroy(device->ws, event->bo);
+ if (event->bo)
+ device->ws->buffer_destroy(device->ws, event->bo);
- vk_object_base_finish(&event->base);
- vk_free2(&device->vk.alloc, pAllocator, event);
+ vk_object_base_finish(&event->base);
+ vk_free2(&device->vk.alloc, pAllocator, event);
}
-VkResult radv_CreateEvent(
- VkDevice _device,
- const VkEventCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkEvent* pEvent)
+VkResult
+radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator,
- sizeof(*event), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!event)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ if (!event)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
+ vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
- event->bo = device->ws->buffer_create(device->ws, 8, 8,
- RADEON_DOMAIN_GTT,
- RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_FENCE);
- if (!event->bo) {
- radv_destroy_event(device, pAllocator, event);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
+ event->bo = device->ws->buffer_create(
+ device->ws, 8, 8, RADEON_DOMAIN_GTT,
+ RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
+ RADV_BO_PRIORITY_FENCE);
+ if (!event->bo) {
+ radv_destroy_event(device, pAllocator, event);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
- event->map = (uint64_t*)device->ws->buffer_map(event->bo);
- if (!event->map) {
- radv_destroy_event(device, pAllocator, event);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
+ event->map = (uint64_t *)device->ws->buffer_map(event->bo);
+ if (!event->map) {
+ radv_destroy_event(device, pAllocator, event);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
- *pEvent = radv_event_to_handle(event);
+ *pEvent = radv_event_to_handle(event);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-void radv_DestroyEvent(
- VkDevice _device,
- VkEvent _event,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_event, event, _event);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_event, event, _event);
- if (!event)
- return;
+ if (!event)
+ return;
- radv_destroy_event(device, pAllocator, event);
+ radv_destroy_event(device, pAllocator, event);
}
-VkResult radv_GetEventStatus(
- VkDevice _device,
- VkEvent _event)
+VkResult
+radv_GetEventStatus(VkDevice _device, VkEvent _event)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_event, event, _event);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_event, event, _event);
- if (radv_device_is_lost(device))
- return VK_ERROR_DEVICE_LOST;
+ if (radv_device_is_lost(device))
+ return VK_ERROR_DEVICE_LOST;
- if (*event->map == 1)
- return VK_EVENT_SET;
- return VK_EVENT_RESET;
+ if (*event->map == 1)
+ return VK_EVENT_SET;
+ return VK_EVENT_RESET;
}
-VkResult radv_SetEvent(
- VkDevice _device,
- VkEvent _event)
+VkResult
+radv_SetEvent(VkDevice _device, VkEvent _event)
{
- RADV_FROM_HANDLE(radv_event, event, _event);
- *event->map = 1;
+ RADV_FROM_HANDLE(radv_event, event, _event);
+ *event->map = 1;
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-VkResult radv_ResetEvent(
- VkDevice _device,
- VkEvent _event)
+VkResult
+radv_ResetEvent(VkDevice _device, VkEvent _event)
{
- RADV_FROM_HANDLE(radv_event, event, _event);
- *event->map = 0;
+ RADV_FROM_HANDLE(radv_event, event, _event);
+ *event->map = 0;
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
static void
-radv_destroy_buffer(struct radv_device *device,
- const VkAllocationCallbacks *pAllocator,
- struct radv_buffer *buffer)
+radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_buffer *buffer)
{
- if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
- device->ws->buffer_destroy(device->ws, buffer->bo);
+ if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
+ device->ws->buffer_destroy(device->ws, buffer->bo);
- vk_object_base_finish(&buffer->base);
- vk_free2(&device->vk.alloc, pAllocator, buffer);
+ vk_object_base_finish(&buffer->base);
+ vk_free2(&device->vk.alloc, pAllocator, buffer);
}
-VkResult radv_CreateBuffer(
- VkDevice _device,
- const VkBufferCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkBuffer* pBuffer)
+VkResult
+radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_buffer *buffer;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_buffer *buffer;
- if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
- return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
- buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (buffer == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (buffer == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
+ vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
- buffer->size = pCreateInfo->size;
- buffer->usage = pCreateInfo->usage;
- buffer->bo = NULL;
- buffer->offset = 0;
- buffer->flags = pCreateInfo->flags;
+ buffer->size = pCreateInfo->size;
+ buffer->usage = pCreateInfo->usage;
+ buffer->bo = NULL;
+ buffer->offset = 0;
+ buffer->flags = pCreateInfo->flags;
- buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
- EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
+ buffer->shareable =
+ vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
- if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
- buffer->bo = device->ws->buffer_create(device->ws,
- align64(buffer->size, 4096),
- 4096, 0, RADEON_FLAG_VIRTUAL,
- RADV_BO_PRIORITY_VIRTUAL);
- if (!buffer->bo) {
- radv_destroy_buffer(device, pAllocator, buffer);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
- }
+ if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
+ buffer->bo = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0,
+ RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
+ if (!buffer->bo) {
+ radv_destroy_buffer(device, pAllocator, buffer);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+ }
- *pBuffer = radv_buffer_to_handle(buffer);
+ *pBuffer = radv_buffer_to_handle(buffer);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-void radv_DestroyBuffer(
- VkDevice _device,
- VkBuffer _buffer,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
- if (!buffer)
- return;
+ if (!buffer)
+ return;
- radv_destroy_buffer(device, pAllocator, buffer);
+ radv_destroy_buffer(device, pAllocator, buffer);
}
-VkDeviceAddress radv_GetBufferDeviceAddress(
- VkDevice device,
- const VkBufferDeviceAddressInfo* pInfo)
+VkDeviceAddress
+radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
{
- RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
- return radv_buffer_get_va(buffer->bo) + buffer->offset;
+ RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
+ return radv_buffer_get_va(buffer->bo) + buffer->offset;
}
-
-uint64_t radv_GetBufferOpaqueCaptureAddress(VkDevice device,
- const VkBufferDeviceAddressInfo* pInfo)
+uint64_t
+radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
{
- return 0;
+ return 0;
}
-uint64_t radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
- const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
+uint64_t
+radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
+ const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
{
- return 0;
+ return 0;
}
static inline unsigned
si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
{
- if (stencil)
- return plane->surface.u.legacy.stencil_tiling_index[level];
- else
- return plane->surface.u.legacy.tiling_index[level];
+ if (stencil)
+ return plane->surface.u.legacy.stencil_tiling_index[level];
+ else
+ return plane->surface.u.legacy.tiling_index[level];
}
-static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
+static uint32_t
+radv_surface_max_layer_count(struct radv_image_view *iview)
{
- return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
+ return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
+ : (iview->base_layer + iview->layer_count);
}
static unsigned
get_dcc_max_uncompressed_block_size(const struct radv_device *device,
const struct radv_image_view *iview)
{
- if (device->physical_device->rad_info.chip_class < GFX10 &&
- iview->image->info.samples > 1) {
- if (iview->image->planes[0].surface.bpe == 1)
- return V_028C78_MAX_BLOCK_SIZE_64B;
- else if (iview->image->planes[0].surface.bpe == 2)
- return V_028C78_MAX_BLOCK_SIZE_128B;
- }
+ if (device->physical_device->rad_info.chip_class < GFX10 && iview->image->info.samples > 1) {
+ if (iview->image->planes[0].surface.bpe == 1)
+ return V_028C78_MAX_BLOCK_SIZE_64B;
+ else if (iview->image->planes[0].surface.bpe == 2)
+ return V_028C78_MAX_BLOCK_SIZE_128B;
+ }
- return V_028C78_MAX_BLOCK_SIZE_256B;
+ return V_028C78_MAX_BLOCK_SIZE_256B;
}
static unsigned
get_dcc_min_compressed_block_size(const struct radv_device *device)
{
- if (!device->physical_device->rad_info.has_dedicated_vram) {
- /* amdvlk: [min-compressed-block-size] should be set to 32 for
- * dGPU and 64 for APU because all of our APUs to date use
- * DIMMs which have a request granularity size of 64B while all
- * other chips have a 32B request size.
- */
- return V_028C78_MIN_BLOCK_SIZE_64B;
- }
+ if (!device->physical_device->rad_info.has_dedicated_vram) {
+ /* amdvlk: [min-compressed-block-size] should be set to 32 for
+ * dGPU and 64 for APU because all of our APUs to date use
+ * DIMMs which have a request granularity size of 64B while all
+ * other chips have a 32B request size.
+ */
+ return V_028C78_MIN_BLOCK_SIZE_64B;
+ }
- return V_028C78_MIN_BLOCK_SIZE_32B;
+ return V_028C78_MIN_BLOCK_SIZE_32B;
}
static uint32_t
-radv_init_dcc_control_reg(struct radv_device *device,
- struct radv_image_view *iview)
-{
- unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
- unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
- unsigned max_compressed_block_size;
- unsigned independent_128b_blocks;
- unsigned independent_64b_blocks;
-
- if (!radv_dcc_enabled(iview->image, iview->base_mip))
- return 0;
-
- /* For GFX9+ ac_surface computes values for us (except min_compressed
- * and max_uncompressed) */
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size;
- independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.dcc.independent_128B_blocks;
- independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.dcc.independent_64B_blocks;
- } else {
- independent_128b_blocks = 0;
-
- if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
- /* If this DCC image is potentially going to be used in texture
- * fetches, we need some special settings.
- */
- independent_64b_blocks = 1;
- max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
- } else {
- /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
- * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
- * big as possible for better compression state.
- */
- independent_64b_blocks = 0;
- max_compressed_block_size = max_uncompressed_block_size;
- }
- }
-
- return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
- S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
- S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
- S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
- S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
+radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
+{
+ unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
+ unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
+ unsigned max_compressed_block_size;
+ unsigned independent_128b_blocks;
+ unsigned independent_64b_blocks;
+
+ if (!radv_dcc_enabled(iview->image, iview->base_mip))
+ return 0;
+
+ /* For GFX9+ ac_surface computes values for us (except min_compressed
+ * and max_uncompressed) */
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ max_compressed_block_size =
+ iview->image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size;
+ independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.dcc.independent_128B_blocks;
+ independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.dcc.independent_64B_blocks;
+ } else {
+ independent_128b_blocks = 0;
+
+ if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
+ VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
+ /* If this DCC image is potentially going to be used in texture
+ * fetches, we need some special settings.
+ */
+ independent_64b_blocks = 1;
+ max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+ } else {
+ /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
+ * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
+ * big as possible for better compression state.
+ */
+ independent_64b_blocks = 0;
+ max_compressed_block_size = max_uncompressed_block_size;
+ }
+ }
+
+ return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
+ S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
+ S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
+ S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
+ S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
}
void
-radv_initialise_color_surface(struct radv_device *device,
- struct radv_color_buffer_info *cb,
- struct radv_image_view *iview)
-{
- const struct util_format_description *desc;
- unsigned ntype, format, swap, endian;
- unsigned blend_clamp = 0, blend_bypass = 0;
- uint64_t va;
- const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
- const struct radeon_surf *surf = &plane->surface;
-
- desc = vk_format_description(iview->vk_format);
-
- memset(cb, 0, sizeof(*cb));
-
- /* Intensity is implemented as Red, so treat it that way. */
- cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1);
-
- va = radv_buffer_get_va(iview->bo) + iview->image->offset;
-
- cb->cb_color_base = va >> 8;
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
- S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
- S_028EE0_CMASK_PIPE_ALIGNED(1) |
- S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
- } else {
- struct gfx9_surf_meta_flags meta = {
- .rb_aligned = 1,
- .pipe_aligned = 1,
- };
-
- if (surf->dcc_offset)
- meta = surf->u.gfx9.dcc;
-
- cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
- S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
- S_028C74_RB_ALIGNED(meta.rb_aligned) |
- S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
- cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
- }
-
- cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
- cb->cb_color_base |= surf->tile_swizzle;
- } else {
- const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
- unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
-
- cb->cb_color_base += level_info->offset >> 8;
- if (level_info->mode == RADEON_SURF_MODE_2D)
- cb->cb_color_base |= surf->tile_swizzle;
-
- pitch_tile_max = level_info->nblk_x / 8 - 1;
- slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
- tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
-
- cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
- cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
- cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
-
- cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
-
- if (radv_image_has_fmask(iview->image)) {
- if (device->physical_device->rad_info.chip_class >= GFX7)
- cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
- cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
- cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
- } else {
- /* This must be set for fast clear to work without FMASK. */
- if (device->physical_device->rad_info.chip_class >= GFX7)
- cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
- cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
- cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
- }
- }
-
- /* CMASK variables */
- va = radv_buffer_get_va(iview->bo) + iview->image->offset;
- va += surf->cmask_offset;
- cb->cb_color_cmask = va >> 8;
-
- va = radv_buffer_get_va(iview->bo) + iview->image->offset;
- va += surf->dcc_offset;
-
- if (radv_dcc_enabled(iview->image, iview->base_mip) &&
- device->physical_device->rad_info.chip_class <= GFX8)
- va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
-
- unsigned dcc_tile_swizzle = surf->tile_swizzle;
- dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
-
- cb->cb_dcc_base = va >> 8;
- cb->cb_dcc_base |= dcc_tile_swizzle;
-
- /* GFX10 field has the same base shift as the GFX6 field. */
- uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
- cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
- S_028C6C_SLICE_MAX_GFX10(max_slice);
-
- if (iview->image->info.samples > 1) {
- unsigned log_samples = util_logbase2(iview->image->info.samples);
-
- cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
- S_028C74_NUM_FRAGMENTS(log_samples);
- }
-
- if (radv_image_has_fmask(iview->image)) {
- va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->fmask_offset;
- cb->cb_color_fmask = va >> 8;
- cb->cb_color_fmask |= surf->fmask_tile_swizzle;
- } else {
- cb->cb_color_fmask = cb->cb_color_base;
- }
-
- ntype = radv_translate_color_numformat(iview->vk_format,
- desc,
- vk_format_get_first_non_void_channel(iview->vk_format));
- format = radv_translate_colorformat(iview->vk_format);
- if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
- radv_finishme("Illegal color\n");
- swap = radv_translate_colorswap(iview->vk_format, false);
- endian = radv_colorformat_endian_swap(format);
-
- /* blend clamp should be set for all NORM/SRGB types */
- if (ntype == V_028C70_NUMBER_UNORM ||
- ntype == V_028C70_NUMBER_SNORM ||
- ntype == V_028C70_NUMBER_SRGB)
- blend_clamp = 1;
-
- /* set blend bypass according to docs if SINT/UINT or
- 8/24 COLOR variants */
- if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
- format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
- format == V_028C70_COLOR_X24_8_32_FLOAT) {
- blend_clamp = 0;
- blend_bypass = 1;
- }
+radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
+ struct radv_image_view *iview)
+{
+ const struct util_format_description *desc;
+ unsigned ntype, format, swap, endian;
+ unsigned blend_clamp = 0, blend_bypass = 0;
+ uint64_t va;
+ const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
+ const struct radeon_surf *surf = &plane->surface;
+
+ desc = vk_format_description(iview->vk_format);
+
+ memset(cb, 0, sizeof(*cb));
+
+ /* Intensity is implemented as Red, so treat it that way. */
+ cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1);
+
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+
+ cb->cb_color_base = va >> 8;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
+ S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
+ S_028EE0_CMASK_PIPE_ALIGNED(1) |
+ S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
+ } else {
+ struct gfx9_surf_meta_flags meta = {
+ .rb_aligned = 1,
+ .pipe_aligned = 1,
+ };
+
+ if (surf->dcc_offset)
+ meta = surf->u.gfx9.dcc;
+
+ cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
+ S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
+ S_028C74_RB_ALIGNED(meta.rb_aligned) |
+ S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
+ cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
+ }
+
+ cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
+ cb->cb_color_base |= surf->tile_swizzle;
+ } else {
+ const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
+ unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
+
+ cb->cb_color_base += level_info->offset >> 8;
+ if (level_info->mode == RADEON_SURF_MODE_2D)
+ cb->cb_color_base |= surf->tile_swizzle;
+
+ pitch_tile_max = level_info->nblk_x / 8 - 1;
+ slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
+ tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
+
+ cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
+ cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
+ cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
+
+ cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
+
+ if (radv_image_has_fmask(iview->image)) {
+ if (device->physical_device->rad_info.chip_class >= GFX7)
+ cb->cb_color_pitch |=
+ S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
+ cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
+ cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
+ } else {
+ /* This must be set for fast clear to work without FMASK. */
+ if (device->physical_device->rad_info.chip_class >= GFX7)
+ cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
+ cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
+ cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
+ }
+ }
+
+ /* CMASK variables */
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+ va += surf->cmask_offset;
+ cb->cb_color_cmask = va >> 8;
+
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+ va += surf->dcc_offset;
+
+ if (radv_dcc_enabled(iview->image, iview->base_mip) &&
+ device->physical_device->rad_info.chip_class <= GFX8)
+ va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
+
+ unsigned dcc_tile_swizzle = surf->tile_swizzle;
+ dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
+
+ cb->cb_dcc_base = va >> 8;
+ cb->cb_dcc_base |= dcc_tile_swizzle;
+
+ /* GFX10 field has the same base shift as the GFX6 field. */
+ uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
+ cb->cb_color_view =
+ S_028C6C_SLICE_START(iview->base_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice);
+
+ if (iview->image->info.samples > 1) {
+ unsigned log_samples = util_logbase2(iview->image->info.samples);
+
+ cb->cb_color_attrib |=
+ S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_samples);
+ }
+
+ if (radv_image_has_fmask(iview->image)) {
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->fmask_offset;
+ cb->cb_color_fmask = va >> 8;
+ cb->cb_color_fmask |= surf->fmask_tile_swizzle;
+ } else {
+ cb->cb_color_fmask = cb->cb_color_base;
+ }
+
+ ntype = radv_translate_color_numformat(iview->vk_format, desc,
+ vk_format_get_first_non_void_channel(iview->vk_format));
+ format = radv_translate_colorformat(iview->vk_format);
+ if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
+ radv_finishme("Illegal color\n");
+ swap = radv_translate_colorswap(iview->vk_format, false);
+ endian = radv_colorformat_endian_swap(format);
+
+ /* blend clamp should be set for all NORM/SRGB types */
+ if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
+ ntype == V_028C70_NUMBER_SRGB)
+ blend_clamp = 1;
+
+ /* set blend bypass according to docs if SINT/UINT or
+ 8/24 COLOR variants */
+ if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
+ format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
+ format == V_028C70_COLOR_X24_8_32_FLOAT) {
+ blend_clamp = 0;
+ blend_bypass = 1;
+ }
#if 0
if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
(format == V_028C70_COLOR_8 ||
@@ -6744,1268 +6417,1239 @@ radv_initialise_color_surface(struct radv_device *device,
format == V_028C70_COLOR_8_8_8_8))
->color_is_int8 = true;
#endif
- cb->cb_color_info = S_028C70_FORMAT(format) |
- S_028C70_COMP_SWAP(swap) |
- S_028C70_BLEND_CLAMP(blend_clamp) |
- S_028C70_BLEND_BYPASS(blend_bypass) |
- S_028C70_SIMPLE_FLOAT(1) |
- S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
- ntype != V_028C70_NUMBER_SNORM &&
- ntype != V_028C70_NUMBER_SRGB &&
- format != V_028C70_COLOR_8_24 &&
- format != V_028C70_COLOR_24_8) |
- S_028C70_NUMBER_TYPE(ntype) |
- S_028C70_ENDIAN(endian);
- if (radv_image_has_fmask(iview->image)) {
- cb->cb_color_info |= S_028C70_COMPRESSION(1);
- if (device->physical_device->rad_info.chip_class == GFX6) {
- unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
- cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
- }
-
- if (radv_image_is_tc_compat_cmask(iview->image)) {
- /* Allow the texture block to read FMASK directly
- * without decompressing it. This bit must be cleared
- * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
- * otherwise the operation doesn't happen.
- */
- cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
-
- if (device->physical_device->rad_info.chip_class == GFX8) {
- /* Set CMASK into a tiling format that allows
- * the texture block to read it.
- */
- cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
- }
- }
- }
-
- if (radv_image_has_cmask(iview->image) &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
- cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
-
- if (radv_dcc_enabled(iview->image, iview->base_mip))
- cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
-
- cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
-
- /* This must be set for fast clear to work without FMASK. */
- if (!radv_image_has_fmask(iview->image) &&
- device->physical_device->rad_info.chip_class == GFX6) {
- unsigned bankh = util_logbase2(surf->u.legacy.bankh);
- cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
- (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
- unsigned width = vk_format_get_plane_width(iview->image->vk_format,
- iview->plane_id, iview->extent.width);
- unsigned height = vk_format_get_plane_height(iview->image->vk_format,
- iview->plane_id, iview->extent.height);
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
-
- cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
- S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
- S_028EE0_RESOURCE_LEVEL(1);
- } else {
- cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
- cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
- S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
- }
-
- cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
- S_028C68_MIP0_HEIGHT(height - 1) |
- S_028C68_MAX_MIP(iview->image->info.levels - 1);
- }
+ cb->cb_color_info =
+ S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
+ S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
+ S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
+ ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
+ format != V_028C70_COLOR_24_8) |
+ S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);
+ if (radv_image_has_fmask(iview->image)) {
+ cb->cb_color_info |= S_028C70_COMPRESSION(1);
+ if (device->physical_device->rad_info.chip_class == GFX6) {
+ unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
+ cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
+ }
+
+ if (radv_image_is_tc_compat_cmask(iview->image)) {
+ /* Allow the texture block to read FMASK directly
+ * without decompressing it. This bit must be cleared
+ * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
+ * otherwise the operation doesn't happen.
+ */
+ cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
+
+ if (device->physical_device->rad_info.chip_class == GFX8) {
+ /* Set CMASK into a tiling format that allows
+ * the texture block to read it.
+ */
+ cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
+ }
+ }
+ }
+
+ if (radv_image_has_cmask(iview->image) &&
+ !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
+ cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
+
+ if (radv_dcc_enabled(iview->image, iview->base_mip))
+ cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
+
+ cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
+
+ /* This must be set for fast clear to work without FMASK. */
+ if (!radv_image_has_fmask(iview->image) &&
+ device->physical_device->rad_info.chip_class == GFX6) {
+ unsigned bankh = util_logbase2(surf->u.legacy.bankh);
+ cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D
+ ? (iview->extent.depth - 1)
+ : (iview->image->info.array_size - 1);
+ unsigned width =
+ vk_format_get_plane_width(iview->image->vk_format, iview->plane_id, iview->extent.width);
+ unsigned height =
+ vk_format_get_plane_height(iview->image->vk_format, iview->plane_id, iview->extent.height);
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
+
+ cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
+ S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
+ S_028EE0_RESOURCE_LEVEL(1);
+ } else {
+ cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
+ cb->cb_color_attrib |=
+ S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
+ }
+
+ cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |
+ S_028C68_MAX_MIP(iview->image->info.levels - 1);
+ }
}
static unsigned
-radv_calc_decompress_on_z_planes(struct radv_device *device,
- struct radv_image_view *iview)
-{
- unsigned max_zplanes = 0;
-
- assert(radv_image_is_tc_compat_htile(iview->image));
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- /* Default value for 32-bit depth surfaces. */
- max_zplanes = 4;
-
- if (iview->vk_format == VK_FORMAT_D16_UNORM &&
- iview->image->info.samples > 1)
- max_zplanes = 2;
-
- max_zplanes = max_zplanes + 1;
- } else {
- if (iview->vk_format == VK_FORMAT_D16_UNORM) {
- /* Do not enable Z plane compression for 16-bit depth
- * surfaces because isn't supported on GFX8. Only
- * 32-bit depth surfaces are supported by the hardware.
- * This allows to maintain shader compatibility and to
- * reduce the number of depth decompressions.
- */
- max_zplanes = 1;
- } else {
- if (iview->image->info.samples <= 1)
- max_zplanes = 5;
- else if (iview->image->info.samples <= 4)
- max_zplanes = 3;
- else
- max_zplanes = 2;
- }
- }
-
- return max_zplanes;
+radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview)
+{
+ unsigned max_zplanes = 0;
+
+ assert(radv_image_is_tc_compat_htile(iview->image));
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ /* Default value for 32-bit depth surfaces. */
+ max_zplanes = 4;
+
+ if (iview->vk_format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1)
+ max_zplanes = 2;
+
+ max_zplanes = max_zplanes + 1;
+ } else {
+ if (iview->vk_format == VK_FORMAT_D16_UNORM) {
+ /* Do not enable Z plane compression for 16-bit depth
+ * surfaces because isn't supported on GFX8. Only
+ * 32-bit depth surfaces are supported by the hardware.
+ * This allows to maintain shader compatibility and to
+ * reduce the number of depth decompressions.
+ */
+ max_zplanes = 1;
+ } else {
+ if (iview->image->info.samples <= 1)
+ max_zplanes = 5;
+ else if (iview->image->info.samples <= 4)
+ max_zplanes = 3;
+ else
+ max_zplanes = 2;
+ }
+ }
+
+ return max_zplanes;
}
void
-radv_initialise_ds_surface(struct radv_device *device,
- struct radv_ds_buffer_info *ds,
- struct radv_image_view *iview)
-{
- unsigned level = iview->base_mip;
- unsigned format, stencil_format;
- uint64_t va, s_offs, z_offs;
- bool stencil_only = false;
- const struct radv_image_plane *plane = &iview->image->planes[0];
- const struct radeon_surf *surf = &plane->surface;
-
- assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
-
- memset(ds, 0, sizeof(*ds));
- switch (iview->image->vk_format) {
- case VK_FORMAT_D24_UNORM_S8_UINT:
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
- break;
- case VK_FORMAT_D16_UNORM:
- case VK_FORMAT_D16_UNORM_S8_UINT:
- ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
- break;
- case VK_FORMAT_D32_SFLOAT:
- case VK_FORMAT_D32_SFLOAT_S8_UINT:
- ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
- S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
- break;
- case VK_FORMAT_S8_UINT:
- stencil_only = true;
- break;
- default:
- break;
- }
-
- format = radv_translate_dbformat(iview->image->vk_format);
- stencil_format = surf->has_stencil ?
- V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
-
- uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
- ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
- S_028008_SLICE_MAX(max_slice);
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
- S_028008_SLICE_MAX_HI(max_slice >> 11);
- }
-
- ds->db_htile_data_base = 0;
- ds->db_htile_surface = 0;
-
- va = radv_buffer_get_va(iview->bo) + iview->image->offset;
- s_offs = z_offs = va;
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- assert(surf->u.gfx9.surf_offset == 0);
- s_offs += surf->u.gfx9.stencil_offset;
-
- ds->db_z_info = S_028038_FORMAT(format) |
- S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
- S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
- S_028038_MAXMIP(iview->image->info.levels - 1) |
- S_028038_ZRANGE_PRECISION(1);
- ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
- S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
-
- if (device->physical_device->rad_info.chip_class == GFX9) {
- ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
- ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
- }
-
- ds->db_depth_view |= S_028008_MIPID(level);
- ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
- S_02801C_Y_MAX(iview->image->info.height - 1);
-
- if (radv_htile_enabled(iview->image, level)) {
- ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
-
- if (radv_image_is_tc_compat_htile(iview->image)) {
- unsigned max_zplanes =
- radv_calc_decompress_on_z_planes(device, iview);
-
- ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
- ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
- } else {
- ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
- ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
- }
- }
-
- if (radv_image_tile_stencil_disabled(device, iview->image)) {
- ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
- }
-
- va = radv_buffer_get_va(iview->bo) + iview->image->offset +
- surf->htile_offset;
- ds->db_htile_data_base = va >> 8;
- ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
- S_028ABC_PIPE_ALIGNED(1);
-
- if (device->physical_device->rad_info.chip_class == GFX9) {
- ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
- }
- }
- } else {
- const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
-
- if (stencil_only)
- level_info = &surf->u.legacy.stencil_level[level];
-
- z_offs += surf->u.legacy.level[level].offset;
- s_offs += surf->u.legacy.stencil_level[level].offset;
-
- ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
- ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
- ds->db_stencil_info = S_028044_FORMAT(stencil_format);
-
- if (iview->image->info.samples > 1)
- ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
-
- if (device->physical_device->rad_info.chip_class >= GFX7) {
- struct radeon_info *info = &device->physical_device->rad_info;
- unsigned tiling_index = surf->u.legacy.tiling_index[level];
- unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
- unsigned macro_index = surf->u.legacy.macro_tile_index;
- unsigned tile_mode = info->si_tile_mode_array[tiling_index];
- unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
- unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
-
- if (stencil_only)
- tile_mode = stencil_tile_mode;
-
- ds->db_depth_info |=
- S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
- S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
- S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
- S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
- S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
- S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
- ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
- ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
- } else {
- unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
- ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
- tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
- ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
- if (stencil_only)
- ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
- }
-
- ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
- S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
- ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
-
- if (radv_htile_enabled(iview->image, level)) {
- ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
-
- if (radv_image_tile_stencil_disabled(device, iview->image)) {
- ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
- }
-
- va = radv_buffer_get_va(iview->bo) + iview->image->offset +
- surf->htile_offset;
- ds->db_htile_data_base = va >> 8;
- ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
-
- if (radv_image_is_tc_compat_htile(iview->image)) {
- unsigned max_zplanes =
- radv_calc_decompress_on_z_planes(device, iview);
-
- ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
- ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
- }
- }
- }
-
- ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
- ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
-}
-
-VkResult radv_CreateFramebuffer(
- VkDevice _device,
- const VkFramebufferCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkFramebuffer* pFramebuffer)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_framebuffer *framebuffer;
- const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
- vk_find_struct_const(pCreateInfo->pNext,
- FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
-
- size_t size = sizeof(*framebuffer);
- if (!imageless_create_info)
- size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
- framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (framebuffer == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &framebuffer->base,
- VK_OBJECT_TYPE_FRAMEBUFFER);
-
- framebuffer->attachment_count = pCreateInfo->attachmentCount;
- framebuffer->width = pCreateInfo->width;
- framebuffer->height = pCreateInfo->height;
- framebuffer->layers = pCreateInfo->layers;
- framebuffer->imageless = !!imageless_create_info;
-
- if (!imageless_create_info) {
- for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
- VkImageView _iview = pCreateInfo->pAttachments[i];
- struct radv_image_view *iview = radv_image_view_from_handle(_iview);
- framebuffer->attachments[i] = iview;
- }
- }
-
- *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
- return VK_SUCCESS;
-}
-
-void radv_DestroyFramebuffer(
- VkDevice _device,
- VkFramebuffer _fb,
- const VkAllocationCallbacks* pAllocator)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
-
- if (!fb)
- return;
- vk_object_base_finish(&fb->base);
- vk_free2(&device->vk.alloc, pAllocator, fb);
-}
-
-static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
-{
- switch (address_mode) {
- case VK_SAMPLER_ADDRESS_MODE_REPEAT:
- return V_008F30_SQ_TEX_WRAP;
- case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
- return V_008F30_SQ_TEX_MIRROR;
- case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
- return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
- case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
- return V_008F30_SQ_TEX_CLAMP_BORDER;
- case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
- return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
- default:
- unreachable("illegal tex wrap mode");
- break;
- }
+radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
+ struct radv_image_view *iview)
+{
+ unsigned level = iview->base_mip;
+ unsigned format, stencil_format;
+ uint64_t va, s_offs, z_offs;
+ bool stencil_only = false;
+ const struct radv_image_plane *plane = &iview->image->planes[0];
+ const struct radeon_surf *surf = &plane->surface;
+
+ assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
+
+ memset(ds, 0, sizeof(*ds));
+ switch (iview->image->vk_format) {
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
+ break;
+ case VK_FORMAT_D16_UNORM:
+ case VK_FORMAT_D16_UNORM_S8_UINT:
+ ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
+ break;
+ case VK_FORMAT_D32_SFLOAT:
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ ds->pa_su_poly_offset_db_fmt_cntl =
+ S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
+ break;
+ case VK_FORMAT_S8_UINT:
+ stencil_only = true;
+ break;
+ default:
+ break;
+ }
+
+ format = radv_translate_dbformat(iview->image->vk_format);
+ stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
+
+ uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
+ ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | S_028008_SLICE_MAX(max_slice);
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ ds->db_depth_view |=
+ S_028008_SLICE_START_HI(iview->base_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
+ }
+
+ ds->db_htile_data_base = 0;
+ ds->db_htile_surface = 0;
+
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset;
+ s_offs = z_offs = va;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ assert(surf->u.gfx9.surf_offset == 0);
+ s_offs += surf->u.gfx9.stencil_offset;
+
+ ds->db_z_info = S_028038_FORMAT(format) |
+ S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
+ S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
+ S_028038_MAXMIP(iview->image->info.levels - 1) | S_028038_ZRANGE_PRECISION(1);
+ ds->db_stencil_info =
+ S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
+
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
+ ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
+ }
+
+ ds->db_depth_view |= S_028008_MIPID(level);
+ ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
+ S_02801C_Y_MAX(iview->image->info.height - 1);
+
+ if (radv_htile_enabled(iview->image, level)) {
+ ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
+
+ if (radv_image_is_tc_compat_htile(iview->image)) {
+ unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
+
+ ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
+ ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
+ } else {
+ ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
+ ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
+ }
+ }
+
+ if (radv_image_tile_stencil_disabled(device, iview->image)) {
+ ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
+ }
+
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->htile_offset;
+ ds->db_htile_data_base = va >> 8;
+ ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
+
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
+ }
+ }
+ } else {
+ const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
+
+ if (stencil_only)
+ level_info = &surf->u.legacy.stencil_level[level];
+
+ z_offs += surf->u.legacy.level[level].offset;
+ s_offs += surf->u.legacy.stencil_level[level].offset;
+
+ ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
+ ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
+ ds->db_stencil_info = S_028044_FORMAT(stencil_format);
+
+ if (iview->image->info.samples > 1)
+ ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
+
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
+ struct radeon_info *info = &device->physical_device->rad_info;
+ unsigned tiling_index = surf->u.legacy.tiling_index[level];
+ unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
+ unsigned macro_index = surf->u.legacy.macro_tile_index;
+ unsigned tile_mode = info->si_tile_mode_array[tiling_index];
+ unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
+ unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
+
+ if (stencil_only)
+ tile_mode = stencil_tile_mode;
+
+ ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
+ S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
+ S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
+ S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
+ S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
+ S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
+ ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
+ ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
+ } else {
+ unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
+ ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
+ tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
+ ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
+ if (stencil_only)
+ ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
+ }
+
+ ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
+ S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
+ ds->db_depth_slice =
+ S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
+
+ if (radv_htile_enabled(iview->image, level)) {
+ ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+
+ if (radv_image_tile_stencil_disabled(device, iview->image)) {
+ ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
+ }
+
+ va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->htile_offset;
+ ds->db_htile_data_base = va >> 8;
+ ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
+
+ if (radv_image_is_tc_compat_htile(iview->image)) {
+ unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
+
+ ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+ ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
+ }
+ }
+ }
+
+ ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
+ ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
+}
+
+VkResult
+radv_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkFramebuffer *pFramebuffer)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_framebuffer *framebuffer;
+ const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
+ vk_find_struct_const(pCreateInfo->pNext, FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
+
+ size_t size = sizeof(*framebuffer);
+ if (!imageless_create_info)
+ size += sizeof(struct radv_image_view *) * pCreateInfo->attachmentCount;
+ framebuffer =
+ vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (framebuffer == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER);
+
+ framebuffer->attachment_count = pCreateInfo->attachmentCount;
+ framebuffer->width = pCreateInfo->width;
+ framebuffer->height = pCreateInfo->height;
+ framebuffer->layers = pCreateInfo->layers;
+ framebuffer->imageless = !!imageless_create_info;
+
+ if (!imageless_create_info) {
+ for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+ VkImageView _iview = pCreateInfo->pAttachments[i];
+ struct radv_image_view *iview = radv_image_view_from_handle(_iview);
+ framebuffer->attachments[i] = iview;
+ }
+ }
+
+ *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
+ return VK_SUCCESS;
+}
+
+void
+radv_DestroyFramebuffer(VkDevice _device, VkFramebuffer _fb,
+ const VkAllocationCallbacks *pAllocator)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
+
+ if (!fb)
+ return;
+ vk_object_base_finish(&fb->base);
+ vk_free2(&device->vk.alloc, pAllocator, fb);
+}
+
+static unsigned
+radv_tex_wrap(VkSamplerAddressMode address_mode)
+{
+ switch (address_mode) {
+ case VK_SAMPLER_ADDRESS_MODE_REPEAT:
+ return V_008F30_SQ_TEX_WRAP;
+ case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
+ return V_008F30_SQ_TEX_MIRROR;
+ case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
+ return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
+ case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
+ return V_008F30_SQ_TEX_CLAMP_BORDER;
+ case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
+ return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+ default:
+ unreachable("illegal tex wrap mode");
+ break;
+ }
}
static unsigned
radv_tex_compare(VkCompareOp op)
{
- switch (op) {
- case VK_COMPARE_OP_NEVER:
- return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
- case VK_COMPARE_OP_LESS:
- return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
- case VK_COMPARE_OP_EQUAL:
- return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
- case VK_COMPARE_OP_LESS_OR_EQUAL:
- return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
- case VK_COMPARE_OP_GREATER:
- return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
- case VK_COMPARE_OP_NOT_EQUAL:
- return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
- case VK_COMPARE_OP_GREATER_OR_EQUAL:
- return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
- case VK_COMPARE_OP_ALWAYS:
- return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
- default:
- unreachable("illegal compare mode");
- break;
- }
+ switch (op) {
+ case VK_COMPARE_OP_NEVER:
+ return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
+ case VK_COMPARE_OP_LESS:
+ return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
+ case VK_COMPARE_OP_EQUAL:
+ return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
+ case VK_COMPARE_OP_LESS_OR_EQUAL:
+ return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
+ case VK_COMPARE_OP_GREATER:
+ return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
+ case VK_COMPARE_OP_NOT_EQUAL:
+ return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
+ case VK_COMPARE_OP_GREATER_OR_EQUAL:
+ return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
+ case VK_COMPARE_OP_ALWAYS:
+ return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
+ default:
+ unreachable("illegal compare mode");
+ break;
+ }
}
static unsigned
radv_tex_filter(VkFilter filter, unsigned max_ansio)
{
- switch (filter) {
- case VK_FILTER_NEAREST:
- return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
- V_008F38_SQ_TEX_XY_FILTER_POINT);
- case VK_FILTER_LINEAR:
- return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
- V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
- case VK_FILTER_CUBIC_IMG:
- default:
- fprintf(stderr, "illegal texture filter");
- return 0;
- }
+ switch (filter) {
+ case VK_FILTER_NEAREST:
+ return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
+ : V_008F38_SQ_TEX_XY_FILTER_POINT);
+ case VK_FILTER_LINEAR:
+ return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
+ : V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
+ case VK_FILTER_CUBIC_IMG:
+ default:
+ fprintf(stderr, "illegal texture filter");
+ return 0;
+ }
}
static unsigned
radv_tex_mipfilter(VkSamplerMipmapMode mode)
{
- switch (mode) {
- case VK_SAMPLER_MIPMAP_MODE_NEAREST:
- return V_008F38_SQ_TEX_Z_FILTER_POINT;
- case VK_SAMPLER_MIPMAP_MODE_LINEAR:
- return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
- default:
- return V_008F38_SQ_TEX_Z_FILTER_NONE;
- }
+ switch (mode) {
+ case VK_SAMPLER_MIPMAP_MODE_NEAREST:
+ return V_008F38_SQ_TEX_Z_FILTER_POINT;
+ case VK_SAMPLER_MIPMAP_MODE_LINEAR:
+ return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
+ default:
+ return V_008F38_SQ_TEX_Z_FILTER_NONE;
+ }
}
static unsigned
radv_tex_bordercolor(VkBorderColor bcolor)
{
- switch (bcolor) {
- case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
- case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
- return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
- case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
- case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
- return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
- case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
- case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
- return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
- case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
- case VK_BORDER_COLOR_INT_CUSTOM_EXT:
- return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
- default:
- break;
- }
- return 0;
+ switch (bcolor) {
+ case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
+ case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
+ return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
+ case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
+ return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
+ case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
+ case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
+ return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
+ case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
+ case VK_BORDER_COLOR_INT_CUSTOM_EXT:
+ return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
+ default:
+ break;
+ }
+ return 0;
}
static unsigned
radv_tex_aniso_filter(unsigned filter)
{
- if (filter < 2)
- return 0;
- if (filter < 4)
- return 1;
- if (filter < 8)
- return 2;
- if (filter < 16)
- return 3;
- return 4;
+ if (filter < 2)
+ return 0;
+ if (filter < 4)
+ return 1;
+ if (filter < 8)
+ return 2;
+ if (filter < 16)
+ return 3;
+ return 4;
}
static unsigned
radv_tex_filter_mode(VkSamplerReductionMode mode)
{
- switch (mode) {
- case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
- return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
- case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
- return V_008F30_SQ_IMG_FILTER_MODE_MIN;
- case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
- return V_008F30_SQ_IMG_FILTER_MODE_MAX;
- default:
- break;
- }
- return 0;
+ switch (mode) {
+ case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
+ return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
+ case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
+ return V_008F30_SQ_IMG_FILTER_MODE_MIN;
+ case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
+ return V_008F30_SQ_IMG_FILTER_MODE_MAX;
+ default:
+ break;
+ }
+ return 0;
}
static uint32_t
-radv_get_max_anisotropy(struct radv_device *device,
- const VkSamplerCreateInfo *pCreateInfo)
+radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo)
{
- if (device->force_aniso >= 0)
- return device->force_aniso;
+ if (device->force_aniso >= 0)
+ return device->force_aniso;
- if (pCreateInfo->anisotropyEnable &&
- pCreateInfo->maxAnisotropy > 1.0f)
- return (uint32_t)pCreateInfo->maxAnisotropy;
+ if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f)
+ return (uint32_t)pCreateInfo->maxAnisotropy;
- return 0;
+ return 0;
}
-static inline int S_FIXED(float value, unsigned frac_bits)
+static inline int
+S_FIXED(float value, unsigned frac_bits)
{
- return value * (1 << frac_bits);
+ return value * (1 << frac_bits);
}
-static uint32_t radv_register_border_color(struct radv_device *device,
- VkClearColorValue value)
+static uint32_t
+radv_register_border_color(struct radv_device *device, VkClearColorValue value)
{
- uint32_t slot;
+ uint32_t slot;
- mtx_lock(&device->border_color_data.mutex);
+ mtx_lock(&device->border_color_data.mutex);
- for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
- if (!device->border_color_data.used[slot]) {
- /* Copy to the GPU wrt endian-ness. */
- util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot],
- &value,
- sizeof(VkClearColorValue));
+ for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
+ if (!device->border_color_data.used[slot]) {
+ /* Copy to the GPU wrt endian-ness. */
+ util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,
+ sizeof(VkClearColorValue));
- device->border_color_data.used[slot] = true;
- break;
- }
- }
+ device->border_color_data.used[slot] = true;
+ break;
+ }
+ }
- mtx_unlock(&device->border_color_data.mutex);
+ mtx_unlock(&device->border_color_data.mutex);
- return slot;
+ return slot;
}
-static void radv_unregister_border_color(struct radv_device *device,
- uint32_t slot)
+static void
+radv_unregister_border_color(struct radv_device *device, uint32_t slot)
{
- mtx_lock(&device->border_color_data.mutex);
+ mtx_lock(&device->border_color_data.mutex);
- device->border_color_data.used[slot] = false;
+ device->border_color_data.used[slot] = false;
- mtx_unlock(&device->border_color_data.mutex);
+ mtx_unlock(&device->border_color_data.mutex);
}
static void
-radv_init_sampler(struct radv_device *device,
- struct radv_sampler *sampler,
- const VkSamplerCreateInfo *pCreateInfo)
-{
- uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
- uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
- bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
- device->physical_device->rad_info.chip_class == GFX9;
- unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
- unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
- bool trunc_coord = pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
- bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
- pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
- pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
- VkBorderColor border_color = uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
- uint32_t border_color_ptr;
-
- const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
- vk_find_struct_const(pCreateInfo->pNext,
- SAMPLER_REDUCTION_MODE_CREATE_INFO);
- if (sampler_reduction)
- filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
-
- if (pCreateInfo->compareEnable)
- depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
-
- sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
-
- if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
- const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
- vk_find_struct_const(pCreateInfo->pNext,
- SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
-
- assert(custom_border_color);
-
- sampler->border_color_slot =
- radv_register_border_color(device, custom_border_color->customBorderColor);
-
- /* Did we fail to find a slot? */
- if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
- fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
- border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
- }
- }
-
- /* If we don't have a custom color, set the ptr to 0 */
- border_color_ptr = sampler->border_color_slot != RADV_BORDER_COLOR_COUNT
- ? sampler->border_color_slot
- : 0;
-
- sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
- S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
- S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
- S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
- S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
- S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
- S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
- S_008F30_ANISO_BIAS(max_aniso_ratio) |
- S_008F30_DISABLE_CUBE_WRAP(0) |
- S_008F30_COMPAT_MODE(compat_mode) |
- S_008F30_FILTER_MODE(filter_mode) |
- S_008F30_TRUNC_COORD(trunc_coord));
- sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
- S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
- S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
- sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
- S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
- S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
- S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
- S_008F38_MIP_POINT_PRECLAMP(0));
- sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
- S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
- } else {
- sampler->state[2] |=
- S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
- S_008F38_FILTER_PREC_FIX(1) |
- S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
- }
-}
-
-VkResult radv_CreateSampler(
- VkDevice _device,
- const VkSamplerCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSampler* pSampler)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_sampler *sampler;
-
- const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
- vk_find_struct_const(pCreateInfo->pNext,
- SAMPLER_YCBCR_CONVERSION_INFO);
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
-
- sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!sampler)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &sampler->base,
- VK_OBJECT_TYPE_SAMPLER);
-
- radv_init_sampler(device, sampler, pCreateInfo);
-
- sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
- *pSampler = radv_sampler_to_handle(sampler);
-
- return VK_SUCCESS;
-}
-
-void radv_DestroySampler(
- VkDevice _device,
- VkSampler _sampler,
- const VkAllocationCallbacks* pAllocator)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
+radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
+ const VkSamplerCreateInfo *pCreateInfo)
+{
+ uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
+ uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
+ bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
+ device->physical_device->rad_info.chip_class == GFX9;
+ unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
+ unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
+ bool trunc_coord =
+ pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
+ bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+ pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
+ pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
+ VkBorderColor border_color =
+ uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
+ uint32_t border_color_ptr;
+
+ const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
+ vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
+ if (sampler_reduction)
+ filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
+
+ if (pCreateInfo->compareEnable)
+ depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
+
+ sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
+
+ if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
+ border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
+ const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
+ vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
+
+ assert(custom_border_color);
+
+ sampler->border_color_slot =
+ radv_register_border_color(device, custom_border_color->customBorderColor);
+
+ /* Did we fail to find a slot? */
+ if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
+ fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
+ border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
+ }
+ }
+
+ /* If we don't have a custom color, set the ptr to 0 */
+ border_color_ptr =
+ sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
+
+ sampler->state[0] =
+ (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
+ S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
+ S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
+ S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
+ S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
+ S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
+ S_008F30_DISABLE_CUBE_WRAP(0) | S_008F30_COMPAT_MODE(compat_mode) |
+ S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
+ sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
+ S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
+ S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
+ sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
+ S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
+ S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
+ S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
+ S_008F38_MIP_POINT_PRECLAMP(0));
+ sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
+ S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
+ } else {
+ sampler->state[2] |=
+ S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
+ S_008F38_FILTER_PREC_FIX(1) |
+ S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
+ }
+}
+
+VkResult
+radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_sampler *sampler;
+
+ const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
+ vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
+
+ sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!sampler)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
+
+ radv_init_sampler(device, sampler, pCreateInfo);
+
+ sampler->ycbcr_sampler =
+ ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion)
+ : NULL;
+ *pSampler = radv_sampler_to_handle(sampler);
+
+ return VK_SUCCESS;
+}
+
+void
+radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
+
+ if (!sampler)
+ return;
- if (!sampler)
- return;
-
- if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
- radv_unregister_border_color(device, sampler->border_color_slot);
-
- vk_object_base_finish(&sampler->base);
- vk_free2(&device->vk.alloc, pAllocator, sampler);
+ if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
+ radv_unregister_border_color(device, sampler->border_color_slot);
+
+ vk_object_base_finish(&sampler->base);
+ vk_free2(&device->vk.alloc, pAllocator, sampler);
}
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
{
- /* For the full details on loader interface versioning, see
- * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
- * What follows is a condensed summary, to help you navigate the large and
- * confusing official doc.
- *
- * - Loader interface v0 is incompatible with later versions. We don't
- * support it.
- *
- * - In loader interface v1:
- * - The first ICD entrypoint called by the loader is
- * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
- * entrypoint.
- * - The ICD must statically expose no other Vulkan symbol unless it is
- * linked with -Bsymbolic.
- * - Each dispatchable Vulkan handle created by the ICD must be
- * a pointer to a struct whose first member is VK_LOADER_DATA. The
- * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
- * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
- * vkDestroySurfaceKHR(). The ICD must be capable of working with
- * such loader-managed surfaces.
- *
- * - Loader interface v2 differs from v1 in:
- * - The first ICD entrypoint called by the loader is
- * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
- * statically expose this entrypoint.
- *
- * - Loader interface v3 differs from v2 in:
- * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
- * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
- * because the loader no longer does so.
- */
- *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
- return VK_SUCCESS;
-}
-
-VkResult radv_GetMemoryFdKHR(VkDevice _device,
- const VkMemoryGetFdInfoKHR *pGetFdInfo,
- int *pFD)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
-
- assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
-
- /* At the moment, we support only the below handle types. */
- assert(pGetFdInfo->handleType ==
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
- pGetFdInfo->handleType ==
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
-
- bool ret = radv_get_memory_fd(device, memory, pFD);
- if (ret == false)
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- return VK_SUCCESS;
-}
-
-static uint32_t radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
- enum radeon_bo_domain domains,
- enum radeon_bo_flag flags,
- enum radeon_bo_flag ignore_flags)
-{
- /* Don't count GTT/CPU as relevant:
- *
- * - We're not fully consistent between the two.
- * - Sometimes VRAM gets VRAM|GTT.
- */
- const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM |
- RADEON_DOMAIN_GDS |
- RADEON_DOMAIN_OA;
- uint32_t bits = 0;
- for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
- if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
- continue;
-
- if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
- continue;
-
- bits |= 1u << i;
- }
-
- return bits;
-}
-
-static uint32_t radv_compute_valid_memory_types(struct radv_physical_device *dev,
- enum radeon_bo_domain domains,
- enum radeon_bo_flag flags)
-{
- enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
- uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
-
- if (!bits) {
- ignore_flags |= RADEON_FLAG_GTT_WC;
- bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
- }
-
- if (!bits) {
- ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
- bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
- }
-
- return bits;
-}
-VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
- VkExternalMemoryHandleTypeFlagBits handleType,
- int fd,
- VkMemoryFdPropertiesKHR *pMemoryFdProperties)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
-
- switch (handleType) {
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
- enum radeon_bo_domain domains;
- enum radeon_bo_flag flags;
- if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
- return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
-
- pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
- return VK_SUCCESS;
- }
- default:
- /* The valid usage section for this function says:
- *
- * "handleType must not be one of the handle types defined as
- * opaque."
- *
- * So opaque handle types fall into the default "unsupported" case.
- */
- return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
- }
-}
-
-static VkResult radv_import_opaque_fd(struct radv_device *device,
- int fd,
- uint32_t *syncobj)
-{
- uint32_t syncobj_handle = 0;
- int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
- if (ret != 0)
- return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
-
- if (*syncobj)
- device->ws->destroy_syncobj(device->ws, *syncobj);
-
- *syncobj = syncobj_handle;
- close(fd);
-
- return VK_SUCCESS;
-}
-
-static VkResult radv_import_sync_fd(struct radv_device *device,
- int fd,
- uint32_t *syncobj)
-{
- /* If we create a syncobj we do it locally so that if we have an error, we don't
- * leave a syncobj in an undetermined state in the fence. */
- uint32_t syncobj_handle = *syncobj;
- if (!syncobj_handle) {
- bool create_signaled = fd == -1 ? true : false;
-
- int ret = device->ws->create_syncobj(device->ws, create_signaled,
- &syncobj_handle);
- if (ret) {
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
- } else {
- if (fd == -1)
- device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
- }
-
- if (fd != -1) {
- int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
- if (ret)
- return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
- close(fd);
- }
-
- *syncobj = syncobj_handle;
-
- return VK_SUCCESS;
-}
-
-VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
- const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
- VkResult result;
- struct radv_semaphore_part *dst = NULL;
- bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
-
- if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
- assert(!timeline);
- dst = &sem->temporary;
- } else {
- dst = &sem->permanent;
- }
-
- uint32_t syncobj = (dst->kind == RADV_SEMAPHORE_SYNCOBJ ||
- dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) ? dst->syncobj : 0;
-
- switch(pImportSemaphoreFdInfo->handleType) {
- case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
- result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
- break;
- case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
- assert(!timeline);
- result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
- break;
- default:
- unreachable("Unhandled semaphore handle type");
- }
-
- if (result == VK_SUCCESS) {
- dst->syncobj = syncobj;
- dst->kind = RADV_SEMAPHORE_SYNCOBJ;
- if (timeline) {
- dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
- dst->timeline_syncobj.max_point = 0;
- }
- }
-
- return result;
-}
-
-VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
- const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
- int *pFd)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
- int ret;
- uint32_t syncobj_handle;
-
- if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
- assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
- sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
- syncobj_handle = sem->temporary.syncobj;
- } else {
- assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
- sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
- syncobj_handle = sem->permanent.syncobj;
- }
-
- switch(pGetFdInfo->handleType) {
- case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
- ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
- if (ret)
- return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
- break;
- case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
- ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
- if (ret)
- return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
-
- if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
- radv_destroy_semaphore_part(device, &sem->temporary);
- } else {
- device->ws->reset_syncobj(device->ws, syncobj_handle);
- }
- break;
- default:
- unreachable("Unhandled semaphore handle type");
- }
-
- return VK_SUCCESS;
-}
-
-void radv_GetPhysicalDeviceExternalSemaphoreProperties(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
- VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
-
- if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
- pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
- pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
- pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
- pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
- VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
- } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
- pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
- pExternalSemaphoreProperties->compatibleHandleTypes = 0;
- pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
- } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
- pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
- pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
- pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
- pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
- VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
- } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
- pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
- pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
- pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
- VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
- } else {
- pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
- pExternalSemaphoreProperties->compatibleHandleTypes = 0;
- pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
- }
-}
-
-VkResult radv_ImportFenceFdKHR(VkDevice _device,
- const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
- struct radv_fence_part *dst = NULL;
- VkResult result;
-
- if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
- dst = &fence->temporary;
- } else {
- dst = &fence->permanent;
- }
-
- uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
-
- switch(pImportFenceFdInfo->handleType) {
- case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
- result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
- break;
- case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
- result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
- break;
- default:
- unreachable("Unhandled fence handle type");
- }
-
- if (result == VK_SUCCESS) {
- dst->syncobj = syncobj;
- dst->kind = RADV_FENCE_SYNCOBJ;
- }
-
- return result;
-}
-
-VkResult radv_GetFenceFdKHR(VkDevice _device,
- const VkFenceGetFdInfoKHR *pGetFdInfo,
- int *pFd)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
- int ret;
-
- struct radv_fence_part *part =
- fence->temporary.kind != RADV_FENCE_NONE ?
- &fence->temporary : &fence->permanent;
-
- switch(pGetFdInfo->handleType) {
- case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
- ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
- if (ret)
- return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
- break;
- case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
- ret = device->ws->export_syncobj_to_sync_file(device->ws,
- part->syncobj, pFd);
- if (ret)
- return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
-
- if (part == &fence->temporary) {
- radv_destroy_fence_part(device, part);
- } else {
- device->ws->reset_syncobj(device->ws, part->syncobj);
- }
- break;
- default:
- unreachable("Unhandled fence handle type");
- }
-
- return VK_SUCCESS;
-}
-
-void radv_GetPhysicalDeviceExternalFenceProperties(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
- VkExternalFenceProperties *pExternalFenceProperties)
-{
- if (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
- pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
- pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
- pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
- pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
- VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
- } else {
- pExternalFenceProperties->exportFromImportedHandleTypes = 0;
- pExternalFenceProperties->compatibleHandleTypes = 0;
- pExternalFenceProperties->externalFenceFeatures = 0;
- }
+ /* For the full details on loader interface versioning, see
+ * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
+ * What follows is a condensed summary, to help you navigate the large and
+ * confusing official doc.
+ *
+ * - Loader interface v0 is incompatible with later versions. We don't
+ * support it.
+ *
+ * - In loader interface v1:
+ * - The first ICD entrypoint called by the loader is
+ * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
+ * entrypoint.
+ * - The ICD must statically expose no other Vulkan symbol unless it is
+ * linked with -Bsymbolic.
+ * - Each dispatchable Vulkan handle created by the ICD must be
+ * a pointer to a struct whose first member is VK_LOADER_DATA. The
+ * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
+ * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
+ * vkDestroySurfaceKHR(). The ICD must be capable of working with
+ * such loader-managed surfaces.
+ *
+ * - Loader interface v2 differs from v1 in:
+ * - The first ICD entrypoint called by the loader is
+ * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
+ * statically expose this entrypoint.
+ *
+ * - Loader interface v3 differs from v2 in:
+ * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
+ * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
+ * because the loader no longer does so.
+ */
+ *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
+ return VK_SUCCESS;
+}
+
+VkResult
+radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
+
+ assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
+
+ /* At the moment, we support only the below handle types. */
+ assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+ pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+
+ bool ret = radv_get_memory_fd(device, memory, pFD);
+ if (ret == false)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ return VK_SUCCESS;
+}
+
+static uint32_t
+radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
+ enum radeon_bo_domain domains, enum radeon_bo_flag flags,
+ enum radeon_bo_flag ignore_flags)
+{
+ /* Don't count GTT/CPU as relevant:
+ *
+ * - We're not fully consistent between the two.
+ * - Sometimes VRAM gets VRAM|GTT.
+ */
+ const enum radeon_bo_domain relevant_domains =
+ RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
+ uint32_t bits = 0;
+ for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
+ if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
+ continue;
+
+ if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
+ continue;
+
+ bits |= 1u << i;
+ }
+
+ return bits;
+}
+
+static uint32_t
+radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
+ enum radeon_bo_flag flags)
+{
+ enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
+ uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
+
+ if (!bits) {
+ ignore_flags |= RADEON_FLAG_GTT_WC;
+ bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
+ }
+
+ if (!bits) {
+ ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
+ bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
+ }
+
+ return bits;
+}
+VkResult
+radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
+ int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+
+ switch (handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
+ enum radeon_bo_domain domains;
+ enum radeon_bo_flag flags;
+ if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
+ return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+
+ pMemoryFdProperties->memoryTypeBits =
+ radv_compute_valid_memory_types(device->physical_device, domains, flags);
+ return VK_SUCCESS;
+ }
+ default:
+ /* The valid usage section for this function says:
+ *
+ * "handleType must not be one of the handle types defined as
+ * opaque."
+ *
+ * So opaque handle types fall into the default "unsupported" case.
+ */
+ return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ }
+}
+
+static VkResult
+radv_import_opaque_fd(struct radv_device *device, int fd, uint32_t *syncobj)
+{
+ uint32_t syncobj_handle = 0;
+ int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
+ if (ret != 0)
+ return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+
+ if (*syncobj)
+ device->ws->destroy_syncobj(device->ws, *syncobj);
+
+ *syncobj = syncobj_handle;
+ close(fd);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+radv_import_sync_fd(struct radv_device *device, int fd, uint32_t *syncobj)
+{
+ /* If we create a syncobj we do it locally so that if we have an error, we don't
+ * leave a syncobj in an undetermined state in the fence. */
+ uint32_t syncobj_handle = *syncobj;
+ if (!syncobj_handle) {
+ bool create_signaled = fd == -1 ? true : false;
+
+ int ret = device->ws->create_syncobj(device->ws, create_signaled, &syncobj_handle);
+ if (ret) {
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ } else {
+ if (fd == -1)
+ device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
+ }
+
+ if (fd != -1) {
+ int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
+ if (ret)
+ return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ close(fd);
+ }
+
+ *syncobj = syncobj_handle;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+radv_ImportSemaphoreFdKHR(VkDevice _device,
+ const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
+ VkResult result;
+ struct radv_semaphore_part *dst = NULL;
+ bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
+
+ if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
+ assert(!timeline);
+ dst = &sem->temporary;
+ } else {
+ dst = &sem->permanent;
+ }
+
+ uint32_t syncobj =
+ (dst->kind == RADV_SEMAPHORE_SYNCOBJ || dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
+ ? dst->syncobj
+ : 0;
+
+ switch (pImportSemaphoreFdInfo->handleType) {
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
+ break;
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
+ assert(!timeline);
+ result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
+ break;
+ default:
+ unreachable("Unhandled semaphore handle type");
+ }
+
+ if (result == VK_SUCCESS) {
+ dst->syncobj = syncobj;
+ dst->kind = RADV_SEMAPHORE_SYNCOBJ;
+ if (timeline) {
+ dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
+ dst->timeline_syncobj.max_point = 0;
+ }
+ }
+
+ return result;
+}
+
+VkResult
+radv_GetSemaphoreFdKHR(VkDevice _device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
+ int ret;
+ uint32_t syncobj_handle;
+
+ if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
+ assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
+ sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
+ syncobj_handle = sem->temporary.syncobj;
+ } else {
+ assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
+ sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
+ syncobj_handle = sem->permanent.syncobj;
+ }
+
+ switch (pGetFdInfo->handleType) {
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
+ if (ret)
+ return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
+ break;
+ case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
+ ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
+ if (ret)
+ return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
+
+ if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
+ radv_destroy_semaphore_part(device, &sem->temporary);
+ } else {
+ device->ws->reset_syncobj(device->ws, syncobj_handle);
+ }
+ break;
+ default:
+ unreachable("Unhandled semaphore handle type");
+ }
+
+ return VK_SUCCESS;
}
void
-radv_GetDeviceGroupPeerMemoryFeatures(
- VkDevice device,
- uint32_t heapIndex,
- uint32_t localDeviceIndex,
- uint32_t remoteDeviceIndex,
- VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
+radv_GetPhysicalDeviceExternalSemaphoreProperties(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
+ VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+ VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
+
+ if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
+ pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+ pExternalSemaphoreProperties->compatibleHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+ pExternalSemaphoreProperties->externalSemaphoreFeatures =
+ VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+ VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+ } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+ pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+ pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+ } else if (pExternalSemaphoreInfo->handleType ==
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
+ pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalSemaphoreProperties->compatibleHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalSemaphoreProperties->externalSemaphoreFeatures =
+ VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+ VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+ } else if (pExternalSemaphoreInfo->handleType ==
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+ pExternalSemaphoreProperties->compatibleHandleTypes =
+ VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
+ pExternalSemaphoreProperties->externalSemaphoreFeatures =
+ VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
+ VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+ } else {
+ pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+ pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+ pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+ }
+}
+
+VkResult
+radv_ImportFenceFdKHR(VkDevice _device, const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
{
- assert(localDeviceIndex == remoteDeviceIndex);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
+ struct radv_fence_part *dst = NULL;
+ VkResult result;
+
+ if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
+ dst = &fence->temporary;
+ } else {
+ dst = &fence->permanent;
+ }
+
+ uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
+
+ switch (pImportFenceFdInfo->handleType) {
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
+ break;
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
+ result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
+ break;
+ default:
+ unreachable("Unhandled fence handle type");
+ }
- *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
- VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
- VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
- VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
+ if (result == VK_SUCCESS) {
+ dst->syncobj = syncobj;
+ dst->kind = RADV_FENCE_SYNCOBJ;
+ }
+
+ return result;
+}
+
+VkResult
+radv_GetFenceFdKHR(VkDevice _device, const VkFenceGetFdInfoKHR *pGetFdInfo, int *pFd)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
+ int ret;
+
+ struct radv_fence_part *part =
+ fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
+
+ switch (pGetFdInfo->handleType) {
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
+ ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
+ if (ret)
+ return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
+ break;
+ case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
+ ret = device->ws->export_syncobj_to_sync_file(device->ws, part->syncobj, pFd);
+ if (ret)
+ return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
+
+ if (part == &fence->temporary) {
+ radv_destroy_fence_part(device, part);
+ } else {
+ device->ws->reset_syncobj(device->ws, part->syncobj);
+ }
+ break;
+ default:
+ unreachable("Unhandled fence handle type");
+ }
+
+ return VK_SUCCESS;
+}
+
+void
+radv_GetPhysicalDeviceExternalFenceProperties(
+ VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
+ VkExternalFenceProperties *pExternalFenceProperties)
+{
+ if (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
+ pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
+ pExternalFenceProperties->exportFromImportedHandleTypes =
+ VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalFenceProperties->compatibleHandleTypes =
+ VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
+ pExternalFenceProperties->externalFenceFeatures =
+ VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
+ } else {
+ pExternalFenceProperties->exportFromImportedHandleTypes = 0;
+ pExternalFenceProperties->compatibleHandleTypes = 0;
+ pExternalFenceProperties->externalFenceFeatures = 0;
+ }
+}
+
+void
+radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex,
+ uint32_t localDeviceIndex, uint32_t remoteDeviceIndex,
+ VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
+{
+ assert(localDeviceIndex == remoteDeviceIndex);
+
+ *pPeerMemoryFeatures =
+ VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
+ VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
}
static const VkTimeDomainEXT radv_time_domains[] = {
- VK_TIME_DOMAIN_DEVICE_EXT,
- VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+ VK_TIME_DOMAIN_DEVICE_EXT,
+ VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
#ifdef CLOCK_MONOTONIC_RAW
- VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+ VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
#endif
};
-VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
- VkPhysicalDevice physicalDevice,
- uint32_t *pTimeDomainCount,
- VkTimeDomainEXT *pTimeDomains)
+VkResult
+radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,
+ uint32_t *pTimeDomainCount,
+ VkTimeDomainEXT *pTimeDomains)
{
- int d;
- VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains,
- pTimeDomainCount);
+ int d;
+ VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
- for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
- vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
- *i = radv_time_domains[d];
- }
- }
+ for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+ vk_outarray_append_typed(VkTimeDomainEXT, &out, i)
+ {
+ *i = radv_time_domains[d];
+ }
+ }
- return vk_outarray_status(&out);
+ return vk_outarray_status(&out);
}
#ifndef _WIN32
static uint64_t
radv_clock_gettime(clockid_t clock_id)
{
- struct timespec current;
- int ret;
+ struct timespec current;
+ int ret;
- ret = clock_gettime(clock_id, &current);
+ ret = clock_gettime(clock_id, &current);
#ifdef CLOCK_MONOTONIC_RAW
- if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
- ret = clock_gettime(CLOCK_MONOTONIC, &current);
+ if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+ ret = clock_gettime(CLOCK_MONOTONIC, &current);
#endif
- if (ret < 0)
- return 0;
+ if (ret < 0)
+ return 0;
- return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+ return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec;
}
-VkResult radv_GetCalibratedTimestampsEXT(
- VkDevice _device,
- uint32_t timestampCount,
- const VkCalibratedTimestampInfoEXT *pTimestampInfos,
- uint64_t *pTimestamps,
- uint64_t *pMaxDeviation)
+VkResult
+radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,
+ const VkCalibratedTimestampInfoEXT *pTimestampInfos,
+ uint64_t *pTimestamps, uint64_t *pMaxDeviation)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
- int d;
- uint64_t begin, end;
- uint64_t max_clock_period = 0;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
+ int d;
+ uint64_t begin, end;
+ uint64_t max_clock_period = 0;
#ifdef CLOCK_MONOTONIC_RAW
- begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+ begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
#else
- begin = radv_clock_gettime(CLOCK_MONOTONIC);
+ begin = radv_clock_gettime(CLOCK_MONOTONIC);
#endif
- for (d = 0; d < timestampCount; d++) {
- switch (pTimestampInfos[d].timeDomain) {
- case VK_TIME_DOMAIN_DEVICE_EXT:
- pTimestamps[d] = device->ws->query_value(device->ws,
- RADEON_TIMESTAMP);
- uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
- max_clock_period = MAX2(max_clock_period, device_period);
- break;
- case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
- pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
- max_clock_period = MAX2(max_clock_period, 1);
- break;
+ for (d = 0; d < timestampCount; d++) {
+ switch (pTimestampInfos[d].timeDomain) {
+ case VK_TIME_DOMAIN_DEVICE_EXT:
+ pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
+ uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
+ max_clock_period = MAX2(max_clock_period, device_period);
+ break;
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+ pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+ max_clock_period = MAX2(max_clock_period, 1);
+ break;
#ifdef CLOCK_MONOTONIC_RAW
- case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
- pTimestamps[d] = begin;
- break;
+ case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+ pTimestamps[d] = begin;
+ break;
#endif
- default:
- pTimestamps[d] = 0;
- break;
- }
- }
+ default:
+ pTimestamps[d] = 0;
+ break;
+ }
+ }
#ifdef CLOCK_MONOTONIC_RAW
- end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+ end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
#else
- end = radv_clock_gettime(CLOCK_MONOTONIC);
+ end = radv_clock_gettime(CLOCK_MONOTONIC);
#endif
- /*
- * The maximum deviation is the sum of the interval over which we
- * perform the sampling and the maximum period of any sampled
- * clock. That's because the maximum skew between any two sampled
- * clock edges is when the sampled clock with the largest period is
- * sampled at the end of that period but right at the beginning of the
- * sampling interval and some other clock is sampled right at the
- * begining of its sampling period and right at the end of the
- * sampling interval. Let's assume the GPU has the longest clock
- * period and that the application is sampling GPU and monotonic:
- *
- * s e
- * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
- * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
- *
- * g
- * 0 1 2 3
- * GPU -----_____-----_____-----_____-----_____
- *
- * m
- * x y z 0 1 2 3 4 5 6 7 8 9 a b c
- * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
- *
- * Interval <----------------->
- * Deviation <-------------------------->
- *
- * s = read(raw) 2
- * g = read(GPU) 1
- * m = read(monotonic) 2
- * e = read(raw) b
- *
- * We round the sample interval up by one tick to cover sampling error
- * in the interval clock
- */
-
- uint64_t sample_interval = end - begin + 1;
-
- *pMaxDeviation = sample_interval + max_clock_period;
-
- return VK_SUCCESS;
+ /*
+ * The maximum deviation is the sum of the interval over which we
+ * perform the sampling and the maximum period of any sampled
+ * clock. That's because the maximum skew between any two sampled
+ * clock edges is when the sampled clock with the largest period is
+ * sampled at the end of that period but right at the beginning of the
+ * sampling interval and some other clock is sampled right at the
+ * begining of its sampling period and right at the end of the
+ * sampling interval. Let's assume the GPU has the longest clock
+ * period and that the application is sampling GPU and monotonic:
+ *
+ * s e
+ * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+ *
+ * g
+ * 0 1 2 3
+ * GPU -----_____-----_____-----_____-----_____
+ *
+ * m
+ * x y z 0 1 2 3 4 5 6 7 8 9 a b c
+ * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+ *
+ * Interval <----------------->
+ * Deviation <-------------------------->
+ *
+ * s = read(raw) 2
+ * g = read(GPU) 1
+ * m = read(monotonic) 2
+ * e = read(raw) b
+ *
+ * We round the sample interval up by one tick to cover sampling error
+ * in the interval clock
+ */
+
+ uint64_t sample_interval = end - begin + 1;
+
+ *pMaxDeviation = sample_interval + max_clock_period;
+
+ return VK_SUCCESS;
}
#endif
-void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
- VkPhysicalDevice physicalDevice,
- VkSampleCountFlagBits samples,
- VkMultisamplePropertiesEXT* pMultisampleProperties)
-{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
- VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT |
- VK_SAMPLE_COUNT_4_BIT;
-
- if (physical_device->rad_info.chip_class < GFX10)
- supported_samples |= VK_SAMPLE_COUNT_8_BIT;
-
- if (samples & supported_samples) {
- pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
- } else {
- pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
- }
-}
-
-VkResult radv_GetPhysicalDeviceFragmentShadingRatesKHR(
- VkPhysicalDevice physicalDevice,
- uint32_t* pFragmentShadingRateCount,
- VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
-{
- VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates, pFragmentShadingRateCount);
-
-#define append_rate(w, h, s) { \
- VkPhysicalDeviceFragmentShadingRateKHR rate = { \
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,\
- .sampleCounts = s, \
- .fragmentSize = { .width = w, .height = h }, \
- }; \
- vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \
-}
-
- for (uint32_t x = 2; x >= 1; x--) {
- for (uint32_t y = 2; y >= 1; y--) {
- append_rate(x, y, VK_SAMPLE_COUNT_1_BIT |
- VK_SAMPLE_COUNT_2_BIT |
- VK_SAMPLE_COUNT_4_BIT |
- VK_SAMPLE_COUNT_8_BIT);
- }
- }
+void
+radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
+ VkSampleCountFlagBits samples,
+ VkMultisamplePropertiesEXT *pMultisampleProperties)
+{
+ RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+ VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
+
+ if (physical_device->rad_info.chip_class < GFX10)
+ supported_samples |= VK_SAMPLE_COUNT_8_BIT;
+
+ if (samples & supported_samples) {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
+ } else {
+ pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
+ }
+}
+
+VkResult
+radv_GetPhysicalDeviceFragmentShadingRatesKHR(
+ VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
+ VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
+{
+ VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
+ pFragmentShadingRateCount);
+
+#define append_rate(w, h, s) \
+ { \
+ VkPhysicalDeviceFragmentShadingRateKHR rate = { \
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
+ .sampleCounts = s, \
+ .fragmentSize = {.width = w, .height = h}, \
+ }; \
+ vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \
+ }
+
+ for (uint32_t x = 2; x >= 1; x--) {
+ for (uint32_t y = 2; y >= 1; y--) {
+ append_rate(x, y,
+ VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
+ VK_SAMPLE_COUNT_8_BIT);
+ }
+ }
#undef append_rate
- return vk_outarray_status(&out);
+ return vk_outarray_status(&out);
}
diff --git a/src/amd/vulkan/radv_formats.c b/src/amd/vulkan/radv_formats.c
index cbb6d5e9373..925444d3fd0 100644
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -22,1999 +22,1947 @@
* IN THE SOFTWARE.
*/
-#include "radv_private.h"
#include "radv_debug.h"
+#include "radv_private.h"
-#include "vk_format.h"
#include "sid.h"
+#include "vk_format.h"
#include "vk_util.h"
#include "drm-uapi/drm_fourcc.h"
-#include "util/half_float.h"
-#include "util/format_srgb.h"
#include "util/format_r11g11b10f.h"
#include "util/format_rgb9e5.h"
+#include "util/format_srgb.h"
+#include "util/half_float.h"
#include "vulkan/util/vk_format.h"
-uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
- int first_non_void)
+uint32_t
+radv_translate_buffer_dataformat(const struct util_format_description *desc, int first_non_void)
{
- unsigned type;
- int i;
-
- assert(util_format_get_num_planes(desc->format) == 1);
-
- if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
- return V_008F0C_BUF_DATA_FORMAT_10_11_11;
-
- if (first_non_void < 0)
- return V_008F0C_BUF_DATA_FORMAT_INVALID;
- type = desc->channel[first_non_void].type;
-
- if (type == UTIL_FORMAT_TYPE_FIXED)
- return V_008F0C_BUF_DATA_FORMAT_INVALID;
- if (desc->nr_channels == 4 &&
- desc->channel[0].size == 10 &&
- desc->channel[1].size == 10 &&
- desc->channel[2].size == 10 &&
- desc->channel[3].size == 2)
- return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
-
- /* See whether the components are of the same size. */
- for (i = 0; i < desc->nr_channels; i++) {
- if (desc->channel[first_non_void].size != desc->channel[i].size)
- return V_008F0C_BUF_DATA_FORMAT_INVALID;
- }
-
- switch (desc->channel[first_non_void].size) {
- case 8:
- switch (desc->nr_channels) {
- case 1:
- return V_008F0C_BUF_DATA_FORMAT_8;
- case 2:
- return V_008F0C_BUF_DATA_FORMAT_8_8;
- case 4:
- return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
- }
- break;
- case 16:
- switch (desc->nr_channels) {
- case 1:
- return V_008F0C_BUF_DATA_FORMAT_16;
- case 2:
- return V_008F0C_BUF_DATA_FORMAT_16_16;
- case 4:
- return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
- }
- break;
- case 32:
- /* From the Southern Islands ISA documentation about MTBUF:
- * 'Memory reads of data in memory that is 32 or 64 bits do not
- * undergo any format conversion.'
- */
- if (type != UTIL_FORMAT_TYPE_FLOAT &&
- !desc->channel[first_non_void].pure_integer)
- return V_008F0C_BUF_DATA_FORMAT_INVALID;
-
- switch (desc->nr_channels) {
- case 1:
- return V_008F0C_BUF_DATA_FORMAT_32;
- case 2:
- return V_008F0C_BUF_DATA_FORMAT_32_32;
- case 3:
- return V_008F0C_BUF_DATA_FORMAT_32_32_32;
- case 4:
- return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
- }
- break;
- case 64:
- if (desc->nr_channels == 1)
- return V_008F0C_BUF_DATA_FORMAT_32_32;
- }
-
- return V_008F0C_BUF_DATA_FORMAT_INVALID;
+ unsigned type;
+ int i;
+
+ assert(util_format_get_num_planes(desc->format) == 1);
+
+ if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
+ return V_008F0C_BUF_DATA_FORMAT_10_11_11;
+
+ if (first_non_void < 0)
+ return V_008F0C_BUF_DATA_FORMAT_INVALID;
+ type = desc->channel[first_non_void].type;
+
+ if (type == UTIL_FORMAT_TYPE_FIXED)
+ return V_008F0C_BUF_DATA_FORMAT_INVALID;
+ if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
+ desc->channel[2].size == 10 && desc->channel[3].size == 2)
+ return V_008F0C_BUF_DATA_FORMAT_2_10_10_10;
+
+ /* See whether the components are of the same size. */
+ for (i = 0; i < desc->nr_channels; i++) {
+ if (desc->channel[first_non_void].size != desc->channel[i].size)
+ return V_008F0C_BUF_DATA_FORMAT_INVALID;
+ }
+
+ switch (desc->channel[first_non_void].size) {
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ return V_008F0C_BUF_DATA_FORMAT_8;
+ case 2:
+ return V_008F0C_BUF_DATA_FORMAT_8_8;
+ case 4:
+ return V_008F0C_BUF_DATA_FORMAT_8_8_8_8;
+ }
+ break;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ return V_008F0C_BUF_DATA_FORMAT_16;
+ case 2:
+ return V_008F0C_BUF_DATA_FORMAT_16_16;
+ case 4:
+ return V_008F0C_BUF_DATA_FORMAT_16_16_16_16;
+ }
+ break;
+ case 32:
+ /* From the Southern Islands ISA documentation about MTBUF:
+ * 'Memory reads of data in memory that is 32 or 64 bits do not
+ * undergo any format conversion.'
+ */
+ if (type != UTIL_FORMAT_TYPE_FLOAT && !desc->channel[first_non_void].pure_integer)
+ return V_008F0C_BUF_DATA_FORMAT_INVALID;
+
+ switch (desc->nr_channels) {
+ case 1:
+ return V_008F0C_BUF_DATA_FORMAT_32;
+ case 2:
+ return V_008F0C_BUF_DATA_FORMAT_32_32;
+ case 3:
+ return V_008F0C_BUF_DATA_FORMAT_32_32_32;
+ case 4:
+ return V_008F0C_BUF_DATA_FORMAT_32_32_32_32;
+ }
+ break;
+ case 64:
+ if (desc->nr_channels == 1)
+ return V_008F0C_BUF_DATA_FORMAT_32_32;
+ }
+
+ return V_008F0C_BUF_DATA_FORMAT_INVALID;
}
-uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
- int first_non_void)
+uint32_t
+radv_translate_buffer_numformat(const struct util_format_description *desc, int first_non_void)
{
- assert(util_format_get_num_planes(desc->format) == 1);
-
- if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
- return V_008F0C_BUF_NUM_FORMAT_FLOAT;
-
- if (first_non_void < 0)
- return ~0;
-
- switch (desc->channel[first_non_void].type) {
- case UTIL_FORMAT_TYPE_SIGNED:
- if (desc->channel[first_non_void].normalized)
- return V_008F0C_BUF_NUM_FORMAT_SNORM;
- else if (desc->channel[first_non_void].pure_integer)
- return V_008F0C_BUF_NUM_FORMAT_SINT;
- else
- return V_008F0C_BUF_NUM_FORMAT_SSCALED;
- break;
- case UTIL_FORMAT_TYPE_UNSIGNED:
- if (desc->channel[first_non_void].normalized)
- return V_008F0C_BUF_NUM_FORMAT_UNORM;
- else if (desc->channel[first_non_void].pure_integer)
- return V_008F0C_BUF_NUM_FORMAT_UINT;
- else
- return V_008F0C_BUF_NUM_FORMAT_USCALED;
- break;
- case UTIL_FORMAT_TYPE_FLOAT:
- default:
- return V_008F0C_BUF_NUM_FORMAT_FLOAT;
- }
+ assert(util_format_get_num_planes(desc->format) == 1);
+
+ if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT)
+ return V_008F0C_BUF_NUM_FORMAT_FLOAT;
+
+ if (first_non_void < 0)
+ return ~0;
+
+ switch (desc->channel[first_non_void].type) {
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if (desc->channel[first_non_void].normalized)
+ return V_008F0C_BUF_NUM_FORMAT_SNORM;
+ else if (desc->channel[first_non_void].pure_integer)
+ return V_008F0C_BUF_NUM_FORMAT_SINT;
+ else
+ return V_008F0C_BUF_NUM_FORMAT_SSCALED;
+ break;
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if (desc->channel[first_non_void].normalized)
+ return V_008F0C_BUF_NUM_FORMAT_UNORM;
+ else if (desc->channel[first_non_void].pure_integer)
+ return V_008F0C_BUF_NUM_FORMAT_UINT;
+ else
+ return V_008F0C_BUF_NUM_FORMAT_USCALED;
+ break;
+ case UTIL_FORMAT_TYPE_FLOAT:
+ default:
+ return V_008F0C_BUF_NUM_FORMAT_FLOAT;
+ }
}
-uint32_t radv_translate_tex_dataformat(VkFormat format,
- const struct util_format_description *desc,
- int first_non_void)
+uint32_t
+radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
+ int first_non_void)
{
- bool uniform = true;
- int i;
-
- assert(vk_format_get_plane_count(format) == 1);
-
- if (!desc)
- return ~0;
- /* Colorspace (return non-RGB formats directly). */
- switch (desc->colorspace) {
- /* Depth stencil formats */
- case UTIL_FORMAT_COLORSPACE_ZS:
- switch (format) {
- case VK_FORMAT_D16_UNORM:
- return V_008F14_IMG_DATA_FORMAT_16;
- case VK_FORMAT_D24_UNORM_S8_UINT:
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- return V_008F14_IMG_DATA_FORMAT_8_24;
- case VK_FORMAT_S8_UINT:
- return V_008F14_IMG_DATA_FORMAT_8;
- case VK_FORMAT_D32_SFLOAT:
- return V_008F14_IMG_DATA_FORMAT_32;
- case VK_FORMAT_D32_SFLOAT_S8_UINT:
- return V_008F14_IMG_DATA_FORMAT_X24_8_32;
- default:
- goto out_unknown;
- }
-
- case UTIL_FORMAT_COLORSPACE_YUV:
- goto out_unknown; /* TODO */
-
- case UTIL_FORMAT_COLORSPACE_SRGB:
- if (desc->nr_channels != 4 && desc->nr_channels != 1)
- goto out_unknown;
- break;
-
- default:
- break;
- }
-
- if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
- switch(format) {
- /* Don't ask me why this looks inverted. PAL does the same. */
- case VK_FORMAT_G8B8G8R8_422_UNORM:
- return V_008F14_IMG_DATA_FORMAT_BG_RG;
- case VK_FORMAT_B8G8R8G8_422_UNORM:
- return V_008F14_IMG_DATA_FORMAT_GB_GR;
- default:
- goto out_unknown;
- }
- }
-
- if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
- switch(format) {
- case VK_FORMAT_BC4_UNORM_BLOCK:
- case VK_FORMAT_BC4_SNORM_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_BC4;
- case VK_FORMAT_BC5_UNORM_BLOCK:
- case VK_FORMAT_BC5_SNORM_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_BC5;
- default:
- break;
- }
- }
-
- if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
- switch(format) {
- case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
- case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
- case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
- case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_BC1;
- case VK_FORMAT_BC2_UNORM_BLOCK:
- case VK_FORMAT_BC2_SRGB_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_BC2;
- case VK_FORMAT_BC3_UNORM_BLOCK:
- case VK_FORMAT_BC3_SRGB_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_BC3;
- default:
- break;
- }
- }
-
- if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
- switch(format) {
- case VK_FORMAT_BC6H_UFLOAT_BLOCK:
- case VK_FORMAT_BC6H_SFLOAT_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_BC6;
- case VK_FORMAT_BC7_UNORM_BLOCK:
- case VK_FORMAT_BC7_SRGB_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_BC7;
- default:
- break;
- }
- }
-
- if (desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
- switch (format) {
- case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
- case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
- case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
- case VK_FORMAT_EAC_R11_UNORM_BLOCK:
- case VK_FORMAT_EAC_R11_SNORM_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_ETC2_R;
- case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
- case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
- return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
- default:
- break;
- }
- }
-
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
- return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
- } else if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
- return V_008F14_IMG_DATA_FORMAT_10_11_11;
- }
-
- /* R8G8Bx_SNORM - TODO CxV8U8 */
-
- /* hw cannot support mixed formats (except depth/stencil, since only
- * depth is read).*/
- if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- goto out_unknown;
-
- /* See whether the components are of the same size. */
- for (i = 1; i < desc->nr_channels; i++) {
- uniform = uniform && desc->channel[0].size == desc->channel[i].size;
- }
-
- /* Non-uniform formats. */
- if (!uniform) {
- switch(desc->nr_channels) {
- case 3:
- if (desc->channel[0].size == 5 &&
- desc->channel[1].size == 6 &&
- desc->channel[2].size == 5) {
- return V_008F14_IMG_DATA_FORMAT_5_6_5;
- }
- goto out_unknown;
- case 4:
- if (desc->channel[0].size == 5 &&
- desc->channel[1].size == 5 &&
- desc->channel[2].size == 5 &&
- desc->channel[3].size == 1) {
- return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
- }
- if (desc->channel[0].size == 1 &&
- desc->channel[1].size == 5 &&
- desc->channel[2].size == 5 &&
- desc->channel[3].size == 5) {
- return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
- }
- if (desc->channel[0].size == 10 &&
- desc->channel[1].size == 10 &&
- desc->channel[2].size == 10 &&
- desc->channel[3].size == 2) {
- /* Closed VK driver does this also no 2/10/10/10 snorm */
- if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED &&
- desc->channel[0].normalized)
- goto out_unknown;
- return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
- }
- goto out_unknown;
- }
- goto out_unknown;
- }
-
- if (first_non_void < 0 || first_non_void > 3)
- goto out_unknown;
-
- /* uniform formats */
- switch (desc->channel[first_non_void].size) {
- case 4:
- switch (desc->nr_channels) {
+ bool uniform = true;
+ int i;
+
+ assert(vk_format_get_plane_count(format) == 1);
+
+ if (!desc)
+ return ~0;
+ /* Colorspace (return non-RGB formats directly). */
+ switch (desc->colorspace) {
+ /* Depth stencil formats */
+ case UTIL_FORMAT_COLORSPACE_ZS:
+ switch (format) {
+ case VK_FORMAT_D16_UNORM:
+ return V_008F14_IMG_DATA_FORMAT_16;
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ return V_008F14_IMG_DATA_FORMAT_8_24;
+ case VK_FORMAT_S8_UINT:
+ return V_008F14_IMG_DATA_FORMAT_8;
+ case VK_FORMAT_D32_SFLOAT:
+ return V_008F14_IMG_DATA_FORMAT_32;
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ return V_008F14_IMG_DATA_FORMAT_X24_8_32;
+ default:
+ goto out_unknown;
+ }
+
+ case UTIL_FORMAT_COLORSPACE_YUV:
+ goto out_unknown; /* TODO */
+
+ case UTIL_FORMAT_COLORSPACE_SRGB:
+ if (desc->nr_channels != 4 && desc->nr_channels != 1)
+ goto out_unknown;
+ break;
+
+ default:
+ break;
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ switch (format) {
+ /* Don't ask me why this looks inverted. PAL does the same. */
+ case VK_FORMAT_G8B8G8R8_422_UNORM:
+ return V_008F14_IMG_DATA_FORMAT_BG_RG;
+ case VK_FORMAT_B8G8R8G8_422_UNORM:
+ return V_008F14_IMG_DATA_FORMAT_GB_GR;
+ default:
+ goto out_unknown;
+ }
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ switch (format) {
+ case VK_FORMAT_BC4_UNORM_BLOCK:
+ case VK_FORMAT_BC4_SNORM_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_BC4;
+ case VK_FORMAT_BC5_UNORM_BLOCK:
+ case VK_FORMAT_BC5_SNORM_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_BC5;
+ default:
+ break;
+ }
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ switch (format) {
+ case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+ case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+ case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_BC1;
+ case VK_FORMAT_BC2_UNORM_BLOCK:
+ case VK_FORMAT_BC2_SRGB_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_BC2;
+ case VK_FORMAT_BC3_UNORM_BLOCK:
+ case VK_FORMAT_BC3_SRGB_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_BC3;
+ default:
+ break;
+ }
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
+ switch (format) {
+ case VK_FORMAT_BC6H_UFLOAT_BLOCK:
+ case VK_FORMAT_BC6H_SFLOAT_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_BC6;
+ case VK_FORMAT_BC7_UNORM_BLOCK:
+ case VK_FORMAT_BC7_SRGB_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_BC7;
+ default:
+ break;
+ }
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
+ switch (format) {
+ case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
+ case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
+ case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA;
+ case VK_FORMAT_EAC_R11_UNORM_BLOCK:
+ case VK_FORMAT_EAC_R11_SNORM_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_R;
+ case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
+ case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
+ return V_008F14_IMG_DATA_FORMAT_ETC2_RG;
+ default:
+ break;
+ }
+ }
+
+ if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+ return V_008F14_IMG_DATA_FORMAT_5_9_9_9;
+ } else if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
+ return V_008F14_IMG_DATA_FORMAT_10_11_11;
+ }
+
+ /* R8G8Bx_SNORM - TODO CxV8U8 */
+
+ /* hw cannot support mixed formats (except depth/stencil, since only
+ * depth is read).*/
+ if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ goto out_unknown;
+
+ /* See whether the components are of the same size. */
+ for (i = 1; i < desc->nr_channels; i++) {
+ uniform = uniform && desc->channel[0].size == desc->channel[i].size;
+ }
+
+ /* Non-uniform formats. */
+ if (!uniform) {
+ switch (desc->nr_channels) {
+ case 3:
+ if (desc->channel[0].size == 5 && desc->channel[1].size == 6 &&
+ desc->channel[2].size == 5) {
+ return V_008F14_IMG_DATA_FORMAT_5_6_5;
+ }
+ goto out_unknown;
+ case 4:
+ if (desc->channel[0].size == 5 && desc->channel[1].size == 5 &&
+ desc->channel[2].size == 5 && desc->channel[3].size == 1) {
+ return V_008F14_IMG_DATA_FORMAT_1_5_5_5;
+ }
+ if (desc->channel[0].size == 1 && desc->channel[1].size == 5 &&
+ desc->channel[2].size == 5 && desc->channel[3].size == 5) {
+ return V_008F14_IMG_DATA_FORMAT_5_5_5_1;
+ }
+ if (desc->channel[0].size == 10 && desc->channel[1].size == 10 &&
+ desc->channel[2].size == 10 && desc->channel[3].size == 2) {
+ /* Closed VK driver does this also no 2/10/10/10 snorm */
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[0].normalized)
+ goto out_unknown;
+ return V_008F14_IMG_DATA_FORMAT_2_10_10_10;
+ }
+ goto out_unknown;
+ }
+ goto out_unknown;
+ }
+
+ if (first_non_void < 0 || first_non_void > 3)
+ goto out_unknown;
+
+ /* uniform formats */
+ switch (desc->channel[first_non_void].size) {
+ case 4:
+ switch (desc->nr_channels) {
#if 0 /* Not supported for render targets */
case 2:
return V_008F14_IMG_DATA_FORMAT_4_4;
#endif
- case 4:
- return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
- }
- break;
- case 8:
- switch (desc->nr_channels) {
- case 1:
- return V_008F14_IMG_DATA_FORMAT_8;
- case 2:
- return V_008F14_IMG_DATA_FORMAT_8_8;
- case 4:
- return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
- }
- break;
- case 16:
- switch (desc->nr_channels) {
- case 1:
- return V_008F14_IMG_DATA_FORMAT_16;
- case 2:
- return V_008F14_IMG_DATA_FORMAT_16_16;
- case 4:
- return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
- }
- break;
- case 32:
- switch (desc->nr_channels) {
- case 1:
- return V_008F14_IMG_DATA_FORMAT_32;
- case 2:
- return V_008F14_IMG_DATA_FORMAT_32_32;
- case 3:
- return V_008F14_IMG_DATA_FORMAT_32_32_32;
- case 4:
- return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
- }
- break;
- case 64:
- if (desc->nr_channels == 1)
- return V_008F14_IMG_DATA_FORMAT_32_32;
- break;
- }
+ case 4:
+ return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
+ }
+ break;
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ return V_008F14_IMG_DATA_FORMAT_8;
+ case 2:
+ return V_008F14_IMG_DATA_FORMAT_8_8;
+ case 4:
+ return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
+ }
+ break;
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ return V_008F14_IMG_DATA_FORMAT_16;
+ case 2:
+ return V_008F14_IMG_DATA_FORMAT_16_16;
+ case 4:
+ return V_008F14_IMG_DATA_FORMAT_16_16_16_16;
+ }
+ break;
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ return V_008F14_IMG_DATA_FORMAT_32;
+ case 2:
+ return V_008F14_IMG_DATA_FORMAT_32_32;
+ case 3:
+ return V_008F14_IMG_DATA_FORMAT_32_32_32;
+ case 4:
+ return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
+ }
+ break;
+ case 64:
+ if (desc->nr_channels == 1)
+ return V_008F14_IMG_DATA_FORMAT_32_32;
+ break;
+ }
out_unknown:
- /* R600_ERR("Unable to handle texformat %d %s\n", format, vk_format_name(format)); */
- return ~0;
+ /* R600_ERR("Unable to handle texformat %d %s\n", format, vk_format_name(format)); */
+ return ~0;
}
-uint32_t radv_translate_tex_numformat(VkFormat format,
- const struct util_format_description *desc,
- int first_non_void)
+uint32_t
+radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
+ int first_non_void)
{
- assert(vk_format_get_plane_count(format) == 1);
-
- switch (format) {
- case VK_FORMAT_D24_UNORM_S8_UINT:
- return V_008F14_IMG_NUM_FORMAT_UNORM;
- default:
- if (first_non_void < 0) {
- if (vk_format_is_compressed(format)) {
- switch (format) {
- case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
- case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
- case VK_FORMAT_BC2_SRGB_BLOCK:
- case VK_FORMAT_BC3_SRGB_BLOCK:
- case VK_FORMAT_BC7_SRGB_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
- case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
- return V_008F14_IMG_NUM_FORMAT_SRGB;
- case VK_FORMAT_BC4_SNORM_BLOCK:
- case VK_FORMAT_BC5_SNORM_BLOCK:
- case VK_FORMAT_BC6H_SFLOAT_BLOCK:
- case VK_FORMAT_EAC_R11_SNORM_BLOCK:
- case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
- return V_008F14_IMG_NUM_FORMAT_SNORM;
- default:
- return V_008F14_IMG_NUM_FORMAT_UNORM;
- }
- } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
- return V_008F14_IMG_NUM_FORMAT_UNORM;
- } else {
- return V_008F14_IMG_NUM_FORMAT_FLOAT;
- }
- } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
- return V_008F14_IMG_NUM_FORMAT_SRGB;
- } else {
- switch (desc->channel[first_non_void].type) {
- case UTIL_FORMAT_TYPE_FLOAT:
- return V_008F14_IMG_NUM_FORMAT_FLOAT;
- case UTIL_FORMAT_TYPE_SIGNED:
- if (desc->channel[first_non_void].normalized)
- return V_008F14_IMG_NUM_FORMAT_SNORM;
- else if (desc->channel[first_non_void].pure_integer)
- return V_008F14_IMG_NUM_FORMAT_SINT;
- else
- return V_008F14_IMG_NUM_FORMAT_SSCALED;
- case UTIL_FORMAT_TYPE_UNSIGNED:
- if (desc->channel[first_non_void].normalized)
- return V_008F14_IMG_NUM_FORMAT_UNORM;
- else if (desc->channel[first_non_void].pure_integer)
- return V_008F14_IMG_NUM_FORMAT_UINT;
- else
- return V_008F14_IMG_NUM_FORMAT_USCALED;
- default:
- return V_008F14_IMG_NUM_FORMAT_UNORM;
- }
- }
- }
+ assert(vk_format_get_plane_count(format) == 1);
+
+ switch (format) {
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ return V_008F14_IMG_NUM_FORMAT_UNORM;
+ default:
+ if (first_non_void < 0) {
+ if (vk_format_is_compressed(format)) {
+ switch (format) {
+ case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+ case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ case VK_FORMAT_BC2_SRGB_BLOCK:
+ case VK_FORMAT_BC3_SRGB_BLOCK:
+ case VK_FORMAT_BC7_SRGB_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+ case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+ return V_008F14_IMG_NUM_FORMAT_SRGB;
+ case VK_FORMAT_BC4_SNORM_BLOCK:
+ case VK_FORMAT_BC5_SNORM_BLOCK:
+ case VK_FORMAT_BC6H_SFLOAT_BLOCK:
+ case VK_FORMAT_EAC_R11_SNORM_BLOCK:
+ case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
+ return V_008F14_IMG_NUM_FORMAT_SNORM;
+ default:
+ return V_008F14_IMG_NUM_FORMAT_UNORM;
+ }
+ } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ return V_008F14_IMG_NUM_FORMAT_UNORM;
+ } else {
+ return V_008F14_IMG_NUM_FORMAT_FLOAT;
+ }
+ } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ return V_008F14_IMG_NUM_FORMAT_SRGB;
+ } else {
+ switch (desc->channel[first_non_void].type) {
+ case UTIL_FORMAT_TYPE_FLOAT:
+ return V_008F14_IMG_NUM_FORMAT_FLOAT;
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if (desc->channel[first_non_void].normalized)
+ return V_008F14_IMG_NUM_FORMAT_SNORM;
+ else if (desc->channel[first_non_void].pure_integer)
+ return V_008F14_IMG_NUM_FORMAT_SINT;
+ else
+ return V_008F14_IMG_NUM_FORMAT_SSCALED;
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ if (desc->channel[first_non_void].normalized)
+ return V_008F14_IMG_NUM_FORMAT_UNORM;
+ else if (desc->channel[first_non_void].pure_integer)
+ return V_008F14_IMG_NUM_FORMAT_UINT;
+ else
+ return V_008F14_IMG_NUM_FORMAT_USCALED;
+ default:
+ return V_008F14_IMG_NUM_FORMAT_UNORM;
+ }
+ }
+ }
}
-uint32_t radv_translate_color_numformat(VkFormat format,
- const struct util_format_description *desc,
- int first_non_void)
+uint32_t
+radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
+ int first_non_void)
{
- unsigned ntype;
-
- assert(vk_format_get_plane_count(format) == 1);
-
- if (first_non_void == -1 || desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_FLOAT)
- ntype = V_028C70_NUMBER_FLOAT;
- else {
- ntype = V_028C70_NUMBER_UNORM;
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
- ntype = V_028C70_NUMBER_SRGB;
- else if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) {
- if (desc->channel[first_non_void].pure_integer) {
- ntype = V_028C70_NUMBER_SINT;
- } else if (desc->channel[first_non_void].normalized) {
- ntype = V_028C70_NUMBER_SNORM;
- } else
- ntype = ~0u;
- } else if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED) {
- if (desc->channel[first_non_void].pure_integer) {
- ntype = V_028C70_NUMBER_UINT;
- } else if (desc->channel[first_non_void].normalized) {
- ntype = V_028C70_NUMBER_UNORM;
- } else
- ntype = ~0u;
- }
- }
- return ntype;
+ unsigned ntype;
+
+ assert(vk_format_get_plane_count(format) == 1);
+
+ if (first_non_void == -1 || desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_FLOAT)
+ ntype = V_028C70_NUMBER_FLOAT;
+ else {
+ ntype = V_028C70_NUMBER_UNORM;
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ ntype = V_028C70_NUMBER_SRGB;
+ else if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) {
+ if (desc->channel[first_non_void].pure_integer) {
+ ntype = V_028C70_NUMBER_SINT;
+ } else if (desc->channel[first_non_void].normalized) {
+ ntype = V_028C70_NUMBER_SNORM;
+ } else
+ ntype = ~0u;
+ } else if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ if (desc->channel[first_non_void].pure_integer) {
+ ntype = V_028C70_NUMBER_UINT;
+ } else if (desc->channel[first_non_void].normalized) {
+ ntype = V_028C70_NUMBER_UNORM;
+ } else
+ ntype = ~0u;
+ }
+ }
+ return ntype;
}
-static bool radv_is_sampler_format_supported(VkFormat format, bool *linear_sampling)
+static bool
+radv_is_sampler_format_supported(VkFormat format, bool *linear_sampling)
{
- const struct util_format_description *desc = vk_format_description(format);
- uint32_t num_format;
- if (!desc || format == VK_FORMAT_UNDEFINED ||
- format == VK_FORMAT_R64_UINT || format == VK_FORMAT_R64_SINT ||
- format == VK_FORMAT_R64_SFLOAT)
- return false;
- num_format = radv_translate_tex_numformat(format, desc,
- vk_format_get_first_non_void_channel(format));
-
- if (num_format == V_008F14_IMG_NUM_FORMAT_USCALED ||
- num_format == V_008F14_IMG_NUM_FORMAT_SSCALED)
- return false;
-
- if (num_format == V_008F14_IMG_NUM_FORMAT_UNORM ||
- num_format == V_008F14_IMG_NUM_FORMAT_SNORM ||
- num_format == V_008F14_IMG_NUM_FORMAT_FLOAT ||
- num_format == V_008F14_IMG_NUM_FORMAT_SRGB)
- *linear_sampling = true;
- else
- *linear_sampling = false;
- return radv_translate_tex_dataformat(format, vk_format_description(format),
- vk_format_get_first_non_void_channel(format)) != ~0U;
+ const struct util_format_description *desc = vk_format_description(format);
+ uint32_t num_format;
+ if (!desc || format == VK_FORMAT_UNDEFINED || format == VK_FORMAT_R64_UINT ||
+ format == VK_FORMAT_R64_SINT || format == VK_FORMAT_R64_SFLOAT)
+ return false;
+ num_format =
+ radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format));
+
+ if (num_format == V_008F14_IMG_NUM_FORMAT_USCALED ||
+ num_format == V_008F14_IMG_NUM_FORMAT_SSCALED)
+ return false;
+
+ if (num_format == V_008F14_IMG_NUM_FORMAT_UNORM || num_format == V_008F14_IMG_NUM_FORMAT_SNORM ||
+ num_format == V_008F14_IMG_NUM_FORMAT_FLOAT || num_format == V_008F14_IMG_NUM_FORMAT_SRGB)
+ *linear_sampling = true;
+ else
+ *linear_sampling = false;
+ return radv_translate_tex_dataformat(format, vk_format_description(format),
+ vk_format_get_first_non_void_channel(format)) != ~0U;
}
bool
radv_is_atomic_format_supported(VkFormat format)
{
- return format == VK_FORMAT_R32_UINT ||
- format == VK_FORMAT_R32_SINT ||
- format == VK_FORMAT_R32_SFLOAT ||
- format == VK_FORMAT_R64_UINT ||
- format == VK_FORMAT_R64_SINT;
+ return format == VK_FORMAT_R32_UINT || format == VK_FORMAT_R32_SINT ||
+ format == VK_FORMAT_R32_SFLOAT || format == VK_FORMAT_R64_UINT ||
+ format == VK_FORMAT_R64_SINT;
}
-static bool radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
- VkFormat format)
+static bool
+radv_is_storage_image_format_supported(struct radv_physical_device *physical_device,
+ VkFormat format)
{
- const struct util_format_description *desc = vk_format_description(format);
- unsigned data_format, num_format;
- if (!desc || format == VK_FORMAT_UNDEFINED)
- return false;
-
- data_format = radv_translate_tex_dataformat(format, desc,
- vk_format_get_first_non_void_channel(format));
- num_format = radv_translate_tex_numformat(format, desc,
- vk_format_get_first_non_void_channel(format));
-
- if(data_format == ~0 || num_format == ~0)
- return false;
-
- /* Extracted from the GCN3 ISA document. */
- switch(num_format) {
- case V_008F14_IMG_NUM_FORMAT_UNORM:
- case V_008F14_IMG_NUM_FORMAT_SNORM:
- case V_008F14_IMG_NUM_FORMAT_UINT:
- case V_008F14_IMG_NUM_FORMAT_SINT:
- case V_008F14_IMG_NUM_FORMAT_FLOAT:
- break;
- default:
- return false;
- }
-
- switch(data_format) {
- case V_008F14_IMG_DATA_FORMAT_8:
- case V_008F14_IMG_DATA_FORMAT_16:
- case V_008F14_IMG_DATA_FORMAT_8_8:
- case V_008F14_IMG_DATA_FORMAT_32:
- case V_008F14_IMG_DATA_FORMAT_16_16:
- case V_008F14_IMG_DATA_FORMAT_10_11_11:
- case V_008F14_IMG_DATA_FORMAT_11_11_10:
- case V_008F14_IMG_DATA_FORMAT_10_10_10_2:
- case V_008F14_IMG_DATA_FORMAT_2_10_10_10:
- case V_008F14_IMG_DATA_FORMAT_8_8_8_8:
- case V_008F14_IMG_DATA_FORMAT_32_32:
- case V_008F14_IMG_DATA_FORMAT_16_16_16_16:
- case V_008F14_IMG_DATA_FORMAT_32_32_32_32:
- case V_008F14_IMG_DATA_FORMAT_5_6_5:
- case V_008F14_IMG_DATA_FORMAT_1_5_5_5:
- case V_008F14_IMG_DATA_FORMAT_5_5_5_1:
- case V_008F14_IMG_DATA_FORMAT_4_4_4_4:
- /* TODO: FMASK formats. */
- return true;
- default:
- return false;
- }
+ const struct util_format_description *desc = vk_format_description(format);
+ unsigned data_format, num_format;
+ if (!desc || format == VK_FORMAT_UNDEFINED)
+ return false;
+
+ data_format =
+ radv_translate_tex_dataformat(format, desc, vk_format_get_first_non_void_channel(format));
+ num_format =
+ radv_translate_tex_numformat(format, desc, vk_format_get_first_non_void_channel(format));
+
+ if (data_format == ~0 || num_format == ~0)
+ return false;
+
+ /* Extracted from the GCN3 ISA document. */
+ switch (num_format) {
+ case V_008F14_IMG_NUM_FORMAT_UNORM:
+ case V_008F14_IMG_NUM_FORMAT_SNORM:
+ case V_008F14_IMG_NUM_FORMAT_UINT:
+ case V_008F14_IMG_NUM_FORMAT_SINT:
+ case V_008F14_IMG_NUM_FORMAT_FLOAT:
+ break;
+ default:
+ return false;
+ }
+
+ switch (data_format) {
+ case V_008F14_IMG_DATA_FORMAT_8:
+ case V_008F14_IMG_DATA_FORMAT_16:
+ case V_008F14_IMG_DATA_FORMAT_8_8:
+ case V_008F14_IMG_DATA_FORMAT_32:
+ case V_008F14_IMG_DATA_FORMAT_16_16:
+ case V_008F14_IMG_DATA_FORMAT_10_11_11:
+ case V_008F14_IMG_DATA_FORMAT_11_11_10:
+ case V_008F14_IMG_DATA_FORMAT_10_10_10_2:
+ case V_008F14_IMG_DATA_FORMAT_2_10_10_10:
+ case V_008F14_IMG_DATA_FORMAT_8_8_8_8:
+ case V_008F14_IMG_DATA_FORMAT_32_32:
+ case V_008F14_IMG_DATA_FORMAT_16_16_16_16:
+ case V_008F14_IMG_DATA_FORMAT_32_32_32_32:
+ case V_008F14_IMG_DATA_FORMAT_5_6_5:
+ case V_008F14_IMG_DATA_FORMAT_1_5_5_5:
+ case V_008F14_IMG_DATA_FORMAT_5_5_5_1:
+ case V_008F14_IMG_DATA_FORMAT_4_4_4_4:
+ /* TODO: FMASK formats. */
+ return true;
+ default:
+ return false;
+ }
}
-bool radv_is_buffer_format_supported(VkFormat format, bool *scaled)
+bool
+radv_is_buffer_format_supported(VkFormat format, bool *scaled)
{
- const struct util_format_description *desc = vk_format_description(format);
- unsigned data_format, num_format;
- if (!desc || format == VK_FORMAT_UNDEFINED)
- return false;
-
- data_format = radv_translate_buffer_dataformat(desc,
- vk_format_get_first_non_void_channel(format));
- num_format = radv_translate_buffer_numformat(desc,
- vk_format_get_first_non_void_channel(format));
-
- if (scaled)
- *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) || (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
- return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID &&
- num_format != ~0;
+ const struct util_format_description *desc = vk_format_description(format);
+ unsigned data_format, num_format;
+ if (!desc || format == VK_FORMAT_UNDEFINED)
+ return false;
+
+ data_format =
+ radv_translate_buffer_dataformat(desc, vk_format_get_first_non_void_channel(format));
+ num_format = radv_translate_buffer_numformat(desc, vk_format_get_first_non_void_channel(format));
+
+ if (scaled)
+ *scaled = (num_format == V_008F0C_BUF_NUM_FORMAT_SSCALED) ||
+ (num_format == V_008F0C_BUF_NUM_FORMAT_USCALED);
+ return data_format != V_008F0C_BUF_DATA_FORMAT_INVALID && num_format != ~0;
}
-bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
- VkFormat format, bool *blendable)
+bool
+radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice, VkFormat format,
+ bool *blendable)
{
- const struct util_format_description *desc = vk_format_description(format);
- uint32_t color_format = radv_translate_colorformat(format);
- uint32_t color_swap = radv_translate_colorswap(format, false);
- uint32_t color_num_format = radv_translate_color_numformat(format,
- desc,
- vk_format_get_first_non_void_channel(format));
-
- if (color_num_format == V_028C70_NUMBER_UINT || color_num_format == V_028C70_NUMBER_SINT ||
- color_format == V_028C70_COLOR_8_24 || color_format == V_028C70_COLOR_24_8 ||
- color_format == V_028C70_COLOR_X24_8_32_FLOAT) {
- *blendable = false;
- } else
- *blendable = true;
-
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 && pdevice->rad_info.chip_class < GFX10_3)
- return false;
-
- return color_format != V_028C70_COLOR_INVALID &&
- color_swap != ~0U &&
- color_num_format != ~0;
+ const struct util_format_description *desc = vk_format_description(format);
+ uint32_t color_format = radv_translate_colorformat(format);
+ uint32_t color_swap = radv_translate_colorswap(format, false);
+ uint32_t color_num_format =
+ radv_translate_color_numformat(format, desc, vk_format_get_first_non_void_channel(format));
+
+ if (color_num_format == V_028C70_NUMBER_UINT || color_num_format == V_028C70_NUMBER_SINT ||
+ color_format == V_028C70_COLOR_8_24 || color_format == V_028C70_COLOR_24_8 ||
+ color_format == V_028C70_COLOR_X24_8_32_FLOAT) {
+ *blendable = false;
+ } else
+ *blendable = true;
+
+ if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 && pdevice->rad_info.chip_class < GFX10_3)
+ return false;
+
+ return color_format != V_028C70_COLOR_INVALID && color_swap != ~0U && color_num_format != ~0;
}
-static bool radv_is_zs_format_supported(VkFormat format)
+static bool
+radv_is_zs_format_supported(VkFormat format)
{
- return radv_translate_dbformat(format) != V_028040_Z_INVALID || format == VK_FORMAT_S8_UINT;
+ return radv_translate_dbformat(format) != V_028040_Z_INVALID || format == VK_FORMAT_S8_UINT;
}
-static bool radv_is_filter_minmax_format_supported(VkFormat format)
+static bool
+radv_is_filter_minmax_format_supported(VkFormat format)
{
- /* From the Vulkan spec 1.1.71:
- *
- * "The following formats must support the
- * VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT feature with
- * VK_IMAGE_TILING_OPTIMAL, if they support
- * VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT."
- */
- /* TODO: enable more formats. */
- switch (format) {
- case VK_FORMAT_R8_UNORM:
- case VK_FORMAT_R8_SNORM:
- case VK_FORMAT_R16_UNORM:
- case VK_FORMAT_R16_SNORM:
- case VK_FORMAT_R16_SFLOAT:
- case VK_FORMAT_R32_SFLOAT:
- case VK_FORMAT_D16_UNORM:
- case VK_FORMAT_X8_D24_UNORM_PACK32:
- case VK_FORMAT_D32_SFLOAT:
- case VK_FORMAT_D16_UNORM_S8_UINT:
- case VK_FORMAT_D24_UNORM_S8_UINT:
- case VK_FORMAT_D32_SFLOAT_S8_UINT:
- return true;
- default:
- return false;
- }
+ /* From the Vulkan spec 1.1.71:
+ *
+ * "The following formats must support the
+ * VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT feature with
+ * VK_IMAGE_TILING_OPTIMAL, if they support
+ * VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT."
+ */
+ /* TODO: enable more formats. */
+ switch (format) {
+ case VK_FORMAT_R8_UNORM:
+ case VK_FORMAT_R8_SNORM:
+ case VK_FORMAT_R16_UNORM:
+ case VK_FORMAT_R16_SNORM:
+ case VK_FORMAT_R16_SFLOAT:
+ case VK_FORMAT_R32_SFLOAT:
+ case VK_FORMAT_D16_UNORM:
+ case VK_FORMAT_X8_D24_UNORM_PACK32:
+ case VK_FORMAT_D32_SFLOAT:
+ case VK_FORMAT_D16_UNORM_S8_UINT:
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ return true;
+ default:
+ return false;
+ }
}
bool
radv_device_supports_etc(struct radv_physical_device *physical_device)
{
- return physical_device->rad_info.family == CHIP_VEGA10 ||
- physical_device->rad_info.family == CHIP_RAVEN ||
- physical_device->rad_info.family == CHIP_RAVEN2 ||
- physical_device->rad_info.family == CHIP_STONEY;
+ return physical_device->rad_info.family == CHIP_VEGA10 ||
+ physical_device->rad_info.family == CHIP_RAVEN ||
+ physical_device->rad_info.family == CHIP_RAVEN2 ||
+ physical_device->rad_info.family == CHIP_STONEY;
}
static void
radv_physical_device_get_format_properties(struct radv_physical_device *physical_device,
- VkFormat format,
- VkFormatProperties *out_properties)
+ VkFormat format, VkFormatProperties *out_properties)
{
- VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
- const struct util_format_description *desc = vk_format_description(format);
- bool blendable;
- bool scaled = false;
- /* TODO: implement some software emulation of SUBSAMPLED formats. */
- if (!desc || vk_format_to_pipe_format(format) == PIPE_FORMAT_NONE ||
- desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
- out_properties->linearTilingFeatures = linear;
- out_properties->optimalTilingFeatures = tiled;
- out_properties->bufferFeatures = buffer;
- return;
- }
-
- if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
- !radv_device_supports_etc(physical_device)) {
- out_properties->linearTilingFeatures = linear;
- out_properties->optimalTilingFeatures = tiled;
- out_properties->bufferFeatures = buffer;
- return;
- }
-
- if (vk_format_get_plane_count(format) > 1 ||
- desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
- uint32_t tiling = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
- VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
- VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
- VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
-
- /* The subsampled formats have no support for linear filters. */
- if (desc->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
- tiling |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT;
- }
-
- /* Fails for unknown reasons with linear tiling & subsampled formats. */
- out_properties->linearTilingFeatures = desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ? 0 : tiling;
- out_properties->optimalTilingFeatures = tiling;
- out_properties->bufferFeatures = 0;
- return;
- }
-
- if (radv_is_storage_image_format_supported(physical_device, format)) {
- tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
- linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
- }
-
- if (radv_is_buffer_format_supported(format, &scaled)) {
- if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT) {
- buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
- if (!scaled)
- buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
- }
- buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
- }
-
- if (vk_format_is_depth_or_stencil(format)) {
- if (radv_is_zs_format_supported(format)) {
- tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
- tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
- tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
- VK_FORMAT_FEATURE_BLIT_DST_BIT;
- tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
-
- if (radv_is_filter_minmax_format_supported(format))
- tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
-
- if (vk_format_has_depth(format))
- tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
-
- /* Don't support blitting surfaces with depth/stencil. */
- if (vk_format_has_depth(format) && vk_format_has_stencil(format))
- tiled &= ~VK_FORMAT_FEATURE_BLIT_DST_BIT;
-
- /* Don't support linear depth surfaces */
- linear = 0;
- }
- } else {
- bool linear_sampling;
- if (radv_is_sampler_format_supported(format, &linear_sampling)) {
- linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
- VK_FORMAT_FEATURE_BLIT_SRC_BIT;
- tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
- VK_FORMAT_FEATURE_BLIT_SRC_BIT;
-
- if (radv_is_filter_minmax_format_supported(format))
- tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
-
- if (linear_sampling) {
- linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
- tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
- }
-
- /* Don't support blitting for R32G32B32 formats. */
- if (format == VK_FORMAT_R32G32B32_SFLOAT ||
- format == VK_FORMAT_R32G32B32_UINT ||
- format == VK_FORMAT_R32G32B32_SINT) {
- linear &= ~VK_FORMAT_FEATURE_BLIT_SRC_BIT;
- }
- }
- if (radv_is_colorbuffer_format_supported(physical_device, format, &blendable)) {
- linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
- tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
- if (blendable) {
- linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
- tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
- }
- }
- if (tiled && !scaled) {
- tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
- }
-
- /* Tiled formatting does not support NPOT pixel sizes */
- if (!util_is_power_of_two_or_zero(vk_format_get_blocksize(format)))
- tiled = 0;
- }
-
- if (linear && !scaled) {
- linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
- VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
- }
-
- if (radv_is_atomic_format_supported(format)) {
- buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
- linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
- tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
- }
-
- switch(format) {
- case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
- case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
- case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
- case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
- case VK_FORMAT_A2R10G10B10_SINT_PACK32:
- case VK_FORMAT_A2B10G10R10_SINT_PACK32:
- buffer &= ~(VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT |
- VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT);
- linear = 0;
- tiled = 0;
- break;
- default:
- break;
- }
-
- /* addrlib does not support linear compressed textures. */
- if (vk_format_is_compressed(format))
- linear = 0;
-
- out_properties->linearTilingFeatures = linear;
- out_properties->optimalTilingFeatures = tiled;
- out_properties->bufferFeatures = buffer;
+ VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
+ const struct util_format_description *desc = vk_format_description(format);
+ bool blendable;
+ bool scaled = false;
+ /* TODO: implement some software emulation of SUBSAMPLED formats. */
+ if (!desc || vk_format_to_pipe_format(format) == PIPE_FORMAT_NONE ||
+ desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ out_properties->linearTilingFeatures = linear;
+ out_properties->optimalTilingFeatures = tiled;
+ out_properties->bufferFeatures = buffer;
+ return;
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && !radv_device_supports_etc(physical_device)) {
+ out_properties->linearTilingFeatures = linear;
+ out_properties->optimalTilingFeatures = tiled;
+ out_properties->bufferFeatures = buffer;
+ return;
+ }
+
+ if (vk_format_get_plane_count(format) > 1 || desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ uint32_t tiling = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
+ VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
+
+ /* The subsampled formats have no support for linear filters. */
+ if (desc->layout != UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ tiling |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_LINEAR_FILTER_BIT;
+ }
+
+ /* Fails for unknown reasons with linear tiling & subsampled formats. */
+ out_properties->linearTilingFeatures =
+ desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED ? 0 : tiling;
+ out_properties->optimalTilingFeatures = tiling;
+ out_properties->bufferFeatures = 0;
+ return;
+ }
+
+ if (radv_is_storage_image_format_supported(physical_device, format)) {
+ tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
+ linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
+ }
+
+ if (radv_is_buffer_format_supported(format, &scaled)) {
+ if (format != VK_FORMAT_R64_UINT && format != VK_FORMAT_R64_SINT) {
+ buffer |= VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT;
+ if (!scaled)
+ buffer |= VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
+ }
+ buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT;
+ }
+
+ if (vk_format_is_depth_or_stencil(format)) {
+ if (radv_is_zs_format_supported(format)) {
+ tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
+ tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
+ tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
+
+ if (radv_is_filter_minmax_format_supported(format))
+ tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
+
+ if (vk_format_has_depth(format))
+ tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+
+ /* Don't support blitting surfaces with depth/stencil. */
+ if (vk_format_has_depth(format) && vk_format_has_stencil(format))
+ tiled &= ~VK_FORMAT_FEATURE_BLIT_DST_BIT;
+
+ /* Don't support linear depth surfaces */
+ linear = 0;
+ }
+ } else {
+ bool linear_sampling;
+ if (radv_is_sampler_format_supported(format, &linear_sampling)) {
+ linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+ tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+
+ if (radv_is_filter_minmax_format_supported(format))
+ tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT;
+
+ if (linear_sampling) {
+ linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+ }
+
+ /* Don't support blitting for R32G32B32 formats. */
+ if (format == VK_FORMAT_R32G32B32_SFLOAT || format == VK_FORMAT_R32G32B32_UINT ||
+ format == VK_FORMAT_R32G32B32_SINT) {
+ linear &= ~VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+ }
+ }
+ if (radv_is_colorbuffer_format_supported(physical_device, format, &blendable)) {
+ linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
+ if (blendable) {
+ linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+ tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
+ }
+ }
+ if (tiled && !scaled) {
+ tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
+ }
+
+ /* Tiled formatting does not support NPOT pixel sizes */
+ if (!util_is_power_of_two_or_zero(vk_format_get_blocksize(format)))
+ tiled = 0;
+ }
+
+ if (linear && !scaled) {
+ linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT;
+ }
+
+ if (radv_is_atomic_format_supported(format)) {
+ buffer |= VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_ATOMIC_BIT;
+ linear |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
+ tiled |= VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT;
+ }
+
+ switch (format) {
+ case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+ case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+ case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
+ case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+ case VK_FORMAT_A2B10G10R10_SINT_PACK32:
+ buffer &=
+ ~(VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT);
+ linear = 0;
+ tiled = 0;
+ break;
+ default:
+ break;
+ }
+
+ /* addrlib does not support linear compressed textures. */
+ if (vk_format_is_compressed(format))
+ linear = 0;
+
+ out_properties->linearTilingFeatures = linear;
+ out_properties->optimalTilingFeatures = tiled;
+ out_properties->bufferFeatures = buffer;
}
-uint32_t radv_translate_colorformat(VkFormat format)
+uint32_t
+radv_translate_colorformat(VkFormat format)
{
- const struct util_format_description *desc = vk_format_description(format);
-
-#define HAS_SIZE(x,y,z,w) \
- (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
- desc->channel[2].size == (z) && desc->channel[3].size == (w))
-
- if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) /* isn't plain */
- return V_028C70_COLOR_10_11_11;
-
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- return V_028C70_COLOR_5_9_9_9;
-
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
- return V_028C70_COLOR_INVALID;
-
- /* hw cannot support mixed formats (except depth/stencil, since
- * stencil is not written to). */
- if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
- return V_028C70_COLOR_INVALID;
-
- switch (desc->nr_channels) {
- case 1:
- switch (desc->channel[0].size) {
- case 8:
- return V_028C70_COLOR_8;
- case 16:
- return V_028C70_COLOR_16;
- case 32:
- return V_028C70_COLOR_32;
- }
- break;
- case 2:
- if (desc->channel[0].size == desc->channel[1].size) {
- switch (desc->channel[0].size) {
- case 8:
- return V_028C70_COLOR_8_8;
- case 16:
- return V_028C70_COLOR_16_16;
- case 32:
- return V_028C70_COLOR_32_32;
- }
- } else if (HAS_SIZE(8,24,0,0)) {
- return V_028C70_COLOR_24_8;
- } else if (HAS_SIZE(24,8,0,0)) {
- return V_028C70_COLOR_8_24;
- }
- break;
- case 3:
- if (HAS_SIZE(5,6,5,0)) {
- return V_028C70_COLOR_5_6_5;
- } else if (HAS_SIZE(32,8,24,0)) {
- return V_028C70_COLOR_X24_8_32_FLOAT;
- }
- break;
- case 4:
- if (desc->channel[0].size == desc->channel[1].size &&
- desc->channel[0].size == desc->channel[2].size &&
- desc->channel[0].size == desc->channel[3].size) {
- switch (desc->channel[0].size) {
- case 4:
- return V_028C70_COLOR_4_4_4_4;
- case 8:
- return V_028C70_COLOR_8_8_8_8;
- case 16:
- return V_028C70_COLOR_16_16_16_16;
- case 32:
- return V_028C70_COLOR_32_32_32_32;
- }
- } else if (HAS_SIZE(5,5,5,1)) {
- return V_028C70_COLOR_1_5_5_5;
- } else if (HAS_SIZE(1,5,5,5)) {
- return V_028C70_COLOR_5_5_5_1;
- } else if (HAS_SIZE(10,10,10,2)) {
- return V_028C70_COLOR_2_10_10_10;
- }
- break;
- }
- return V_028C70_COLOR_INVALID;
+ const struct util_format_description *desc = vk_format_description(format);
+
+#define HAS_SIZE(x, y, z, w) \
+ (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \
+ desc->channel[2].size == (z) && desc->channel[3].size == (w))
+
+ if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) /* isn't plain */
+ return V_028C70_COLOR_10_11_11;
+
+ if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ return V_028C70_COLOR_5_9_9_9;
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return V_028C70_COLOR_INVALID;
+
+ /* hw cannot support mixed formats (except depth/stencil, since
+ * stencil is not written to). */
+ if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+ return V_028C70_COLOR_INVALID;
+
+ switch (desc->nr_channels) {
+ case 1:
+ switch (desc->channel[0].size) {
+ case 8:
+ return V_028C70_COLOR_8;
+ case 16:
+ return V_028C70_COLOR_16;
+ case 32:
+ return V_028C70_COLOR_32;
+ }
+ break;
+ case 2:
+ if (desc->channel[0].size == desc->channel[1].size) {
+ switch (desc->channel[0].size) {
+ case 8:
+ return V_028C70_COLOR_8_8;
+ case 16:
+ return V_028C70_COLOR_16_16;
+ case 32:
+ return V_028C70_COLOR_32_32;
+ }
+ } else if (HAS_SIZE(8, 24, 0, 0)) {
+ return V_028C70_COLOR_24_8;
+ } else if (HAS_SIZE(24, 8, 0, 0)) {
+ return V_028C70_COLOR_8_24;
+ }
+ break;
+ case 3:
+ if (HAS_SIZE(5, 6, 5, 0)) {
+ return V_028C70_COLOR_5_6_5;
+ } else if (HAS_SIZE(32, 8, 24, 0)) {
+ return V_028C70_COLOR_X24_8_32_FLOAT;
+ }
+ break;
+ case 4:
+ if (desc->channel[0].size == desc->channel[1].size &&
+ desc->channel[0].size == desc->channel[2].size &&
+ desc->channel[0].size == desc->channel[3].size) {
+ switch (desc->channel[0].size) {
+ case 4:
+ return V_028C70_COLOR_4_4_4_4;
+ case 8:
+ return V_028C70_COLOR_8_8_8_8;
+ case 16:
+ return V_028C70_COLOR_16_16_16_16;
+ case 32:
+ return V_028C70_COLOR_32_32_32_32;
+ }
+ } else if (HAS_SIZE(5, 5, 5, 1)) {
+ return V_028C70_COLOR_1_5_5_5;
+ } else if (HAS_SIZE(1, 5, 5, 5)) {
+ return V_028C70_COLOR_5_5_5_1;
+ } else if (HAS_SIZE(10, 10, 10, 2)) {
+ return V_028C70_COLOR_2_10_10_10;
+ }
+ break;
+ }
+ return V_028C70_COLOR_INVALID;
}
-uint32_t radv_colorformat_endian_swap(uint32_t colorformat)
+uint32_t
+radv_colorformat_endian_swap(uint32_t colorformat)
{
- if (0/*SI_BIG_ENDIAN*/) {
- switch(colorformat) {
- /* 8-bit buffers. */
- case V_028C70_COLOR_8:
- return V_028C70_ENDIAN_NONE;
-
- /* 16-bit buffers. */
- case V_028C70_COLOR_5_6_5:
- case V_028C70_COLOR_1_5_5_5:
- case V_028C70_COLOR_4_4_4_4:
- case V_028C70_COLOR_16:
- case V_028C70_COLOR_8_8:
- return V_028C70_ENDIAN_8IN16;
-
- /* 32-bit buffers. */
- case V_028C70_COLOR_8_8_8_8:
- case V_028C70_COLOR_2_10_10_10:
- case V_028C70_COLOR_8_24:
- case V_028C70_COLOR_24_8:
- case V_028C70_COLOR_16_16:
- return V_028C70_ENDIAN_8IN32;
-
- /* 64-bit buffers. */
- case V_028C70_COLOR_16_16_16_16:
- return V_028C70_ENDIAN_8IN16;
-
- case V_028C70_COLOR_32_32:
- return V_028C70_ENDIAN_8IN32;
-
- /* 128-bit buffers. */
- case V_028C70_COLOR_32_32_32_32:
- return V_028C70_ENDIAN_8IN32;
- default:
- return V_028C70_ENDIAN_NONE; /* Unsupported. */
- }
- } else {
- return V_028C70_ENDIAN_NONE;
- }
+ if (0 /*SI_BIG_ENDIAN*/) {
+ switch (colorformat) {
+ /* 8-bit buffers. */
+ case V_028C70_COLOR_8:
+ return V_028C70_ENDIAN_NONE;
+
+ /* 16-bit buffers. */
+ case V_028C70_COLOR_5_6_5:
+ case V_028C70_COLOR_1_5_5_5:
+ case V_028C70_COLOR_4_4_4_4:
+ case V_028C70_COLOR_16:
+ case V_028C70_COLOR_8_8:
+ return V_028C70_ENDIAN_8IN16;
+
+ /* 32-bit buffers. */
+ case V_028C70_COLOR_8_8_8_8:
+ case V_028C70_COLOR_2_10_10_10:
+ case V_028C70_COLOR_8_24:
+ case V_028C70_COLOR_24_8:
+ case V_028C70_COLOR_16_16:
+ return V_028C70_ENDIAN_8IN32;
+
+ /* 64-bit buffers. */
+ case V_028C70_COLOR_16_16_16_16:
+ return V_028C70_ENDIAN_8IN16;
+
+ case V_028C70_COLOR_32_32:
+ return V_028C70_ENDIAN_8IN32;
+
+ /* 128-bit buffers. */
+ case V_028C70_COLOR_32_32_32_32:
+ return V_028C70_ENDIAN_8IN32;
+ default:
+ return V_028C70_ENDIAN_NONE; /* Unsupported. */
+ }
+ } else {
+ return V_028C70_ENDIAN_NONE;
+ }
}
-uint32_t radv_translate_dbformat(VkFormat format)
+uint32_t
+radv_translate_dbformat(VkFormat format)
{
- switch (format) {
- case VK_FORMAT_D16_UNORM:
- case VK_FORMAT_D16_UNORM_S8_UINT:
- return V_028040_Z_16;
- case VK_FORMAT_D32_SFLOAT:
- case VK_FORMAT_D32_SFLOAT_S8_UINT:
- return V_028040_Z_32_FLOAT;
- default:
- return V_028040_Z_INVALID;
- }
+ switch (format) {
+ case VK_FORMAT_D16_UNORM:
+ case VK_FORMAT_D16_UNORM_S8_UINT:
+ return V_028040_Z_16;
+ case VK_FORMAT_D32_SFLOAT:
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ return V_028040_Z_32_FLOAT;
+ default:
+ return V_028040_Z_INVALID;
+ }
}
-unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap)
+unsigned
+radv_translate_colorswap(VkFormat format, bool do_endian_swap)
{
- const struct util_format_description *desc = vk_format_description(format);
-
-#define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
-
- if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32)
- return V_028C70_SWAP_STD;
-
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
- return V_028C70_SWAP_STD;
-
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
- return ~0U;
-
- switch (desc->nr_channels) {
- case 1:
- if (HAS_SWIZZLE(0,X))
- return V_028C70_SWAP_STD; /* X___ */
- else if (HAS_SWIZZLE(3,X))
- return V_028C70_SWAP_ALT_REV; /* ___X */
- break;
- case 2:
- if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
- (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
- (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
- return V_028C70_SWAP_STD; /* XY__ */
- else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
- (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
- (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
- /* YX__ */
- return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
- else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
- return V_028C70_SWAP_ALT; /* X__Y */
- else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
- return V_028C70_SWAP_ALT_REV; /* Y__X */
- break;
- case 3:
- if (HAS_SWIZZLE(0,X))
- return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
- else if (HAS_SWIZZLE(0,Z))
- return V_028C70_SWAP_STD_REV; /* ZYX */
- break;
- case 4:
- /* check the middle channels, the 1st and 4th channel can be NONE */
- if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
- return V_028C70_SWAP_STD; /* XYZW */
- } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
- return V_028C70_SWAP_STD_REV; /* WZYX */
- } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
- return V_028C70_SWAP_ALT; /* ZYXW */
- } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
- /* YZWX */
- if (desc->is_array)
- return V_028C70_SWAP_ALT_REV;
- else
- return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
- }
- break;
- }
- return ~0U;
+ const struct util_format_description *desc = vk_format_description(format);
+
+#define HAS_SWIZZLE(chan, swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
+
+ if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32)
+ return V_028C70_SWAP_STD;
+
+ if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
+ return V_028C70_SWAP_STD;
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
+ return ~0U;
+
+ switch (desc->nr_channels) {
+ case 1:
+ if (HAS_SWIZZLE(0, X))
+ return V_028C70_SWAP_STD; /* X___ */
+ else if (HAS_SWIZZLE(3, X))
+ return V_028C70_SWAP_ALT_REV; /* ___X */
+ break;
+ case 2:
+ if ((HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, Y)) || (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(1, NONE)) ||
+ (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, Y)))
+ return V_028C70_SWAP_STD; /* XY__ */
+ else if ((HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, X)) ||
+ (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(1, NONE)) ||
+ (HAS_SWIZZLE(0, NONE) && HAS_SWIZZLE(1, X)))
+ /* YX__ */
+ return (do_endian_swap ? V_028C70_SWAP_STD : V_028C70_SWAP_STD_REV);
+ else if (HAS_SWIZZLE(0, X) && HAS_SWIZZLE(3, Y))
+ return V_028C70_SWAP_ALT; /* X__Y */
+ else if (HAS_SWIZZLE(0, Y) && HAS_SWIZZLE(3, X))
+ return V_028C70_SWAP_ALT_REV; /* Y__X */
+ break;
+ case 3:
+ if (HAS_SWIZZLE(0, X))
+ return (do_endian_swap ? V_028C70_SWAP_STD_REV : V_028C70_SWAP_STD);
+ else if (HAS_SWIZZLE(0, Z))
+ return V_028C70_SWAP_STD_REV; /* ZYX */
+ break;
+ case 4:
+ /* check the middle channels, the 1st and 4th channel can be NONE */
+ if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, Z)) {
+ return V_028C70_SWAP_STD; /* XYZW */
+ } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, Y)) {
+ return V_028C70_SWAP_STD_REV; /* WZYX */
+ } else if (HAS_SWIZZLE(1, Y) && HAS_SWIZZLE(2, X)) {
+ return V_028C70_SWAP_ALT; /* ZYXW */
+ } else if (HAS_SWIZZLE(1, Z) && HAS_SWIZZLE(2, W)) {
+ /* YZWX */
+ if (desc->is_array)
+ return V_028C70_SWAP_ALT_REV;
+ else
+ return (do_endian_swap ? V_028C70_SWAP_ALT : V_028C70_SWAP_ALT_REV);
+ }
+ break;
+ }
+ return ~0U;
}
-bool radv_format_pack_clear_color(VkFormat format,
- uint32_t clear_vals[2],
- VkClearColorValue *value)
+bool
+radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2], VkClearColorValue *value)
{
- const struct util_format_description *desc = vk_format_description(format);
-
- if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
- clear_vals[0] = float3_to_r11g11b10f(value->float32);
- clear_vals[1] = 0;
- return true;
- } else if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
- clear_vals[0] = float3_to_rgb9e5(value->float32);
- clear_vals[1] = 0;
- return true;
- }
-
- if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
- fprintf(stderr, "failed to fast clear for non-plain format %d\n", format);
- return false;
- }
-
- if (!util_is_power_of_two_or_zero(desc->block.bits)) {
- fprintf(stderr, "failed to fast clear for NPOT format %d\n", format);
- return false;
- }
-
- if (desc->block.bits > 64) {
- /*
- * We have a 128 bits format, check if the first 3 components are the same.
- * Every elements has to be 32 bits since we don't support 64-bit formats,
- * and we can skip swizzling checks as alpha always comes last for these and
- * we do not care about the rest as they have to be the same.
- */
- if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
- if (value->float32[0] != value->float32[1] ||
- value->float32[0] != value->float32[2])
- return false;
- } else {
- if (value->uint32[0] != value->uint32[1] ||
- value->uint32[0] != value->uint32[2])
- return false;
- }
- clear_vals[0] = value->uint32[0];
- clear_vals[1] = value->uint32[3];
- return true;
- }
- uint64_t clear_val = 0;
-
- for (unsigned c = 0; c < 4; ++c) {
- if (desc->swizzle[c] >= 4)
- continue;
-
- const struct util_format_channel_description *channel = &desc->channel[desc->swizzle[c]];
- assert(channel->size);
-
- uint64_t v = 0;
- if (channel->pure_integer) {
- v = value->uint32[c] & ((1ULL << channel->size) - 1);
- } else if (channel->normalized) {
- if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED &&
- desc->swizzle[c] < 3 &&
- desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
- assert(channel->size == 8);
-
- v = util_format_linear_float_to_srgb_8unorm(value->float32[c]);
- } else {
- float f = MIN2(value->float32[c], 1.0f);
-
- if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
- f = MAX2(f, 0.0f) * ((1ULL << channel->size) - 1);
- } else {
- f = MAX2(f, -1.0f) * ((1ULL << (channel->size - 1)) - 1);
- }
-
- /* The hardware rounds before conversion. */
- if (f > 0)
- f += 0.5f;
- else
- f -= 0.5f;
-
- v = (uint64_t)f;
- }
- } else if (channel->type == UTIL_FORMAT_TYPE_FLOAT) {
- if (channel->size == 32) {
- memcpy(&v, &value->float32[c], 4);
- } else if(channel->size == 16) {
- v = _mesa_float_to_float16_rtz(value->float32[c]);
- } else {
- fprintf(stderr, "failed to fast clear for unhandled float size in format %d\n", format);
- return false;
- }
- } else {
- fprintf(stderr, "failed to fast clear for unhandled component type in format %d\n", format);
- return false;
- }
- clear_val |= (v & ((1ULL << channel->size) - 1)) << channel->shift;
- }
-
- clear_vals[0] = clear_val;
- clear_vals[1] = clear_val >> 32;
-
- return true;
+ const struct util_format_description *desc = vk_format_description(format);
+
+ if (format == VK_FORMAT_B10G11R11_UFLOAT_PACK32) {
+ clear_vals[0] = float3_to_r11g11b10f(value->float32);
+ clear_vals[1] = 0;
+ return true;
+ } else if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+ clear_vals[0] = float3_to_rgb9e5(value->float32);
+ clear_vals[1] = 0;
+ return true;
+ }
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ fprintf(stderr, "failed to fast clear for non-plain format %d\n", format);
+ return false;
+ }
+
+ if (!util_is_power_of_two_or_zero(desc->block.bits)) {
+ fprintf(stderr, "failed to fast clear for NPOT format %d\n", format);
+ return false;
+ }
+
+ if (desc->block.bits > 64) {
+ /*
+ * We have a 128 bits format, check if the first 3 components are the same.
+ * Every elements has to be 32 bits since we don't support 64-bit formats,
+ * and we can skip swizzling checks as alpha always comes last for these and
+ * we do not care about the rest as they have to be the same.
+ */
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+ if (value->float32[0] != value->float32[1] || value->float32[0] != value->float32[2])
+ return false;
+ } else {
+ if (value->uint32[0] != value->uint32[1] || value->uint32[0] != value->uint32[2])
+ return false;
+ }
+ clear_vals[0] = value->uint32[0];
+ clear_vals[1] = value->uint32[3];
+ return true;
+ }
+ uint64_t clear_val = 0;
+
+ for (unsigned c = 0; c < 4; ++c) {
+ if (desc->swizzle[c] >= 4)
+ continue;
+
+ const struct util_format_channel_description *channel = &desc->channel[desc->swizzle[c]];
+ assert(channel->size);
+
+ uint64_t v = 0;
+ if (channel->pure_integer) {
+ v = value->uint32[c] & ((1ULL << channel->size) - 1);
+ } else if (channel->normalized) {
+ if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED && desc->swizzle[c] < 3 &&
+ desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ assert(channel->size == 8);
+
+ v = util_format_linear_float_to_srgb_8unorm(value->float32[c]);
+ } else {
+ float f = MIN2(value->float32[c], 1.0f);
+
+ if (channel->type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ f = MAX2(f, 0.0f) * ((1ULL << channel->size) - 1);
+ } else {
+ f = MAX2(f, -1.0f) * ((1ULL << (channel->size - 1)) - 1);
+ }
+
+ /* The hardware rounds before conversion. */
+ if (f > 0)
+ f += 0.5f;
+ else
+ f -= 0.5f;
+
+ v = (uint64_t)f;
+ }
+ } else if (channel->type == UTIL_FORMAT_TYPE_FLOAT) {
+ if (channel->size == 32) {
+ memcpy(&v, &value->float32[c], 4);
+ } else if (channel->size == 16) {
+ v = _mesa_float_to_float16_rtz(value->float32[c]);
+ } else {
+ fprintf(stderr, "failed to fast clear for unhandled float size in format %d\n", format);
+ return false;
+ }
+ } else {
+ fprintf(stderr, "failed to fast clear for unhandled component type in format %d\n",
+ format);
+ return false;
+ }
+ clear_val |= (v & ((1ULL << channel->size) - 1)) << channel->shift;
+ }
+
+ clear_vals[0] = clear_val;
+ clear_vals[1] = clear_val >> 32;
+
+ return true;
}
-void radv_GetPhysicalDeviceFormatProperties(
- VkPhysicalDevice physicalDevice,
- VkFormat format,
- VkFormatProperties* pFormatProperties)
+void
+radv_GetPhysicalDeviceFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format,
+ VkFormatProperties *pFormatProperties)
{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
- radv_physical_device_get_format_properties(physical_device,
- format,
- pFormatProperties);
+ radv_physical_device_get_format_properties(physical_device, format, pFormatProperties);
}
static const struct ac_modifier_options radv_modifier_options = {
- .dcc = true,
- .dcc_retile = true,
+ .dcc = true,
+ .dcc_retile = true,
};
static VkFormatFeatureFlags
-radv_get_modifier_flags(struct radv_physical_device *dev,
- VkFormat format, uint64_t modifier,
+radv_get_modifier_flags(struct radv_physical_device *dev, VkFormat format, uint64_t modifier,
const VkFormatProperties *props)
{
- VkFormatFeatureFlags features;
+ VkFormatFeatureFlags features;
- if (vk_format_is_compressed(format) || vk_format_is_depth_or_stencil(format))
- return 0;
+ if (vk_format_is_compressed(format) || vk_format_is_depth_or_stencil(format))
+ return 0;
- if (modifier == DRM_FORMAT_MOD_LINEAR)
- features = props->linearTilingFeatures;
- else
- features = props->optimalTilingFeatures;
+ if (modifier == DRM_FORMAT_MOD_LINEAR)
+ features = props->linearTilingFeatures;
+ else
+ features = props->optimalTilingFeatures;
- if (modifier != DRM_FORMAT_MOD_LINEAR && vk_format_get_plane_count(format) > 1)
- return 0;
+ if (modifier != DRM_FORMAT_MOD_LINEAR && vk_format_get_plane_count(format) > 1)
+ return 0;
- if (ac_modifier_has_dcc(modifier)) {
- features &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
+ if (ac_modifier_has_dcc(modifier)) {
+ features &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
- if (dev->instance->debug_flags & (RADV_DEBUG_NO_DCC | RADV_DEBUG_NO_DISPLAY_DCC))
- return 0;
- }
+ if (dev->instance->debug_flags & (RADV_DEBUG_NO_DCC | RADV_DEBUG_NO_DISPLAY_DCC))
+ return 0;
+ }
- return features;
+ return features;
}
-static void radv_list_drm_format_modifiers(struct radv_physical_device *dev,
- VkFormat format,
- VkFormatProperties2 *pFormatProperties)
+static void
+radv_list_drm_format_modifiers(struct radv_physical_device *dev, VkFormat format,
+ VkFormatProperties2 *pFormatProperties)
{
- VkDrmFormatModifierPropertiesListEXT *mod_list =
- vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT);
- unsigned mod_count;
-
- if (!mod_list)
- return;
-
- if (vk_format_is_compressed(format) || vk_format_is_depth_or_stencil(format)) {
- mod_list->drmFormatModifierCount = 0;
- return;
- }
-
- ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
- vk_format_to_pipe_format(format), &mod_count, NULL);
- if (!mod_list->pDrmFormatModifierProperties) {
- mod_list->drmFormatModifierCount = mod_count;
- return;
- }
-
- mod_count = MIN2(mod_count, mod_list->drmFormatModifierCount);
-
- uint64_t *mods = malloc(mod_count * sizeof(uint64_t));
- if (!mods) {
- /* We can't return an error here ... */
- mod_list->drmFormatModifierCount = 0;
- return;
- }
- ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
- vk_format_to_pipe_format(format), &mod_count, mods);
-
- mod_list->drmFormatModifierCount = 0;
- for (unsigned i = 0; i < mod_count; ++i) {
- VkFormatFeatureFlags features =
- radv_get_modifier_flags(dev, format, mods[i], &pFormatProperties->formatProperties);
- unsigned planes = vk_format_get_plane_count(format);
- if (planes == 1) {
- if (ac_modifier_has_dcc_retile(mods[i]))
- planes = 3;
- else if (ac_modifier_has_dcc(mods[i]))
- planes = 2;
- }
-
- if (!features)
- continue;
-
- mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount].drmFormatModifier = mods[i];
- mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount].drmFormatModifierPlaneCount = planes;
- mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount].drmFormatModifierTilingFeatures = features;
-
- ++mod_list->drmFormatModifierCount;
- }
-
- free(mods);
+ VkDrmFormatModifierPropertiesListEXT *mod_list =
+ vk_find_struct(pFormatProperties, DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT);
+ unsigned mod_count;
+
+ if (!mod_list)
+ return;
+
+ if (vk_format_is_compressed(format) || vk_format_is_depth_or_stencil(format)) {
+ mod_list->drmFormatModifierCount = 0;
+ return;
+ }
+
+ ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
+ vk_format_to_pipe_format(format), &mod_count, NULL);
+ if (!mod_list->pDrmFormatModifierProperties) {
+ mod_list->drmFormatModifierCount = mod_count;
+ return;
+ }
+
+ mod_count = MIN2(mod_count, mod_list->drmFormatModifierCount);
+
+ uint64_t *mods = malloc(mod_count * sizeof(uint64_t));
+ if (!mods) {
+ /* We can't return an error here ... */
+ mod_list->drmFormatModifierCount = 0;
+ return;
+ }
+ ac_get_supported_modifiers(&dev->rad_info, &radv_modifier_options,
+ vk_format_to_pipe_format(format), &mod_count, mods);
+
+ mod_list->drmFormatModifierCount = 0;
+ for (unsigned i = 0; i < mod_count; ++i) {
+ VkFormatFeatureFlags features =
+ radv_get_modifier_flags(dev, format, mods[i], &pFormatProperties->formatProperties);
+ unsigned planes = vk_format_get_plane_count(format);
+ if (planes == 1) {
+ if (ac_modifier_has_dcc_retile(mods[i]))
+ planes = 3;
+ else if (ac_modifier_has_dcc(mods[i]))
+ planes = 2;
+ }
+
+ if (!features)
+ continue;
+
+ mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount].drmFormatModifier =
+ mods[i];
+ mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount]
+ .drmFormatModifierPlaneCount = planes;
+ mod_list->pDrmFormatModifierProperties[mod_list->drmFormatModifierCount]
+ .drmFormatModifierTilingFeatures = features;
+
+ ++mod_list->drmFormatModifierCount;
+ }
+
+ free(mods);
}
-
-static VkResult radv_check_modifier_support(struct radv_physical_device *dev,
- const VkPhysicalDeviceImageFormatInfo2 *info,
- VkImageFormatProperties *props,
- VkFormat format,
- uint64_t modifier)
+static VkResult
+radv_check_modifier_support(struct radv_physical_device *dev,
+ const VkPhysicalDeviceImageFormatInfo2 *info,
+ VkImageFormatProperties *props, VkFormat format, uint64_t modifier)
{
- if (info->type != VK_IMAGE_TYPE_2D)
- return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
- /* We did not add modifiers for sparse textures. */
- if (info->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
- VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
- VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))
- return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
- /*
- * Need to check the modifier is supported in general:
- * "If the drmFormatModifier is incompatible with the parameters specified
- * in VkPhysicalDeviceImageFormatInfo2 and its pNext chain, then
- * vkGetPhysicalDeviceImageFormatProperties2 returns VK_ERROR_FORMAT_NOT_SUPPORTED.
- * The implementation must support the query of any drmFormatModifier,
- * including unknown and invalid modifier values."
- */
- VkDrmFormatModifierPropertiesListEXT mod_list = {
- .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
- };
-
- VkFormatProperties2 format_props2 = {
- .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
- .pNext = &mod_list
- };
-
- radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format, &format_props2);
-
- if (!mod_list.drmFormatModifierCount)
- return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
- mod_list.pDrmFormatModifierProperties = calloc(mod_list.drmFormatModifierCount, sizeof(*mod_list.pDrmFormatModifierProperties));
- if (!mod_list.pDrmFormatModifierProperties)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format, &format_props2);
-
- bool found = false;
- for (uint32_t i = 0; i < mod_list.drmFormatModifierCount && !found; ++i)
- if (mod_list.pDrmFormatModifierProperties[i].drmFormatModifier == modifier)
- found = true;
-
- free(mod_list.pDrmFormatModifierProperties);
-
- if (!found)
- return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
- if (ac_modifier_has_dcc(modifier) &&
- !radv_are_formats_dcc_compatible(dev, info->pNext, format, info->flags))
- return VK_ERROR_FORMAT_NOT_SUPPORTED;
-
- /* We can expand this as needed and implemented but there is not much demand
- * for more. */
- if (ac_modifier_has_dcc(modifier)) {
- props->maxMipLevels = 1;
- props->maxArrayLayers = 1;
- }
- /* We don't support MSAA for modifiers */
- props->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
- return VK_SUCCESS;
+ if (info->type != VK_IMAGE_TYPE_2D)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /* We did not add modifiers for sparse textures. */
+ if (info->flags & (VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
+ VK_IMAGE_CREATE_SPARSE_ALIASED_BIT))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /*
+ * Need to check the modifier is supported in general:
+ * "If the drmFormatModifier is incompatible with the parameters specified
+ * in VkPhysicalDeviceImageFormatInfo2 and its pNext chain, then
+ * vkGetPhysicalDeviceImageFormatProperties2 returns VK_ERROR_FORMAT_NOT_SUPPORTED.
+ * The implementation must support the query of any drmFormatModifier,
+ * including unknown and invalid modifier values."
+ */
+ VkDrmFormatModifierPropertiesListEXT mod_list = {
+ .sType = VK_STRUCTURE_TYPE_DRM_FORMAT_MODIFIER_PROPERTIES_LIST_EXT,
+ };
+
+ VkFormatProperties2 format_props2 = {.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
+ .pNext = &mod_list};
+
+ radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format,
+ &format_props2);
+
+ if (!mod_list.drmFormatModifierCount)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ mod_list.pDrmFormatModifierProperties =
+ calloc(mod_list.drmFormatModifierCount, sizeof(*mod_list.pDrmFormatModifierProperties));
+ if (!mod_list.pDrmFormatModifierProperties)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ radv_GetPhysicalDeviceFormatProperties2(radv_physical_device_to_handle(dev), format,
+ &format_props2);
+
+ bool found = false;
+ for (uint32_t i = 0; i < mod_list.drmFormatModifierCount && !found; ++i)
+ if (mod_list.pDrmFormatModifierProperties[i].drmFormatModifier == modifier)
+ found = true;
+
+ free(mod_list.pDrmFormatModifierProperties);
+
+ if (!found)
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ if (ac_modifier_has_dcc(modifier) &&
+ !radv_are_formats_dcc_compatible(dev, info->pNext, format, info->flags))
+ return VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ /* We can expand this as needed and implemented but there is not much demand
+ * for more. */
+ if (ac_modifier_has_dcc(modifier)) {
+ props->maxMipLevels = 1;
+ props->maxArrayLayers = 1;
+ }
+ /* We don't support MSAA for modifiers */
+ props->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
+ return VK_SUCCESS;
}
-void radv_GetPhysicalDeviceFormatProperties2(
- VkPhysicalDevice physicalDevice,
- VkFormat format,
- VkFormatProperties2* pFormatProperties)
+void
+radv_GetPhysicalDeviceFormatProperties2(VkPhysicalDevice physicalDevice, VkFormat format,
+ VkFormatProperties2 *pFormatProperties)
{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
- radv_physical_device_get_format_properties(physical_device,
- format,
- &pFormatProperties->formatProperties);
+ radv_physical_device_get_format_properties(physical_device, format,
+ &pFormatProperties->formatProperties);
- radv_list_drm_format_modifiers(physical_device, format, pFormatProperties);
+ radv_list_drm_format_modifiers(physical_device, format, pFormatProperties);
}
-static VkResult radv_get_image_format_properties(struct radv_physical_device *physical_device,
- const VkPhysicalDeviceImageFormatInfo2 *info,
- VkFormat format,
- VkImageFormatProperties *pImageFormatProperties)
+static VkResult
+radv_get_image_format_properties(struct radv_physical_device *physical_device,
+ const VkPhysicalDeviceImageFormatInfo2 *info, VkFormat format,
+ VkImageFormatProperties *pImageFormatProperties)
{
- VkFormatProperties format_props;
- VkFormatFeatureFlags format_feature_flags;
- VkExtent3D maxExtent;
- uint32_t maxMipLevels;
- uint32_t maxArraySize;
- VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
- const struct util_format_description *desc = vk_format_description(format);
- enum chip_class chip_class = physical_device->rad_info.chip_class;
- VkImageTiling tiling = info->tiling;
- const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *mod_info =
- vk_find_struct_const(info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT);
- VkResult result = VK_ERROR_FORMAT_NOT_SUPPORTED;
-
- radv_physical_device_get_format_properties(physical_device, format,
- &format_props);
- if (tiling == VK_IMAGE_TILING_LINEAR) {
- format_feature_flags = format_props.linearTilingFeatures;
- } else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
- format_feature_flags = format_props.optimalTilingFeatures;
- } else if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
- format_feature_flags = radv_get_modifier_flags(physical_device, format,
- mod_info->drmFormatModifier,
- &format_props);
- } else {
- unreachable("bad VkImageTiling");
- }
-
- if (format_feature_flags == 0)
- goto unsupported;
-
- if (info->type != VK_IMAGE_TYPE_2D && vk_format_is_depth_or_stencil(format))
- goto unsupported;
-
- switch (info->type) {
- default:
- unreachable("bad vkimage type\n");
- case VK_IMAGE_TYPE_1D:
- maxExtent.width = 16384;
- maxExtent.height = 1;
- maxExtent.depth = 1;
- maxMipLevels = 15; /* log2(maxWidth) + 1 */
- maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
- break;
- case VK_IMAGE_TYPE_2D:
- maxExtent.width = 16384;
- maxExtent.height = 16384;
- maxExtent.depth = 1;
- maxMipLevels = 15; /* log2(maxWidth) + 1 */
- maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
- break;
- case VK_IMAGE_TYPE_3D:
- if (chip_class >= GFX10) {
- maxExtent.width = 8192;
- maxExtent.height = 8192;
- maxExtent.depth = 8192;
- } else {
- maxExtent.width = 2048;
- maxExtent.height = 2048;
- maxExtent.depth = 2048;
- }
- maxMipLevels = util_logbase2(maxExtent.width) + 1;
- maxArraySize = 1;
- break;
- }
-
- if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
- /* Might be able to support but the entire format support is
- * messy, so taking the lazy way out. */
- maxArraySize = 1;
- }
-
- if (tiling == VK_IMAGE_TILING_OPTIMAL &&
- info->type == VK_IMAGE_TYPE_2D &&
- (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
- VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
- !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) {
- sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
- }
-
- if (tiling == VK_IMAGE_TILING_LINEAR &&
- (format == VK_FORMAT_R32G32B32_SFLOAT ||
- format == VK_FORMAT_R32G32B32_SINT ||
- format == VK_FORMAT_R32G32B32_UINT)) {
- /* R32G32B32 is a weird format and the driver currently only
- * supports the barely minimum.
- * TODO: Implement more if we really need to.
- */
- if (info->type == VK_IMAGE_TYPE_3D)
- goto unsupported;
- maxArraySize = 1;
- maxMipLevels = 1;
- }
-
-
- /* We can't create 3d compressed 128bpp images that can be rendered to on GFX9 */
- if (physical_device->rad_info.chip_class >= GFX9 &&
- info->type == VK_IMAGE_TYPE_3D &&
- vk_format_get_blocksizebits(format) == 128 &&
- vk_format_is_compressed(format) &&
- (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
- ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) ||
- (info->usage & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT))) {
- goto unsupported;
- }
-
- if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
- if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
- goto unsupported;
- }
- }
-
- if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
- if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
- goto unsupported;
- }
- }
-
- if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
- if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
- goto unsupported;
- }
- }
-
- if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
- if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
- goto unsupported;
- }
- }
-
- if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
- if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) {
- goto unsupported;
- }
- }
-
- if (info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
- if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) {
- goto unsupported;
- }
- }
-
- if (info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
- if (!(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
- VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) {
- goto unsupported;
- }
- }
-
- /* Sparse resources with multi-planar formats are unsupported. */
- if (info->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
- if (vk_format_get_plane_count(format) > 1)
- goto unsupported;
- }
-
- if (info->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
- /* Sparse textures are only supported on GFX8+. */
- if (physical_device->rad_info.chip_class < GFX8)
- goto unsupported;
-
- if (vk_format_get_plane_count(format) > 1 || info->type != VK_IMAGE_TYPE_2D ||
- info->tiling != VK_IMAGE_TILING_OPTIMAL ||
- vk_format_is_depth_or_stencil(format))
- goto unsupported;
- }
-
- *pImageFormatProperties = (VkImageFormatProperties) {
- .maxExtent = maxExtent,
- .maxMipLevels = maxMipLevels,
- .maxArrayLayers = maxArraySize,
- .sampleCounts = sampleCounts,
-
- /* FINISHME: Accurately calculate
- * VkImageFormatProperties::maxResourceSize.
- */
- .maxResourceSize = UINT32_MAX,
- };
-
- if (mod_info) {
- result = radv_check_modifier_support(physical_device, info,
- pImageFormatProperties,
- format, mod_info->drmFormatModifier);
- if (result != VK_SUCCESS)
- goto unsupported;
- }
-
- return VK_SUCCESS;
+ VkFormatProperties format_props;
+ VkFormatFeatureFlags format_feature_flags;
+ VkExtent3D maxExtent;
+ uint32_t maxMipLevels;
+ uint32_t maxArraySize;
+ VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
+ const struct util_format_description *desc = vk_format_description(format);
+ enum chip_class chip_class = physical_device->rad_info.chip_class;
+ VkImageTiling tiling = info->tiling;
+ const VkPhysicalDeviceImageDrmFormatModifierInfoEXT *mod_info =
+ vk_find_struct_const(info->pNext, PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT);
+ VkResult result = VK_ERROR_FORMAT_NOT_SUPPORTED;
+
+ radv_physical_device_get_format_properties(physical_device, format, &format_props);
+ if (tiling == VK_IMAGE_TILING_LINEAR) {
+ format_feature_flags = format_props.linearTilingFeatures;
+ } else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
+ format_feature_flags = format_props.optimalTilingFeatures;
+ } else if (tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ format_feature_flags = radv_get_modifier_flags(physical_device, format,
+ mod_info->drmFormatModifier, &format_props);
+ } else {
+ unreachable("bad VkImageTiling");
+ }
+
+ if (format_feature_flags == 0)
+ goto unsupported;
+
+ if (info->type != VK_IMAGE_TYPE_2D && vk_format_is_depth_or_stencil(format))
+ goto unsupported;
+
+ switch (info->type) {
+ default:
+ unreachable("bad vkimage type\n");
+ case VK_IMAGE_TYPE_1D:
+ maxExtent.width = 16384;
+ maxExtent.height = 1;
+ maxExtent.depth = 1;
+ maxMipLevels = 15; /* log2(maxWidth) + 1 */
+ maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ maxExtent.width = 16384;
+ maxExtent.height = 16384;
+ maxExtent.depth = 1;
+ maxMipLevels = 15; /* log2(maxWidth) + 1 */
+ maxArraySize = chip_class >= GFX10 ? 8192 : 2048;
+ break;
+ case VK_IMAGE_TYPE_3D:
+ if (chip_class >= GFX10) {
+ maxExtent.width = 8192;
+ maxExtent.height = 8192;
+ maxExtent.depth = 8192;
+ } else {
+ maxExtent.width = 2048;
+ maxExtent.height = 2048;
+ maxExtent.depth = 2048;
+ }
+ maxMipLevels = util_logbase2(maxExtent.width) + 1;
+ maxArraySize = 1;
+ break;
+ }
+
+ if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) {
+ /* Might be able to support but the entire format support is
+ * messy, so taking the lazy way out. */
+ maxArraySize = 1;
+ }
+
+ if (tiling == VK_IMAGE_TILING_OPTIMAL && info->type == VK_IMAGE_TYPE_2D &&
+ (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+ !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT)) {
+ sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
+ }
+
+ if (tiling == VK_IMAGE_TILING_LINEAR &&
+ (format == VK_FORMAT_R32G32B32_SFLOAT || format == VK_FORMAT_R32G32B32_SINT ||
+ format == VK_FORMAT_R32G32B32_UINT)) {
+ /* R32G32B32 is a weird format and the driver currently only
+ * supports the barely minimum.
+ * TODO: Implement more if we really need to.
+ */
+ if (info->type == VK_IMAGE_TYPE_3D)
+ goto unsupported;
+ maxArraySize = 1;
+ maxMipLevels = 1;
+ }
+
+ /* We can't create 3d compressed 128bpp images that can be rendered to on GFX9 */
+ if (physical_device->rad_info.chip_class >= GFX9 && info->type == VK_IMAGE_TYPE_3D &&
+ vk_format_get_blocksizebits(format) == 128 && vk_format_is_compressed(format) &&
+ (info->flags & VK_IMAGE_CREATE_BLOCK_TEXEL_VIEW_COMPATIBLE_BIT) &&
+ ((info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT) ||
+ (info->usage & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT))) {
+ goto unsupported;
+ }
+
+ if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
+ if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
+ goto unsupported;
+ }
+ }
+
+ if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+ if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
+ goto unsupported;
+ }
+ }
+
+ if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
+ if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
+ goto unsupported;
+ }
+ }
+
+ if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+ if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
+ goto unsupported;
+ }
+ }
+
+ if (info->usage & VK_IMAGE_USAGE_TRANSFER_SRC_BIT) {
+ if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_SRC_BIT)) {
+ goto unsupported;
+ }
+ }
+
+ if (info->usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
+ if (!(format_feature_flags & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)) {
+ goto unsupported;
+ }
+ }
+
+ if (info->usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) {
+ if (!(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))) {
+ goto unsupported;
+ }
+ }
+
+ /* Sparse resources with multi-planar formats are unsupported. */
+ if (info->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
+ if (vk_format_get_plane_count(format) > 1)
+ goto unsupported;
+ }
+
+ if (info->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
+ /* Sparse textures are only supported on GFX8+. */
+ if (physical_device->rad_info.chip_class < GFX8)
+ goto unsupported;
+
+ if (vk_format_get_plane_count(format) > 1 || info->type != VK_IMAGE_TYPE_2D ||
+ info->tiling != VK_IMAGE_TILING_OPTIMAL || vk_format_is_depth_or_stencil(format))
+ goto unsupported;
+ }
+
+ *pImageFormatProperties = (VkImageFormatProperties){
+ .maxExtent = maxExtent,
+ .maxMipLevels = maxMipLevels,
+ .maxArrayLayers = maxArraySize,
+ .sampleCounts = sampleCounts,
+
+ /* FINISHME: Accurately calculate
+ * VkImageFormatProperties::maxResourceSize.
+ */
+ .maxResourceSize = UINT32_MAX,
+ };
+
+ if (mod_info) {
+ result = radv_check_modifier_support(physical_device, info, pImageFormatProperties, format,
+ mod_info->drmFormatModifier);
+ if (result != VK_SUCCESS)
+ goto unsupported;
+ }
+
+ return VK_SUCCESS;
unsupported:
- *pImageFormatProperties = (VkImageFormatProperties) {
- .maxExtent = { 0, 0, 0 },
- .maxMipLevels = 0,
- .maxArrayLayers = 0,
- .sampleCounts = 0,
- .maxResourceSize = 0,
- };
-
- return result;
+ *pImageFormatProperties = (VkImageFormatProperties){
+ .maxExtent = {0, 0, 0},
+ .maxMipLevels = 0,
+ .maxArrayLayers = 0,
+ .sampleCounts = 0,
+ .maxResourceSize = 0,
+ };
+
+ return result;
}
-VkResult radv_GetPhysicalDeviceImageFormatProperties(
- VkPhysicalDevice physicalDevice,
- VkFormat format,
- VkImageType type,
- VkImageTiling tiling,
- VkImageUsageFlags usage,
- VkImageCreateFlags createFlags,
- VkImageFormatProperties* pImageFormatProperties)
+VkResult
+radv_GetPhysicalDeviceImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format,
+ VkImageType type, VkImageTiling tiling,
+ VkImageUsageFlags usage, VkImageCreateFlags createFlags,
+ VkImageFormatProperties *pImageFormatProperties)
{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
-
- const VkPhysicalDeviceImageFormatInfo2 info = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
- .pNext = NULL,
- .format = format,
- .type = type,
- .tiling = tiling,
- .usage = usage,
- .flags = createFlags,
- };
-
- return radv_get_image_format_properties(physical_device, &info, format,
- pImageFormatProperties);
+ RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+
+ const VkPhysicalDeviceImageFormatInfo2 info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ .pNext = NULL,
+ .format = format,
+ .type = type,
+ .tiling = tiling,
+ .usage = usage,
+ .flags = createFlags,
+ };
+
+ return radv_get_image_format_properties(physical_device, &info, format, pImageFormatProperties);
}
static void
get_external_image_format_properties(struct radv_physical_device *physical_device,
- const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
- VkExternalMemoryHandleTypeFlagBits handleType,
- VkExternalMemoryProperties *external_properties,
- VkImageFormatProperties *format_properties)
+ const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
+ VkExternalMemoryHandleTypeFlagBits handleType,
+ VkExternalMemoryProperties *external_properties,
+ VkImageFormatProperties *format_properties)
{
- VkExternalMemoryFeatureFlagBits flags = 0;
- VkExternalMemoryHandleTypeFlags export_flags = 0;
- VkExternalMemoryHandleTypeFlags compat_flags = 0;
-
- if (pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
- return;
-
- switch (handleType) {
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
- if (pImageFormatInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
- break;
-
- switch (pImageFormatInfo->type) {
- case VK_IMAGE_TYPE_2D:
- flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
-
- compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
- break;
- default:
- break;
- }
- break;
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
- switch (pImageFormatInfo->type) {
- case VK_IMAGE_TYPE_2D:
- flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
- if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
- flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
-
- compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
- break;
- default:
- break;
- }
- break;
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
- if (!physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer)
- break;
-
- if (!radv_android_gralloc_supports_format(pImageFormatInfo->format,
- pImageFormatInfo->usage))
- break;
-
- if (pImageFormatInfo->type != VK_IMAGE_TYPE_2D)
- break;
-
- format_properties->maxMipLevels = MIN2(1, format_properties->maxMipLevels);
- format_properties->maxArrayLayers = MIN2(1, format_properties->maxArrayLayers);
- format_properties->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
-
- flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT|VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
- if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
- flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
-
- compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID;
- break;
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
- flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
- compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
- break;
- default:
- break;
- }
-
- *external_properties = (VkExternalMemoryProperties) {
- .externalMemoryFeatures = flags,
- .exportFromImportedHandleTypes = export_flags,
- .compatibleHandleTypes = compat_flags,
- };
+ VkExternalMemoryFeatureFlagBits flags = 0;
+ VkExternalMemoryHandleTypeFlags export_flags = 0;
+ VkExternalMemoryHandleTypeFlags compat_flags = 0;
+
+ if (pImageFormatInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
+ return;
+
+ switch (handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+ if (pImageFormatInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+ break;
+
+ switch (pImageFormatInfo->type) {
+ case VK_IMAGE_TYPE_2D:
+ flags =
+ VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+
+ compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+ break;
+ default:
+ break;
+ }
+ break;
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
+ switch (pImageFormatInfo->type) {
+ case VK_IMAGE_TYPE_2D:
+ flags =
+ VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
+ flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
+
+ compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
+ break;
+ default:
+ break;
+ }
+ break;
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID:
+ if (!physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer)
+ break;
+
+ if (!radv_android_gralloc_supports_format(pImageFormatInfo->format, pImageFormatInfo->usage))
+ break;
+
+ if (pImageFormatInfo->type != VK_IMAGE_TYPE_2D)
+ break;
+
+ format_properties->maxMipLevels = MIN2(1, format_properties->maxMipLevels);
+ format_properties->maxArrayLayers = MIN2(1, format_properties->maxArrayLayers);
+ format_properties->sampleCounts &= VK_SAMPLE_COUNT_1_BIT;
+
+ flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ if (pImageFormatInfo->tiling != VK_IMAGE_TILING_LINEAR)
+ flags |= VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT;
+
+ compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID;
+ break;
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
+ flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+ break;
+ default:
+ break;
+ }
+
+ *external_properties = (VkExternalMemoryProperties){
+ .externalMemoryFeatures = flags,
+ .exportFromImportedHandleTypes = export_flags,
+ .compatibleHandleTypes = compat_flags,
+ };
}
-VkResult radv_GetPhysicalDeviceImageFormatProperties2(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceImageFormatInfo2 *base_info,
- VkImageFormatProperties2 *base_props)
+VkResult
+radv_GetPhysicalDeviceImageFormatProperties2(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceImageFormatInfo2 *base_info,
+ VkImageFormatProperties2 *base_props)
{
- RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
- const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
- VkExternalImageFormatProperties *external_props = NULL;
- struct VkAndroidHardwareBufferUsageANDROID *android_usage = NULL;
- VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
- VkTextureLODGatherFormatPropertiesAMD *texture_lod_props = NULL;
- VkResult result;
- VkFormat format = radv_select_android_external_format(base_info->pNext, base_info->format);
-
- result = radv_get_image_format_properties(physical_device, base_info, format,
- &base_props->imageFormatProperties);
- if (result != VK_SUCCESS)
- return result;
-
- /* Extract input structs */
- vk_foreach_struct_const(s, base_info->pNext) {
- switch (s->sType) {
- case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
- external_info = (const void *) s;
- break;
- default:
- break;
- }
- }
-
- /* Extract output structs */
- vk_foreach_struct(s, base_props->pNext) {
- switch (s->sType) {
- case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
- external_props = (void *) s;
- break;
- case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
- ycbcr_props = (void *) s;
- break;
- case VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_USAGE_ANDROID:
- android_usage = (void *) s;
- break;
- case VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD:
- texture_lod_props = (void *) s;
- break;
- default:
- break;
- }
- }
-
- bool ahb_supported = physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer;
- if (android_usage && ahb_supported) {
+ RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+ const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
+ VkExternalImageFormatProperties *external_props = NULL;
+ struct VkAndroidHardwareBufferUsageANDROID *android_usage = NULL;
+ VkSamplerYcbcrConversionImageFormatProperties *ycbcr_props = NULL;
+ VkTextureLODGatherFormatPropertiesAMD *texture_lod_props = NULL;
+ VkResult result;
+ VkFormat format = radv_select_android_external_format(base_info->pNext, base_info->format);
+
+ result = radv_get_image_format_properties(physical_device, base_info, format,
+ &base_props->imageFormatProperties);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Extract input structs */
+ vk_foreach_struct_const(s, base_info->pNext)
+ {
+ switch (s->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
+ external_info = (const void *)s;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Extract output structs */
+ vk_foreach_struct(s, base_props->pNext)
+ {
+ switch (s->sType) {
+ case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
+ external_props = (void *)s;
+ break;
+ case VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES:
+ ycbcr_props = (void *)s;
+ break;
+ case VK_STRUCTURE_TYPE_ANDROID_HARDWARE_BUFFER_USAGE_ANDROID:
+ android_usage = (void *)s;
+ break;
+ case VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD:
+ texture_lod_props = (void *)s;
+ break;
+ default:
+ break;
+ }
+ }
+
+ bool ahb_supported =
+ physical_device->vk.supported_extensions.ANDROID_external_memory_android_hardware_buffer;
+ if (android_usage && ahb_supported) {
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- android_usage->androidHardwareBufferUsage =
- radv_ahb_usage_from_vk_usage(base_info->flags,
- base_info->usage);
+ android_usage->androidHardwareBufferUsage =
+ radv_ahb_usage_from_vk_usage(base_info->flags, base_info->usage);
#endif
- }
-
- /* From the Vulkan 1.0.97 spec:
- *
- * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
- * behave as if VkPhysicalDeviceExternalImageFormatInfo was not
- * present and VkExternalImageFormatProperties will be ignored.
- */
- if (external_info && external_info->handleType != 0) {
- get_external_image_format_properties(physical_device, base_info, external_info->handleType,
- &external_props->externalMemoryProperties,
- &base_props->imageFormatProperties);
- if (!external_props->externalMemoryProperties.externalMemoryFeatures) {
- /* From the Vulkan 1.0.97 spec:
- *
- * If handleType is not compatible with the [parameters] specified
- * in VkPhysicalDeviceImageFormatInfo2, then
- * vkGetPhysicalDeviceImageFormatProperties2 returns
- * VK_ERROR_FORMAT_NOT_SUPPORTED.
- */
- result = vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
- "unsupported VkExternalMemoryTypeFlagBitsKHR 0x%x",
- external_info->handleType);
- goto fail;
- }
- }
-
- if (ycbcr_props) {
- ycbcr_props->combinedImageSamplerDescriptorCount = vk_format_get_plane_count(format);
- }
-
- if (texture_lod_props) {
- if (physical_device->rad_info.chip_class >= GFX9) {
- texture_lod_props->supportsTextureGatherLODBiasAMD = true;
- } else {
- texture_lod_props->supportsTextureGatherLODBiasAMD = !vk_format_is_int(format);
- }
- }
-
- return VK_SUCCESS;
+ }
+
+ /* From the Vulkan 1.0.97 spec:
+ *
+ * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
+ * behave as if VkPhysicalDeviceExternalImageFormatInfo was not
+ * present and VkExternalImageFormatProperties will be ignored.
+ */
+ if (external_info && external_info->handleType != 0) {
+ get_external_image_format_properties(physical_device, base_info, external_info->handleType,
+ &external_props->externalMemoryProperties,
+ &base_props->imageFormatProperties);
+ if (!external_props->externalMemoryProperties.externalMemoryFeatures) {
+ /* From the Vulkan 1.0.97 spec:
+ *
+ * If handleType is not compatible with the [parameters] specified
+ * in VkPhysicalDeviceImageFormatInfo2, then
+ * vkGetPhysicalDeviceImageFormatProperties2 returns
+ * VK_ERROR_FORMAT_NOT_SUPPORTED.
+ */
+ result = vk_errorf(physical_device->instance, VK_ERROR_FORMAT_NOT_SUPPORTED,
+ "unsupported VkExternalMemoryTypeFlagBitsKHR 0x%x",
+ external_info->handleType);
+ goto fail;
+ }
+ }
+
+ if (ycbcr_props) {
+ ycbcr_props->combinedImageSamplerDescriptorCount = vk_format_get_plane_count(format);
+ }
+
+ if (texture_lod_props) {
+ if (physical_device->rad_info.chip_class >= GFX9) {
+ texture_lod_props->supportsTextureGatherLODBiasAMD = true;
+ } else {
+ texture_lod_props->supportsTextureGatherLODBiasAMD = !vk_format_is_int(format);
+ }
+ }
+
+ return VK_SUCCESS;
fail:
- if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
- /* From the Vulkan 1.0.97 spec:
- *
- * If the combination of parameters to
- * vkGetPhysicalDeviceImageFormatProperties2 is not supported by
- * the implementation for use in vkCreateImage, then all members of
- * imageFormatProperties will be filled with zero.
- */
- base_props->imageFormatProperties = (VkImageFormatProperties) {0};
- }
-
- return result;
+ if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+ /* From the Vulkan 1.0.97 spec:
+ *
+ * If the combination of parameters to
+ * vkGetPhysicalDeviceImageFormatProperties2 is not supported by
+ * the implementation for use in vkCreateImage, then all members of
+ * imageFormatProperties will be filled with zero.
+ */
+ base_props->imageFormatProperties = (VkImageFormatProperties){0};
+ }
+
+ return result;
}
-static void fill_sparse_image_format_properties(struct radv_physical_device *pdev,
- VkFormat format,
- VkSparseImageFormatProperties *prop)
+static void
+fill_sparse_image_format_properties(struct radv_physical_device *pdev, VkFormat format,
+ VkSparseImageFormatProperties *prop)
{
- prop->aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
- prop->flags = 0;
+ prop->aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+ prop->flags = 0;
- /* On GFX8 we first subdivide by level and then layer, leading to a single
- * miptail. On GFX9+ we first subdivide by layer and then level which results
- * in a miptail per layer. */
- if (pdev->rad_info.chip_class < GFX9)
- prop->flags |= VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT;
+ /* On GFX8 we first subdivide by level and then layer, leading to a single
+ * miptail. On GFX9+ we first subdivide by layer and then level which results
+ * in a miptail per layer. */
+ if (pdev->rad_info.chip_class < GFX9)
+ prop->flags |= VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT;
- /* This assumes the sparse image tile size is always 64 KiB (1 << 16) */
- unsigned l2_size = 16 - util_logbase2(vk_format_get_blocksize(format));
- unsigned w = (1u << ((l2_size + 1) / 2)) * vk_format_get_blockwidth(format);
- unsigned h = (1u << (l2_size / 2)) * vk_format_get_blockheight(format);
+ /* This assumes the sparse image tile size is always 64 KiB (1 << 16) */
+ unsigned l2_size = 16 - util_logbase2(vk_format_get_blocksize(format));
+ unsigned w = (1u << ((l2_size + 1) / 2)) * vk_format_get_blockwidth(format);
+ unsigned h = (1u << (l2_size / 2)) * vk_format_get_blockheight(format);
- prop->imageGranularity = (VkExtent3D) {w, h, 1};
+ prop->imageGranularity = (VkExtent3D){w, h, 1};
}
-void radv_GetPhysicalDeviceSparseImageFormatProperties2(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
- uint32_t *pPropertyCount,
- VkSparseImageFormatProperties2 *pProperties)
+void
+radv_GetPhysicalDeviceSparseImageFormatProperties2(
+ VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
+ uint32_t *pPropertyCount, VkSparseImageFormatProperties2 *pProperties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
- VkResult result;
-
- if (pFormatInfo->samples > VK_SAMPLE_COUNT_1_BIT) {
- *pPropertyCount = 0;
- return;
- }
-
- const VkPhysicalDeviceImageFormatInfo2 fmt_info = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
- .format = pFormatInfo->format,
- .type = pFormatInfo->type,
- .tiling = pFormatInfo->tiling,
- .usage = pFormatInfo->usage,
- .flags = VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
- VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT
- };
-
- VkImageFormatProperties fmt_props;
- result = radv_get_image_format_properties(pdev, &fmt_info, pFormatInfo->format,
- &fmt_props);
- if (result != VK_SUCCESS) {
- *pPropertyCount = 0;
- return;
- }
-
- VK_OUTARRAY_MAKE_TYPED(VkSparseImageFormatProperties2, out, pProperties, pPropertyCount);
-
- vk_outarray_append_typed(VkSparseImageFormatProperties2 , &out, prop) {
- fill_sparse_image_format_properties(pdev, pFormatInfo->format, &prop->properties);
- };
+ RADV_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
+ VkResult result;
+
+ if (pFormatInfo->samples > VK_SAMPLE_COUNT_1_BIT) {
+ *pPropertyCount = 0;
+ return;
+ }
+
+ const VkPhysicalDeviceImageFormatInfo2 fmt_info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+ .format = pFormatInfo->format,
+ .type = pFormatInfo->type,
+ .tiling = pFormatInfo->tiling,
+ .usage = pFormatInfo->usage,
+ .flags = VK_IMAGE_CREATE_SPARSE_BINDING_BIT | VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT};
+
+ VkImageFormatProperties fmt_props;
+ result = radv_get_image_format_properties(pdev, &fmt_info, pFormatInfo->format, &fmt_props);
+ if (result != VK_SUCCESS) {
+ *pPropertyCount = 0;
+ return;
+ }
+
+ VK_OUTARRAY_MAKE_TYPED(VkSparseImageFormatProperties2, out, pProperties, pPropertyCount);
+
+ vk_outarray_append_typed(VkSparseImageFormatProperties2, &out, prop)
+ {
+ fill_sparse_image_format_properties(pdev, pFormatInfo->format, &prop->properties);
+ };
}
-void radv_GetPhysicalDeviceSparseImageFormatProperties(
- VkPhysicalDevice physicalDevice,
- VkFormat format,
- VkImageType type,
- uint32_t samples,
- VkImageUsageFlags usage,
- VkImageTiling tiling,
- uint32_t* pNumProperties,
- VkSparseImageFormatProperties* pProperties)
+void
+radv_GetPhysicalDeviceSparseImageFormatProperties(VkPhysicalDevice physicalDevice, VkFormat format,
+ VkImageType type, uint32_t samples,
+ VkImageUsageFlags usage, VkImageTiling tiling,
+ uint32_t *pNumProperties,
+ VkSparseImageFormatProperties *pProperties)
{
- const VkPhysicalDeviceSparseImageFormatInfo2 info = {
- .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SPARSE_IMAGE_FORMAT_INFO_2,
- .format = format,
- .type = type,
- .samples = samples,
- .usage = usage,
- .tiling = tiling
- };
-
- if (!pProperties) {
- radv_GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, &info,
- pNumProperties, NULL);
- return;
- }
-
- VkSparseImageFormatProperties2 props[4];
- uint32_t prop_cnt = MIN2(ARRAY_SIZE(props), *pNumProperties);
-
- memset(props, 0, sizeof(props));
- for (unsigned i = 0; i < ARRAY_SIZE(props); ++i)
- props[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2;
-
- radv_GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, &info,
- &prop_cnt, props);
-
- for (unsigned i = 0; i < prop_cnt; ++i)
- pProperties[i] = props[i].properties;
- *pNumProperties = prop_cnt;
+ const VkPhysicalDeviceSparseImageFormatInfo2 info = {
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SPARSE_IMAGE_FORMAT_INFO_2,
+ .format = format,
+ .type = type,
+ .samples = samples,
+ .usage = usage,
+ .tiling = tiling};
+
+ if (!pProperties) {
+ radv_GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, &info, pNumProperties,
+ NULL);
+ return;
+ }
+
+ VkSparseImageFormatProperties2 props[4];
+ uint32_t prop_cnt = MIN2(ARRAY_SIZE(props), *pNumProperties);
+
+ memset(props, 0, sizeof(props));
+ for (unsigned i = 0; i < ARRAY_SIZE(props); ++i)
+ props[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_FORMAT_PROPERTIES_2;
+
+ radv_GetPhysicalDeviceSparseImageFormatProperties2(physicalDevice, &info, &prop_cnt, props);
+
+ for (unsigned i = 0; i < prop_cnt; ++i)
+ pProperties[i] = props[i].properties;
+ *pNumProperties = prop_cnt;
}
-void radv_GetImageSparseMemoryRequirements2(
- VkDevice _device,
- const VkImageSparseMemoryRequirementsInfo2 *pInfo,
- uint32_t* pSparseMemoryRequirementCount,
- VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
+void
+radv_GetImageSparseMemoryRequirements2(VkDevice _device,
+ const VkImageSparseMemoryRequirementsInfo2 *pInfo,
+ uint32_t *pSparseMemoryRequirementCount,
+ VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_image, image, pInfo->image);
-
- if (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) {
- *pSparseMemoryRequirementCount = 0;
- return;
- }
-
- VK_OUTARRAY_MAKE_TYPED(VkSparseImageMemoryRequirements2, out, pSparseMemoryRequirements, pSparseMemoryRequirementCount);
-
- vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req) {
- fill_sparse_image_format_properties(device->physical_device,
- image->vk_format,
- &req->memoryRequirements.formatProperties);
- req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
-
- if (req->memoryRequirements.imageMipTailFirstLod < image->info.levels) {
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- /* The tail is always a single tile per layer. */
- req->memoryRequirements.imageMipTailSize = 65536;
- req->memoryRequirements.imageMipTailOffset =
- image->planes[0].surface.u.gfx9.prt_level_offset[req->memoryRequirements.imageMipTailFirstLod] & ~65535;
- req->memoryRequirements.imageMipTailStride =
- image->planes[0].surface.u.gfx9.surf_slice_size;
- } else {
- req->memoryRequirements.imageMipTailOffset =
- image->planes[0].surface.u.legacy.level[req->memoryRequirements.imageMipTailFirstLod ].offset;
- req->memoryRequirements.imageMipTailSize =
- image->size - req->memoryRequirements.imageMipTailOffset;
- req->memoryRequirements.imageMipTailStride = 0;
- }
- } else {
- req->memoryRequirements.imageMipTailSize = 0;
- req->memoryRequirements.imageMipTailOffset = 0;
- req->memoryRequirements.imageMipTailStride = 0;
- }
- };
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_image, image, pInfo->image);
+
+ if (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)) {
+ *pSparseMemoryRequirementCount = 0;
+ return;
+ }
+
+ VK_OUTARRAY_MAKE_TYPED(VkSparseImageMemoryRequirements2, out, pSparseMemoryRequirements,
+ pSparseMemoryRequirementCount);
+
+ vk_outarray_append_typed(VkSparseImageMemoryRequirements2, &out, req)
+ {
+ fill_sparse_image_format_properties(device->physical_device, image->vk_format,
+ &req->memoryRequirements.formatProperties);
+ req->memoryRequirements.imageMipTailFirstLod = image->planes[0].surface.first_mip_tail_level;
+
+ if (req->memoryRequirements.imageMipTailFirstLod < image->info.levels) {
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ /* The tail is always a single tile per layer. */
+ req->memoryRequirements.imageMipTailSize = 65536;
+ req->memoryRequirements.imageMipTailOffset =
+ image->planes[0]
+ .surface.u.gfx9.prt_level_offset[req->memoryRequirements.imageMipTailFirstLod] &
+ ~65535;
+ req->memoryRequirements.imageMipTailStride =
+ image->planes[0].surface.u.gfx9.surf_slice_size;
+ } else {
+ req->memoryRequirements.imageMipTailOffset =
+ image->planes[0]
+ .surface.u.legacy.level[req->memoryRequirements.imageMipTailFirstLod]
+ .offset;
+ req->memoryRequirements.imageMipTailSize =
+ image->size - req->memoryRequirements.imageMipTailOffset;
+ req->memoryRequirements.imageMipTailStride = 0;
+ }
+ } else {
+ req->memoryRequirements.imageMipTailSize = 0;
+ req->memoryRequirements.imageMipTailOffset = 0;
+ req->memoryRequirements.imageMipTailStride = 0;
+ }
+ };
}
-void radv_GetImageSparseMemoryRequirements(
- VkDevice device,
- VkImage image,
- uint32_t* pSparseMemoryRequirementCount,
- VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
+void
+radv_GetImageSparseMemoryRequirements(VkDevice device, VkImage image,
+ uint32_t *pSparseMemoryRequirementCount,
+ VkSparseImageMemoryRequirements *pSparseMemoryRequirements)
{
- const VkImageSparseMemoryRequirementsInfo2 info = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2,
- .image = image
- };
-
- if (!pSparseMemoryRequirements) {
- radv_GetImageSparseMemoryRequirements2(device, &info,
- pSparseMemoryRequirementCount, NULL);
- return;
- }
-
- VkSparseImageMemoryRequirements2 reqs[4];
- uint32_t reqs_cnt = MIN2(ARRAY_SIZE(reqs), *pSparseMemoryRequirementCount);
-
- memset(reqs, 0, sizeof(reqs));
- for (unsigned i = 0; i < ARRAY_SIZE(reqs); ++i)
- reqs[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2;
-
- radv_GetImageSparseMemoryRequirements2(device, &info,
- &reqs_cnt, reqs);
-
- for (unsigned i = 0; i < reqs_cnt; ++i)
- pSparseMemoryRequirements[i] = reqs[i].memoryRequirements;
- *pSparseMemoryRequirementCount = reqs_cnt;
+ const VkImageSparseMemoryRequirementsInfo2 info = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_SPARSE_MEMORY_REQUIREMENTS_INFO_2,
+ .image = image};
+
+ if (!pSparseMemoryRequirements) {
+ radv_GetImageSparseMemoryRequirements2(device, &info, pSparseMemoryRequirementCount, NULL);
+ return;
+ }
+
+ VkSparseImageMemoryRequirements2 reqs[4];
+ uint32_t reqs_cnt = MIN2(ARRAY_SIZE(reqs), *pSparseMemoryRequirementCount);
+
+ memset(reqs, 0, sizeof(reqs));
+ for (unsigned i = 0; i < ARRAY_SIZE(reqs); ++i)
+ reqs[i].sType = VK_STRUCTURE_TYPE_SPARSE_IMAGE_MEMORY_REQUIREMENTS_2;
+
+ radv_GetImageSparseMemoryRequirements2(device, &info, &reqs_cnt, reqs);
+
+ for (unsigned i = 0; i < reqs_cnt; ++i)
+ pSparseMemoryRequirements[i] = reqs[i].memoryRequirements;
+ *pSparseMemoryRequirementCount = reqs_cnt;
}
-void radv_GetPhysicalDeviceExternalBufferProperties(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
- VkExternalBufferProperties *pExternalBufferProperties)
+void
+radv_GetPhysicalDeviceExternalBufferProperties(
+ VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
+ VkExternalBufferProperties *pExternalBufferProperties)
{
- VkExternalMemoryFeatureFlagBits flags = 0;
- VkExternalMemoryHandleTypeFlags export_flags = 0;
- VkExternalMemoryHandleTypeFlags compat_flags = 0;
- switch(pExternalBufferInfo->handleType) {
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
- flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
- VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
- compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
- VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
- break;
- case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
- flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
- compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
- break;
- default:
- break;
- }
- pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties) {
- .externalMemoryFeatures = flags,
- .exportFromImportedHandleTypes = export_flags,
- .compatibleHandleTypes = compat_flags,
- };
+ VkExternalMemoryFeatureFlagBits flags = 0;
+ VkExternalMemoryHandleTypeFlags export_flags = 0;
+ VkExternalMemoryHandleTypeFlags compat_flags = 0;
+ switch (pExternalBufferInfo->handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+ flags = VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ break;
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
+ flags = VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ compat_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
+ break;
+ default:
+ break;
+ }
+ pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryProperties){
+ .externalMemoryFeatures = flags,
+ .exportFromImportedHandleTypes = export_flags,
+ .compatibleHandleTypes = compat_flags,
+ };
}
/* DCC channel type categories within which formats can be reinterpreted
* while keeping the same DCC encoding. The swizzle must also match. */
enum dcc_channel_type {
- dcc_channel_float32,
- dcc_channel_uint32,
- dcc_channel_sint32,
- dcc_channel_float16,
- dcc_channel_uint16,
- dcc_channel_sint16,
- dcc_channel_uint_10_10_10_2,
- dcc_channel_uint8,
- dcc_channel_sint8,
- dcc_channel_incompatible,
+ dcc_channel_float32,
+ dcc_channel_uint32,
+ dcc_channel_sint32,
+ dcc_channel_float16,
+ dcc_channel_uint16,
+ dcc_channel_sint16,
+ dcc_channel_uint_10_10_10_2,
+ dcc_channel_uint8,
+ dcc_channel_sint8,
+ dcc_channel_incompatible,
};
/* Return the type of DCC encoding. */
static enum dcc_channel_type
radv_get_dcc_channel_type(const struct util_format_description *desc)
{
- int i;
-
- /* Find the first non-void channel. */
- for (i = 0; i < desc->nr_channels; i++)
- if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
- break;
- if (i == desc->nr_channels)
- return dcc_channel_incompatible;
-
- switch (desc->channel[i].size) {
- case 32:
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
- return dcc_channel_float32;
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
- return dcc_channel_uint32;
- return dcc_channel_sint32;
- case 16:
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
- return dcc_channel_float16;
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
- return dcc_channel_uint16;
- return dcc_channel_sint16;
- case 10:
- return dcc_channel_uint_10_10_10_2;
- case 8:
- if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
- return dcc_channel_uint8;
- return dcc_channel_sint8;
- default:
- return dcc_channel_incompatible;
- }
+ int i;
+
+ /* Find the first non-void channel. */
+ for (i = 0; i < desc->nr_channels; i++)
+ if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID)
+ break;
+ if (i == desc->nr_channels)
+ return dcc_channel_incompatible;
+
+ switch (desc->channel[i].size) {
+ case 32:
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
+ return dcc_channel_float32;
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
+ return dcc_channel_uint32;
+ return dcc_channel_sint32;
+ case 16:
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
+ return dcc_channel_float16;
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
+ return dcc_channel_uint16;
+ return dcc_channel_sint16;
+ case 10:
+ return dcc_channel_uint_10_10_10_2;
+ case 8:
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED)
+ return dcc_channel_uint8;
+ return dcc_channel_sint8;
+ default:
+ return dcc_channel_incompatible;
+ }
}
/* Return if it's allowed to reinterpret one format as another with DCC enabled. */
-bool radv_dcc_formats_compatible(VkFormat format1,
- VkFormat format2)
+bool
+radv_dcc_formats_compatible(VkFormat format1, VkFormat format2)
{
- const struct util_format_description *desc1, *desc2;
- enum dcc_channel_type type1, type2;
- int i;
+ const struct util_format_description *desc1, *desc2;
+ enum dcc_channel_type type1, type2;
+ int i;
- if (format1 == format2)
- return true;
+ if (format1 == format2)
+ return true;
- desc1 = vk_format_description(format1);
- desc2 = vk_format_description(format2);
+ desc1 = vk_format_description(format1);
+ desc2 = vk_format_description(format2);
- if (desc1->nr_channels != desc2->nr_channels)
- return false;
+ if (desc1->nr_channels != desc2->nr_channels)
+ return false;
- /* Swizzles must be the same. */
- for (i = 0; i < desc1->nr_channels; i++)
- if (desc1->swizzle[i] <= PIPE_SWIZZLE_W &&
- desc2->swizzle[i] <= PIPE_SWIZZLE_W &&
- desc1->swizzle[i] != desc2->swizzle[i])
- return false;
+ /* Swizzles must be the same. */
+ for (i = 0; i < desc1->nr_channels; i++)
+ if (desc1->swizzle[i] <= PIPE_SWIZZLE_W && desc2->swizzle[i] <= PIPE_SWIZZLE_W &&
+ desc1->swizzle[i] != desc2->swizzle[i])
+ return false;
- type1 = radv_get_dcc_channel_type(desc1);
- type2 = radv_get_dcc_channel_type(desc2);
+ type1 = radv_get_dcc_channel_type(desc1);
+ type2 = radv_get_dcc_channel_type(desc2);
- return type1 != dcc_channel_incompatible &&
- type2 != dcc_channel_incompatible &&
- type1 == type2;
+ return type1 != dcc_channel_incompatible && type2 != dcc_channel_incompatible && type1 == type2;
}
-
diff --git a/src/amd/vulkan/radv_image.c b/src/amd/vulkan/radv_image.c
index 58324c36dde..deda051bd0c 100644
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -26,257 +26,237 @@
*/
#include "drm-uapi/drm_fourcc.h"
+#include "util/debug.h"
+#include "util/u_atomic.h"
+#include "vulkan/util/vk_format.h"
#include "radv_debug.h"
#include "radv_private.h"
-#include "vk_format.h"
-#include "vk_util.h"
#include "radv_radeon_winsys.h"
#include "sid.h"
-#include "util/debug.h"
-#include "util/u_atomic.h"
-#include "vulkan/util/vk_format.h"
+#include "vk_format.h"
+#include "vk_util.h"
#include "gfx10_format_table.h"
-
static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS =
- VK_IMAGE_USAGE_TRANSFER_DST_BIT |
- VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_STORAGE_BIT;
+ VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
static unsigned
-radv_choose_tiling(struct radv_device *device,
- const VkImageCreateInfo *pCreateInfo,
- VkFormat format)
+radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
+ VkFormat format)
{
- if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
- assert(pCreateInfo->samples <= 1);
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
- }
-
- /* MSAA resources must be 2D tiled. */
- if (pCreateInfo->samples > 1)
- return RADEON_SURF_MODE_2D;
-
- if (!vk_format_is_compressed(format) &&
- !vk_format_is_depth_or_stencil(format)
- && device->physical_device->rad_info.chip_class <= GFX8) {
- /* this causes hangs in some VK CTS tests on GFX9. */
- /* Textures with a very small height are recommended to be linear. */
- if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
- /* Only very thin and long 2D textures should benefit from
- * linear_aligned. */
- (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
- return RADEON_SURF_MODE_LINEAR_ALIGNED;
- }
-
- return RADEON_SURF_MODE_2D;
+ if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
+ assert(pCreateInfo->samples <= 1);
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+ }
+
+ /* MSAA resources must be 2D tiled. */
+ if (pCreateInfo->samples > 1)
+ return RADEON_SURF_MODE_2D;
+
+ if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
+ device->physical_device->rad_info.chip_class <= GFX8) {
+ /* this causes hangs in some VK CTS tests on GFX9. */
+ /* Textures with a very small height are recommended to be linear. */
+ if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
+ /* Only very thin and long 2D textures should benefit from
+ * linear_aligned. */
+ (pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
+ return RADEON_SURF_MODE_LINEAR_ALIGNED;
+ }
+
+ return RADEON_SURF_MODE_2D;
}
static bool
-radv_use_tc_compat_htile_for_image(struct radv_device *device,
- const VkImageCreateInfo *pCreateInfo,
- VkFormat format)
+radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
+ VkFormat format)
{
- /* TC-compat HTILE is only available for GFX8+. */
- if (device->physical_device->rad_info.chip_class < GFX8)
- return false;
-
- if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
- return false;
-
- if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
- return false;
-
- /* Do not enable TC-compatible HTILE if the image isn't readable by a
- * shader because no texture fetches will happen.
- */
- if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
- return false;
-
- if (device->physical_device->rad_info.chip_class < GFX9) {
- /* TC-compat HTILE for MSAA depth/stencil images is broken
- * on GFX8 because the tiling doesn't match.
- */
- if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
- return false;
-
- /* GFX9+ supports compression for both 32-bit and 16-bit depth
- * surfaces, while GFX8 only supports 32-bit natively. Though,
- * the driver allows TC-compat HTILE for 16-bit depth surfaces
- * with no Z planes compression.
- */
- if (format != VK_FORMAT_D32_SFLOAT_S8_UINT &&
- format != VK_FORMAT_D32_SFLOAT &&
- format != VK_FORMAT_D16_UNORM)
- return false;
- }
-
- return true;
+ /* TC-compat HTILE is only available for GFX8+. */
+ if (device->physical_device->rad_info.chip_class < GFX8)
+ return false;
+
+ if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
+ return false;
+
+ if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
+ return false;
+
+ /* Do not enable TC-compatible HTILE if the image isn't readable by a
+ * shader because no texture fetches will happen.
+ */
+ if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
+ return false;
+
+ if (device->physical_device->rad_info.chip_class < GFX9) {
+ /* TC-compat HTILE for MSAA depth/stencil images is broken
+ * on GFX8 because the tiling doesn't match.
+ */
+ if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
+ return false;
+
+ /* GFX9+ supports compression for both 32-bit and 16-bit depth
+ * surfaces, while GFX8 only supports 32-bit natively. Though,
+ * the driver allows TC-compat HTILE for 16-bit depth surfaces
+ * with no Z planes compression.
+ */
+ if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
+ format != VK_FORMAT_D16_UNORM)
+ return false;
+ }
+
+ return true;
}
static bool
radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
{
- if (info->bo_metadata) {
- if (device->physical_device->rad_info.chip_class >= GFX9)
- return info->bo_metadata->u.gfx9.scanout;
- else
- return info->bo_metadata->u.legacy.scanout;
- }
-
- return info->scanout;
+ if (info->bo_metadata) {
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ return info->bo_metadata->u.gfx9.scanout;
+ else
+ return info->bo_metadata->u.legacy.scanout;
+ }
+
+ return info->scanout;
}
static bool
radv_image_use_fast_clear_for_image(const struct radv_device *device,
const struct radv_image *image)
{
- if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
- return true;
-
- if (image->info.samples <= 1 &&
- image->info.width * image->info.height <= 512 * 512) {
- /* Do not enable CMASK or DCC for small surfaces where the cost
- * of the eliminate pass can be higher than the benefit of fast
- * clear. RadeonSI does this, but the image threshold is
- * different.
- */
- return false;
- }
-
- return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
- (image->exclusive ||
- /* Enable DCC for concurrent images if stores are
- * supported because that means we can keep DCC compressed on
- * all layouts/queues.
- */
- radv_image_use_dcc_image_stores(device, image));
+ if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
+ return true;
+
+ if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
+ /* Do not enable CMASK or DCC for small surfaces where the cost
+ * of the eliminate pass can be higher than the benefit of fast
+ * clear. RadeonSI does this, but the image threshold is
+ * different.
+ */
+ return false;
+ }
+
+ return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
+ (image->exclusive ||
+ /* Enable DCC for concurrent images if stores are
+ * supported because that means we can keep DCC compressed on
+ * all layouts/queues.
+ */
+ radv_image_use_dcc_image_stores(device, image));
}
bool
-radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev,
- const void *pNext, VkFormat format,
- VkImageCreateFlags flags)
+radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
+ VkFormat format, VkImageCreateFlags flags)
{
- bool blendable;
-
- if (!radv_is_colorbuffer_format_supported(pdev,
- format, &blendable))
- return false;
-
- if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
- const struct VkImageFormatListCreateInfo *format_list =
- (const struct VkImageFormatListCreateInfo *)
- vk_find_struct_const(pNext,
- IMAGE_FORMAT_LIST_CREATE_INFO);
-
- /* We have to ignore the existence of the list if viewFormatCount = 0 */
- if (format_list && format_list->viewFormatCount) {
- /* compatibility is transitive, so we only need to check
- * one format with everything else. */
- for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
- if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
- continue;
-
- if (!radv_dcc_formats_compatible(format,
- format_list->pViewFormats[i]))
- return false;
- }
- } else {
- return false;
- }
- }
-
- return true;
+ bool blendable;
+
+ if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
+ return false;
+
+ if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
+ const struct VkImageFormatListCreateInfo *format_list =
+ (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
+ pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
+
+ /* We have to ignore the existence of the list if viewFormatCount = 0 */
+ if (format_list && format_list->viewFormatCount) {
+ /* compatibility is transitive, so we only need to check
+ * one format with everything else. */
+ for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
+ if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
+ continue;
+
+ if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i]))
+ return false;
+ }
+ } else {
+ return false;
+ }
+ }
+
+ return true;
}
static bool
-radv_formats_is_atomic_allowed(const void *pNext, VkFormat format,
- VkImageCreateFlags flags)
+radv_formats_is_atomic_allowed(const void *pNext, VkFormat format, VkImageCreateFlags flags)
{
- if (radv_is_atomic_format_supported(format))
- return true;
-
- if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
- const struct VkImageFormatListCreateInfo *format_list =
- (const struct VkImageFormatListCreateInfo *)
- vk_find_struct_const(pNext,
- IMAGE_FORMAT_LIST_CREATE_INFO);
-
- /* We have to ignore the existence of the list if viewFormatCount = 0 */
- if (format_list && format_list->viewFormatCount) {
- for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
- if (radv_is_atomic_format_supported(format_list->pViewFormats[i]))
- return true;
- }
- }
- }
-
- return false;
+ if (radv_is_atomic_format_supported(format))
+ return true;
+
+ if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
+ const struct VkImageFormatListCreateInfo *format_list =
+ (const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
+ pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
+
+ /* We have to ignore the existence of the list if viewFormatCount = 0 */
+ if (format_list && format_list->viewFormatCount) {
+ for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
+ if (radv_is_atomic_format_supported(format_list->pViewFormats[i]))
+ return true;
+ }
+ }
+ }
+
+ return false;
}
static bool
-radv_use_dcc_for_image(struct radv_device *device,
- const struct radv_image *image,
- const VkImageCreateInfo *pCreateInfo,
- VkFormat format)
+radv_use_dcc_for_image(struct radv_device *device, const struct radv_image *image,
+ const VkImageCreateInfo *pCreateInfo, VkFormat format)
{
- /* DCC (Delta Color Compression) is only available for GFX8+. */
- if (device->physical_device->rad_info.chip_class < GFX8)
- return false;
-
- if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
- return false;
-
- if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
- return false;
-
- /*
- * TODO: Enable DCC for storage images on GFX9 and earlier.
- *
- * Also disable DCC with atomics because even when DCC stores are
- * supported atomics will always decompress. So if we are
- * decompressing a lot anyway we might as well not have DCC.
- */
- if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
- (!radv_image_use_dcc_image_stores(device, image) ||
- radv_formats_is_atomic_allowed(pCreateInfo->pNext, format, pCreateInfo->flags)))
- return false;
-
- if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
- return false;
-
- if (vk_format_is_subsampled(format) ||
- vk_format_get_plane_count(format) > 1)
- return false;
-
- if (!radv_image_use_fast_clear_for_image(device, image) &&
- image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
- return false;
-
- /* Do not enable DCC for mipmapped arrays because performance is worse. */
- if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
- return false;
-
- if (device->physical_device->rad_info.chip_class < GFX10) {
- /* TODO: Add support for DCC MSAA on GFX8-9. */
- if (pCreateInfo->samples > 1 &&
- !device->physical_device->dcc_msaa_allowed)
- return false;
-
- /* TODO: Add support for DCC layers/mipmaps on GFX9. */
- if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
- device->physical_device->rad_info.chip_class == GFX9)
- return false;
- }
-
- return radv_are_formats_dcc_compatible(device->physical_device,
- pCreateInfo->pNext, format,
- pCreateInfo->flags);
+ /* DCC (Delta Color Compression) is only available for GFX8+. */
+ if (device->physical_device->rad_info.chip_class < GFX8)
+ return false;
+
+ if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
+ return false;
+
+ if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+ return false;
+
+ /*
+ * TODO: Enable DCC for storage images on GFX9 and earlier.
+ *
+ * Also disable DCC with atomics because even when DCC stores are
+ * supported atomics will always decompress. So if we are
+ * decompressing a lot anyway we might as well not have DCC.
+ */
+ if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
+ (!radv_image_use_dcc_image_stores(device, image) ||
+ radv_formats_is_atomic_allowed(pCreateInfo->pNext, format, pCreateInfo->flags)))
+ return false;
+
+ if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
+ return false;
+
+ if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
+ return false;
+
+ if (!radv_image_use_fast_clear_for_image(device, image) &&
+ image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+ return false;
+
+ /* Do not enable DCC for mipmapped arrays because performance is worse. */
+ if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
+ return false;
+
+ if (device->physical_device->rad_info.chip_class < GFX10) {
+ /* TODO: Add support for DCC MSAA on GFX8-9. */
+ if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
+ return false;
+
+ /* TODO: Add support for DCC layers/mipmaps on GFX9. */
+ if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
+ device->physical_device->rad_info.chip_class == GFX9)
+ return false;
+ }
+
+ return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
+ pCreateInfo->flags);
}
/*
@@ -289,18 +269,18 @@ radv_use_dcc_for_image(struct radv_device *device,
*
* This function assumes the image uses DCC compression.
*/
-bool radv_image_use_dcc_image_stores(const struct radv_device *device,
- const struct radv_image *image)
+bool
+radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
{
- /*
- * TODO: Enable on more HW. DIMGREY and VANGOGH need a workaround and
- * we need more perf analysis.
- * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643853
- */
- return device->physical_device->rad_info.chip_class == GFX10 ||
- (device->physical_device->rad_info.chip_class == GFX10_3 &&
- (device->instance->perftest_flags & RADV_PERFTEST_DCC_STORES) &&
- !device->physical_device->use_llvm);
+ /*
+ * TODO: Enable on more HW. DIMGREY and VANGOGH need a workaround and
+ * we need more perf analysis.
+ * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643853
+ */
+ return device->physical_device->rad_info.chip_class == GFX10 ||
+ (device->physical_device->rad_info.chip_class == GFX10_3 &&
+ (device->instance->perftest_flags & RADV_PERFTEST_DCC_STORES) &&
+ !device->physical_device->use_llvm);
}
/*
@@ -309,1584 +289,1485 @@ bool radv_image_use_dcc_image_stores(const struct radv_device *device,
*
* This function assumes the image uses DCC compression.
*/
-bool radv_image_use_dcc_predication(const struct radv_device *device,
- const struct radv_image *image)
+bool
+radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
{
- return !radv_image_use_dcc_image_stores(device, image);
+ return !radv_image_use_dcc_image_stores(device, image);
}
static inline bool
-radv_use_fmask_for_image(const struct radv_device *device,
- const struct radv_image *image)
+radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
{
- return image->info.samples > 1 &&
- ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
- (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
+ return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
+ (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
}
static inline bool
-radv_use_htile_for_image(const struct radv_device *device,
- const struct radv_image *image)
+radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
{
- /* TODO:
- * - Investigate about mips+layers.
- * - Enable on other gens.
- */
- bool use_htile_for_mips = image->info.array_size == 1 &&
- device->physical_device->rad_info.chip_class >= GFX10;
-
- return (image->info.levels == 1 || use_htile_for_mips) &&
- !image->shareable &&
- ((image->info.width * image->info.height >= 8 * 8) ||
- (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
+ /* TODO:
+ * - Investigate about mips+layers.
+ * - Enable on other gens.
+ */
+ bool use_htile_for_mips =
+ image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
+
+ return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable &&
+ ((image->info.width * image->info.height >= 8 * 8) ||
+ (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
}
static bool
-radv_use_tc_compat_cmask_for_image(struct radv_device *device,
- struct radv_image *image)
+radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
{
- /* TC-compat CMASK is only available for GFX8+. */
- if (device->physical_device->rad_info.chip_class < GFX8)
- return false;
-
- if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
- return false;
-
- /* TODO: Enable TC-compat CMASK on GFX8-9. */
- if (device->physical_device->rad_info.chip_class < GFX10 &&
- !(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
- return false;
-
- if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
- return false;
-
- /* Do not enable TC-compatible if the image isn't readable by a shader
- * because no texture fetches will happen.
- */
- if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
- VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
- return false;
-
- /* If the image doesn't have FMASK, it can't be fetchable. */
- if (!radv_image_has_fmask(image))
- return false;
-
- return true;
+ /* TC-compat CMASK is only available for GFX8+. */
+ if (device->physical_device->rad_info.chip_class < GFX8)
+ return false;
+
+ if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
+ return false;
+
+ /* TODO: Enable TC-compat CMASK on GFX8-9. */
+ if (device->physical_device->rad_info.chip_class < GFX10 &&
+ !(device->instance->perftest_flags & RADV_PERFTEST_TC_COMPAT_CMASK))
+ return false;
+
+ if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
+ return false;
+
+ /* Do not enable TC-compatible if the image isn't readable by a shader
+ * because no texture fetches will happen.
+ */
+ if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
+ VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
+ return false;
+
+ /* If the image doesn't have FMASK, it can't be fetchable. */
+ if (!radv_image_has_fmask(image))
+ return false;
+
+ return true;
}
-static uint32_t si_get_bo_metadata_word1(const struct radv_device *device)
+static uint32_t
+si_get_bo_metadata_word1(const struct radv_device *device)
{
- return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
+ return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
}
static bool
-radv_is_valid_opaque_metadata(const struct radv_device *device,
- const struct radeon_bo_metadata *md)
+radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
{
- if (md->metadata[0] != 1 ||
- md->metadata[1] != si_get_bo_metadata_word1(device))
- return false;
+ if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
+ return false;
- if (md->size_metadata < 40)
- return false;
+ if (md->size_metadata < 40)
+ return false;
- return true;
+ return true;
}
static void
-radv_patch_surface_from_metadata(struct radv_device *device,
- struct radeon_surf *surface,
+radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
const struct radeon_bo_metadata *md)
{
- surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- if (md->u.gfx9.swizzle_mode > 0)
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
- else
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
-
- surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
- } else {
- surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
- surface->u.legacy.bankw = md->u.legacy.bankw;
- surface->u.legacy.bankh = md->u.legacy.bankh;
- surface->u.legacy.tile_split = md->u.legacy.tile_split;
- surface->u.legacy.mtilea = md->u.legacy.mtilea;
- surface->u.legacy.num_banks = md->u.legacy.num_banks;
-
- if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
- else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
- else
- surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
-
- }
+ surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ if (md->u.gfx9.swizzle_mode > 0)
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+ else
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
+
+ surface->u.gfx9.surf.swizzle_mode = md->u.gfx9.swizzle_mode;
+ } else {
+ surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
+ surface->u.legacy.bankw = md->u.legacy.bankw;
+ surface->u.legacy.bankh = md->u.legacy.bankh;
+ surface->u.legacy.tile_split = md->u.legacy.tile_split;
+ surface->u.legacy.mtilea = md->u.legacy.mtilea;
+ surface->u.legacy.num_banks = md->u.legacy.num_banks;
+
+ if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
+ else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
+ else
+ surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
+ }
}
static VkResult
-radv_patch_image_dimensions(struct radv_device *device,
- struct radv_image *image,
+radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
const struct radv_image_create_info *create_info,
struct ac_surf_info *image_info)
{
- unsigned width = image->info.width;
- unsigned height = image->info.height;
-
- /*
- * minigbm sometimes allocates bigger images which is going to result in
- * weird strides and other properties. Lets be lenient where possible and
- * fail it on GFX10 (as we cannot cope there).
- *
- * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
- */
- if (create_info->bo_metadata &&
- radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
- const struct radeon_bo_metadata *md = create_info->bo_metadata;
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- width = G_00A004_WIDTH_LO(md->metadata[3]) +
- (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
- height = G_00A008_HEIGHT(md->metadata[4]) + 1;
- } else {
- width = G_008F18_WIDTH(md->metadata[4]) + 1;
- height = G_008F18_HEIGHT(md->metadata[4]) + 1;
- }
- }
-
- if (image->info.width == width && image->info.height == height)
- return VK_SUCCESS;
-
- if (width < image->info.width || height < image->info.height) {
- fprintf(stderr,
- "The imported image has smaller dimensions than the internal\n"
- "dimensions. Using it is going to fail badly, so we reject\n"
- "this import.\n"
- "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
- image->info.width, image->info.height, width, height);
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
- } else if (device->physical_device->rad_info.chip_class >= GFX10) {
- fprintf(stderr,
- "Tried to import an image with inconsistent width on GFX10.\n"
- "As GFX10 has no separate stride fields we cannot cope with\n"
- "an inconsistency in width and will fail this import.\n"
- "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
- image->info.width, image->info.height, width, height);
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
- } else {
- fprintf(stderr,
- "Tried to import an image with inconsistent width on pre-GFX10.\n"
- "As GFX10 has no separate stride fields we cannot cope with\n"
- "an inconsistency and would fail on GFX10.\n"
- "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
- image->info.width, image->info.height, width, height);
- }
- image_info->width = width;
- image_info->height = height;
-
- return VK_SUCCESS;
+ unsigned width = image->info.width;
+ unsigned height = image->info.height;
+
+ /*
+ * minigbm sometimes allocates bigger images which is going to result in
+ * weird strides and other properties. Lets be lenient where possible and
+ * fail it on GFX10 (as we cannot cope there).
+ *
+ * Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
+ */
+ if (create_info->bo_metadata &&
+ radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
+ const struct radeon_bo_metadata *md = create_info->bo_metadata;
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
+ height = G_00A008_HEIGHT(md->metadata[4]) + 1;
+ } else {
+ width = G_008F18_WIDTH(md->metadata[4]) + 1;
+ height = G_008F18_HEIGHT(md->metadata[4]) + 1;
+ }
+ }
+
+ if (image->info.width == width && image->info.height == height)
+ return VK_SUCCESS;
+
+ if (width < image->info.width || height < image->info.height) {
+ fprintf(stderr,
+ "The imported image has smaller dimensions than the internal\n"
+ "dimensions. Using it is going to fail badly, so we reject\n"
+ "this import.\n"
+ "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
+ image->info.width, image->info.height, width, height);
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ } else if (device->physical_device->rad_info.chip_class >= GFX10) {
+ fprintf(stderr,
+ "Tried to import an image with inconsistent width on GFX10.\n"
+ "As GFX10 has no separate stride fields we cannot cope with\n"
+ "an inconsistency in width and will fail this import.\n"
+ "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
+ image->info.width, image->info.height, width, height);
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+ } else {
+ fprintf(stderr,
+ "Tried to import an image with inconsistent width on pre-GFX10.\n"
+ "As GFX10 has no separate stride fields we cannot cope with\n"
+ "an inconsistency and would fail on GFX10.\n"
+ "(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
+ image->info.width, image->info.height, width, height);
+ }
+ image_info->width = width;
+ image_info->height = height;
+
+ return VK_SUCCESS;
}
static VkResult
-radv_patch_image_from_extra_info(struct radv_device *device,
- struct radv_image *image,
+radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
const struct radv_image_create_info *create_info,
struct ac_surf_info *image_info)
{
- VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
- if (result != VK_SUCCESS)
- return result;
-
- for (unsigned plane = 0; plane < image->plane_count; ++plane) {
- if (create_info->bo_metadata) {
- radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
- create_info->bo_metadata);
- }
-
- if (radv_surface_has_scanout(device, create_info)) {
- image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
- if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
- image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
-
- image->info.surf_index = NULL;
- }
- }
- return VK_SUCCESS;
+ VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
+ if (result != VK_SUCCESS)
+ return result;
+
+ for (unsigned plane = 0; plane < image->plane_count; ++plane) {
+ if (create_info->bo_metadata) {
+ radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
+ create_info->bo_metadata);
+ }
+
+ if (radv_surface_has_scanout(device, create_info)) {
+ image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
+ if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
+ image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
+
+ image->info.surf_index = NULL;
+ }
+ }
+ return VK_SUCCESS;
}
static uint64_t
-radv_get_surface_flags(struct radv_device *device,
- const struct radv_image *image,
- unsigned plane_id,
- const VkImageCreateInfo *pCreateInfo,
+radv_get_surface_flags(struct radv_device *device, const struct radv_image *image,
+ unsigned plane_id, const VkImageCreateInfo *pCreateInfo,
VkFormat image_format)
{
- uint64_t flags;
- unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
- VkFormat format = vk_format_get_plane_format(image_format, plane_id);
- const struct util_format_description *desc = vk_format_description(format);
- bool is_depth, is_stencil;
-
- is_depth = util_format_has_depth(desc);
- is_stencil = util_format_has_stencil(desc);
-
- flags = RADEON_SURF_SET(array_mode, MODE);
-
- switch (pCreateInfo->imageType){
- case VK_IMAGE_TYPE_1D:
- if (pCreateInfo->arrayLayers > 1)
- flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
- else
- flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
- break;
- case VK_IMAGE_TYPE_2D:
- if (pCreateInfo->arrayLayers > 1)
- flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
- else
- flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
- break;
- case VK_IMAGE_TYPE_3D:
- flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
- break;
- default:
- unreachable("unhandled image type");
- }
-
- /* Required for clearing/initializing a specific layer on GFX8. */
- flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
-
- if (is_depth) {
- flags |= RADEON_SURF_ZBUFFER;
-
- if (radv_use_htile_for_image(device, image) &&
- !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
- if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
- flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
- } else {
- flags |= RADEON_SURF_NO_HTILE;
- }
- }
-
- if (is_stencil)
- flags |= RADEON_SURF_SBUFFER;
-
- if (device->physical_device->rad_info.chip_class >= GFX9 &&
- pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
- vk_format_get_blocksizebits(image_format) == 128 &&
- vk_format_is_compressed(image_format))
- flags |= RADEON_SURF_NO_RENDER_TARGET;
-
- if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
- flags |= RADEON_SURF_DISABLE_DCC;
-
- if (!radv_use_fmask_for_image(device, image))
- flags |= RADEON_SURF_NO_FMASK;
-
- if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
- flags |= RADEON_SURF_PRT |
- RADEON_SURF_NO_FMASK |
- RADEON_SURF_NO_HTILE |
- RADEON_SURF_DISABLE_DCC;
- }
-
- return flags;
+ uint64_t flags;
+ unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
+ VkFormat format = vk_format_get_plane_format(image_format, plane_id);
+ const struct util_format_description *desc = vk_format_description(format);
+ bool is_depth, is_stencil;
+
+ is_depth = util_format_has_depth(desc);
+ is_stencil = util_format_has_stencil(desc);
+
+ flags = RADEON_SURF_SET(array_mode, MODE);
+
+ switch (pCreateInfo->imageType) {
+ case VK_IMAGE_TYPE_1D:
+ if (pCreateInfo->arrayLayers > 1)
+ flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
+ else
+ flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
+ break;
+ case VK_IMAGE_TYPE_2D:
+ if (pCreateInfo->arrayLayers > 1)
+ flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
+ else
+ flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
+ break;
+ case VK_IMAGE_TYPE_3D:
+ flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
+ break;
+ default:
+ unreachable("unhandled image type");
+ }
+
+ /* Required for clearing/initializing a specific layer on GFX8. */
+ flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
+
+ if (is_depth) {
+ flags |= RADEON_SURF_ZBUFFER;
+
+ if (radv_use_htile_for_image(device, image) &&
+ !(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
+ if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
+ flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
+ } else {
+ flags |= RADEON_SURF_NO_HTILE;
+ }
+ }
+
+ if (is_stencil)
+ flags |= RADEON_SURF_SBUFFER;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
+ vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
+ flags |= RADEON_SURF_NO_RENDER_TARGET;
+
+ if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
+ flags |= RADEON_SURF_DISABLE_DCC;
+
+ if (!radv_use_fmask_for_image(device, image))
+ flags |= RADEON_SURF_NO_FMASK;
+
+ if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
+ flags |=
+ RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
+ }
+
+ return flags;
}
static inline unsigned
si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
{
- if (stencil)
- return plane->surface.u.legacy.stencil_tiling_index[level];
- else
- return plane->surface.u.legacy.tiling_index[level];
+ if (stencil)
+ return plane->surface.u.legacy.stencil_tiling_index[level];
+ else
+ return plane->surface.u.legacy.tiling_index[level];
}
-static unsigned radv_map_swizzle(unsigned swizzle)
+static unsigned
+radv_map_swizzle(unsigned swizzle)
{
- switch (swizzle) {
- case PIPE_SWIZZLE_Y:
- return V_008F0C_SQ_SEL_Y;
- case PIPE_SWIZZLE_Z:
- return V_008F0C_SQ_SEL_Z;
- case PIPE_SWIZZLE_W:
- return V_008F0C_SQ_SEL_W;
- case PIPE_SWIZZLE_0:
- return V_008F0C_SQ_SEL_0;
- case PIPE_SWIZZLE_1:
- return V_008F0C_SQ_SEL_1;
- default: /* PIPE_SWIZZLE_X */
- return V_008F0C_SQ_SEL_X;
- }
+ switch (swizzle) {
+ case PIPE_SWIZZLE_Y:
+ return V_008F0C_SQ_SEL_Y;
+ case PIPE_SWIZZLE_Z:
+ return V_008F0C_SQ_SEL_Z;
+ case PIPE_SWIZZLE_W:
+ return V_008F0C_SQ_SEL_W;
+ case PIPE_SWIZZLE_0:
+ return V_008F0C_SQ_SEL_0;
+ case PIPE_SWIZZLE_1:
+ return V_008F0C_SQ_SEL_1;
+ default: /* PIPE_SWIZZLE_X */
+ return V_008F0C_SQ_SEL_X;
+ }
}
static void
-radv_compose_swizzle(const struct util_format_description *desc,
- const VkComponentMapping *mapping, enum pipe_swizzle swizzle[4])
+radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
+ enum pipe_swizzle swizzle[4])
{
- if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
- /* 64-bit formats only support storage images and storage images
- * require identity component mappings. We use 32-bit
- * instructions to access 64-bit images, so we need a special
- * case here.
- *
- * The zw components are 1,0 so that they can be easily be used
- * by loads to create the w component, which has to be 0 for
- * NULL descriptors.
- */
- swizzle[0] = PIPE_SWIZZLE_X;
- swizzle[1] = PIPE_SWIZZLE_Y;
- swizzle[2] = PIPE_SWIZZLE_1;
- swizzle[3] = PIPE_SWIZZLE_0;
- } else if (!mapping) {
- for (unsigned i = 0; i < 4; i++)
- swizzle[i] = desc->swizzle[i];
- } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
- const unsigned char swizzle_xxxx[4] = {
- PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0, PIPE_SWIZZLE_1
- };
- vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
- } else {
- vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
- }
+ if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
+ /* 64-bit formats only support storage images and storage images
+ * require identity component mappings. We use 32-bit
+ * instructions to access 64-bit images, so we need a special
+ * case here.
+ *
+ * The zw components are 1,0 so that they can be easily be used
+ * by loads to create the w component, which has to be 0 for
+ * NULL descriptors.
+ */
+ swizzle[0] = PIPE_SWIZZLE_X;
+ swizzle[1] = PIPE_SWIZZLE_Y;
+ swizzle[2] = PIPE_SWIZZLE_1;
+ swizzle[3] = PIPE_SWIZZLE_0;
+ } else if (!mapping) {
+ for (unsigned i = 0; i < 4; i++)
+ swizzle[i] = desc->swizzle[i];
+ } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
+ const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
+ PIPE_SWIZZLE_1};
+ vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
+ } else {
+ vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
+ }
}
static void
-radv_make_buffer_descriptor(struct radv_device *device,
- struct radv_buffer *buffer,
- VkFormat vk_format,
- unsigned offset,
- unsigned range,
- uint32_t *state)
+radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer,
+ VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state)
{
- const struct util_format_description *desc;
- unsigned stride;
- uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
- uint64_t va = gpu_address + buffer->offset;
- unsigned num_format, data_format;
- int first_non_void;
- enum pipe_swizzle swizzle[4];
- desc = vk_format_description(vk_format);
- first_non_void = vk_format_get_first_non_void_channel(vk_format);
- stride = desc->block.bits / 8;
-
- radv_compose_swizzle(desc, NULL, swizzle);
-
- va += offset;
- state[0] = va;
- state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
- S_008F04_STRIDE(stride);
-
- if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
- range /= stride;
- }
-
- state[2] = range;
- state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
- S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
- S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
- S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
-
- /* OOB_SELECT chooses the out-of-bounds check:
- * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
- * - 1: index >= NUM_RECORDS
- * - 2: NUM_RECORDS == 0
- * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
- * else: swizzle_address >= NUM_RECORDS
- */
- state[3] |= S_008F0C_FORMAT(fmt->img_format) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- num_format = radv_translate_buffer_numformat(desc, first_non_void);
- data_format = radv_translate_buffer_dataformat(desc, first_non_void);
-
- assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
- assert(num_format != ~0);
-
- state[3] |= S_008F0C_NUM_FORMAT(num_format) |
- S_008F0C_DATA_FORMAT(data_format);
- }
+ const struct util_format_description *desc;
+ unsigned stride;
+ uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
+ uint64_t va = gpu_address + buffer->offset;
+ unsigned num_format, data_format;
+ int first_non_void;
+ enum pipe_swizzle swizzle[4];
+ desc = vk_format_description(vk_format);
+ first_non_void = vk_format_get_first_non_void_channel(vk_format);
+ stride = desc->block.bits / 8;
+
+ radv_compose_swizzle(desc, NULL, swizzle);
+
+ va += offset;
+ state[0] = va;
+ state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
+
+ if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
+ range /= stride;
+ }
+
+ state[2] = range;
+ state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
+ S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+ S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
+ S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
+
+ /* OOB_SELECT chooses the out-of-bounds check:
+ * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
+ * - 1: index >= NUM_RECORDS
+ * - 2: NUM_RECORDS == 0
+ * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
+ * else: swizzle_address >= NUM_RECORDS
+ */
+ state[3] |= S_008F0C_FORMAT(fmt->img_format) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
+ S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ num_format = radv_translate_buffer_numformat(desc, first_non_void);
+ data_format = radv_translate_buffer_dataformat(desc, first_non_void);
+
+ assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
+ assert(num_format != ~0);
+
+ state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
+ }
}
static void
-si_set_mutable_tex_desc_fields(struct radv_device *device,
- struct radv_image *image,
- const struct legacy_surf_level *base_level_info,
- unsigned plane_id,
- unsigned base_level, unsigned first_level,
- unsigned block_width, bool is_stencil,
- bool is_storage_image, bool disable_compression, bool enable_write_compression,
- uint32_t *state)
+si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
+ const struct legacy_surf_level *base_level_info, unsigned plane_id,
+ unsigned base_level, unsigned first_level, unsigned block_width,
+ bool is_stencil, bool is_storage_image, bool disable_compression,
+ bool enable_write_compression, uint32_t *state)
{
- struct radv_image_plane *plane = &image->planes[plane_id];
- uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
- uint64_t va = gpu_address;
- enum chip_class chip_class = device->physical_device->rad_info.chip_class;
- uint64_t meta_va = 0;
- if (chip_class >= GFX9) {
- if (is_stencil)
- va += plane->surface.u.gfx9.stencil_offset;
- else
- va += plane->surface.u.gfx9.surf_offset;
- } else
- va += base_level_info->offset;
-
- state[0] = va >> 8;
- if (chip_class >= GFX9 ||
- base_level_info->mode == RADEON_SURF_MODE_2D)
- state[0] |= plane->surface.tile_swizzle;
- state[1] &= C_008F14_BASE_ADDRESS_HI;
- state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
-
- if (chip_class >= GFX8) {
- state[6] &= C_008F28_COMPRESSION_EN;
- state[7] = 0;
- if (!disable_compression && radv_dcc_enabled(image, first_level)) {
- meta_va = gpu_address + plane->surface.dcc_offset;
- if (chip_class <= GFX8)
- meta_va += base_level_info->dcc_offset;
-
- unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
- dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
- meta_va |= dcc_tile_swizzle;
- } else if (!disable_compression &&
- radv_image_is_tc_compat_htile(image)) {
- meta_va = gpu_address + plane->surface.htile_offset;
- }
-
- if (meta_va) {
- state[6] |= S_008F28_COMPRESSION_EN(1);
- if (chip_class <= GFX9)
- state[7] = meta_va >> 8;
- }
- }
-
- if (chip_class >= GFX10) {
- state[3] &= C_00A00C_SW_MODE;
-
- if (is_stencil) {
- state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
- } else {
- state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
- }
-
- state[6] &= C_00A018_META_DATA_ADDRESS_LO &
- C_00A018_META_PIPE_ALIGNED;
-
- if (meta_va) {
- struct gfx9_surf_meta_flags meta = {
- .rb_aligned = 1,
- .pipe_aligned = 1,
- };
-
- if (plane->surface.dcc_offset)
- meta = plane->surface.u.gfx9.dcc;
-
- if (radv_dcc_enabled(image, first_level) &&
- is_storage_image && enable_write_compression)
- state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
-
- state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
- S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
- }
-
- state[7] = meta_va >> 16;
- } else if (chip_class == GFX9) {
- state[3] &= C_008F1C_SW_MODE;
- state[4] &= C_008F20_PITCH;
-
- if (is_stencil) {
- state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
- state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
- } else {
- state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
- state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
- }
-
- state[5] &= C_008F24_META_DATA_ADDRESS &
- C_008F24_META_PIPE_ALIGNED &
- C_008F24_META_RB_ALIGNED;
- if (meta_va) {
- struct gfx9_surf_meta_flags meta = {
- .rb_aligned = 1,
- .pipe_aligned = 1,
- };
-
- if (plane->surface.dcc_offset)
- meta = plane->surface.u.gfx9.dcc;
-
- state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
- S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
- S_008F24_META_RB_ALIGNED(meta.rb_aligned);
- }
- } else {
- /* GFX6-GFX8 */
- unsigned pitch = base_level_info->nblk_x * block_width;
- unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
-
- state[3] &= C_008F1C_TILING_INDEX;
- state[3] |= S_008F1C_TILING_INDEX(index);
- state[4] &= C_008F20_PITCH;
- state[4] |= S_008F20_PITCH(pitch - 1);
- }
+ struct radv_image_plane *plane = &image->planes[plane_id];
+ uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
+ uint64_t va = gpu_address;
+ enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+ uint64_t meta_va = 0;
+ if (chip_class >= GFX9) {
+ if (is_stencil)
+ va += plane->surface.u.gfx9.stencil_offset;
+ else
+ va += plane->surface.u.gfx9.surf_offset;
+ } else
+ va += base_level_info->offset;
+
+ state[0] = va >> 8;
+ if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
+ state[0] |= plane->surface.tile_swizzle;
+ state[1] &= C_008F14_BASE_ADDRESS_HI;
+ state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
+
+ if (chip_class >= GFX8) {
+ state[6] &= C_008F28_COMPRESSION_EN;
+ state[7] = 0;
+ if (!disable_compression && radv_dcc_enabled(image, first_level)) {
+ meta_va = gpu_address + plane->surface.dcc_offset;
+ if (chip_class <= GFX8)
+ meta_va += base_level_info->dcc_offset;
+
+ unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
+ dcc_tile_swizzle &= plane->surface.dcc_alignment - 1;
+ meta_va |= dcc_tile_swizzle;
+ } else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
+ meta_va = gpu_address + plane->surface.htile_offset;
+ }
+
+ if (meta_va) {
+ state[6] |= S_008F28_COMPRESSION_EN(1);
+ if (chip_class <= GFX9)
+ state[7] = meta_va >> 8;
+ }
+ }
+
+ if (chip_class >= GFX10) {
+ state[3] &= C_00A00C_SW_MODE;
+
+ if (is_stencil) {
+ state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
+ } else {
+ state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
+ }
+
+ state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
+
+ if (meta_va) {
+ struct gfx9_surf_meta_flags meta = {
+ .rb_aligned = 1,
+ .pipe_aligned = 1,
+ };
+
+ if (plane->surface.dcc_offset)
+ meta = plane->surface.u.gfx9.dcc;
+
+ if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
+ state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
+
+ state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
+ S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
+ }
+
+ state[7] = meta_va >> 16;
+ } else if (chip_class == GFX9) {
+ state[3] &= C_008F1C_SW_MODE;
+ state[4] &= C_008F20_PITCH;
+
+ if (is_stencil) {
+ state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.stencil.swizzle_mode);
+ state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.stencil.epitch);
+ } else {
+ state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.surf.swizzle_mode);
+ state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.surf.epitch);
+ }
+
+ state[5] &=
+ C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
+ if (meta_va) {
+ struct gfx9_surf_meta_flags meta = {
+ .rb_aligned = 1,
+ .pipe_aligned = 1,
+ };
+
+ if (plane->surface.dcc_offset)
+ meta = plane->surface.u.gfx9.dcc;
+
+ state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
+ S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
+ S_008F24_META_RB_ALIGNED(meta.rb_aligned);
+ }
+ } else {
+ /* GFX6-GFX8 */
+ unsigned pitch = base_level_info->nblk_x * block_width;
+ unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
+
+ state[3] &= C_008F1C_TILING_INDEX;
+ state[3] |= S_008F1C_TILING_INDEX(index);
+ state[4] &= C_008F20_PITCH;
+ state[4] |= S_008F20_PITCH(pitch - 1);
+ }
}
-static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
- unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
+static unsigned
+radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
+ unsigned nr_samples, bool is_storage_image, bool gfx9)
{
- if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
- return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
-
- /* GFX9 allocates 1D textures as 2D. */
- if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
- image_type = VK_IMAGE_TYPE_2D;
- switch (image_type) {
- case VK_IMAGE_TYPE_1D:
- return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
- case VK_IMAGE_TYPE_2D:
- if (nr_samples > 1)
- return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
- else
- return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
- case VK_IMAGE_TYPE_3D:
- if (view_type == VK_IMAGE_VIEW_TYPE_3D)
- return V_008F1C_SQ_RSRC_IMG_3D;
- else
- return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
- default:
- unreachable("illegal image type");
- }
+ if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
+ return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
+
+ /* GFX9 allocates 1D textures as 2D. */
+ if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
+ image_type = VK_IMAGE_TYPE_2D;
+ switch (image_type) {
+ case VK_IMAGE_TYPE_1D:
+ return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
+ case VK_IMAGE_TYPE_2D:
+ if (nr_samples > 1)
+ return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
+ else
+ return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
+ case VK_IMAGE_TYPE_3D:
+ if (view_type == VK_IMAGE_VIEW_TYPE_3D)
+ return V_008F1C_SQ_RSRC_IMG_3D;
+ else
+ return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
+ default:
+ unreachable("illegal image type");
+ }
}
-static unsigned gfx9_border_color_swizzle(const enum pipe_swizzle swizzle[4])
+static unsigned
+gfx9_border_color_swizzle(const enum pipe_swizzle swizzle[4])
{
- unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
-
- if (swizzle[3] == PIPE_SWIZZLE_X) {
- /* For the pre-defined border color values (white, opaque
- * black, transparent black), the only thing that matters is
- * that the alpha channel winds up in the correct place
- * (because the RGB channels are all the same) so either of
- * these enumerations will work.
- */
- if (swizzle[2] == PIPE_SWIZZLE_Y)
- bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
- else
- bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
- } else if (swizzle[0] == PIPE_SWIZZLE_X) {
- if (swizzle[1] == PIPE_SWIZZLE_Y)
- bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
- else
- bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
- } else if (swizzle[1] == PIPE_SWIZZLE_X) {
- bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
- } else if (swizzle[2] == PIPE_SWIZZLE_X) {
- bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
- }
-
- return bc_swizzle;
+ unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
+
+ if (swizzle[3] == PIPE_SWIZZLE_X) {
+ /* For the pre-defined border color values (white, opaque
+ * black, transparent black), the only thing that matters is
+ * that the alpha channel winds up in the correct place
+ * (because the RGB channels are all the same) so either of
+ * these enumerations will work.
+ */
+ if (swizzle[2] == PIPE_SWIZZLE_Y)
+ bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
+ else
+ bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
+ } else if (swizzle[0] == PIPE_SWIZZLE_X) {
+ if (swizzle[1] == PIPE_SWIZZLE_Y)
+ bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
+ else
+ bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
+ } else if (swizzle[1] == PIPE_SWIZZLE_X) {
+ bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
+ } else if (swizzle[2] == PIPE_SWIZZLE_X) {
+ bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
+ }
+
+ return bc_swizzle;
}
-bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
+bool
+vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
{
- const struct util_format_description *desc = vk_format_description(format);
+ const struct util_format_description *desc = vk_format_description(format);
- if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
- return desc->swizzle[3] == PIPE_SWIZZLE_X;
+ if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
+ return desc->swizzle[3] == PIPE_SWIZZLE_X;
- return radv_translate_colorswap(format, false) <= 1;
+ return radv_translate_colorswap(format, false) <= 1;
}
/**
* Build the sampler view descriptor for a texture (GFX10).
*/
static void
-gfx10_make_texture_descriptor(struct radv_device *device,
- struct radv_image *image,
- bool is_storage_image,
- VkImageViewType view_type,
- VkFormat vk_format,
- const VkComponentMapping *mapping,
- unsigned first_level, unsigned last_level,
- unsigned first_layer, unsigned last_layer,
- unsigned width, unsigned height, unsigned depth,
- uint32_t *state,
- uint32_t *fmask_state)
+gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
+ bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
+ const VkComponentMapping *mapping, unsigned first_level,
+ unsigned last_level, unsigned first_layer, unsigned last_layer,
+ unsigned width, unsigned height, unsigned depth, uint32_t *state,
+ uint32_t *fmask_state)
{
- const struct util_format_description *desc;
- enum pipe_swizzle swizzle[4];
- unsigned img_format;
- unsigned type;
-
- desc = vk_format_description(vk_format);
- img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
-
- radv_compose_swizzle(desc, mapping, swizzle);
-
- type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
- is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
- if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
- height = 1;
- depth = image->info.array_size;
- } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
- type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
- if (view_type != VK_IMAGE_VIEW_TYPE_3D)
- depth = image->info.array_size;
- } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
- depth = image->info.array_size / 6;
-
- state[0] = 0;
- state[1] = S_00A004_FORMAT(img_format) |
- S_00A004_WIDTH_LO(width - 1);
- state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
- S_00A008_HEIGHT(height - 1) |
- S_00A008_RESOURCE_LEVEL(1);
- state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
- S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
- S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
- S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
- S_00A00C_BASE_LEVEL(image->info.samples > 1 ?
- 0 : first_level) |
- S_00A00C_LAST_LEVEL(image->info.samples > 1 ?
- util_logbase2(image->info.samples) :
- last_level) |
- S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) |
- S_00A00C_TYPE(type);
- /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
- * to know the total number of layers.
- */
- state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
- S_00A010_BASE_ARRAY(first_layer);
- state[5] = S_00A014_ARRAY_PITCH(0) |
- S_00A014_MAX_MIP(image->info.samples > 1 ?
- util_logbase2(image->info.samples) :
- image->info.levels - 1) |
- S_00A014_PERF_MOD(4);
- state[6] = 0;
- state[7] = 0;
-
- if (radv_dcc_enabled(image, first_level)) {
- state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
- S_00A018_MAX_COMPRESSED_BLOCK_SIZE(image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size) |
- S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
- }
-
- /* Initialize the sampler view for FMASK. */
- if (fmask_state) {
- if (radv_image_has_fmask(image)) {
- uint64_t gpu_address = radv_buffer_get_va(image->bo);
- uint32_t format;
- uint64_t va;
-
- assert(image->plane_count == 1);
-
- va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
-
- switch (image->info.samples) {
- case 2:
- format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
- break;
- case 4:
- format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
- break;
- case 8:
- format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
- break;
- default:
- unreachable("invalid nr_samples");
- }
-
- fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
- fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) |
- S_00A004_FORMAT(format) |
- S_00A004_WIDTH_LO(width - 1);
- fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) |
- S_00A008_HEIGHT(height - 1) |
- S_00A008_RESOURCE_LEVEL(1);
- fmask_state[3] = S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
- S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
- S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
- S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
- S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
- S_00A00C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
- fmask_state[4] = S_00A010_DEPTH(last_layer) |
- S_00A010_BASE_ARRAY(first_layer);
- fmask_state[5] = 0;
- fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
- fmask_state[7] = 0;
-
- if (radv_image_is_tc_compat_cmask(image)) {
- va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
-
- fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
- fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
- fmask_state[7] |= va >> 16;
- }
- } else
- memset(fmask_state, 0, 8 * 4);
- }
+ const struct util_format_description *desc;
+ enum pipe_swizzle swizzle[4];
+ unsigned img_format;
+ unsigned type;
+
+ desc = vk_format_description(vk_format);
+ img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
+
+ radv_compose_swizzle(desc, mapping, swizzle);
+
+ type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
+ is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
+ if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
+ height = 1;
+ depth = image->info.array_size;
+ } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
+ if (view_type != VK_IMAGE_VIEW_TYPE_3D)
+ depth = image->info.array_size;
+ } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
+ depth = image->info.array_size / 6;
+
+ state[0] = 0;
+ state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
+ state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
+ S_00A008_RESOURCE_LEVEL(1);
+ state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
+ S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+ S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
+ S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
+ S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
+ S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
+ : last_level) |
+ S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(swizzle)) | S_00A00C_TYPE(type);
+ /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
+ * to know the total number of layers.
+ */
+ state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
+ S_00A010_BASE_ARRAY(first_layer);
+ state[5] = S_00A014_ARRAY_PITCH(0) |
+ S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
+ : image->info.levels - 1) |
+ S_00A014_PERF_MOD(4);
+ state[6] = 0;
+ state[7] = 0;
+
+ if (radv_dcc_enabled(image, first_level)) {
+ state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
+ S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
+ image->planes[0].surface.u.gfx9.dcc.max_compressed_block_size) |
+ S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
+ }
+
+ /* Initialize the sampler view for FMASK. */
+ if (fmask_state) {
+ if (radv_image_has_fmask(image)) {
+ uint64_t gpu_address = radv_buffer_get_va(image->bo);
+ uint32_t format;
+ uint64_t va;
+
+ assert(image->plane_count == 1);
+
+ va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
+
+ switch (image->info.samples) {
+ case 2:
+ format = V_008F0C_IMG_FORMAT_FMASK8_S2_F2;
+ break;
+ case 4:
+ format = V_008F0C_IMG_FORMAT_FMASK8_S4_F4;
+ break;
+ case 8:
+ format = V_008F0C_IMG_FORMAT_FMASK32_S8_F8;
+ break;
+ default:
+ unreachable("invalid nr_samples");
+ }
+
+ fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
+ fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
+ S_00A004_WIDTH_LO(width - 1);
+ fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
+ S_00A008_RESOURCE_LEVEL(1);
+ fmask_state[3] =
+ S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
+ S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
+ S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode) |
+ S_00A00C_TYPE(
+ radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
+ fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
+ fmask_state[5] = 0;
+ fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
+ fmask_state[7] = 0;
+
+ if (radv_image_is_tc_compat_cmask(image)) {
+ va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
+
+ fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
+ fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
+ fmask_state[7] |= va >> 16;
+ }
+ } else
+ memset(fmask_state, 0, 8 * 4);
+ }
}
/**
* Build the sampler view descriptor for a texture (SI-GFX9)
*/
static void
-si_make_texture_descriptor(struct radv_device *device,
- struct radv_image *image,
- bool is_storage_image,
- VkImageViewType view_type,
- VkFormat vk_format,
- const VkComponentMapping *mapping,
- unsigned first_level, unsigned last_level,
- unsigned first_layer, unsigned last_layer,
- unsigned width, unsigned height, unsigned depth,
- uint32_t *state,
- uint32_t *fmask_state)
+si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
+ bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
+ const VkComponentMapping *mapping, unsigned first_level,
+ unsigned last_level, unsigned first_layer, unsigned last_layer,
+ unsigned width, unsigned height, unsigned depth, uint32_t *state,
+ uint32_t *fmask_state)
{
- const struct util_format_description *desc;
- enum pipe_swizzle swizzle[4];
- int first_non_void;
- unsigned num_format, data_format, type;
-
- desc = vk_format_description(vk_format);
-
- radv_compose_swizzle(desc, mapping, swizzle);
-
- first_non_void = vk_format_get_first_non_void_channel(vk_format);
-
- num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
- if (num_format == ~0) {
- num_format = 0;
- }
-
- data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
- if (data_format == ~0) {
- data_format = 0;
- }
-
- /* S8 with either Z16 or Z32 HTILE need a special format. */
- if (device->physical_device->rad_info.chip_class == GFX9 &&
- vk_format == VK_FORMAT_S8_UINT &&
- radv_image_is_tc_compat_htile(image)) {
- if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
- data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
- else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
- data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
- }
- type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
- is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
- if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
- height = 1;
- depth = image->info.array_size;
- } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
- type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
- if (view_type != VK_IMAGE_VIEW_TYPE_3D)
- depth = image->info.array_size;
- } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
- depth = image->info.array_size / 6;
-
- state[0] = 0;
- state[1] = (S_008F14_DATA_FORMAT(data_format) |
- S_008F14_NUM_FORMAT(num_format));
- state[2] = (S_008F18_WIDTH(width - 1) |
- S_008F18_HEIGHT(height - 1) |
- S_008F18_PERF_MOD(4));
- state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
- S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
- S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
- S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
- S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
- 0 : first_level) |
- S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
- util_logbase2(image->info.samples) :
- last_level) |
- S_008F1C_TYPE(type));
- state[4] = 0;
- state[5] = S_008F24_BASE_ARRAY(first_layer);
- state[6] = 0;
- state[7] = 0;
-
- if (device->physical_device->rad_info.chip_class == GFX9) {
- unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
-
- /* Depth is the last accessible layer on Gfx9.
- * The hw doesn't need to know the total number of layers.
- */
- if (type == V_008F1C_SQ_RSRC_IMG_3D)
- state[4] |= S_008F20_DEPTH(depth - 1);
- else
- state[4] |= S_008F20_DEPTH(last_layer);
-
- state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
- state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
- util_logbase2(image->info.samples) :
- image->info.levels - 1);
- } else {
- state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
- state[4] |= S_008F20_DEPTH(depth - 1);
- state[5] |= S_008F24_LAST_ARRAY(last_layer);
- }
- if (image->planes[0].surface.dcc_offset) {
- state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
- } else {
- /* The last dword is unused by hw. The shader uses it to clear
- * bits in the first dword of sampler state.
- */
- if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
- if (first_level == last_level)
- state[7] = C_008F30_MAX_ANISO_RATIO;
- else
- state[7] = 0xffffffff;
- }
- }
-
- /* Initialize the sampler view for FMASK. */
- if (fmask_state) {
- if (radv_image_has_fmask(image)) {
- uint32_t fmask_format;
- uint64_t gpu_address = radv_buffer_get_va(image->bo);
- uint64_t va;
-
- assert(image->plane_count == 1);
-
- va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
-
- if (device->physical_device->rad_info.chip_class == GFX9) {
- fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
- switch (image->info.samples) {
- case 2:
- num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
- break;
- case 4:
- num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
- break;
- case 8:
- num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
- break;
- default:
- unreachable("invalid nr_samples");
- }
- } else {
- switch (image->info.samples) {
- case 2:
- fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
- break;
- case 4:
- fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
- break;
- case 8:
- fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
- break;
- default:
- assert(0);
- fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
- }
- num_format = V_008F14_IMG_NUM_FORMAT_UINT;
- }
-
- fmask_state[0] = va >> 8;
- fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
- fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
- S_008F14_DATA_FORMAT(fmask_format) |
- S_008F14_NUM_FORMAT(num_format);
- fmask_state[2] = S_008F18_WIDTH(width - 1) |
- S_008F18_HEIGHT(height - 1);
- fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
- S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
- S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
- S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
- S_008F1C_TYPE(radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
- fmask_state[4] = 0;
- fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
- fmask_state[6] = 0;
- fmask_state[7] = 0;
-
- if (device->physical_device->rad_info.chip_class == GFX9) {
- fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
- fmask_state[4] |= S_008F20_DEPTH(last_layer) |
- S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
- fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
- S_008F24_META_RB_ALIGNED(1);
-
- if (radv_image_is_tc_compat_cmask(image)) {
- va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
-
- fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
- fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
- fmask_state[7] |= va >> 8;
- }
- } else {
- fmask_state[3] |= S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
- fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
- S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
- fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
-
- if (radv_image_is_tc_compat_cmask(image)) {
- va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
-
- fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
- fmask_state[7] |= va >> 8;
- }
- }
- } else
- memset(fmask_state, 0, 8 * 4);
- }
+ const struct util_format_description *desc;
+ enum pipe_swizzle swizzle[4];
+ int first_non_void;
+ unsigned num_format, data_format, type;
+
+ desc = vk_format_description(vk_format);
+
+ radv_compose_swizzle(desc, mapping, swizzle);
+
+ first_non_void = vk_format_get_first_non_void_channel(vk_format);
+
+ num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
+ if (num_format == ~0) {
+ num_format = 0;
+ }
+
+ data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
+ if (data_format == ~0) {
+ data_format = 0;
+ }
+
+ /* S8 with either Z16 or Z32 HTILE need a special format. */
+ if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
+ radv_image_is_tc_compat_htile(image)) {
+ if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
+ data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
+ else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
+ data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
+ }
+ type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
+ is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
+ if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
+ height = 1;
+ depth = image->info.array_size;
+ } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
+ if (view_type != VK_IMAGE_VIEW_TYPE_3D)
+ depth = image->info.array_size;
+ } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
+ depth = image->info.array_size / 6;
+
+ state[0] = 0;
+ state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));
+ state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
+ state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
+ S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
+ S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
+ S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
+ S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
+ S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
+ : last_level) |
+ S_008F1C_TYPE(type));
+ state[4] = 0;
+ state[5] = S_008F24_BASE_ARRAY(first_layer);
+ state[6] = 0;
+ state[7] = 0;
+
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
+
+ /* Depth is the last accessible layer on Gfx9.
+ * The hw doesn't need to know the total number of layers.
+ */
+ if (type == V_008F1C_SQ_RSRC_IMG_3D)
+ state[4] |= S_008F20_DEPTH(depth - 1);
+ else
+ state[4] |= S_008F20_DEPTH(last_layer);
+
+ state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
+ state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
+ : image->info.levels - 1);
+ } else {
+ state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
+ state[4] |= S_008F20_DEPTH(depth - 1);
+ state[5] |= S_008F24_LAST_ARRAY(last_layer);
+ }
+ if (image->planes[0].surface.dcc_offset) {
+ state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
+ } else {
+ /* The last dword is unused by hw. The shader uses it to clear
+ * bits in the first dword of sampler state.
+ */
+ if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
+ if (first_level == last_level)
+ state[7] = C_008F30_MAX_ANISO_RATIO;
+ else
+ state[7] = 0xffffffff;
+ }
+ }
+
+ /* Initialize the sampler view for FMASK. */
+ if (fmask_state) {
+ if (radv_image_has_fmask(image)) {
+ uint32_t fmask_format;
+ uint64_t gpu_address = radv_buffer_get_va(image->bo);
+ uint64_t va;
+
+ assert(image->plane_count == 1);
+
+ va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
+
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
+ switch (image->info.samples) {
+ case 2:
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
+ break;
+ case 4:
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
+ break;
+ case 8:
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
+ break;
+ default:
+ unreachable("invalid nr_samples");
+ }
+ } else {
+ switch (image->info.samples) {
+ case 2:
+ fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
+ break;
+ case 4:
+ fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
+ break;
+ case 8:
+ fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
+ break;
+ default:
+ assert(0);
+ fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
+ }
+ num_format = V_008F14_IMG_NUM_FORMAT_UINT;
+ }
+
+ fmask_state[0] = va >> 8;
+ fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
+ fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
+ S_008F14_NUM_FORMAT(num_format);
+ fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
+ fmask_state[3] =
+ S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
+ S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
+ S_008F1C_TYPE(
+ radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
+ fmask_state[4] = 0;
+ fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
+ fmask_state[6] = 0;
+ fmask_state[7] = 0;
+
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.fmask.swizzle_mode);
+ fmask_state[4] |= S_008F20_DEPTH(last_layer) |
+ S_008F20_PITCH(image->planes[0].surface.u.gfx9.fmask.epitch);
+ fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
+
+ if (radv_image_is_tc_compat_cmask(image)) {
+ va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
+
+ fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
+ fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
+ fmask_state[7] |= va >> 8;
+ }
+ } else {
+ fmask_state[3] |=
+ S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.fmask.tiling_index);
+ fmask_state[4] |=
+ S_008F20_DEPTH(depth - 1) |
+ S_008F20_PITCH(image->planes[0].surface.u.legacy.fmask.pitch_in_pixels - 1);
+ fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
+
+ if (radv_image_is_tc_compat_cmask(image)) {
+ va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
+
+ fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
+ fmask_state[7] |= va >> 8;
+ }
+ }
+ } else
+ memset(fmask_state, 0, 8 * 4);
+ }
}
static void
-radv_make_texture_descriptor(struct radv_device *device,
- struct radv_image *image,
- bool is_storage_image,
- VkImageViewType view_type,
- VkFormat vk_format,
- const VkComponentMapping *mapping,
- unsigned first_level, unsigned last_level,
- unsigned first_layer, unsigned last_layer,
- unsigned width, unsigned height, unsigned depth,
- uint32_t *state,
- uint32_t *fmask_state)
+radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
+ bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
+ const VkComponentMapping *mapping, unsigned first_level,
+ unsigned last_level, unsigned first_layer, unsigned last_layer,
+ unsigned width, unsigned height, unsigned depth, uint32_t *state,
+ uint32_t *fmask_state)
{
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- gfx10_make_texture_descriptor(device, image, is_storage_image,
- view_type, vk_format, mapping,
- first_level, last_level,
- first_layer, last_layer,
- width, height, depth,
- state, fmask_state);
- } else {
- si_make_texture_descriptor(device, image, is_storage_image,
- view_type, vk_format, mapping,
- first_level, last_level,
- first_layer, last_layer,
- width, height, depth,
- state, fmask_state);
- }
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
+ first_level, last_level, first_layer, last_layer, width, height,
+ depth, state, fmask_state);
+ } else {
+ si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
+ first_level, last_level, first_layer, last_layer, width, height,
+ depth, state, fmask_state);
+ }
}
static void
-radv_query_opaque_metadata(struct radv_device *device,
- struct radv_image *image,
- struct radeon_bo_metadata *md)
+radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
+ struct radeon_bo_metadata *md)
{
- static const VkComponentMapping fixedmapping;
- uint32_t desc[8];
+ static const VkComponentMapping fixedmapping;
+ uint32_t desc[8];
- assert(image->plane_count == 1);
+ assert(image->plane_count == 1);
- radv_make_texture_descriptor(device, image, false,
- (VkImageViewType)image->type, image->vk_format,
- &fixedmapping, 0, image->info.levels - 1, 0,
- image->info.array_size - 1,
- image->info.width, image->info.height,
- image->info.depth,
- desc, NULL);
+ radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type,
+ image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0,
+ image->info.array_size - 1, image->info.width, image->info.height,
+ image->info.depth, desc, NULL);
- si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0, 0,
- image->planes[0].surface.blk_w, false, false, false, false, desc);
+ si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
+ 0, image->planes[0].surface.blk_w, false, false, false, false,
+ desc);
- ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
- image->info.levels, desc, &md->size_metadata, md->metadata);
+ ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
+ image->info.levels, desc, &md->size_metadata, md->metadata);
}
void
-radv_init_metadata(struct radv_device *device,
- struct radv_image *image,
- struct radeon_bo_metadata *metadata)
+radv_init_metadata(struct radv_device *device, struct radv_image *image,
+ struct radeon_bo_metadata *metadata)
{
- struct radeon_surf *surface = &image->planes[0].surface;
-
- memset(metadata, 0, sizeof(*metadata));
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- uint64_t dcc_offset = image->offset + (surface->display_dcc_offset ?
- surface->display_dcc_offset : surface->dcc_offset);
- metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
- metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
- metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.display_dcc_pitch_max;
- metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.dcc.independent_64B_blocks;
- metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.dcc.independent_128B_blocks;
- metadata->u.gfx9.dcc_max_compressed_block_size = surface->u.gfx9.dcc.max_compressed_block_size;
- metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
- } else {
- metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
- RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
- metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
- RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
- metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
- metadata->u.legacy.bankw = surface->u.legacy.bankw;
- metadata->u.legacy.bankh = surface->u.legacy.bankh;
- metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
- metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
- metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
- metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
- metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
- }
- radv_query_opaque_metadata(device, image, metadata);
+ struct radeon_surf *surface = &image->planes[0].surface;
+
+ memset(metadata, 0, sizeof(*metadata));
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ uint64_t dcc_offset =
+ image->offset +
+ (surface->display_dcc_offset ? surface->display_dcc_offset : surface->dcc_offset);
+ metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
+ metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
+ metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.display_dcc_pitch_max;
+ metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.dcc.independent_64B_blocks;
+ metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.dcc.independent_128B_blocks;
+ metadata->u.gfx9.dcc_max_compressed_block_size =
+ surface->u.gfx9.dcc.max_compressed_block_size;
+ metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+ } else {
+ metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
+ ? RADEON_LAYOUT_TILED
+ : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
+ ? RADEON_LAYOUT_TILED
+ : RADEON_LAYOUT_LINEAR;
+ metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
+ metadata->u.legacy.bankw = surface->u.legacy.bankw;
+ metadata->u.legacy.bankh = surface->u.legacy.bankh;
+ metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
+ metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
+ metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
+ metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
+ metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+ }
+ radv_query_opaque_metadata(device, image, metadata);
}
void
-radv_image_override_offset_stride(struct radv_device *device,
- struct radv_image *image,
+radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
uint64_t offset, uint32_t stride)
{
- ac_surface_override_offset_stride(&device->physical_device->rad_info,
- &image->planes[0].surface,
- image->info.levels, offset, stride);
+ ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
+ image->info.levels, offset, stride);
}
static void
radv_image_alloc_single_sample_cmask(const struct radv_device *device,
- const struct radv_image *image,
- struct radeon_surf *surf)
+ const struct radv_image *image, struct radeon_surf *surf)
{
- if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
- image->info.levels > 1 || image->info.depth > 1 ||
- radv_image_has_dcc(image) ||
- !radv_image_use_fast_clear_for_image(device, image) ||
- (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
- return;
-
- assert(image->info.storage_samples == 1);
-
- surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
- surf->total_size = surf->cmask_offset + surf->cmask_size;
- surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
+ if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
+ image->info.depth > 1 || radv_image_has_dcc(image) ||
+ !radv_image_use_fast_clear_for_image(device, image) ||
+ (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
+ return;
+
+ assert(image->info.storage_samples == 1);
+
+ surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
+ surf->total_size = surf->cmask_offset + surf->cmask_size;
+ surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
}
static void
radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
{
- /* images with modifiers can be potentially imported */
- if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
- return;
-
- if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
- image->fce_pred_offset = image->size;
- image->size += 8 * image->info.levels;
- }
-
- if (radv_image_use_dcc_predication(device, image)) {
- image->dcc_pred_offset = image->size;
- image->size += 8 * image->info.levels;
- }
-
- if (radv_image_has_dcc(image) || radv_image_has_cmask(image) ||
- radv_image_has_htile(image)) {
- image->clear_value_offset = image->size;
- image->size += 8 * image->info.levels;
- }
-
- if (radv_image_is_tc_compat_htile(image) &&
- device->physical_device->rad_info.has_tc_compat_zrange_bug) {
- /* Metadata for the TC-compatible HTILE hardware bug which
- * have to be fixed by updating ZRANGE_PRECISION when doing
- * fast depth clears to 0.0f.
- */
- image->tc_compat_zrange_offset = image->size;
- image->size += image->info.levels * 4;
- }
+ /* images with modifiers can be potentially imported */
+ if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
+ return;
+
+ if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
+ image->fce_pred_offset = image->size;
+ image->size += 8 * image->info.levels;
+ }
+
+ if (radv_image_use_dcc_predication(device, image)) {
+ image->dcc_pred_offset = image->size;
+ image->size += 8 * image->info.levels;
+ }
+
+ if (radv_image_has_dcc(image) || radv_image_has_cmask(image) || radv_image_has_htile(image)) {
+ image->clear_value_offset = image->size;
+ image->size += 8 * image->info.levels;
+ }
+
+ if (radv_image_is_tc_compat_htile(image) &&
+ device->physical_device->rad_info.has_tc_compat_zrange_bug) {
+ /* Metadata for the TC-compatible HTILE hardware bug which
+ * have to be fixed by updating ZRANGE_PRECISION when doing
+ * fast depth clears to 0.0f.
+ */
+ image->tc_compat_zrange_offset = image->size;
+ image->size += image->info.levels * 4;
+ }
}
-
static void
radv_image_reset_layout(struct radv_image *image)
{
- image->size = 0;
- image->alignment = 1;
-
- image->tc_compatible_cmask = 0;
- image->fce_pred_offset = image->dcc_pred_offset = 0;
- image->clear_value_offset = image->tc_compat_zrange_offset = 0;
-
- for (unsigned i = 0; i < image->plane_count; ++i) {
- VkFormat format = vk_format_get_plane_format(image->vk_format, i);
-
- uint64_t flags = image->planes[i].surface.flags;
- uint64_t modifier = image->planes[i].surface.modifier;
- memset(image->planes + i, 0, sizeof(image->planes[i]));
-
- image->planes[i].surface.flags = flags;
- image->planes[i].surface.modifier = modifier;
- image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
- image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
- image->planes[i].surface.bpe = vk_format_get_blocksize(vk_format_depth_only(format));
-
- /* align byte per element on dword */
- if (image->planes[i].surface.bpe == 3) {
- image->planes[i].surface.bpe = 4;
- }
- }
+ image->size = 0;
+ image->alignment = 1;
+
+ image->tc_compatible_cmask = 0;
+ image->fce_pred_offset = image->dcc_pred_offset = 0;
+ image->clear_value_offset = image->tc_compat_zrange_offset = 0;
+
+ for (unsigned i = 0; i < image->plane_count; ++i) {
+ VkFormat format = vk_format_get_plane_format(image->vk_format, i);
+
+ uint64_t flags = image->planes[i].surface.flags;
+ uint64_t modifier = image->planes[i].surface.modifier;
+ memset(image->planes + i, 0, sizeof(image->planes[i]));
+
+ image->planes[i].surface.flags = flags;
+ image->planes[i].surface.modifier = modifier;
+ image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
+ image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
+ image->planes[i].surface.bpe = vk_format_get_blocksize(vk_format_depth_only(format));
+
+ /* align byte per element on dword */
+ if (image->planes[i].surface.bpe == 3) {
+ image->planes[i].surface.bpe = 4;
+ }
+ }
}
static VkResult
radv_image_init_retile_map(struct radv_device *device, struct radv_image *image)
{
- /* If we do a relayout we have to free the old buffer. */
- if(image->retile_map)
- device->ws->buffer_destroy(device->ws, image->retile_map);
-
- image->retile_map = NULL;
- if (!radv_image_has_dcc(image) || !image->planes[0].surface.display_dcc_offset ||
- image->planes[0].surface.display_dcc_offset == image->planes[0].surface.dcc_offset)
- return VK_SUCCESS;
-
- uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
- image->retile_map = device->ws->buffer_create(device->ws, retile_map_size, 4096,
- RADEON_DOMAIN_VRAM, RADEON_FLAG_READ_ONLY |
- RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_METADATA);
- if (!image->retile_map) {
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
- void *data = device->ws->buffer_map(image->retile_map);
- if (!data) {
- device->ws->buffer_destroy(device->ws, image->retile_map);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
-
- memcpy(data, image->planes[0].surface.u.gfx9.dcc_retile_map, retile_map_size);
- return VK_SUCCESS;
+ /* If we do a relayout we have to free the old buffer. */
+ if (image->retile_map)
+ device->ws->buffer_destroy(device->ws, image->retile_map);
+
+ image->retile_map = NULL;
+ if (!radv_image_has_dcc(image) || !image->planes[0].surface.display_dcc_offset ||
+ image->planes[0].surface.display_dcc_offset == image->planes[0].surface.dcc_offset)
+ return VK_SUCCESS;
+
+ uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
+ image->retile_map = device->ws->buffer_create(
+ device->ws, retile_map_size, 4096, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_METADATA);
+ if (!image->retile_map) {
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+ void *data = device->ws->buffer_map(image->retile_map);
+ if (!data) {
+ device->ws->buffer_destroy(device->ws, image->retile_map);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ memcpy(data, image->planes[0].surface.u.gfx9.dcc_retile_map, retile_map_size);
+ return VK_SUCCESS;
}
VkResult
-radv_image_create_layout(struct radv_device *device,
- struct radv_image_create_info create_info,
+radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
struct radv_image *image)
{
- /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
- * common internal case. */
- create_info.vk_info = NULL;
-
- struct ac_surf_info image_info = image->info;
- VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
- if (result != VK_SUCCESS)
- return result;
-
- assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
-
- radv_image_reset_layout(image);
-
- for (unsigned plane = 0; plane < image->plane_count; ++plane) {
- struct ac_surf_info info = image_info;
- uint64_t offset;
- unsigned stride;
-
- info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
- info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
-
- if (create_info.no_metadata_planes || image->plane_count > 1) {
- image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC |
- RADEON_SURF_NO_FMASK |
- RADEON_SURF_NO_HTILE;
- }
-
- device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
-
- if (create_info.bo_metadata && !mod_info &&
- !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
- &image->planes[plane].surface,
- image_info.storage_samples, image_info.levels,
- create_info.bo_metadata->size_metadata,
- create_info.bo_metadata->metadata))
- return VK_ERROR_INVALID_EXTERNAL_HANDLE;
-
- if (!create_info.no_metadata_planes && !create_info.bo_metadata &&
- image->plane_count == 1 && !mod_info)
- radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
-
- if (mod_info) {
- if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
- !mod_info->pPlaneLayouts[plane].rowPitch)
- return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
-
- offset = mod_info->pPlaneLayouts[plane].offset;
- stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
- } else {
- offset = align(image->size, image->planes[plane].surface.alignment);
- stride = 0; /* 0 means no override */
- }
-
- if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
- &image->planes[plane].surface,
- image->info.levels,
- offset,
- stride))
- return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
-
- /* Validate DCC offsets in modifier layout. */
- if (image->plane_count == 1 && mod_info) {
- unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
- if (mod_info->drmFormatModifierPlaneCount != mem_planes)
- return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
-
- for (unsigned i = 1; i < mem_planes; ++i) {
- if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
- &image->planes[plane].surface, i, 0) !=
- mod_info->pPlaneLayouts[i].offset)
- return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
- }
- }
-
- image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
- image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
-
- image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
- }
-
- image->tc_compatible_cmask = radv_image_has_cmask(image) &&
- radv_use_tc_compat_cmask_for_image(device, image);
-
- radv_image_alloc_values(device, image);
-
- result = radv_image_init_retile_map(device, image);
- if (result != VK_SUCCESS)
- return result;
-
- assert(image->planes[0].surface.surf_size);
- assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
- ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
- return VK_SUCCESS;
+ /* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
+ * common internal case. */
+ create_info.vk_info = NULL;
+
+ struct ac_surf_info image_info = image->info;
+ VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
+
+ radv_image_reset_layout(image);
+
+ for (unsigned plane = 0; plane < image->plane_count; ++plane) {
+ struct ac_surf_info info = image_info;
+ uint64_t offset;
+ unsigned stride;
+
+ info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
+ info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
+
+ if (create_info.no_metadata_planes || image->plane_count > 1) {
+ image->planes[plane].surface.flags |=
+ RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
+ }
+
+ device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
+
+ if (create_info.bo_metadata && !mod_info &&
+ !ac_surface_set_umd_metadata(&device->physical_device->rad_info,
+ &image->planes[plane].surface, image_info.storage_samples,
+ image_info.levels, create_info.bo_metadata->size_metadata,
+ create_info.bo_metadata->metadata))
+ return VK_ERROR_INVALID_EXTERNAL_HANDLE;
+
+ if (!create_info.no_metadata_planes && !create_info.bo_metadata && image->plane_count == 1 &&
+ !mod_info)
+ radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
+
+ if (mod_info) {
+ if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
+ !mod_info->pPlaneLayouts[plane].rowPitch)
+ return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+
+ offset = mod_info->pPlaneLayouts[plane].offset;
+ stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
+ } else {
+ offset = align(image->size, image->planes[plane].surface.alignment);
+ stride = 0; /* 0 means no override */
+ }
+
+ if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
+ &image->planes[plane].surface, image->info.levels,
+ offset, stride))
+ return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+
+ /* Validate DCC offsets in modifier layout. */
+ if (image->plane_count == 1 && mod_info) {
+ unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
+ if (mod_info->drmFormatModifierPlaneCount != mem_planes)
+ return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+
+ for (unsigned i = 1; i < mem_planes; ++i) {
+ if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
+ &image->planes[plane].surface, i,
+ 0) != mod_info->pPlaneLayouts[i].offset)
+ return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
+ }
+ }
+
+ image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
+ image->alignment = MAX2(image->alignment, image->planes[plane].surface.alignment);
+
+ image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
+ }
+
+ image->tc_compatible_cmask =
+ radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
+
+ radv_image_alloc_values(device, image);
+
+ result = radv_image_init_retile_map(device, image);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(image->planes[0].surface.surf_size);
+ assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
+ ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
+ return VK_SUCCESS;
}
static void
-radv_destroy_image(struct radv_device *device,
- const VkAllocationCallbacks *pAllocator,
- struct radv_image *image)
+radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_image *image)
{
- if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
- device->ws->buffer_destroy(device->ws, image->bo);
+ if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
+ device->ws->buffer_destroy(device->ws, image->bo);
- if(image->retile_map)
- device->ws->buffer_destroy(device->ws, image->retile_map);
+ if (image->retile_map)
+ device->ws->buffer_destroy(device->ws, image->retile_map);
- if (image->owned_memory != VK_NULL_HANDLE) {
- RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
- radv_free_memory(device, pAllocator, mem);
- }
+ if (image->owned_memory != VK_NULL_HANDLE) {
+ RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
+ radv_free_memory(device, pAllocator, mem);
+ }
- vk_object_base_finish(&image->base);
- vk_free2(&device->vk.alloc, pAllocator, image);
+ vk_object_base_finish(&image->base);
+ vk_free2(&device->vk.alloc, pAllocator, image);
}
static void
radv_image_print_info(struct radv_device *device, struct radv_image *image)
{
- fprintf(stderr, "Image:\n");
- fprintf(stderr, " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
- "width=%" PRIu32 ", height=%" PRIu32 ", "
- "offset=%" PRIu64 ", array_size=%" PRIu32 "\n",
- image->size, image->alignment, image->info.width,
- image->info.height, image->offset, image->info.array_size);
- for (unsigned i = 0; i < image->plane_count; ++i) {
- const struct radv_image_plane *plane = &image->planes[i];
- const struct radeon_surf *surf = &plane->surface;
- const struct util_format_description *desc =
- vk_format_description(plane->format);
- uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
- &plane->surface, 0, 0);
-
- fprintf(stderr,
- " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n",
- i, desc->name, offset);
-
- ac_surface_print_info(stderr,
- &device->physical_device->rad_info,
- surf);
- }
+ fprintf(stderr, "Image:\n");
+ fprintf(stderr,
+ " Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
+ "width=%" PRIu32 ", height=%" PRIu32 ", "
+ "offset=%" PRIu64 ", array_size=%" PRIu32 "\n",
+ image->size, image->alignment, image->info.width, image->info.height, image->offset,
+ image->info.array_size);
+ for (unsigned i = 0; i < image->plane_count; ++i) {
+ const struct radv_image_plane *plane = &image->planes[i];
+ const struct radeon_surf *surf = &plane->surface;
+ const struct util_format_description *desc = vk_format_description(plane->format);
+ uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
+ &plane->surface, 0, 0);
+
+ fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
+
+ ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
+ }
}
/**
* Determine if the given image can be fast cleared.
*/
static bool
-radv_image_can_fast_clear(const struct radv_device *device,
- const struct radv_image *image)
+radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
{
- if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
- return false;
-
- if (vk_format_is_color(image->vk_format)) {
- if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
- return false;
-
- /* RB+ doesn't work with CMASK fast clear on Stoney. */
- if (!radv_image_has_dcc(image) &&
- device->physical_device->rad_info.family == CHIP_STONEY)
- return false;
- } else {
- if (!radv_image_has_htile(image))
- return false;
- }
-
- /* Do not fast clears 3D images. */
- if (image->type == VK_IMAGE_TYPE_3D)
- return false;
-
- return true;
+ if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+ return false;
+
+ if (vk_format_is_color(image->vk_format)) {
+ if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
+ return false;
+
+ /* RB+ doesn't work with CMASK fast clear on Stoney. */
+ if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
+ return false;
+ } else {
+ if (!radv_image_has_htile(image))
+ return false;
+ }
+
+ /* Do not fast clears 3D images. */
+ if (image->type == VK_IMAGE_TYPE_3D)
+ return false;
+
+ return true;
}
static uint64_t
-radv_select_modifier(const struct radv_device *dev,
- VkFormat format,
+radv_select_modifier(const struct radv_device *dev, VkFormat format,
const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
{
- const struct radv_physical_device *pdev = dev->physical_device;
- unsigned mod_count;
-
- assert(mod_list->drmFormatModifierCount);
-
- /* We can allow everything here as it does not affect order and the application
- * is only allowed to specify modifiers that we support. */
- const struct ac_modifier_options modifier_options = {
- .dcc = true,
- .dcc_retile = true,
- };
-
- ac_get_supported_modifiers(&pdev->rad_info, &modifier_options,
- vk_format_to_pipe_format(format), &mod_count, NULL);
-
- uint64_t *mods = calloc(mod_count, sizeof(*mods));
-
- /* If allocations fail, fall back to a dumber solution. */
- if (!mods)
- return mod_list->pDrmFormatModifiers[0];
-
- ac_get_supported_modifiers(&pdev->rad_info, &modifier_options,
- vk_format_to_pipe_format(format), &mod_count, mods);
-
- for (unsigned i = 0; i < mod_count; ++i) {
- for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
- if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
- free(mods);
- return mod_list->pDrmFormatModifiers[j];
- }
- }
- }
- unreachable("App specified an invalid modifier");
+ const struct radv_physical_device *pdev = dev->physical_device;
+ unsigned mod_count;
+
+ assert(mod_list->drmFormatModifierCount);
+
+ /* We can allow everything here as it does not affect order and the application
+ * is only allowed to specify modifiers that we support. */
+ const struct ac_modifier_options modifier_options = {
+ .dcc = true,
+ .dcc_retile = true,
+ };
+
+ ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
+ &mod_count, NULL);
+
+ uint64_t *mods = calloc(mod_count, sizeof(*mods));
+
+ /* If allocations fail, fall back to a dumber solution. */
+ if (!mods)
+ return mod_list->pDrmFormatModifiers[0];
+
+ ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
+ &mod_count, mods);
+
+ for (unsigned i = 0; i < mod_count; ++i) {
+ for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
+ if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
+ free(mods);
+ return mod_list->pDrmFormatModifiers[j];
+ }
+ }
+ }
+ unreachable("App specified an invalid modifier");
}
VkResult
-radv_image_create(VkDevice _device,
- const struct radv_image_create_info *create_info,
- const VkAllocationCallbacks* alloc,
- VkImage *pImage)
+radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
+ const VkAllocationCallbacks *alloc, VkImage *pImage)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
- uint64_t modifier = DRM_FORMAT_MOD_INVALID;
- struct radv_image *image = NULL;
- VkFormat format = radv_select_android_external_format(pCreateInfo->pNext,
- pCreateInfo->format);
- const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
- vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
- const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
- vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
-
- const unsigned plane_count = vk_format_get_plane_count(format);
- const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
-
- radv_assert(pCreateInfo->mipLevels > 0);
- radv_assert(pCreateInfo->arrayLayers > 0);
- radv_assert(pCreateInfo->samples > 0);
- radv_assert(pCreateInfo->extent.width > 0);
- radv_assert(pCreateInfo->extent.height > 0);
- radv_assert(pCreateInfo->extent.depth > 0);
-
- image = vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!image)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
-
- image->type = pCreateInfo->imageType;
- image->info.width = pCreateInfo->extent.width;
- image->info.height = pCreateInfo->extent.height;
- image->info.depth = pCreateInfo->extent.depth;
- image->info.samples = pCreateInfo->samples;
- image->info.storage_samples = pCreateInfo->samples;
- image->info.array_size = pCreateInfo->arrayLayers;
- image->info.levels = pCreateInfo->mipLevels;
- image->info.num_channels = vk_format_get_nr_components(format);
-
- image->vk_format = format;
- image->tiling = pCreateInfo->tiling;
- image->usage = pCreateInfo->usage;
- image->flags = pCreateInfo->flags;
- image->plane_count = plane_count;
-
- image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
- if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
- for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
- if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
- pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
- image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
- else
- image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
- }
-
- const VkExternalMemoryImageCreateInfo *external_info =
- vk_find_struct_const(pCreateInfo->pNext,
- EXTERNAL_MEMORY_IMAGE_CREATE_INFO) ;
-
- image->shareable = external_info;
- if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
- !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
- pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
- image->info.surf_index = &device->image_mrt_offset_counter;
- }
-
- if (mod_list)
- modifier = radv_select_modifier(device, format, mod_list);
- else if (explicit_mod)
- modifier = explicit_mod->drmFormatModifier;
-
- for (unsigned plane = 0; plane < image->plane_count; ++plane) {
- image->planes[plane].surface.flags =
- radv_get_surface_flags(device, image, plane, pCreateInfo, format);
- image->planes[plane].surface.modifier = modifier;
- }
-
- bool delay_layout = external_info &&
- (external_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
-
- if (delay_layout) {
- *pImage = radv_image_to_handle(image);
- assert (!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
- return VK_SUCCESS;
- }
-
- VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
- if (result != VK_SUCCESS) {
- radv_destroy_image(device, alloc, image);
- return result;
- }
-
- if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
- image->alignment = MAX2(image->alignment, 4096);
- image->size = align64(image->size, image->alignment);
- image->offset = 0;
-
- image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment,
- 0, RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
- if (!image->bo) {
- radv_destroy_image(device, alloc, image);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
- }
-
- if (device->instance->debug_flags & RADV_DEBUG_IMG) {
- radv_image_print_info(device, image);
- }
-
- *pImage = radv_image_to_handle(image);
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
+ uint64_t modifier = DRM_FORMAT_MOD_INVALID;
+ struct radv_image *image = NULL;
+ VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
+ const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
+ vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
+ const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
+ vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
+
+ const unsigned plane_count = vk_format_get_plane_count(format);
+ const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
+
+ radv_assert(pCreateInfo->mipLevels > 0);
+ radv_assert(pCreateInfo->arrayLayers > 0);
+ radv_assert(pCreateInfo->samples > 0);
+ radv_assert(pCreateInfo->extent.width > 0);
+ radv_assert(pCreateInfo->extent.height > 0);
+ radv_assert(pCreateInfo->extent.depth > 0);
+
+ image =
+ vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!image)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
+
+ image->type = pCreateInfo->imageType;
+ image->info.width = pCreateInfo->extent.width;
+ image->info.height = pCreateInfo->extent.height;
+ image->info.depth = pCreateInfo->extent.depth;
+ image->info.samples = pCreateInfo->samples;
+ image->info.storage_samples = pCreateInfo->samples;
+ image->info.array_size = pCreateInfo->arrayLayers;
+ image->info.levels = pCreateInfo->mipLevels;
+ image->info.num_channels = vk_format_get_nr_components(format);
+
+ image->vk_format = format;
+ image->tiling = pCreateInfo->tiling;
+ image->usage = pCreateInfo->usage;
+ image->flags = pCreateInfo->flags;
+ image->plane_count = plane_count;
+
+ image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
+ if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
+ for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
+ if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
+ pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
+ image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
+ else
+ image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
+ }
+
+ const VkExternalMemoryImageCreateInfo *external_info =
+ vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
+
+ image->shareable = external_info;
+ if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
+ !(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
+ pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ image->info.surf_index = &device->image_mrt_offset_counter;
+ }
+
+ if (mod_list)
+ modifier = radv_select_modifier(device, format, mod_list);
+ else if (explicit_mod)
+ modifier = explicit_mod->drmFormatModifier;
+
+ for (unsigned plane = 0; plane < image->plane_count; ++plane) {
+ image->planes[plane].surface.flags =
+ radv_get_surface_flags(device, image, plane, pCreateInfo, format);
+ image->planes[plane].surface.modifier = modifier;
+ }
+
+ bool delay_layout =
+ external_info && (external_info->handleTypes &
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
+
+ if (delay_layout) {
+ *pImage = radv_image_to_handle(image);
+ assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
+ return VK_SUCCESS;
+ }
+
+ VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
+ if (result != VK_SUCCESS) {
+ radv_destroy_image(device, alloc, image);
+ return result;
+ }
+
+ if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
+ image->alignment = MAX2(image->alignment, 4096);
+ image->size = align64(image->size, image->alignment);
+ image->offset = 0;
+
+ image->bo = device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
+ RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL);
+ if (!image->bo) {
+ radv_destroy_image(device, alloc, image);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+ }
+
+ if (device->instance->debug_flags & RADV_DEBUG_IMG) {
+ radv_image_print_info(device, image);
+ }
+
+ *pImage = radv_image_to_handle(image);
+
+ return VK_SUCCESS;
}
static void
-radv_image_view_make_descriptor(struct radv_image_view *iview,
- struct radv_device *device,
- VkFormat vk_format,
- const VkComponentMapping *components,
- bool is_storage_image, bool disable_compression,
- unsigned plane_id, unsigned descriptor_plane_id)
+radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
+ VkFormat vk_format, const VkComponentMapping *components,
+ bool is_storage_image, bool disable_compression, unsigned plane_id,
+ unsigned descriptor_plane_id)
{
- struct radv_image *image = iview->image;
- struct radv_image_plane *plane = &image->planes[plane_id];
- bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
- uint32_t blk_w;
- union radv_descriptor *descriptor;
- uint32_t hw_level = 0;
-
- if (is_storage_image) {
- descriptor = &iview->storage_descriptor;
- } else {
- descriptor = &iview->descriptor;
- }
-
- assert(vk_format_get_plane_count(vk_format) == 1);
- assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
- blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) * vk_format_get_blockwidth(vk_format);
-
- if (device->physical_device->rad_info.chip_class >= GFX9)
- hw_level = iview->base_mip;
- radv_make_texture_descriptor(device, image, is_storage_image,
- iview->type,
- vk_format,
- components,
- hw_level, hw_level + iview->level_count - 1,
- iview->base_layer,
- iview->base_layer + iview->layer_count - 1,
- vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
- vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
- iview->extent.depth,
- descriptor->plane_descriptors[descriptor_plane_id],
- descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
-
- const struct legacy_surf_level *base_level_info = NULL;
- if (device->physical_device->rad_info.chip_class <= GFX9) {
- if (is_stencil)
- base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
- else
- base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
- }
-
- bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
- if (is_storage_image && !enable_write_compression)
- disable_compression = true;
- si_set_mutable_tex_desc_fields(device, image,
- base_level_info,
- plane_id,
- iview->base_mip,
- iview->base_mip,
- blk_w, is_stencil, is_storage_image,
- disable_compression, enable_write_compression,
- descriptor->plane_descriptors[descriptor_plane_id]);
+ struct radv_image *image = iview->image;
+ struct radv_image_plane *plane = &image->planes[plane_id];
+ bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
+ uint32_t blk_w;
+ union radv_descriptor *descriptor;
+ uint32_t hw_level = 0;
+
+ if (is_storage_image) {
+ descriptor = &iview->storage_descriptor;
+ } else {
+ descriptor = &iview->descriptor;
+ }
+
+ assert(vk_format_get_plane_count(vk_format) == 1);
+ assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
+ blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
+ vk_format_get_blockwidth(vk_format);
+
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ hw_level = iview->base_mip;
+ radv_make_texture_descriptor(
+ device, image, is_storage_image, iview->type, vk_format, components, hw_level,
+ hw_level + iview->level_count - 1, iview->base_layer,
+ iview->base_layer + iview->layer_count - 1,
+ vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
+ vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
+ iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id],
+ descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
+
+ const struct legacy_surf_level *base_level_info = NULL;
+ if (device->physical_device->rad_info.chip_class <= GFX9) {
+ if (is_stencil)
+ base_level_info = &plane->surface.u.legacy.stencil_level[iview->base_mip];
+ else
+ base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
+ }
+
+ bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
+ if (is_storage_image && !enable_write_compression)
+ disable_compression = true;
+ si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip,
+ iview->base_mip, blk_w, is_stencil, is_storage_image,
+ disable_compression, enable_write_compression,
+ descriptor->plane_descriptors[descriptor_plane_id]);
}
static unsigned
radv_plane_from_aspect(VkImageAspectFlags mask)
{
- switch(mask) {
- case VK_IMAGE_ASPECT_PLANE_1_BIT:
- case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
- return 1;
- case VK_IMAGE_ASPECT_PLANE_2_BIT:
- case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
- return 2;
- case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
- return 3;
- default:
- return 0;
- }
+ switch (mask) {
+ case VK_IMAGE_ASPECT_PLANE_1_BIT:
+ case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
+ return 1;
+ case VK_IMAGE_ASPECT_PLANE_2_BIT:
+ case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
+ return 2;
+ case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
+ return 3;
+ default:
+ return 0;
+ }
}
VkFormat
radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
{
- switch(mask) {
- case VK_IMAGE_ASPECT_PLANE_0_BIT:
- return image->planes[0].format;
- case VK_IMAGE_ASPECT_PLANE_1_BIT:
- return image->planes[1].format;
- case VK_IMAGE_ASPECT_PLANE_2_BIT:
- return image->planes[2].format;
- case VK_IMAGE_ASPECT_STENCIL_BIT:
- return vk_format_stencil_only(image->vk_format);
- case VK_IMAGE_ASPECT_DEPTH_BIT:
- return vk_format_depth_only(image->vk_format);
- case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
- return vk_format_depth_only(image->vk_format);
- default:
- return image->vk_format;
- }
+ switch (mask) {
+ case VK_IMAGE_ASPECT_PLANE_0_BIT:
+ return image->planes[0].format;
+ case VK_IMAGE_ASPECT_PLANE_1_BIT:
+ return image->planes[1].format;
+ case VK_IMAGE_ASPECT_PLANE_2_BIT:
+ return image->planes[2].format;
+ case VK_IMAGE_ASPECT_STENCIL_BIT:
+ return vk_format_stencil_only(image->vk_format);
+ case VK_IMAGE_ASPECT_DEPTH_BIT:
+ return vk_format_depth_only(image->vk_format);
+ case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
+ return vk_format_depth_only(image->vk_format);
+ default:
+ return image->vk_format;
+ }
}
/**
@@ -1894,495 +1775,458 @@ radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
*/
static bool
radv_image_view_can_fast_clear(const struct radv_device *device,
- const struct radv_image_view *iview)
+ const struct radv_image_view *iview)
{
- struct radv_image *image;
+ struct radv_image *image;
- if (!iview)
- return false;
- image = iview->image;
+ if (!iview)
+ return false;
+ image = iview->image;
- /* Only fast clear if the image itself can be fast cleared. */
- if (!radv_image_can_fast_clear(device, image))
- return false;
+ /* Only fast clear if the image itself can be fast cleared. */
+ if (!radv_image_can_fast_clear(device, image))
+ return false;
- /* Only fast clear if all layers are bound. */
- if (iview->base_layer > 0 ||
- iview->layer_count != image->info.array_size)
- return false;
+ /* Only fast clear if all layers are bound. */
+ if (iview->base_layer > 0 || iview->layer_count != image->info.array_size)
+ return false;
- /* Only fast clear if the view covers the whole image. */
- if (!radv_image_extent_compare(image, &iview->extent))
- return false;
+ /* Only fast clear if the view covers the whole image. */
+ if (!radv_image_extent_compare(image, &iview->extent))
+ return false;
- return true;
+ return true;
}
void
-radv_image_view_init(struct radv_image_view *iview,
- struct radv_device *device,
- const VkImageViewCreateInfo* pCreateInfo,
- const struct radv_image_view_extra_create_info* extra_create_info)
+radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
+ const VkImageViewCreateInfo *pCreateInfo,
+ const struct radv_image_view_extra_create_info *extra_create_info)
{
- RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
- const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
-
- switch (image->type) {
- case VK_IMAGE_TYPE_1D:
- case VK_IMAGE_TYPE_2D:
- assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
- break;
- case VK_IMAGE_TYPE_3D:
- assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
- <= radv_minify(image->info.depth, range->baseMipLevel));
- break;
- default:
- unreachable("bad VkImageType");
- }
- iview->image = image;
- iview->bo = image->bo;
- iview->type = pCreateInfo->viewType;
- iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
- iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
- iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 && iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
-
- iview->base_layer = range->baseArrayLayer;
- iview->layer_count = radv_get_layerCount(image, range);
- iview->base_mip = range->baseMipLevel;
- iview->level_count = radv_get_levelCount(image, range);
-
- iview->vk_format = pCreateInfo->format;
-
- /* If the image has an Android external format, pCreateInfo->format will be
- * VK_FORMAT_UNDEFINED. */
- if (iview->vk_format == VK_FORMAT_UNDEFINED)
- iview->vk_format = image->vk_format;
-
- if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
- iview->vk_format = vk_format_stencil_only(iview->vk_format);
- } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
- iview->vk_format = vk_format_depth_only(iview->vk_format);
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- iview->extent = (VkExtent3D) {
- .width = image->info.width,
- .height = image->info.height,
- .depth = image->info.depth,
- };
- } else {
- iview->extent = (VkExtent3D) {
- .width = radv_minify(image->info.width , range->baseMipLevel),
- .height = radv_minify(image->info.height, range->baseMipLevel),
- .depth = radv_minify(image->info.depth , range->baseMipLevel),
- };
- }
-
- if (iview->vk_format != image->planes[iview->plane_id].format) {
- unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
- unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
- unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
- unsigned img_bh = vk_format_get_blockheight(image->vk_format);
-
- iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
- iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
-
- /* Comment ported from amdvlk -
- * If we have the following image:
- * Uncompressed pixels Compressed block sizes (4x4)
- * mip0: 22 x 22 6 x 6
- * mip1: 11 x 11 3 x 3
- * mip2: 5 x 5 2 x 2
- * mip3: 2 x 2 1 x 1
- * mip4: 1 x 1 1 x 1
- *
- * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
- * calculating the degradation of the block sizes down the mip-chain as follows (straight-up
- * divide-by-two integer math):
- * mip0: 6x6
- * mip1: 3x3
- * mip2: 1x1
- * mip3: 1x1
- *
- * This means that mip2 will be missing texels.
- *
- * Fix this by calculating the base mip's width and height, then convert
- * that, and round it back up to get the level 0 size. Clamp the
- * converted size between the original values, and the physical extent
- * of the base mipmap.
- *
- * On GFX10 we have to take care to not go over the physical extent
- * of the base mipmap as otherwise the GPU computes a different layout.
- * Note that the GPU does use the same base-mip dimensions for both a
- * block compatible format and the compressed format, so even if we take
- * the plain converted dimensions the physical layout is correct.
- */
- if (device->physical_device->rad_info.chip_class >= GFX9 &&
- vk_format_is_compressed(image->vk_format) &&
- !vk_format_is_compressed(iview->vk_format)) {
- /* If we have multiple levels in the view we should ideally take the last level,
- * but the mip calculation has a max(..., 1) so walking back to the base mip in an
- * useful way is hard. */
- if (iview->level_count > 1) {
- iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
- iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
- } else {
- unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
- unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
-
- lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
- lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
-
- lvl_width <<= range->baseMipLevel;
- lvl_height <<= range->baseMipLevel;
-
- iview->extent.width = CLAMP(lvl_width, iview->extent.width,
- iview->image->planes[0].surface.u.gfx9.base_mip_width);
- iview->extent.height = CLAMP(lvl_height, iview->extent.height,
- iview->image->planes[0].surface.u.gfx9.base_mip_height);
- }
- }
- }
-
- iview->support_fast_clear =
- radv_image_view_can_fast_clear(device, iview);
-
- bool disable_compression = extra_create_info ? extra_create_info->disable_compression: false;
- for (unsigned i = 0; i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
- VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
- radv_image_view_make_descriptor(iview, device, format,
- &pCreateInfo->components,
- false, disable_compression,
- iview->plane_id + i, i);
- radv_image_view_make_descriptor(iview, device,
- format, &pCreateInfo->components,
- true, disable_compression,
- iview->plane_id + i, i);
- }
+ RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
+ const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
+
+ switch (image->type) {
+ case VK_IMAGE_TYPE_1D:
+ case VK_IMAGE_TYPE_2D:
+ assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
+ image->info.array_size);
+ break;
+ case VK_IMAGE_TYPE_3D:
+ assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
+ radv_minify(image->info.depth, range->baseMipLevel));
+ break;
+ default:
+ unreachable("bad VkImageType");
+ }
+ iview->image = image;
+ iview->bo = image->bo;
+ iview->type = pCreateInfo->viewType;
+ iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
+ iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
+ iview->multiple_planes = vk_format_get_plane_count(image->vk_format) > 1 &&
+ iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT;
+
+ iview->base_layer = range->baseArrayLayer;
+ iview->layer_count = radv_get_layerCount(image, range);
+ iview->base_mip = range->baseMipLevel;
+ iview->level_count = radv_get_levelCount(image, range);
+
+ iview->vk_format = pCreateInfo->format;
+
+ /* If the image has an Android external format, pCreateInfo->format will be
+ * VK_FORMAT_UNDEFINED. */
+ if (iview->vk_format == VK_FORMAT_UNDEFINED)
+ iview->vk_format = image->vk_format;
+
+ if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ iview->vk_format = vk_format_stencil_only(iview->vk_format);
+ } else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ iview->vk_format = vk_format_depth_only(iview->vk_format);
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ iview->extent = (VkExtent3D){
+ .width = image->info.width,
+ .height = image->info.height,
+ .depth = image->info.depth,
+ };
+ } else {
+ iview->extent = (VkExtent3D){
+ .width = radv_minify(image->info.width, range->baseMipLevel),
+ .height = radv_minify(image->info.height, range->baseMipLevel),
+ .depth = radv_minify(image->info.depth, range->baseMipLevel),
+ };
+ }
+
+ if (iview->vk_format != image->planes[iview->plane_id].format) {
+ unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
+ unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
+ unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
+ unsigned img_bh = vk_format_get_blockheight(image->vk_format);
+
+ iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
+ iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
+
+ /* Comment ported from amdvlk -
+ * If we have the following image:
+ * Uncompressed pixels Compressed block sizes (4x4)
+ * mip0: 22 x 22 6 x 6
+ * mip1: 11 x 11 3 x 3
+ * mip2: 5 x 5 2 x 2
+ * mip3: 2 x 2 1 x 1
+ * mip4: 1 x 1 1 x 1
+ *
+ * On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
+ * the HW is calculating the degradation of the block sizes down the mip-chain as follows
+ * (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1
+ *
+ * This means that mip2 will be missing texels.
+ *
+ * Fix this by calculating the base mip's width and height, then convert
+ * that, and round it back up to get the level 0 size. Clamp the
+ * converted size between the original values, and the physical extent
+ * of the base mipmap.
+ *
+ * On GFX10 we have to take care to not go over the physical extent
+ * of the base mipmap as otherwise the GPU computes a different layout.
+ * Note that the GPU does use the same base-mip dimensions for both a
+ * block compatible format and the compressed format, so even if we take
+ * the plain converted dimensions the physical layout is correct.
+ */
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) {
+ /* If we have multiple levels in the view we should ideally take the last level,
+ * but the mip calculation has a max(..., 1) so walking back to the base mip in an
+ * useful way is hard. */
+ if (iview->level_count > 1) {
+ iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
+ iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
+ } else {
+ unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
+ unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
+
+ lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
+ lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
+
+ lvl_width <<= range->baseMipLevel;
+ lvl_height <<= range->baseMipLevel;
+
+ iview->extent.width = CLAMP(lvl_width, iview->extent.width,
+ iview->image->planes[0].surface.u.gfx9.base_mip_width);
+ iview->extent.height = CLAMP(lvl_height, iview->extent.height,
+ iview->image->planes[0].surface.u.gfx9.base_mip_height);
+ }
+ }
+ }
+
+ iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
+
+ bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
+ for (unsigned i = 0;
+ i < (iview->multiple_planes ? vk_format_get_plane_count(image->vk_format) : 1); ++i) {
+ VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
+ radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false,
+ disable_compression, iview->plane_id + i, i);
+ radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true,
+ disable_compression, iview->plane_id + i, i);
+ }
}
-bool radv_layout_is_htile_compressed(const struct radv_device *device,
- const struct radv_image *image,
- VkImageLayout layout,
- bool in_render_loop,
- unsigned queue_mask)
+bool
+radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
+ VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
{
- switch (layout) {
- case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
- case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR:
- case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR:
- return radv_image_has_htile(image);
- case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
- return radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL);
- case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
- case VK_IMAGE_LAYOUT_GENERAL:
- /* It should be safe to enable TC-compat HTILE with
- * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
- * if the image doesn't have the storage bit set. This
- * improves performance for apps that use GENERAL for the main
- * depth pass because this allows compression and this reduces
- * the number of decompressions from/to GENERAL.
- */
- /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
- * queue is likely broken for eg. depth/stencil copies.
- */
- if (radv_image_is_tc_compat_htile(image) &&
- queue_mask & (1u << RADV_QUEUE_GENERAL) &&
- !in_render_loop &&
- !device->instance->disable_tc_compat_htile_in_general) {
- /* GFX10+ supports compressed writes to HTILE. */
- return device->physical_device->rad_info.chip_class >= GFX10 ||
- !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT);
- } else {
- return false;
- }
- case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
- if (radv_image_is_tc_compat_htile(image) ||
- (radv_image_has_htile(image) &&
- !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
- VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
- /* Keep HTILE compressed if the image is only going to
- * be used as a depth/stencil read-only attachment.
- */
- return true;
- } else {
- return false;
- }
- break;
- default:
- return radv_image_is_tc_compat_htile(image);
- }
+ switch (layout) {
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
+ case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR:
+ case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR:
+ return radv_image_has_htile(image);
+ case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
+ return radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL);
+ case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
+ case VK_IMAGE_LAYOUT_GENERAL:
+ /* It should be safe to enable TC-compat HTILE with
+ * VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
+ * if the image doesn't have the storage bit set. This
+ * improves performance for apps that use GENERAL for the main
+ * depth pass because this allows compression and this reduces
+ * the number of decompressions from/to GENERAL.
+ */
+ /* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
+ * queue is likely broken for eg. depth/stencil copies.
+ */
+ if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
+ !in_render_loop && !device->instance->disable_tc_compat_htile_in_general) {
+ /* GFX10+ supports compressed writes to HTILE. */
+ return device->physical_device->rad_info.chip_class >= GFX10 ||
+ !(image->usage & VK_IMAGE_USAGE_STORAGE_BIT);
+ } else {
+ return false;
+ }
+ case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
+ if (radv_image_is_tc_compat_htile(image) ||
+ (radv_image_has_htile(image) &&
+ !(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
+ /* Keep HTILE compressed if the image is only going to
+ * be used as a depth/stencil read-only attachment.
+ */
+ return true;
+ } else {
+ return false;
+ }
+ break;
+ default:
+ return radv_image_is_tc_compat_htile(image);
+ }
}
-bool radv_layout_can_fast_clear(const struct radv_device *device,
- const struct radv_image *image,
- VkImageLayout layout,
- bool in_render_loop,
- unsigned queue_mask)
+bool
+radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
+ VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
{
- if (radv_image_has_dcc(image) &&
- !radv_layout_dcc_compressed(device, image, layout, in_render_loop, queue_mask))
- return false;
+ if (radv_image_has_dcc(image) &&
+ !radv_layout_dcc_compressed(device, image, layout, in_render_loop, queue_mask))
+ return false;
- if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
- return false;
+ if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
+ return false;
- return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
- queue_mask == (1u << RADV_QUEUE_GENERAL);
+ return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+ queue_mask == (1u << RADV_QUEUE_GENERAL);
}
-bool radv_layout_dcc_compressed(const struct radv_device *device,
- const struct radv_image *image,
- VkImageLayout layout,
- bool in_render_loop,
- unsigned queue_mask)
+bool
+radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
+ VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
{
- /* If the image is read-only, we can always just keep it compressed */
- if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS) &&
- radv_image_has_dcc(image))
- return false;
-
- /* Don't compress compute transfer dst when image stores are not supported. */
- if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ||
- layout == VK_IMAGE_LAYOUT_GENERAL) &&
- (queue_mask & (1u << RADV_QUEUE_COMPUTE)) &&
- !radv_image_use_dcc_image_stores(device, image))
- return false;
-
- return radv_image_has_dcc(image) &&
- (device->physical_device->rad_info.chip_class >= GFX10 ||
- layout != VK_IMAGE_LAYOUT_GENERAL);
+ /* If the image is read-only, we can always just keep it compressed */
+ if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS) && radv_image_has_dcc(image))
+ return false;
+
+ /* Don't compress compute transfer dst when image stores are not supported. */
+ if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
+ (queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
+ return false;
+
+ return radv_image_has_dcc(image) && (device->physical_device->rad_info.chip_class >= GFX10 ||
+ layout != VK_IMAGE_LAYOUT_GENERAL);
}
-bool radv_layout_fmask_compressed(const struct radv_device *device,
- const struct radv_image *image,
- VkImageLayout layout,
- unsigned queue_mask)
+bool
+radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
+ VkImageLayout layout, unsigned queue_mask)
{
- return radv_image_has_fmask(image) &&
- layout != VK_IMAGE_LAYOUT_GENERAL &&
- queue_mask == (1u << RADV_QUEUE_GENERAL);
+ return radv_image_has_fmask(image) && layout != VK_IMAGE_LAYOUT_GENERAL &&
+ queue_mask == (1u << RADV_QUEUE_GENERAL);
}
-unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
+unsigned
+radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
{
- if (!image->exclusive)
- return image->queue_family_mask;
- if (family == VK_QUEUE_FAMILY_EXTERNAL ||
- family == VK_QUEUE_FAMILY_FOREIGN_EXT)
- return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) |
- (1u << RADV_QUEUE_FOREIGN);
- if (family == VK_QUEUE_FAMILY_IGNORED)
- return 1u << queue_family;
- return 1u << family;
+ if (!image->exclusive)
+ return image->queue_family_mask;
+ if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT)
+ return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
+ if (family == VK_QUEUE_FAMILY_IGNORED)
+ return 1u << queue_family;
+ return 1u << family;
}
VkResult
-radv_CreateImage(VkDevice device,
- const VkImageCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkImage *pImage)
+radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkImage *pImage)
{
#ifdef ANDROID
- const VkNativeBufferANDROID *gralloc_info =
- vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
+ const VkNativeBufferANDROID *gralloc_info =
+ vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
- if (gralloc_info)
- return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
- pAllocator, pImage);
+ if (gralloc_info)
+ return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage);
#endif
- const struct wsi_image_create_info *wsi_info =
- vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
- bool scanout = wsi_info && wsi_info->scanout;
-
- return radv_image_create(device,
- &(struct radv_image_create_info) {
- .vk_info = pCreateInfo,
- .scanout = scanout,
- },
- pAllocator,
- pImage);
+ const struct wsi_image_create_info *wsi_info =
+ vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
+ bool scanout = wsi_info && wsi_info->scanout;
+
+ return radv_image_create(device,
+ &(struct radv_image_create_info){
+ .vk_info = pCreateInfo,
+ .scanout = scanout,
+ },
+ pAllocator, pImage);
}
void
-radv_DestroyImage(VkDevice _device, VkImage _image,
- const VkAllocationCallbacks *pAllocator)
+radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_image, image, _image);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_image, image, _image);
- if (!image)
- return;
+ if (!image)
+ return;
- radv_destroy_image(device, pAllocator, image);
+ radv_destroy_image(device, pAllocator, image);
}
-void radv_GetImageSubresourceLayout(
- VkDevice _device,
- VkImage _image,
- const VkImageSubresource* pSubresource,
- VkSubresourceLayout* pLayout)
+void
+radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
+ const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
{
- RADV_FROM_HANDLE(radv_image, image, _image);
- RADV_FROM_HANDLE(radv_device, device, _device);
- int level = pSubresource->mipLevel;
- int layer = pSubresource->arrayLayer;
-
- unsigned plane_id = 0;
- if (vk_format_get_plane_count(image->vk_format) > 1)
- plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
-
- struct radv_image_plane *plane = &image->planes[plane_id];
- struct radeon_surf *surface = &plane->surface;
-
- if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
- unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
-
- assert(level == 0);
- assert(layer == 0);
-
- pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
- surface, mem_plane_id, 0);
- pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
- surface, mem_plane_id);
- pLayout->arrayPitch = 0;
- pLayout->depthPitch = 0;
- pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
- } else if (device->physical_device->rad_info.chip_class >= GFX9) {
- uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
-
- pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
- &plane->surface, 0, layer) + level_offset;
- if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
- image->vk_format == VK_FORMAT_R32G32B32_SINT ||
- image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
- /* Adjust the number of bytes between each row because
- * the pitch is actually the number of components per
- * row.
- */
- pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
- } else {
- uint32_t pitch = surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
-
- assert(util_is_power_of_two_nonzero(surface->bpe));
- pLayout->rowPitch = pitch * surface->bpe;
- }
-
- pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
- pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
- pLayout->size = surface->u.gfx9.surf_slice_size;
- if (image->type == VK_IMAGE_TYPE_3D)
- pLayout->size *= u_minify(image->info.depth, level);
- } else {
- pLayout->offset = surface->u.legacy.level[level].offset + (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
- pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
- pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
- pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
- pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
- if (image->type == VK_IMAGE_TYPE_3D)
- pLayout->size *= u_minify(image->info.depth, level);
- }
+ RADV_FROM_HANDLE(radv_image, image, _image);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ int level = pSubresource->mipLevel;
+ int layer = pSubresource->arrayLayer;
+
+ unsigned plane_id = 0;
+ if (vk_format_get_plane_count(image->vk_format) > 1)
+ plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
+
+ struct radv_image_plane *plane = &image->planes[plane_id];
+ struct radeon_surf *surface = &plane->surface;
+
+ if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
+ unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
+
+ assert(level == 0);
+ assert(layer == 0);
+
+ pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
+ surface, mem_plane_id, 0);
+ pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
+ surface, mem_plane_id);
+ pLayout->arrayPitch = 0;
+ pLayout->depthPitch = 0;
+ pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
+ } else if (device->physical_device->rad_info.chip_class >= GFX9) {
+ uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
+
+ pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
+ &plane->surface, 0, layer) +
+ level_offset;
+ if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+ image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+ image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
+ /* Adjust the number of bytes between each row because
+ * the pitch is actually the number of components per
+ * row.
+ */
+ pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
+ } else {
+ uint32_t pitch =
+ surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
+
+ assert(util_is_power_of_two_nonzero(surface->bpe));
+ pLayout->rowPitch = pitch * surface->bpe;
+ }
+
+ pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
+ pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
+ pLayout->size = surface->u.gfx9.surf_slice_size;
+ if (image->type == VK_IMAGE_TYPE_3D)
+ pLayout->size *= u_minify(image->info.depth, level);
+ } else {
+ pLayout->offset = surface->u.legacy.level[level].offset +
+ (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
+ pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
+ pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
+ pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
+ pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
+ if (image->type == VK_IMAGE_TYPE_3D)
+ pLayout->size *= u_minify(image->info.depth, level);
+ }
}
-VkResult radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device,
- VkImage _image,
- VkImageDrmFormatModifierPropertiesEXT* pProperties)
+VkResult
+radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
+ VkImageDrmFormatModifierPropertiesEXT *pProperties)
{
- RADV_FROM_HANDLE(radv_image, image, _image);
+ RADV_FROM_HANDLE(radv_image, image, _image);
- pProperties->drmFormatModifier = image->planes[0].surface.modifier;
- return VK_SUCCESS;
+ pProperties->drmFormatModifier = image->planes[0].surface.modifier;
+ return VK_SUCCESS;
}
-
VkResult
-radv_CreateImageView(VkDevice _device,
- const VkImageViewCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkImageView *pView)
+radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkImageView *pView)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_image_view *view;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_image_view *view;
- view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (view == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ view =
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (view == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- vk_object_base_init(&device->vk, &view->base,
- VK_OBJECT_TYPE_IMAGE_VIEW);
+ vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_IMAGE_VIEW);
- radv_image_view_init(view, device, pCreateInfo, NULL);
+ radv_image_view_init(view, device, pCreateInfo, NULL);
- *pView = radv_image_view_to_handle(view);
+ *pView = radv_image_view_to_handle(view);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
void
-radv_DestroyImageView(VkDevice _device, VkImageView _iview,
- const VkAllocationCallbacks *pAllocator)
+radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_image_view, iview, _iview);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_image_view, iview, _iview);
- if (!iview)
- return;
+ if (!iview)
+ return;
- vk_object_base_finish(&iview->base);
- vk_free2(&device->vk.alloc, pAllocator, iview);
+ vk_object_base_finish(&iview->base);
+ vk_free2(&device->vk.alloc, pAllocator, iview);
}
-void radv_buffer_view_init(struct radv_buffer_view *view,
- struct radv_device *device,
- const VkBufferViewCreateInfo* pCreateInfo)
+void
+radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
+ const VkBufferViewCreateInfo *pCreateInfo)
{
- RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
+ RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
- view->bo = buffer->bo;
- view->range = pCreateInfo->range == VK_WHOLE_SIZE ?
- buffer->size - pCreateInfo->offset : pCreateInfo->range;
- view->vk_format = pCreateInfo->format;
+ view->bo = buffer->bo;
+ view->range =
+ pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range;
+ view->vk_format = pCreateInfo->format;
- radv_make_buffer_descriptor(device, buffer, view->vk_format,
- pCreateInfo->offset, view->range, view->state);
+ radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range,
+ view->state);
}
VkResult
-radv_CreateBufferView(VkDevice _device,
- const VkBufferViewCreateInfo *pCreateInfo,
- const VkAllocationCallbacks *pAllocator,
- VkBufferView *pView)
+radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_buffer_view *view;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_buffer_view *view;
- view = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (!view)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ view =
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!view)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- vk_object_base_init(&device->vk, &view->base,
- VK_OBJECT_TYPE_BUFFER_VIEW);
+ vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
- radv_buffer_view_init(view, device, pCreateInfo);
+ radv_buffer_view_init(view, device, pCreateInfo);
- *pView = radv_buffer_view_to_handle(view);
+ *pView = radv_buffer_view_to_handle(view);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
void
radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
- const VkAllocationCallbacks *pAllocator)
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
- if (!view)
- return;
+ if (!view)
+ return;
- vk_object_base_finish(&view->base);
- vk_free2(&device->vk.alloc, pAllocator, view);
+ vk_object_base_finish(&view->base);
+ vk_free2(&device->vk.alloc, pAllocator, view);
}
diff --git a/src/amd/vulkan/radv_llvm_helper.cpp b/src/amd/vulkan/radv_llvm_helper.cpp
index 612548e4219..f5eed3545e4 100644
--- a/src/amd/vulkan/radv_llvm_helper.cpp
+++ b/src/amd/vulkan/radv_llvm_helper.cpp
@@ -20,131 +20,125 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#include "ac_llvm_util.h"
#include "ac_llvm_build.h"
+#include "ac_llvm_util.h"
#include "radv_shader_helper.h"
#include <list>
class radv_llvm_per_thread_info {
-public:
- radv_llvm_per_thread_info(enum radeon_family arg_family,
- enum ac_target_machine_options arg_tm_options,
- unsigned arg_wave_size)
- : family(arg_family), tm_options(arg_tm_options),
- wave_size(arg_wave_size), passes(NULL), passes_wave32(NULL) {}
-
- ~radv_llvm_per_thread_info()
- {
- ac_destroy_llvm_compiler(&llvm_info);
- }
-
- bool init(void)
- {
- if (!ac_init_llvm_compiler(&llvm_info,
- family,
- tm_options))
- return false;
-
- passes = ac_create_llvm_passes(llvm_info.tm);
- if (!passes)
- return false;
-
- if (llvm_info.tm_wave32) {
- passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
- if (!passes_wave32)
- return false;
- }
-
- return true;
- }
-
- bool compile_to_memory_buffer(LLVMModuleRef module,
- char **pelf_buffer, size_t *pelf_size)
- {
- struct ac_compiler_passes *p = wave_size == 32 ? passes_wave32 : passes;
- return ac_compile_module_to_elf(p, module, pelf_buffer, pelf_size);
- }
-
- bool is_same(enum radeon_family arg_family,
- enum ac_target_machine_options arg_tm_options,
- unsigned arg_wave_size) {
- if (arg_family == family &&
- arg_tm_options == tm_options &&
- arg_wave_size == wave_size)
- return true;
- return false;
- }
- struct ac_llvm_compiler llvm_info;
-private:
- enum radeon_family family;
- enum ac_target_machine_options tm_options;
- unsigned wave_size;
- struct ac_compiler_passes *passes;
- struct ac_compiler_passes *passes_wave32;
+ public:
+ radv_llvm_per_thread_info(enum radeon_family arg_family,
+ enum ac_target_machine_options arg_tm_options, unsigned arg_wave_size)
+ : family(arg_family), tm_options(arg_tm_options), wave_size(arg_wave_size), passes(NULL),
+ passes_wave32(NULL)
+ {
+ }
+
+ ~radv_llvm_per_thread_info()
+ {
+ ac_destroy_llvm_compiler(&llvm_info);
+ }
+
+ bool init(void)
+ {
+ if (!ac_init_llvm_compiler(&llvm_info, family, tm_options))
+ return false;
+
+ passes = ac_create_llvm_passes(llvm_info.tm);
+ if (!passes)
+ return false;
+
+ if (llvm_info.tm_wave32) {
+ passes_wave32 = ac_create_llvm_passes(llvm_info.tm_wave32);
+ if (!passes_wave32)
+ return false;
+ }
+
+ return true;
+ }
+
+ bool compile_to_memory_buffer(LLVMModuleRef module, char **pelf_buffer, size_t *pelf_size)
+ {
+ struct ac_compiler_passes *p = wave_size == 32 ? passes_wave32 : passes;
+ return ac_compile_module_to_elf(p, module, pelf_buffer, pelf_size);
+ }
+
+ bool is_same(enum radeon_family arg_family, enum ac_target_machine_options arg_tm_options,
+ unsigned arg_wave_size)
+ {
+ if (arg_family == family && arg_tm_options == tm_options && arg_wave_size == wave_size)
+ return true;
+ return false;
+ }
+ struct ac_llvm_compiler llvm_info;
+
+ private:
+ enum radeon_family family;
+ enum ac_target_machine_options tm_options;
+ unsigned wave_size;
+ struct ac_compiler_passes *passes;
+ struct ac_compiler_passes *passes_wave32;
};
/* we have to store a linked list per thread due to the possiblity of multiple gpus being required */
static thread_local std::list<radv_llvm_per_thread_info> radv_llvm_per_thread_list;
-bool radv_compile_to_elf(struct ac_llvm_compiler *info,
- LLVMModuleRef module,
- char **pelf_buffer, size_t *pelf_size)
+bool
+radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer,
+ size_t *pelf_size)
{
- radv_llvm_per_thread_info *thread_info = nullptr;
-
- for (auto &I : radv_llvm_per_thread_list) {
- if (I.llvm_info.tm == info->tm) {
- thread_info = &I;
- break;
- }
- }
-
- if (!thread_info) {
- struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm);
- bool ret = ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size);
- ac_destroy_llvm_passes(passes);
- return ret;
- }
-
- return thread_info->compile_to_memory_buffer(module, pelf_buffer, pelf_size);
+ radv_llvm_per_thread_info *thread_info = nullptr;
+
+ for (auto &I : radv_llvm_per_thread_list) {
+ if (I.llvm_info.tm == info->tm) {
+ thread_info = &I;
+ break;
+ }
+ }
+
+ if (!thread_info) {
+ struct ac_compiler_passes *passes = ac_create_llvm_passes(info->tm);
+ bool ret = ac_compile_module_to_elf(passes, module, pelf_buffer, pelf_size);
+ ac_destroy_llvm_passes(passes);
+ return ret;
+ }
+
+ return thread_info->compile_to_memory_buffer(module, pelf_buffer, pelf_size);
}
-bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
- bool thread_compiler,
- enum radeon_family family,
- enum ac_target_machine_options tm_options,
- unsigned wave_size)
+bool
+radv_init_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler,
+ enum radeon_family family, enum ac_target_machine_options tm_options,
+ unsigned wave_size)
{
- if (thread_compiler) {
- for (auto &I : radv_llvm_per_thread_list) {
- if (I.is_same(family, tm_options, wave_size)) {
- *info = I.llvm_info;
- return true;
- }
- }
-
- radv_llvm_per_thread_list.emplace_back(family, tm_options, wave_size);
- radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back();
-
- if (!tinfo.init()) {
- radv_llvm_per_thread_list.pop_back();
- return false;
- }
-
- *info = tinfo.llvm_info;
- return true;
- }
-
- if (!ac_init_llvm_compiler(info,
- family,
- tm_options))
- return false;
- return true;
+ if (thread_compiler) {
+ for (auto &I : radv_llvm_per_thread_list) {
+ if (I.is_same(family, tm_options, wave_size)) {
+ *info = I.llvm_info;
+ return true;
+ }
+ }
+
+ radv_llvm_per_thread_list.emplace_back(family, tm_options, wave_size);
+ radv_llvm_per_thread_info &tinfo = radv_llvm_per_thread_list.back();
+
+ if (!tinfo.init()) {
+ radv_llvm_per_thread_list.pop_back();
+ return false;
+ }
+
+ *info = tinfo.llvm_info;
+ return true;
+ }
+
+ if (!ac_init_llvm_compiler(info, family, tm_options))
+ return false;
+ return true;
}
-void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
- bool thread_compiler)
+void
+radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler)
{
- if (!thread_compiler)
- ac_destroy_llvm_compiler(info);
+ if (!thread_compiler)
+ ac_destroy_llvm_compiler(info);
}
diff --git a/src/amd/vulkan/radv_meta.c b/src/amd/vulkan/radv_meta.c
index a09bfa1755c..a38c7911601 100644
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -35,213 +35,205 @@
#include <sys/stat.h>
void
-radv_meta_save(struct radv_meta_saved_state *state,
- struct radv_cmd_buffer *cmd_buffer, uint32_t flags)
+radv_meta_save(struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer,
+ uint32_t flags)
{
- VkPipelineBindPoint bind_point =
- flags & RADV_META_SAVE_GRAPHICS_PIPELINE ?
- VK_PIPELINE_BIND_POINT_GRAPHICS :
- VK_PIPELINE_BIND_POINT_COMPUTE;
- struct radv_descriptor_state *descriptors_state =
- radv_get_descriptors_state(cmd_buffer, bind_point);
-
- assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_COMPUTE_PIPELINE));
-
- state->flags = flags;
-
- if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
- assert(!(state->flags & RADV_META_SAVE_COMPUTE_PIPELINE));
-
- state->old_pipeline = cmd_buffer->state.pipeline;
-
- /* Save all viewports. */
- state->viewport.count = cmd_buffer->state.dynamic.viewport.count;
- typed_memcpy(state->viewport.viewports,
- cmd_buffer->state.dynamic.viewport.viewports,
- MAX_VIEWPORTS);
-
- /* Save all scissors. */
- state->scissor.count = cmd_buffer->state.dynamic.scissor.count;
- typed_memcpy(state->scissor.scissors,
- cmd_buffer->state.dynamic.scissor.scissors,
- MAX_SCISSORS);
-
- state->cull_mode = cmd_buffer->state.dynamic.cull_mode;
- state->front_face = cmd_buffer->state.dynamic.front_face;
-
- state->primitive_topology = cmd_buffer->state.dynamic.primitive_topology;
-
- state->depth_test_enable = cmd_buffer->state.dynamic.depth_test_enable;
- state->depth_write_enable = cmd_buffer->state.dynamic.depth_write_enable;
- state->depth_compare_op = cmd_buffer->state.dynamic.depth_compare_op;
- state->depth_bounds_test_enable = cmd_buffer->state.dynamic.depth_bounds_test_enable;
- state->stencil_test_enable = cmd_buffer->state.dynamic.stencil_test_enable;
-
- state->stencil_op.front.compare_op = cmd_buffer->state.dynamic.stencil_op.front.compare_op;
- state->stencil_op.front.fail_op = cmd_buffer->state.dynamic.stencil_op.front.fail_op;
- state->stencil_op.front.pass_op = cmd_buffer->state.dynamic.stencil_op.front.pass_op;
- state->stencil_op.front.depth_fail_op = cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op;
-
- state->stencil_op.back.compare_op = cmd_buffer->state.dynamic.stencil_op.back.compare_op;
- state->stencil_op.back.fail_op = cmd_buffer->state.dynamic.stencil_op.back.fail_op;
- state->stencil_op.back.pass_op = cmd_buffer->state.dynamic.stencil_op.back.pass_op;
- state->stencil_op.back.depth_fail_op = cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op;
-
- state->fragment_shading_rate.size = cmd_buffer->state.dynamic.fragment_shading_rate.size;
- state->fragment_shading_rate.combiner_ops[0] = cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0];
- state->fragment_shading_rate.combiner_ops[1] = cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1];
- }
-
- if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
- typed_memcpy(&state->sample_location,
- &cmd_buffer->state.dynamic.sample_location, 1);
- }
-
- if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
- assert(!(state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE));
-
- state->old_pipeline = cmd_buffer->state.compute_pipeline;
- }
-
- if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
- state->old_descriptor_set0 = descriptors_state->sets[0];
- if (!(descriptors_state->valid & 1) || !state->old_descriptor_set0)
- state->flags &= ~RADV_META_SAVE_DESCRIPTORS;
- }
-
- if (state->flags & RADV_META_SAVE_CONSTANTS) {
- memcpy(state->push_constants, cmd_buffer->push_constants,
- MAX_PUSH_CONSTANTS_SIZE);
- }
-
- if (state->flags & RADV_META_SAVE_PASS) {
- state->pass = cmd_buffer->state.pass;
- state->subpass = cmd_buffer->state.subpass;
- state->framebuffer = cmd_buffer->state.framebuffer;
- state->attachments = cmd_buffer->state.attachments;
- state->render_area = cmd_buffer->state.render_area;
- }
+ VkPipelineBindPoint bind_point = flags & RADV_META_SAVE_GRAPHICS_PIPELINE
+ ? VK_PIPELINE_BIND_POINT_GRAPHICS
+ : VK_PIPELINE_BIND_POINT_COMPUTE;
+ struct radv_descriptor_state *descriptors_state =
+ radv_get_descriptors_state(cmd_buffer, bind_point);
+
+ assert(flags & (RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_COMPUTE_PIPELINE));
+
+ state->flags = flags;
+
+ if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
+ assert(!(state->flags & RADV_META_SAVE_COMPUTE_PIPELINE));
+
+ state->old_pipeline = cmd_buffer->state.pipeline;
+
+ /* Save all viewports. */
+ state->viewport.count = cmd_buffer->state.dynamic.viewport.count;
+ typed_memcpy(state->viewport.viewports, cmd_buffer->state.dynamic.viewport.viewports,
+ MAX_VIEWPORTS);
+
+ /* Save all scissors. */
+ state->scissor.count = cmd_buffer->state.dynamic.scissor.count;
+ typed_memcpy(state->scissor.scissors, cmd_buffer->state.dynamic.scissor.scissors,
+ MAX_SCISSORS);
+
+ state->cull_mode = cmd_buffer->state.dynamic.cull_mode;
+ state->front_face = cmd_buffer->state.dynamic.front_face;
+
+ state->primitive_topology = cmd_buffer->state.dynamic.primitive_topology;
+
+ state->depth_test_enable = cmd_buffer->state.dynamic.depth_test_enable;
+ state->depth_write_enable = cmd_buffer->state.dynamic.depth_write_enable;
+ state->depth_compare_op = cmd_buffer->state.dynamic.depth_compare_op;
+ state->depth_bounds_test_enable = cmd_buffer->state.dynamic.depth_bounds_test_enable;
+ state->stencil_test_enable = cmd_buffer->state.dynamic.stencil_test_enable;
+
+ state->stencil_op.front.compare_op = cmd_buffer->state.dynamic.stencil_op.front.compare_op;
+ state->stencil_op.front.fail_op = cmd_buffer->state.dynamic.stencil_op.front.fail_op;
+ state->stencil_op.front.pass_op = cmd_buffer->state.dynamic.stencil_op.front.pass_op;
+ state->stencil_op.front.depth_fail_op =
+ cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op;
+
+ state->stencil_op.back.compare_op = cmd_buffer->state.dynamic.stencil_op.back.compare_op;
+ state->stencil_op.back.fail_op = cmd_buffer->state.dynamic.stencil_op.back.fail_op;
+ state->stencil_op.back.pass_op = cmd_buffer->state.dynamic.stencil_op.back.pass_op;
+ state->stencil_op.back.depth_fail_op =
+ cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op;
+
+ state->fragment_shading_rate.size = cmd_buffer->state.dynamic.fragment_shading_rate.size;
+ state->fragment_shading_rate.combiner_ops[0] =
+ cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0];
+ state->fragment_shading_rate.combiner_ops[1] =
+ cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1];
+ }
+
+ if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
+ typed_memcpy(&state->sample_location, &cmd_buffer->state.dynamic.sample_location, 1);
+ }
+
+ if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
+ assert(!(state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE));
+
+ state->old_pipeline = cmd_buffer->state.compute_pipeline;
+ }
+
+ if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
+ state->old_descriptor_set0 = descriptors_state->sets[0];
+ if (!(descriptors_state->valid & 1) || !state->old_descriptor_set0)
+ state->flags &= ~RADV_META_SAVE_DESCRIPTORS;
+ }
+
+ if (state->flags & RADV_META_SAVE_CONSTANTS) {
+ memcpy(state->push_constants, cmd_buffer->push_constants, MAX_PUSH_CONSTANTS_SIZE);
+ }
+
+ if (state->flags & RADV_META_SAVE_PASS) {
+ state->pass = cmd_buffer->state.pass;
+ state->subpass = cmd_buffer->state.subpass;
+ state->framebuffer = cmd_buffer->state.framebuffer;
+ state->attachments = cmd_buffer->state.attachments;
+ state->render_area = cmd_buffer->state.render_area;
+ }
}
void
-radv_meta_restore(const struct radv_meta_saved_state *state,
- struct radv_cmd_buffer *cmd_buffer)
+radv_meta_restore(const struct radv_meta_saved_state *state, struct radv_cmd_buffer *cmd_buffer)
{
- VkPipelineBindPoint bind_point =
- state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE ?
- VK_PIPELINE_BIND_POINT_GRAPHICS :
- VK_PIPELINE_BIND_POINT_COMPUTE;
-
- if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS,
- radv_pipeline_to_handle(state->old_pipeline));
-
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
-
- /* Restore all viewports. */
- cmd_buffer->state.dynamic.viewport.count = state->viewport.count;
- typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports,
- state->viewport.viewports,
- MAX_VIEWPORTS);
-
- /* Restore all scissors. */
- cmd_buffer->state.dynamic.scissor.count = state->scissor.count;
- typed_memcpy(cmd_buffer->state.dynamic.scissor.scissors,
- state->scissor.scissors,
- MAX_SCISSORS);
-
- cmd_buffer->state.dynamic.cull_mode = state->cull_mode;
- cmd_buffer->state.dynamic.front_face = state->front_face;
-
- cmd_buffer->state.dynamic.primitive_topology = state->primitive_topology;
-
- cmd_buffer->state.dynamic.depth_test_enable = state->depth_test_enable;
- cmd_buffer->state.dynamic.depth_write_enable = state->depth_write_enable;
- cmd_buffer->state.dynamic.depth_compare_op = state->depth_compare_op;
- cmd_buffer->state.dynamic.depth_bounds_test_enable = state->depth_bounds_test_enable;
- cmd_buffer->state.dynamic.stencil_test_enable = state->stencil_test_enable;
-
- cmd_buffer->state.dynamic.stencil_op.front.compare_op = state->stencil_op.front.compare_op;
- cmd_buffer->state.dynamic.stencil_op.front.fail_op = state->stencil_op.front.fail_op;
- cmd_buffer->state.dynamic.stencil_op.front.pass_op = state->stencil_op.front.pass_op;
- cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op = state->stencil_op.front.depth_fail_op;
-
- cmd_buffer->state.dynamic.stencil_op.back.compare_op = state->stencil_op.back.compare_op;
- cmd_buffer->state.dynamic.stencil_op.back.fail_op = state->stencil_op.back.fail_op;
- cmd_buffer->state.dynamic.stencil_op.back.pass_op = state->stencil_op.back.pass_op;
- cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op = state->stencil_op.back.depth_fail_op;
-
- cmd_buffer->state.dynamic.fragment_shading_rate.size = state->fragment_shading_rate.size;
- cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0] = state->fragment_shading_rate.combiner_ops[0];
- cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1] = state->fragment_shading_rate.combiner_ops[1];
-
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT |
- RADV_CMD_DIRTY_DYNAMIC_SCISSOR |
- RADV_CMD_DIRTY_DYNAMIC_CULL_MODE |
- RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
- RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE |
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP |
- RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
- }
-
- if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
- typed_memcpy(&cmd_buffer->state.dynamic.sample_location.locations,
- &state->sample_location.locations, 1);
-
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
- }
-
- if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE,
- radv_pipeline_to_handle(state->old_pipeline));
- }
-
- if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
- radv_set_descriptor_set(cmd_buffer, bind_point,
- state->old_descriptor_set0, 0);
- }
-
- if (state->flags & RADV_META_SAVE_CONSTANTS) {
- VkShaderStageFlags stages = VK_SHADER_STAGE_COMPUTE_BIT;
-
- if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE)
- stages |= VK_SHADER_STAGE_ALL_GRAPHICS;
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_NULL_HANDLE, stages, 0,
- MAX_PUSH_CONSTANTS_SIZE,
- state->push_constants);
- }
-
- if (state->flags & RADV_META_SAVE_PASS) {
- cmd_buffer->state.pass = state->pass;
- cmd_buffer->state.subpass = state->subpass;
- cmd_buffer->state.framebuffer = state->framebuffer;
- cmd_buffer->state.attachments = state->attachments;
- cmd_buffer->state.render_area = state->render_area;
- if (state->subpass)
- cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
- }
+ VkPipelineBindPoint bind_point = state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE
+ ? VK_PIPELINE_BIND_POINT_GRAPHICS
+ : VK_PIPELINE_BIND_POINT_COMPUTE;
+
+ if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE) {
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+ radv_pipeline_to_handle(state->old_pipeline));
+
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
+
+ /* Restore all viewports. */
+ cmd_buffer->state.dynamic.viewport.count = state->viewport.count;
+ typed_memcpy(cmd_buffer->state.dynamic.viewport.viewports, state->viewport.viewports,
+ MAX_VIEWPORTS);
+
+ /* Restore all scissors. */
+ cmd_buffer->state.dynamic.scissor.count = state->scissor.count;
+ typed_memcpy(cmd_buffer->state.dynamic.scissor.scissors, state->scissor.scissors,
+ MAX_SCISSORS);
+
+ cmd_buffer->state.dynamic.cull_mode = state->cull_mode;
+ cmd_buffer->state.dynamic.front_face = state->front_face;
+
+ cmd_buffer->state.dynamic.primitive_topology = state->primitive_topology;
+
+ cmd_buffer->state.dynamic.depth_test_enable = state->depth_test_enable;
+ cmd_buffer->state.dynamic.depth_write_enable = state->depth_write_enable;
+ cmd_buffer->state.dynamic.depth_compare_op = state->depth_compare_op;
+ cmd_buffer->state.dynamic.depth_bounds_test_enable = state->depth_bounds_test_enable;
+ cmd_buffer->state.dynamic.stencil_test_enable = state->stencil_test_enable;
+
+ cmd_buffer->state.dynamic.stencil_op.front.compare_op = state->stencil_op.front.compare_op;
+ cmd_buffer->state.dynamic.stencil_op.front.fail_op = state->stencil_op.front.fail_op;
+ cmd_buffer->state.dynamic.stencil_op.front.pass_op = state->stencil_op.front.pass_op;
+ cmd_buffer->state.dynamic.stencil_op.front.depth_fail_op =
+ state->stencil_op.front.depth_fail_op;
+
+ cmd_buffer->state.dynamic.stencil_op.back.compare_op = state->stencil_op.back.compare_op;
+ cmd_buffer->state.dynamic.stencil_op.back.fail_op = state->stencil_op.back.fail_op;
+ cmd_buffer->state.dynamic.stencil_op.back.pass_op = state->stencil_op.back.pass_op;
+ cmd_buffer->state.dynamic.stencil_op.back.depth_fail_op =
+ state->stencil_op.back.depth_fail_op;
+
+ cmd_buffer->state.dynamic.fragment_shading_rate.size = state->fragment_shading_rate.size;
+ cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[0] =
+ state->fragment_shading_rate.combiner_ops[0];
+ cmd_buffer->state.dynamic.fragment_shading_rate.combiner_ops[1] =
+ state->fragment_shading_rate.combiner_ops[1];
+
+ cmd_buffer->state.dirty |=
+ RADV_CMD_DIRTY_DYNAMIC_VIEWPORT | RADV_CMD_DIRTY_DYNAMIC_SCISSOR |
+ RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
+ RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP |
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP |
+ RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
+ }
+
+ if (state->flags & RADV_META_SAVE_SAMPLE_LOCATIONS) {
+ typed_memcpy(&cmd_buffer->state.dynamic.sample_location.locations,
+ &state->sample_location.locations, 1);
+
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
+ }
+
+ if (state->flags & RADV_META_SAVE_COMPUTE_PIPELINE) {
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ radv_pipeline_to_handle(state->old_pipeline));
+ }
+
+ if (state->flags & RADV_META_SAVE_DESCRIPTORS) {
+ radv_set_descriptor_set(cmd_buffer, bind_point, state->old_descriptor_set0, 0);
+ }
+
+ if (state->flags & RADV_META_SAVE_CONSTANTS) {
+ VkShaderStageFlags stages = VK_SHADER_STAGE_COMPUTE_BIT;
+
+ if (state->flags & RADV_META_SAVE_GRAPHICS_PIPELINE)
+ stages |= VK_SHADER_STAGE_ALL_GRAPHICS;
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), VK_NULL_HANDLE, stages, 0,
+ MAX_PUSH_CONSTANTS_SIZE, state->push_constants);
+ }
+
+ if (state->flags & RADV_META_SAVE_PASS) {
+ cmd_buffer->state.pass = state->pass;
+ cmd_buffer->state.subpass = state->subpass;
+ cmd_buffer->state.framebuffer = state->framebuffer;
+ cmd_buffer->state.attachments = state->attachments;
+ cmd_buffer->state.render_area = state->render_area;
+ if (state->subpass)
+ cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
+ }
}
VkImageViewType
radv_meta_get_view_type(const struct radv_image *image)
{
- switch (image->type) {
- case VK_IMAGE_TYPE_1D: return VK_IMAGE_VIEW_TYPE_1D;
- case VK_IMAGE_TYPE_2D: return VK_IMAGE_VIEW_TYPE_2D;
- case VK_IMAGE_TYPE_3D: return VK_IMAGE_VIEW_TYPE_3D;
- default:
- unreachable("bad VkImageViewType");
- }
+ switch (image->type) {
+ case VK_IMAGE_TYPE_1D:
+ return VK_IMAGE_VIEW_TYPE_1D;
+ case VK_IMAGE_TYPE_2D:
+ return VK_IMAGE_VIEW_TYPE_2D;
+ case VK_IMAGE_TYPE_3D:
+ return VK_IMAGE_VIEW_TYPE_3D;
+ default:
+ unreachable("bad VkImageViewType");
+ }
}
/**
@@ -250,80 +242,76 @@ radv_meta_get_view_type(const struct radv_image *image)
*/
uint32_t
radv_meta_get_iview_layer(const struct radv_image *dest_image,
- const VkImageSubresourceLayers *dest_subresource,
- const VkOffset3D *dest_offset)
+ const VkImageSubresourceLayers *dest_subresource,
+ const VkOffset3D *dest_offset)
{
- switch (dest_image->type) {
- case VK_IMAGE_TYPE_1D:
- case VK_IMAGE_TYPE_2D:
- return dest_subresource->baseArrayLayer;
- case VK_IMAGE_TYPE_3D:
- /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
- * but meta does it anyway. When doing so, we translate the
- * destination's z offset into an array offset.
- */
- return dest_offset->z;
- default:
- assert(!"bad VkImageType");
- return 0;
- }
+ switch (dest_image->type) {
+ case VK_IMAGE_TYPE_1D:
+ case VK_IMAGE_TYPE_2D:
+ return dest_subresource->baseArrayLayer;
+ case VK_IMAGE_TYPE_3D:
+ /* HACK: Vulkan does not allow attaching a 3D image to a framebuffer,
+ * but meta does it anyway. When doing so, we translate the
+ * destination's z offset into an array offset.
+ */
+ return dest_offset->z;
+ default:
+ assert(!"bad VkImageType");
+ return 0;
+ }
}
static void *
-meta_alloc(void* _device, size_t size, size_t alignment,
- VkSystemAllocationScope allocationScope)
+meta_alloc(void *_device, size_t size, size_t alignment, VkSystemAllocationScope allocationScope)
{
- struct radv_device *device = _device;
- return device->vk.alloc.pfnAllocation(device->vk.alloc.pUserData, size, alignment,
- VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ struct radv_device *device = _device;
+ return device->vk.alloc.pfnAllocation(device->vk.alloc.pUserData, size, alignment,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
}
static void *
-meta_realloc(void* _device, void *original, size_t size, size_t alignment,
+meta_realloc(void *_device, void *original, size_t size, size_t alignment,
VkSystemAllocationScope allocationScope)
{
- struct radv_device *device = _device;
- return device->vk.alloc.pfnReallocation(device->vk.alloc.pUserData, original,
- size, alignment,
- VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ struct radv_device *device = _device;
+ return device->vk.alloc.pfnReallocation(device->vk.alloc.pUserData, original, size, alignment,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
}
static void
-meta_free(void* _device, void *data)
+meta_free(void *_device, void *data)
{
- struct radv_device *device = _device;
- device->vk.alloc.pfnFree(device->vk.alloc.pUserData, data);
+ struct radv_device *device = _device;
+ device->vk.alloc.pfnFree(device->vk.alloc.pUserData, data);
}
#ifndef _WIN32
static bool
radv_builtin_cache_path(char *path)
{
- char *xdg_cache_home = getenv("XDG_CACHE_HOME");
- const char *suffix = "/radv_builtin_shaders";
- const char *suffix2 = "/.cache/radv_builtin_shaders";
- struct passwd pwd, *result;
- char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */
- int ret;
-
- if (xdg_cache_home) {
- ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
- xdg_cache_home, suffix, sizeof(void *) * 8);
- return ret > 0 && ret < PATH_MAX + 1;
- }
-
- getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result);
- if (!result)
- return false;
-
- strcpy(path, pwd.pw_dir);
- strcat(path, "/.cache");
- if (mkdir(path, 0755) && errno != EEXIST)
- return false;
-
- ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
- pwd.pw_dir, suffix2, sizeof(void *) * 8);
- return ret > 0 && ret < PATH_MAX + 1;
+ char *xdg_cache_home = getenv("XDG_CACHE_HOME");
+ const char *suffix = "/radv_builtin_shaders";
+ const char *suffix2 = "/.cache/radv_builtin_shaders";
+ struct passwd pwd, *result;
+ char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */
+ int ret;
+
+ if (xdg_cache_home) {
+ ret = snprintf(path, PATH_MAX + 1, "%s%s%zd", xdg_cache_home, suffix, sizeof(void *) * 8);
+ return ret > 0 && ret < PATH_MAX + 1;
+ }
+
+ getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result);
+ if (!result)
+ return false;
+
+ strcpy(path, pwd.pw_dir);
+ strcat(path, "/.cache");
+ if (mkdir(path, 0755) && errno != EEXIST)
+ return false;
+
+ ret = snprintf(path, PATH_MAX + 1, "%s%s%zd", pwd.pw_dir, suffix2, sizeof(void *) * 8);
+ return ret > 0 && ret < PATH_MAX + 1;
}
#endif
@@ -331,32 +319,32 @@ static bool
radv_load_meta_pipeline(struct radv_device *device)
{
#ifdef _WIN32
- return false;
+ return false;
#else
- char path[PATH_MAX + 1];
- struct stat st;
- void *data = NULL;
- bool ret = false;
-
- if (!radv_builtin_cache_path(path))
- return false;
-
- int fd = open(path, O_RDONLY);
- if (fd < 0)
- return false;
- if (fstat(fd, &st))
- goto fail;
- data = malloc(st.st_size);
- if (!data)
- goto fail;
- if(read(fd, data, st.st_size) == -1)
- goto fail;
-
- ret = radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
+ char path[PATH_MAX + 1];
+ struct stat st;
+ void *data = NULL;
+ bool ret = false;
+
+ if (!radv_builtin_cache_path(path))
+ return false;
+
+ int fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return false;
+ if (fstat(fd, &st))
+ goto fail;
+ data = malloc(st.st_size);
+ if (!data)
+ goto fail;
+ if (read(fd, data, st.st_size) == -1)
+ goto fail;
+
+ ret = radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
fail:
- free(data);
- close(fd);
- return ret;
+ free(data);
+ close(fd);
+ return ret;
#endif
}
@@ -364,320 +352,315 @@ static void
radv_store_meta_pipeline(struct radv_device *device)
{
#ifndef _WIN32
- char path[PATH_MAX + 1], path2[PATH_MAX + 7];
- size_t size;
- void *data = NULL;
-
- if (!device->meta_state.cache.modified)
- return;
-
- if (radv_GetPipelineCacheData(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &size, NULL))
- return;
-
- if (!radv_builtin_cache_path(path))
- return;
-
- strcpy(path2, path);
- strcat(path2, "XXXXXX");
- int fd = mkstemp(path2);//open(path, O_WRONLY | O_CREAT, 0600);
- if (fd < 0)
- return;
- data = malloc(size);
- if (!data)
- goto fail;
-
- if (radv_GetPipelineCacheData(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &size, data))
- goto fail;
- if(write(fd, data, size) == -1)
- goto fail;
-
- rename(path2, path);
+ char path[PATH_MAX + 1], path2[PATH_MAX + 7];
+ size_t size;
+ void *data = NULL;
+
+ if (!device->meta_state.cache.modified)
+ return;
+
+ if (radv_GetPipelineCacheData(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache), &size,
+ NULL))
+ return;
+
+ if (!radv_builtin_cache_path(path))
+ return;
+
+ strcpy(path2, path);
+ strcat(path2, "XXXXXX");
+ int fd = mkstemp(path2); // open(path, O_WRONLY | O_CREAT, 0600);
+ if (fd < 0)
+ return;
+ data = malloc(size);
+ if (!data)
+ goto fail;
+
+ if (radv_GetPipelineCacheData(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache), &size,
+ data))
+ goto fail;
+ if (write(fd, data, size) == -1)
+ goto fail;
+
+ rename(path2, path);
fail:
- free(data);
- close(fd);
- unlink(path2);
+ free(data);
+ close(fd);
+ unlink(path2);
#endif
}
VkResult
radv_device_init_meta(struct radv_device *device)
{
- VkResult result;
+ VkResult result;
- memset(&device->meta_state, 0, sizeof(device->meta_state));
+ memset(&device->meta_state, 0, sizeof(device->meta_state));
- device->meta_state.alloc = (VkAllocationCallbacks) {
- .pUserData = device,
- .pfnAllocation = meta_alloc,
- .pfnReallocation = meta_realloc,
- .pfnFree = meta_free,
- };
+ device->meta_state.alloc = (VkAllocationCallbacks){
+ .pUserData = device,
+ .pfnAllocation = meta_alloc,
+ .pfnReallocation = meta_realloc,
+ .pfnFree = meta_free,
+ };
- device->meta_state.cache.alloc = device->meta_state.alloc;
- radv_pipeline_cache_init(&device->meta_state.cache, device);
- bool loaded_cache = radv_load_meta_pipeline(device);
- bool on_demand = !loaded_cache;
+ device->meta_state.cache.alloc = device->meta_state.alloc;
+ radv_pipeline_cache_init(&device->meta_state.cache, device);
+ bool loaded_cache = radv_load_meta_pipeline(device);
+ bool on_demand = !loaded_cache;
- mtx_init(&device->meta_state.mtx, mtx_plain);
+ mtx_init(&device->meta_state.mtx, mtx_plain);
- result = radv_device_init_meta_clear_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_clear;
+ result = radv_device_init_meta_clear_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_clear;
- result = radv_device_init_meta_resolve_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_resolve;
+ result = radv_device_init_meta_resolve_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_resolve;
- result = radv_device_init_meta_blit_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_blit;
+ result = radv_device_init_meta_blit_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_blit;
- result = radv_device_init_meta_blit2d_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_blit2d;
+ result = radv_device_init_meta_blit2d_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_blit2d;
- result = radv_device_init_meta_bufimage_state(device);
- if (result != VK_SUCCESS)
- goto fail_bufimage;
+ result = radv_device_init_meta_bufimage_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_bufimage;
- result = radv_device_init_meta_depth_decomp_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_depth_decomp;
+ result = radv_device_init_meta_depth_decomp_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_depth_decomp;
- result = radv_device_init_meta_buffer_state(device);
- if (result != VK_SUCCESS)
- goto fail_buffer;
+ result = radv_device_init_meta_buffer_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_buffer;
- result = radv_device_init_meta_query_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_query;
+ result = radv_device_init_meta_query_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_query;
- result = radv_device_init_meta_fast_clear_flush_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_fast_clear;
+ result = radv_device_init_meta_fast_clear_flush_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_fast_clear;
- result = radv_device_init_meta_resolve_compute_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_resolve_compute;
+ result = radv_device_init_meta_resolve_compute_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_resolve_compute;
- result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail_resolve_fragment;
+ result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail_resolve_fragment;
- result = radv_device_init_meta_fmask_expand_state(device);
- if (result != VK_SUCCESS)
- goto fail_fmask_expand;
+ result = radv_device_init_meta_fmask_expand_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_fmask_expand;
- if (!on_demand) {
- result = radv_device_init_meta_dcc_retile_state(device);
- if (result != VK_SUCCESS)
- goto fail_dcc_retile;
- }
+ if (!on_demand) {
+ result = radv_device_init_meta_dcc_retile_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_dcc_retile;
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
fail_dcc_retile:
- radv_device_finish_meta_fmask_expand_state(device);
+ radv_device_finish_meta_fmask_expand_state(device);
fail_fmask_expand:
- radv_device_finish_meta_resolve_fragment_state(device);
+ radv_device_finish_meta_resolve_fragment_state(device);
fail_resolve_fragment:
- radv_device_finish_meta_resolve_compute_state(device);
+ radv_device_finish_meta_resolve_compute_state(device);
fail_resolve_compute:
- radv_device_finish_meta_fast_clear_flush_state(device);
+ radv_device_finish_meta_fast_clear_flush_state(device);
fail_fast_clear:
- radv_device_finish_meta_query_state(device);
+ radv_device_finish_meta_query_state(device);
fail_query:
- radv_device_finish_meta_buffer_state(device);
+ radv_device_finish_meta_buffer_state(device);
fail_buffer:
- radv_device_finish_meta_depth_decomp_state(device);
+ radv_device_finish_meta_depth_decomp_state(device);
fail_depth_decomp:
- radv_device_finish_meta_bufimage_state(device);
+ radv_device_finish_meta_bufimage_state(device);
fail_bufimage:
- radv_device_finish_meta_blit2d_state(device);
+ radv_device_finish_meta_blit2d_state(device);
fail_blit2d:
- radv_device_finish_meta_blit_state(device);
+ radv_device_finish_meta_blit_state(device);
fail_blit:
- radv_device_finish_meta_resolve_state(device);
+ radv_device_finish_meta_resolve_state(device);
fail_resolve:
- radv_device_finish_meta_clear_state(device);
+ radv_device_finish_meta_clear_state(device);
fail_clear:
- mtx_destroy(&device->meta_state.mtx);
- radv_pipeline_cache_finish(&device->meta_state.cache);
- return result;
+ mtx_destroy(&device->meta_state.mtx);
+ radv_pipeline_cache_finish(&device->meta_state.cache);
+ return result;
}
void
radv_device_finish_meta(struct radv_device *device)
{
- radv_device_finish_meta_clear_state(device);
- radv_device_finish_meta_resolve_state(device);
- radv_device_finish_meta_blit_state(device);
- radv_device_finish_meta_blit2d_state(device);
- radv_device_finish_meta_bufimage_state(device);
- radv_device_finish_meta_depth_decomp_state(device);
- radv_device_finish_meta_query_state(device);
- radv_device_finish_meta_buffer_state(device);
- radv_device_finish_meta_fast_clear_flush_state(device);
- radv_device_finish_meta_resolve_compute_state(device);
- radv_device_finish_meta_resolve_fragment_state(device);
- radv_device_finish_meta_fmask_expand_state(device);
- radv_device_finish_meta_dcc_retile_state(device);
-
- radv_store_meta_pipeline(device);
- radv_pipeline_cache_finish(&device->meta_state.cache);
- mtx_destroy(&device->meta_state.mtx);
+ radv_device_finish_meta_clear_state(device);
+ radv_device_finish_meta_resolve_state(device);
+ radv_device_finish_meta_blit_state(device);
+ radv_device_finish_meta_blit2d_state(device);
+ radv_device_finish_meta_bufimage_state(device);
+ radv_device_finish_meta_depth_decomp_state(device);
+ radv_device_finish_meta_query_state(device);
+ radv_device_finish_meta_buffer_state(device);
+ radv_device_finish_meta_fast_clear_flush_state(device);
+ radv_device_finish_meta_resolve_compute_state(device);
+ radv_device_finish_meta_resolve_fragment_state(device);
+ radv_device_finish_meta_fmask_expand_state(device);
+ radv_device_finish_meta_dcc_retile_state(device);
+
+ radv_store_meta_pipeline(device);
+ radv_pipeline_cache_finish(&device->meta_state.cache);
+ mtx_destroy(&device->meta_state.mtx);
}
-nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
+nir_ssa_def *
+radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
{
- nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(vs_b);
+ nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(vs_b);
- /* vertex 0 - -1.0, -1.0 */
- /* vertex 1 - -1.0, 1.0 */
- /* vertex 2 - 1.0, -1.0 */
- /* so channel 0 is vertex_id != 2 ? -1.0 : 1.0
- channel 1 is vertex id != 1 ? -1.0 : 1.0 */
+ /* vertex 0 - -1.0, -1.0 */
+ /* vertex 1 - -1.0, 1.0 */
+ /* vertex 2 - 1.0, -1.0 */
+ /* so channel 0 is vertex_id != 2 ? -1.0 : 1.0
+ channel 1 is vertex id != 1 ? -1.0 : 1.0 */
- nir_ssa_def *c0cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 2));
- nir_ssa_def *c1cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 1));
+ nir_ssa_def *c0cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 2));
+ nir_ssa_def *c1cmp = nir_ine(vs_b, vertex_id, nir_imm_int(vs_b, 1));
- nir_ssa_def *comp[4];
- comp[0] = nir_bcsel(vs_b, c0cmp,
- nir_imm_float(vs_b, -1.0),
- nir_imm_float(vs_b, 1.0));
+ nir_ssa_def *comp[4];
+ comp[0] = nir_bcsel(vs_b, c0cmp, nir_imm_float(vs_b, -1.0), nir_imm_float(vs_b, 1.0));
- comp[1] = nir_bcsel(vs_b, c1cmp,
- nir_imm_float(vs_b, -1.0),
- nir_imm_float(vs_b, 1.0));
- comp[2] = comp2;
- comp[3] = nir_imm_float(vs_b, 1.0);
- nir_ssa_def *outvec = nir_vec(vs_b, comp, 4);
+ comp[1] = nir_bcsel(vs_b, c1cmp, nir_imm_float(vs_b, -1.0), nir_imm_float(vs_b, 1.0));
+ comp[2] = comp2;
+ comp[3] = nir_imm_float(vs_b, 1.0);
+ nir_ssa_def *outvec = nir_vec(vs_b, comp, 4);
- return outvec;
+ return outvec;
}
-nir_ssa_def *radv_meta_gen_rect_vertices(nir_builder *vs_b)
+nir_ssa_def *
+radv_meta_gen_rect_vertices(nir_builder *vs_b)
{
- return radv_meta_gen_rect_vertices_comp2(vs_b, nir_imm_float(vs_b, 0.0));
+ return radv_meta_gen_rect_vertices_comp2(vs_b, nir_imm_float(vs_b, 0.0));
}
/* vertex shader that generates vertices */
nir_shader *
radv_meta_build_nir_vs_generate_vertices(void)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *vec4 = glsl_vec4_type();
- nir_variable *v_position;
+ nir_variable *v_position;
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_vs_gen_verts");
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_vs_gen_verts");
- nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+ nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
- v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
- "gl_Position");
- v_position->data.location = VARYING_SLOT_POS;
+ v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
+ v_position->data.location = VARYING_SLOT_POS;
- nir_store_var(&b, v_position, outvec, 0xf);
+ nir_store_var(&b, v_position, outvec, 0xf);
- return b.shader;
+ return b.shader;
}
nir_shader *
radv_meta_build_nir_fs_noop(void)
{
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_noop_fs");
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_noop_fs");
- return b.shader;
+ return b.shader;
}
-void radv_meta_build_resolve_shader_core(nir_builder *b,
- bool is_integer,
- int samples,
- nir_variable *input_img,
- nir_variable *color,
- nir_ssa_def *img_coord)
+void
+radv_meta_build_resolve_shader_core(nir_builder *b, bool is_integer, int samples,
+ nir_variable *input_img, nir_variable *color,
+ nir_ssa_def *img_coord)
{
- /* do a txf_ms on each sample */
- nir_ssa_def *tmp;
- bool inserted_if = false;
-
- nir_ssa_def *input_img_deref = &nir_build_deref_var(b, input_img)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
- tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
- tex->op = nir_texop_txf_ms;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(img_coord);
- tex->src[1].src_type = nir_tex_src_ms_index;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = nir_type_float32;
- tex->is_array = false;
- tex->coord_components = 2;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(b, &tex->instr);
-
- tmp = &tex->dest.ssa;
-
- if (!is_integer && samples > 1) {
- nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 2);
- tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
- tex_all_same->op = nir_texop_samples_identical;
- tex_all_same->src[0].src_type = nir_tex_src_coord;
- tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
- tex_all_same->src[1].src_type = nir_tex_src_texture_deref;
- tex_all_same->src[1].src = nir_src_for_ssa(input_img_deref);
- tex_all_same->dest_type = nir_type_bool1;
- tex_all_same->is_array = false;
- tex_all_same->coord_components = 2;
-
- nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 1, "tex");
- nir_builder_instr_insert(b, &tex_all_same->instr);
-
- nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_bool(b, false));
- nir_push_if(b, all_same);
- for (int i = 1; i < samples; i++) {
- nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 3);
- tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
- tex_add->op = nir_texop_txf_ms;
- tex_add->src[0].src_type = nir_tex_src_coord;
- tex_add->src[0].src = nir_src_for_ssa(img_coord);
- tex_add->src[1].src_type = nir_tex_src_ms_index;
- tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i));
- tex_add->src[2].src_type = nir_tex_src_texture_deref;
- tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
- tex_add->dest_type = nir_type_float32;
- tex_add->is_array = false;
- tex_add->coord_components = 2;
-
- nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
- nir_builder_instr_insert(b, &tex_add->instr);
-
- tmp = nir_fadd(b, tmp, &tex_add->dest.ssa);
- }
-
- tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples));
- nir_store_var(b, color, tmp, 0xf);
- nir_push_else(b, NULL);
- inserted_if = true;
- }
- nir_store_var(b, color, &tex->dest.ssa, 0xf);
-
- if (inserted_if)
- nir_pop_if(b, NULL);
+ /* do a txf_ms on each sample */
+ nir_ssa_def *tmp;
+ bool inserted_if = false;
+
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(b, input_img)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex->op = nir_texop_txf_ms;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(img_coord);
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = nir_type_float32;
+ tex->is_array = false;
+ tex->coord_components = 2;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(b, &tex->instr);
+
+ tmp = &tex->dest.ssa;
+
+ if (!is_integer && samples > 1) {
+ nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 2);
+ tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex_all_same->op = nir_texop_samples_identical;
+ tex_all_same->src[0].src_type = nir_tex_src_coord;
+ tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
+ tex_all_same->src[1].src_type = nir_tex_src_texture_deref;
+ tex_all_same->src[1].src = nir_src_for_ssa(input_img_deref);
+ tex_all_same->dest_type = nir_type_bool1;
+ tex_all_same->is_array = false;
+ tex_all_same->coord_components = 2;
+
+ nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 1, "tex");
+ nir_builder_instr_insert(b, &tex_all_same->instr);
+
+ nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_bool(b, false));
+ nir_push_if(b, all_same);
+ for (int i = 1; i < samples; i++) {
+ nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 3);
+ tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex_add->op = nir_texop_txf_ms;
+ tex_add->src[0].src_type = nir_tex_src_coord;
+ tex_add->src[0].src = nir_src_for_ssa(img_coord);
+ tex_add->src[1].src_type = nir_tex_src_ms_index;
+ tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i));
+ tex_add->src[2].src_type = nir_tex_src_texture_deref;
+ tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex_add->dest_type = nir_type_float32;
+ tex_add->is_array = false;
+ tex_add->coord_components = 2;
+
+ nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+ nir_builder_instr_insert(b, &tex_add->instr);
+
+ tmp = nir_fadd(b, tmp, &tex_add->dest.ssa);
+ }
+
+ tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples));
+ nir_store_var(b, color, tmp, 0xf);
+ nir_push_else(b, NULL);
+ inserted_if = true;
+ }
+ nir_store_var(b, color, &tex->dest.ssa, 0xf);
+
+ if (inserted_if)
+ nir_pop_if(b, NULL);
}
nir_ssa_def *
radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding)
{
- nir_ssa_def *rsrc = nir_vulkan_resource_index(
- b, 2, 32, nir_imm_int(b, 0), .desc_set=desc_set, .binding=binding);
- return nir_channel(b, rsrc, 0);
+ nir_ssa_def *rsrc = nir_vulkan_resource_index(b, 2, 32, nir_imm_int(b, 0), .desc_set = desc_set,
+ .binding = binding);
+ return nir_channel(b, rsrc, 0);
}
diff --git a/src/amd/vulkan/radv_meta.h b/src/amd/vulkan/radv_meta.h
index ce2c25f5850..f20e0b07a02 100644
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -34,62 +34,62 @@ extern "C" {
#endif
enum radv_meta_save_flags {
- RADV_META_SAVE_PASS = (1 << 0),
- RADV_META_SAVE_CONSTANTS = (1 << 1),
- RADV_META_SAVE_DESCRIPTORS = (1 << 2),
- RADV_META_SAVE_GRAPHICS_PIPELINE = (1 << 3),
- RADV_META_SAVE_COMPUTE_PIPELINE = (1 << 4),
- RADV_META_SAVE_SAMPLE_LOCATIONS = (1 << 5),
+ RADV_META_SAVE_PASS = (1 << 0),
+ RADV_META_SAVE_CONSTANTS = (1 << 1),
+ RADV_META_SAVE_DESCRIPTORS = (1 << 2),
+ RADV_META_SAVE_GRAPHICS_PIPELINE = (1 << 3),
+ RADV_META_SAVE_COMPUTE_PIPELINE = (1 << 4),
+ RADV_META_SAVE_SAMPLE_LOCATIONS = (1 << 5),
};
struct radv_meta_saved_state {
- uint32_t flags;
-
- struct radv_descriptor_set *old_descriptor_set0;
- struct radv_pipeline *old_pipeline;
- struct radv_viewport_state viewport;
- struct radv_scissor_state scissor;
- struct radv_sample_locations_state sample_location;
-
- char push_constants[128];
-
- struct radv_render_pass *pass;
- const struct radv_subpass *subpass;
- struct radv_attachment_state *attachments;
- struct radv_framebuffer *framebuffer;
- VkRect2D render_area;
-
- VkCullModeFlags cull_mode;
- VkFrontFace front_face;
-
- unsigned primitive_topology;
-
- bool depth_test_enable;
- bool depth_write_enable;
- unsigned depth_compare_op;
- bool depth_bounds_test_enable;
- bool stencil_test_enable;
-
- struct {
- struct {
- VkStencilOp fail_op;
- VkStencilOp pass_op;
- VkStencilOp depth_fail_op;
- VkCompareOp compare_op;
- } front;
-
- struct {
- VkStencilOp fail_op;
- VkStencilOp pass_op;
- VkStencilOp depth_fail_op;
- VkCompareOp compare_op;
- } back;
- } stencil_op;
-
- struct {
- VkExtent2D size;
- VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
- } fragment_shading_rate;
+ uint32_t flags;
+
+ struct radv_descriptor_set *old_descriptor_set0;
+ struct radv_pipeline *old_pipeline;
+ struct radv_viewport_state viewport;
+ struct radv_scissor_state scissor;
+ struct radv_sample_locations_state sample_location;
+
+ char push_constants[128];
+
+ struct radv_render_pass *pass;
+ const struct radv_subpass *subpass;
+ struct radv_attachment_state *attachments;
+ struct radv_framebuffer *framebuffer;
+ VkRect2D render_area;
+
+ VkCullModeFlags cull_mode;
+ VkFrontFace front_face;
+
+ unsigned primitive_topology;
+
+ bool depth_test_enable;
+ bool depth_write_enable;
+ unsigned depth_compare_op;
+ bool depth_bounds_test_enable;
+ bool stencil_test_enable;
+
+ struct {
+ struct {
+ VkStencilOp fail_op;
+ VkStencilOp pass_op;
+ VkStencilOp depth_fail_op;
+ VkCompareOp compare_op;
+ } front;
+
+ struct {
+ VkStencilOp fail_op;
+ VkStencilOp pass_op;
+ VkStencilOp depth_fail_op;
+ VkCompareOp compare_op;
+ } back;
+ } stencil_op;
+
+ struct {
+ VkExtent2D size;
+ VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
+ } fragment_shading_rate;
};
VkResult radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand);
@@ -128,135 +128,111 @@ void radv_device_finish_meta_fmask_expand_state(struct radv_device *device);
VkResult radv_device_init_meta_dcc_retile_state(struct radv_device *device);
void radv_device_finish_meta_dcc_retile_state(struct radv_device *device);
-void radv_meta_save(struct radv_meta_saved_state *saved_state,
- struct radv_cmd_buffer *cmd_buffer, uint32_t flags);
+void radv_meta_save(struct radv_meta_saved_state *saved_state, struct radv_cmd_buffer *cmd_buffer,
+ uint32_t flags);
void radv_meta_restore(const struct radv_meta_saved_state *state,
- struct radv_cmd_buffer *cmd_buffer);
+ struct radv_cmd_buffer *cmd_buffer);
VkImageViewType radv_meta_get_view_type(const struct radv_image *image);
uint32_t radv_meta_get_iview_layer(const struct radv_image *dest_image,
- const VkImageSubresourceLayers *dest_subresource,
- const VkOffset3D *dest_offset);
+ const VkImageSubresourceLayers *dest_subresource,
+ const VkOffset3D *dest_offset);
struct radv_meta_blit2d_surf {
- /** The size of an element in bytes. */
- uint8_t bs;
- VkFormat format;
-
- struct radv_image *image;
- unsigned level;
- unsigned layer;
- VkImageAspectFlags aspect_mask;
- VkImageLayout current_layout;
- bool disable_compression;
+ /** The size of an element in bytes. */
+ uint8_t bs;
+ VkFormat format;
+
+ struct radv_image *image;
+ unsigned level;
+ unsigned layer;
+ VkImageAspectFlags aspect_mask;
+ VkImageLayout current_layout;
+ bool disable_compression;
};
struct radv_meta_blit2d_buffer {
- struct radv_buffer *buffer;
- uint32_t offset;
- uint32_t pitch;
- uint8_t bs;
- VkFormat format;
+ struct radv_buffer *buffer;
+ uint32_t offset;
+ uint32_t pitch;
+ uint8_t bs;
+ VkFormat format;
};
struct radv_meta_blit2d_rect {
- uint32_t src_x, src_y;
- uint32_t dst_x, dst_y;
- uint32_t width, height;
+ uint32_t src_x, src_y;
+ uint32_t dst_x, dst_y;
+ uint32_t width, height;
};
-void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_state *save);
+void radv_meta_begin_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save);
-void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src_img,
- struct radv_meta_blit2d_buffer *src_buf,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects);
-
-void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_saved_state *save);
+void radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
+ struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst,
+ unsigned num_rects, struct radv_meta_blit2d_rect *rects);
+void radv_meta_end_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_saved_state *save);
VkResult radv_device_init_meta_bufimage_state(struct radv_device *device);
void radv_device_finish_meta_bufimage_state(struct radv_device *device);
void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_buffer *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects);
+ struct radv_meta_blit2d_surf *src,
+ struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects);
void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects);
+ struct radv_meta_blit2d_buffer *src,
+ struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects);
void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects);
-void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *dst,
- const VkClearColorValue *clear_color);
-
-void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange,
- struct radv_sample_locations_state *sample_locs);
-void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange,
- struct radv_sample_locations_state *sample_locs);
+ struct radv_meta_blit2d_surf *src,
+ struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects);
+void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
+ const VkClearColorValue *clear_color);
+
+void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange,
+ struct radv_sample_locations_state *sample_locs);
+void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange,
+ struct radv_sample_locations_state *sample_locs);
void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange);
-void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange);
+ struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange);
+void radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange);
void radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image);
-void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange);
+void radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange);
void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkFormat src_format,
- VkImageLayout src_image_layout,
- struct radv_image *dest_image,
- VkFormat dest_format,
- VkImageLayout dest_image_layout,
- const VkImageResolve2KHR *region);
+ struct radv_image *src_image, VkFormat src_format,
+ VkImageLayout src_image_layout, struct radv_image *dest_image,
+ VkFormat dest_format, VkImageLayout dest_image_layout,
+ const VkImageResolve2KHR *region);
void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkImageLayout src_image_layout,
- struct radv_image *dest_image,
- VkImageLayout dest_image_layout,
- const VkImageResolve2KHR *region);
+ struct radv_image *src_image, VkImageLayout src_image_layout,
+ struct radv_image *dest_image,
+ VkImageLayout dest_image_layout,
+ const VkImageResolve2KHR *region);
void radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer);
-void radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkImageLayout src_image_layout,
- const VkImageResolve2KHR *region);
-
-uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value);
-uint32_t radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value);
-uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value);
-uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value);
+void radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkImageLayout src_image_layout, const VkImageResolve2KHR *region);
+
+uint32_t radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value);
+uint32_t radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value);
+uint32_t radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value);
+uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value);
/**
* Return whether the bound pipeline is the FMASK decompress pass.
@@ -264,11 +240,11 @@ uint32_t radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
static inline bool
radv_is_fmask_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- return radv_pipeline_to_handle(pipeline) ==
- meta_state->fast_clear_flush.fmask_decompress_pipeline;
+ return radv_pipeline_to_handle(pipeline) ==
+ meta_state->fast_clear_flush.fmask_decompress_pipeline;
}
/**
@@ -277,11 +253,10 @@ radv_is_fmask_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
static inline bool
radv_is_dcc_decompress_pipeline(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
- struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+ struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
+ struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
- return radv_pipeline_to_handle(pipeline) ==
- meta_state->fast_clear_flush.dcc_decompress_pipeline;
+ return radv_pipeline_to_handle(pipeline) == meta_state->fast_clear_flush.dcc_decompress_pipeline;
}
/* common nir builder helpers */
@@ -292,15 +267,11 @@ nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *c
nir_shader *radv_meta_build_nir_vs_generate_vertices(void);
nir_shader *radv_meta_build_nir_fs_noop(void);
-void radv_meta_build_resolve_shader_core(nir_builder *b,
- bool is_integer,
- int samples,
- nir_variable *input_img,
- nir_variable *color,
- nir_ssa_def *img_coord);
+void radv_meta_build_resolve_shader_core(nir_builder *b, bool is_integer, int samples,
+ nir_variable *input_img, nir_variable *color,
+ nir_ssa_def *img_coord);
-nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set,
- unsigned binding);
+nir_ssa_def *radv_meta_load_descriptor(nir_builder *b, unsigned desc_set, unsigned binding);
#ifdef __cplusplus
}
diff --git a/src/amd/vulkan/radv_meta_blit.c b/src/amd/vulkan/radv_meta_blit.c
index ff573ad7f04..03a4912f033 100644
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -21,1198 +21,1147 @@
* IN THE SOFTWARE.
*/
-#include "radv_meta.h"
#include "nir/nir_builder.h"
+#include "radv_meta.h"
struct blit_region {
- VkOffset3D src_offset;
- VkExtent3D src_extent;
- VkOffset3D dest_offset;
- VkExtent3D dest_extent;
+ VkOffset3D src_offset;
+ VkExtent3D src_extent;
+ VkOffset3D dest_offset;
+ VkExtent3D dest_extent;
};
-static VkResult
-build_pipeline(struct radv_device *device,
- VkImageAspectFlagBits aspect,
- enum glsl_sampler_dim tex_dim,
- unsigned fs_key,
- VkPipeline *pipeline);
+static VkResult build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect,
+ enum glsl_sampler_dim tex_dim, unsigned fs_key,
+ VkPipeline *pipeline);
static nir_shader *
build_nir_vertex_shader(void)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_blit_vs");
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_blit_vs");
- nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "gl_Position");
- pos_out->data.location = VARYING_SLOT_POS;
+ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
+ pos_out->data.location = VARYING_SLOT_POS;
- nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "v_tex_pos");
- tex_pos_out->data.location = VARYING_SLOT_VAR0;
- tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
+ nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "v_tex_pos");
+ tex_pos_out->data.location = VARYING_SLOT_VAR0;
+ tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
- nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+ nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
- nir_store_var(&b, pos_out, outvec, 0xf);
+ nir_store_var(&b, pos_out, outvec, 0xf);
- nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range=16);
- nir_ssa_def *src0_z = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base=16, .range=4);
+ nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
+ nir_ssa_def *src0_z =
+ nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
- nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
+ nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
- /* vertex 0 - src0_x, src0_y, src0_z */
- /* vertex 1 - src0_x, src1_y, src0_z*/
- /* vertex 2 - src1_x, src0_y, src0_z */
- /* so channel 0 is vertex_id != 2 ? src_x : src_x + w
- channel 1 is vertex id != 1 ? src_y : src_y + w */
+ /* vertex 0 - src0_x, src0_y, src0_z */
+ /* vertex 1 - src0_x, src1_y, src0_z*/
+ /* vertex 2 - src1_x, src0_y, src0_z */
+ /* so channel 0 is vertex_id != 2 ? src_x : src_x + w
+ channel 1 is vertex id != 1 ? src_y : src_y + w */
- nir_ssa_def *c0cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 2));
- nir_ssa_def *c1cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 1));
+ nir_ssa_def *c0cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 2));
+ nir_ssa_def *c1cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 1));
- nir_ssa_def *comp[4];
- comp[0] = nir_bcsel(&b, c0cmp,
- nir_channel(&b, src_box, 0),
- nir_channel(&b, src_box, 2));
+ nir_ssa_def *comp[4];
+ comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
- comp[1] = nir_bcsel(&b, c1cmp,
- nir_channel(&b, src_box, 1),
- nir_channel(&b, src_box, 3));
- comp[2] = src0_z;
- comp[3] = nir_imm_float(&b, 1.0);
- nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
- nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
- return b.shader;
+ comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
+ comp[2] = src0_z;
+ comp[3] = nir_imm_float(&b, 1.0);
+ nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
+ nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
+ return b.shader;
}
static nir_shader *
build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_fs.%d", tex_dim);
-
- nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
- vec4, "v_tex_pos");
- tex_pos_in->data.location = VARYING_SLOT_VAR0;
-
- /* Swizzle the array index which comes in as Z coordinate into the right
- * position.
- */
- unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
- nir_ssa_def *const tex_pos =
- nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
- (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
-
- const struct glsl_type *sampler_type =
- glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
- glsl_get_base_type(vec4));
- nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
- sampler_type, "s_tex");
- sampler->data.descriptor_set = 0;
- sampler->data.binding = 0;
-
- nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = tex_dim;
- tex->op = nir_texop_tex;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(tex_pos);
- tex->src[1].src_type = nir_tex_src_texture_deref;
- tex->src[1].src = nir_src_for_ssa(tex_deref);
- tex->src[2].src_type = nir_tex_src_sampler_deref;
- tex->src[2].src = nir_src_for_ssa(tex_deref);
- tex->dest_type = nir_type_float32; /* TODO */
- tex->is_array = glsl_sampler_type_is_array(sampler_type);
- tex->coord_components = tex_pos->num_components;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "f_color");
- color_out->data.location = FRAG_RESULT_DATA0;
- nir_store_var(&b, color_out, &tex->dest.ssa, 0xf);
-
- return b.shader;
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_fs.%d", tex_dim);
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+ /* Swizzle the array index which comes in as Z coordinate into the right
+ * position.
+ */
+ unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
+ nir_ssa_def *const tex_pos =
+ nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
+
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
+ nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+ sampler->data.descriptor_set = 0;
+ sampler->data.binding = 0;
+
+ nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = tex_dim;
+ tex->op = nir_texop_tex;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(tex_pos);
+ tex->src[1].src_type = nir_tex_src_texture_deref;
+ tex->src[1].src = nir_src_for_ssa(tex_deref);
+ tex->src[2].src_type = nir_tex_src_sampler_deref;
+ tex->src[2].src = nir_src_for_ssa(tex_deref);
+ tex->dest_type = nir_type_float32; /* TODO */
+ tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ tex->coord_components = tex_pos->num_components;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DATA0;
+ nir_store_var(&b, color_out, &tex->dest.ssa, 0xf);
+
+ return b.shader;
}
static nir_shader *
build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_depth_fs.%d", tex_dim);
-
- nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
- vec4, "v_tex_pos");
- tex_pos_in->data.location = VARYING_SLOT_VAR0;
-
- /* Swizzle the array index which comes in as Z coordinate into the right
- * position.
- */
- unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
- nir_ssa_def *const tex_pos =
- nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
- (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
-
- const struct glsl_type *sampler_type =
- glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
- glsl_get_base_type(vec4));
- nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
- sampler_type, "s_tex");
- sampler->data.descriptor_set = 0;
- sampler->data.binding = 0;
-
- nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = tex_dim;
- tex->op = nir_texop_tex;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(tex_pos);
- tex->src[1].src_type = nir_tex_src_texture_deref;
- tex->src[1].src = nir_src_for_ssa(tex_deref);
- tex->src[2].src_type = nir_tex_src_sampler_deref;
- tex->src[2].src = nir_src_for_ssa(tex_deref);
- tex->dest_type = nir_type_float32; /* TODO */
- tex->is_array = glsl_sampler_type_is_array(sampler_type);
- tex->coord_components = tex_pos->num_components;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "f_color");
- color_out->data.location = FRAG_RESULT_DEPTH;
- nir_store_var(&b, color_out, &tex->dest.ssa, 0x1);
-
- return b.shader;
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_depth_fs.%d", tex_dim);
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+ /* Swizzle the array index which comes in as Z coordinate into the right
+ * position.
+ */
+ unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
+ nir_ssa_def *const tex_pos =
+ nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
+
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
+ nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+ sampler->data.descriptor_set = 0;
+ sampler->data.binding = 0;
+
+ nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = tex_dim;
+ tex->op = nir_texop_tex;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(tex_pos);
+ tex->src[1].src_type = nir_tex_src_texture_deref;
+ tex->src[1].src = nir_src_for_ssa(tex_deref);
+ tex->src[2].src_type = nir_tex_src_sampler_deref;
+ tex->src[2].src = nir_src_for_ssa(tex_deref);
+ tex->dest_type = nir_type_float32; /* TODO */
+ tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ tex->coord_components = tex_pos->num_components;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DEPTH;
+ nir_store_var(&b, color_out, &tex->dest.ssa, 0x1);
+
+ return b.shader;
}
static nir_shader *
build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_blit_stencil_fs.%d", tex_dim);
-
- nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
- vec4, "v_tex_pos");
- tex_pos_in->data.location = VARYING_SLOT_VAR0;
-
- /* Swizzle the array index which comes in as Z coordinate into the right
- * position.
- */
- unsigned swz[] = { 0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2 };
- nir_ssa_def *const tex_pos =
- nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz,
- (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
-
- const struct glsl_type *sampler_type =
- glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D,
- glsl_get_base_type(vec4));
- nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform,
- sampler_type, "s_tex");
- sampler->data.descriptor_set = 0;
- sampler->data.binding = 0;
-
- nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = tex_dim;
- tex->op = nir_texop_tex;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(tex_pos);
- tex->src[1].src_type = nir_tex_src_texture_deref;
- tex->src[1].src = nir_src_for_ssa(tex_deref);
- tex->src[2].src_type = nir_tex_src_sampler_deref;
- tex->src[2].src = nir_src_for_ssa(tex_deref);
- tex->dest_type = nir_type_float32; /* TODO */
- tex->is_array = glsl_sampler_type_is_array(sampler_type);
- tex->coord_components = tex_pos->num_components;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "f_color");
- color_out->data.location = FRAG_RESULT_STENCIL;
- nir_store_var(&b, color_out, &tex->dest.ssa, 0x1);
-
- return b.shader;
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
+ "meta_blit_stencil_fs.%d", tex_dim);
+
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec4, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
+
+ /* Swizzle the array index which comes in as Z coordinate into the right
+ * position.
+ */
+ unsigned swz[] = {0, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 1), 2};
+ nir_ssa_def *const tex_pos =
+ nir_swizzle(&b, nir_load_var(&b, tex_pos_in), swz, (tex_dim == GLSL_SAMPLER_DIM_1D ? 2 : 3));
+
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(tex_dim, false, tex_dim != GLSL_SAMPLER_DIM_3D, glsl_get_base_type(vec4));
+ nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+ sampler->data.descriptor_set = 0;
+ sampler->data.binding = 0;
+
+ nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = tex_dim;
+ tex->op = nir_texop_tex;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(tex_pos);
+ tex->src[1].src_type = nir_tex_src_texture_deref;
+ tex->src[1].src = nir_src_for_ssa(tex_deref);
+ tex->src[2].src_type = nir_tex_src_sampler_deref;
+ tex->src[2].src = nir_src_for_ssa(tex_deref);
+ tex->dest_type = nir_type_float32; /* TODO */
+ tex->is_array = glsl_sampler_type_is_array(sampler_type);
+ tex->coord_components = tex_pos->num_components;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_STENCIL;
+ nir_store_var(&b, color_out, &tex->dest.ssa, 0x1);
+
+ return b.shader;
}
static enum glsl_sampler_dim
-translate_sampler_dim(VkImageType type) {
- switch(type) {
- case VK_IMAGE_TYPE_1D:
- return GLSL_SAMPLER_DIM_1D;
- case VK_IMAGE_TYPE_2D:
- return GLSL_SAMPLER_DIM_2D;
- case VK_IMAGE_TYPE_3D:
- return GLSL_SAMPLER_DIM_3D;
- default:
- unreachable("Unhandled image type");
- }
+translate_sampler_dim(VkImageType type)
+{
+ switch (type) {
+ case VK_IMAGE_TYPE_1D:
+ return GLSL_SAMPLER_DIM_1D;
+ case VK_IMAGE_TYPE_2D:
+ return GLSL_SAMPLER_DIM_2D;
+ case VK_IMAGE_TYPE_3D:
+ return GLSL_SAMPLER_DIM_3D;
+ default:
+ unreachable("Unhandled image type");
+ }
}
static void
-meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- struct radv_image_view *src_iview,
- VkImageLayout src_image_layout,
- float src_offset_0[3],
- float src_offset_1[3],
- struct radv_image *dest_image,
- struct radv_image_view *dest_iview,
- VkImageLayout dest_image_layout,
- VkOffset2D dest_offset_0,
- VkOffset2D dest_offset_1,
- VkRect2D dest_box,
+meta_emit_blit(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ struct radv_image_view *src_iview, VkImageLayout src_image_layout,
+ float src_offset_0[3], float src_offset_1[3], struct radv_image *dest_image,
+ struct radv_image_view *dest_iview, VkImageLayout dest_image_layout,
+ VkOffset2D dest_offset_0, VkOffset2D dest_offset_1, VkRect2D dest_box,
VkSampler sampler)
{
- struct radv_device *device = cmd_buffer->device;
- uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
- uint32_t src_height = radv_minify(src_iview->image->info.height, src_iview->base_mip);
- uint32_t src_depth = radv_minify(src_iview->image->info.depth, src_iview->base_mip);
- uint32_t dst_width = radv_minify(dest_iview->image->info.width, dest_iview->base_mip);
- uint32_t dst_height = radv_minify(dest_iview->image->info.height, dest_iview->base_mip);
-
- assert(src_image->info.samples == dest_image->info.samples);
-
- float vertex_push_constants[5] = {
- src_offset_0[0] / (float)src_width,
- src_offset_0[1] / (float)src_height,
- src_offset_1[0] / (float)src_width,
- src_offset_1[1] / (float)src_height,
- src_offset_0[2] / (float)src_depth,
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.blit.pipeline_layout,
- VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
- vertex_push_constants);
-
- VkFramebuffer fb;
- radv_CreateFramebuffer(radv_device_to_handle(device),
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(dest_iview),
- },
- .width = dst_width,
- .height = dst_height,
- .layers = 1,
- }, &cmd_buffer->pool->alloc, &fb);
- VkPipeline* pipeline = NULL;
- unsigned fs_key = 0;
- switch (src_iview->aspect_mask) {
- case VK_IMAGE_ASPECT_COLOR_BIT: {
- unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
- fs_key = radv_format_meta_fs_key(device, dest_image->vk_format);
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit.render_pass[fs_key][dst_layout],
- .framebuffer = fb,
- .renderArea = {
- .offset = { dest_box.offset.x, dest_box.offset.y },
- .extent = { dest_box.extent.width, dest_box.extent.height },
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
- switch (src_image->type) {
- case VK_IMAGE_TYPE_1D:
- pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key];
- break;
- case VK_IMAGE_TYPE_2D:
- pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key];
- break;
- case VK_IMAGE_TYPE_3D:
- pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key];
- break;
- default:
- unreachable("bad VkImageType");
- }
- break;
- }
- case VK_IMAGE_ASPECT_DEPTH_BIT: {
- enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit.depth_only_rp[ds_layout],
- .framebuffer = fb,
- .renderArea = {
- .offset = { dest_box.offset.x, dest_box.offset.y },
- .extent = { dest_box.extent.width, dest_box.extent.height },
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
- switch (src_image->type) {
- case VK_IMAGE_TYPE_1D:
- pipeline = &device->meta_state.blit.depth_only_1d_pipeline;
- break;
- case VK_IMAGE_TYPE_2D:
- pipeline = &device->meta_state.blit.depth_only_2d_pipeline;
- break;
- case VK_IMAGE_TYPE_3D:
- pipeline = &device->meta_state.blit.depth_only_3d_pipeline;
- break;
- default:
- unreachable("bad VkImageType");
- }
- break;
- }
- case VK_IMAGE_ASPECT_STENCIL_BIT: {
- enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit.stencil_only_rp[ds_layout],
- .framebuffer = fb,
- .renderArea = {
- .offset = { dest_box.offset.x, dest_box.offset.y },
- .extent = { dest_box.extent.width, dest_box.extent.height },
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
- switch (src_image->type) {
- case VK_IMAGE_TYPE_1D:
- pipeline = &device->meta_state.blit.stencil_only_1d_pipeline;
- break;
- case VK_IMAGE_TYPE_2D:
- pipeline = &device->meta_state.blit.stencil_only_2d_pipeline;
- break;
- case VK_IMAGE_TYPE_3D:
- pipeline = &device->meta_state.blit.stencil_only_3d_pipeline;
- break;
- default:
- unreachable("bad VkImageType");
- }
- break;
- }
- default:
- unreachable("bad VkImageType");
- }
-
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- if (!*pipeline) {
- VkResult ret = build_pipeline(device, src_iview->aspect_mask, translate_sampler_dim(src_image->type), fs_key, pipeline);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- goto fail_pipeline;
- }
- }
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.blit.pipeline_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = sampler,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = dest_offset_0.x,
- .y = dest_offset_0.y,
- .width = dest_offset_1.x - dest_offset_0.x,
- .height = dest_offset_1.y - dest_offset_0.y,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = (VkOffset2D) { MIN2(dest_offset_0.x, dest_offset_1.x), MIN2(dest_offset_0.y, dest_offset_1.y) },
- .extent = (VkExtent2D) {
- abs(dest_offset_1.x - dest_offset_0.x),
- abs(dest_offset_1.y - dest_offset_0.y)
- },
- });
-
- radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+ struct radv_device *device = cmd_buffer->device;
+ uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
+ uint32_t src_height = radv_minify(src_iview->image->info.height, src_iview->base_mip);
+ uint32_t src_depth = radv_minify(src_iview->image->info.depth, src_iview->base_mip);
+ uint32_t dst_width = radv_minify(dest_iview->image->info.width, dest_iview->base_mip);
+ uint32_t dst_height = radv_minify(dest_iview->image->info.height, dest_iview->base_mip);
+
+ assert(src_image->info.samples == dest_image->info.samples);
+
+ float vertex_push_constants[5] = {
+ src_offset_0[0] / (float)src_width, src_offset_0[1] / (float)src_height,
+ src_offset_1[0] / (float)src_width, src_offset_1[1] / (float)src_height,
+ src_offset_0[2] / (float)src_depth,
+ };
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.blit.pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
+ vertex_push_constants);
+
+ VkFramebuffer fb;
+ radv_CreateFramebuffer(radv_device_to_handle(device),
+ &(VkFramebufferCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments =
+ (VkImageView[]){
+ radv_image_view_to_handle(dest_iview),
+ },
+ .width = dst_width,
+ .height = dst_height,
+ .layers = 1,
+ },
+ &cmd_buffer->pool->alloc, &fb);
+ VkPipeline *pipeline = NULL;
+ unsigned fs_key = 0;
+ switch (src_iview->aspect_mask) {
+ case VK_IMAGE_ASPECT_COLOR_BIT: {
+ unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
+ fs_key = radv_format_meta_fs_key(device, dest_image->vk_format);
+
+ radv_cmd_buffer_begin_render_pass(
+ cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit.render_pass[fs_key][dst_layout],
+ .framebuffer = fb,
+ .renderArea =
+ {
+ .offset = {dest_box.offset.x, dest_box.offset.y},
+ .extent = {dest_box.extent.width, dest_box.extent.height},
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+ switch (src_image->type) {
+ case VK_IMAGE_TYPE_1D:
+ pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key];
+ break;
+ case VK_IMAGE_TYPE_2D:
+ pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key];
+ break;
+ case VK_IMAGE_TYPE_3D:
+ pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key];
+ break;
+ default:
+ unreachable("bad VkImageType");
+ }
+ break;
+ }
+ case VK_IMAGE_ASPECT_DEPTH_BIT: {
+ enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
+ radv_cmd_buffer_begin_render_pass(
+ cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit.depth_only_rp[ds_layout],
+ .framebuffer = fb,
+ .renderArea =
+ {
+ .offset = {dest_box.offset.x, dest_box.offset.y},
+ .extent = {dest_box.extent.width, dest_box.extent.height},
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+ switch (src_image->type) {
+ case VK_IMAGE_TYPE_1D:
+ pipeline = &device->meta_state.blit.depth_only_1d_pipeline;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ pipeline = &device->meta_state.blit.depth_only_2d_pipeline;
+ break;
+ case VK_IMAGE_TYPE_3D:
+ pipeline = &device->meta_state.blit.depth_only_3d_pipeline;
+ break;
+ default:
+ unreachable("bad VkImageType");
+ }
+ break;
+ }
+ case VK_IMAGE_ASPECT_STENCIL_BIT: {
+ enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dest_image_layout);
+ radv_cmd_buffer_begin_render_pass(
+ cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit.stencil_only_rp[ds_layout],
+ .framebuffer = fb,
+ .renderArea =
+ {
+ .offset = {dest_box.offset.x, dest_box.offset.y},
+ .extent = {dest_box.extent.width, dest_box.extent.height},
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+ switch (src_image->type) {
+ case VK_IMAGE_TYPE_1D:
+ pipeline = &device->meta_state.blit.stencil_only_1d_pipeline;
+ break;
+ case VK_IMAGE_TYPE_2D:
+ pipeline = &device->meta_state.blit.stencil_only_2d_pipeline;
+ break;
+ case VK_IMAGE_TYPE_3D:
+ pipeline = &device->meta_state.blit.stencil_only_3d_pipeline;
+ break;
+ default:
+ unreachable("bad VkImageType");
+ }
+ break;
+ }
+ default:
+ unreachable("bad VkImageType");
+ }
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ if (!*pipeline) {
+ VkResult ret = build_pipeline(device, src_iview->aspect_mask,
+ translate_sampler_dim(src_image->type), fs_key, pipeline);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ goto fail_pipeline;
+ }
+ }
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+ *pipeline);
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, device->meta_state.blit.pipeline_layout,
+ 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = sampler,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
+
+ radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = dest_offset_0.x,
+ .y = dest_offset_0.y,
+ .width = dest_offset_1.x - dest_offset_0.x,
+ .height = dest_offset_1.y - dest_offset_0.y,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkRect2D){
+ .offset = (VkOffset2D){MIN2(dest_offset_0.x, dest_offset_1.x),
+ MIN2(dest_offset_0.y, dest_offset_1.y)},
+ .extent = (VkExtent2D){abs(dest_offset_1.x - dest_offset_0.x),
+ abs(dest_offset_1.y - dest_offset_0.y)},
+ });
+
+ radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
fail_pipeline:
- radv_cmd_buffer_end_render_pass(cmd_buffer);
+ radv_cmd_buffer_end_render_pass(cmd_buffer);
- /* At the point where we emit the draw call, all data from the
- * descriptor sets, etc. has been used. We are free to delete it.
- */
- /* TODO: above comment is not valid for at least descriptor sets/pools,
- * as we may not free them till after execution finishes. Check others. */
+ /* At the point where we emit the draw call, all data from the
+ * descriptor sets, etc. has been used. We are free to delete it.
+ */
+ /* TODO: above comment is not valid for at least descriptor sets/pools,
+ * as we may not free them till after execution finishes. Check others. */
- radv_DestroyFramebuffer(radv_device_to_handle(device), fb,
- &cmd_buffer->pool->alloc);
+ radv_DestroyFramebuffer(radv_device_to_handle(device), fb, &cmd_buffer->pool->alloc);
}
static bool
flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
{
- bool flip = false;
- if (*src0 > *src1) {
- unsigned tmp = *src0;
- *src0 = *src1;
- *src1 = tmp;
- flip = !flip;
- }
-
- if (*dst0 > *dst1) {
- unsigned tmp = *dst0;
- *dst0 = *dst1;
- *dst1 = tmp;
- flip = !flip;
- }
- return flip;
+ bool flip = false;
+ if (*src0 > *src1) {
+ unsigned tmp = *src0;
+ *src0 = *src1;
+ *src1 = tmp;
+ flip = !flip;
+ }
+
+ if (*dst0 > *dst1) {
+ unsigned tmp = *dst0;
+ *dst0 = *dst1;
+ *dst1 = tmp;
+ flip = !flip;
+ }
+ return flip;
}
static void
-blit_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkImageLayout src_image_layout,
- struct radv_image *dst_image,
- VkImageLayout dst_image_layout,
- const VkImageBlit2KHR *region,
- VkFilter filter)
+blit_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkImageLayout src_image_layout, struct radv_image *dst_image,
+ VkImageLayout dst_image_layout, const VkImageBlit2KHR *region, VkFilter filter)
{
- const VkImageSubresourceLayers *src_res = &region->srcSubresource;
- const VkImageSubresourceLayers *dst_res = &region->dstSubresource;
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_state saved_state;
- bool old_predicating;
- VkSampler sampler;
-
- /* From the Vulkan 1.0 spec:
- *
- * vkCmdBlitImage must not be used for multisampled source or
- * destination images. Use vkCmdResolveImage for this purpose.
- */
- assert(src_image->info.samples == 1);
- assert(dst_image->info.samples == 1);
-
- radv_CreateSampler(radv_device_to_handle(device),
- &(VkSamplerCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
- .magFilter = filter,
- .minFilter = filter,
- .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
- .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
- .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
- }, &cmd_buffer->pool->alloc, &sampler);
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- /* VK_EXT_conditional_rendering says that blit commands should not be
- * affected by conditional rendering.
- */
- old_predicating = cmd_buffer->state.predicating;
- cmd_buffer->state.predicating = false;
-
- unsigned dst_start, dst_end;
- if (dst_image->type == VK_IMAGE_TYPE_3D) {
- assert(dst_res->baseArrayLayer == 0);
- dst_start = region->dstOffsets[0].z;
- dst_end = region->dstOffsets[1].z;
- } else {
- dst_start = dst_res->baseArrayLayer;
- dst_end = dst_start + dst_res->layerCount;
- }
-
- unsigned src_start, src_end;
- if (src_image->type == VK_IMAGE_TYPE_3D) {
- assert(src_res->baseArrayLayer == 0);
- src_start = region->srcOffsets[0].z;
- src_end = region->srcOffsets[1].z;
- } else {
- src_start = src_res->baseArrayLayer;
- src_end = src_start + src_res->layerCount;
- }
-
- bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
- float src_z_step = (float)(src_end - src_start) /
- (float)(dst_end - dst_start);
-
- /* There is no interpolation to the pixel center during
- * rendering, so add the 0.5 offset ourselves here. */
- float depth_center_offset = 0;
- if (src_image->type == VK_IMAGE_TYPE_3D)
- depth_center_offset = 0.5 / (dst_end - dst_start) * (src_end - src_start);
-
- if (flip_z) {
- src_start = src_end;
- src_z_step *= -1;
- depth_center_offset *= -1;
- }
-
- unsigned src_x0 = region->srcOffsets[0].x;
- unsigned src_x1 = region->srcOffsets[1].x;
- unsigned dst_x0 = region->dstOffsets[0].x;
- unsigned dst_x1 = region->dstOffsets[1].x;
-
- unsigned src_y0 = region->srcOffsets[0].y;
- unsigned src_y1 = region->srcOffsets[1].y;
- unsigned dst_y0 = region->dstOffsets[0].y;
- unsigned dst_y1 = region->dstOffsets[1].y;
-
- VkRect2D dst_box;
- dst_box.offset.x = MIN2(dst_x0, dst_x1);
- dst_box.offset.y = MIN2(dst_y0, dst_y1);
- dst_box.extent.width = dst_x1 - dst_x0;
- dst_box.extent.height = dst_y1 - dst_y0;
-
- const unsigned num_layers = dst_end - dst_start;
- for (unsigned i = 0; i < num_layers; i++) {
- struct radv_image_view dst_iview, src_iview;
-
- const VkOffset2D dst_offset_0 = {
- .x = dst_x0,
- .y = dst_y0,
- };
- const VkOffset2D dst_offset_1 = {
- .x = dst_x1,
- .y = dst_y1,
- };
-
- float src_offset_0[3] = {
- src_x0,
- src_y0,
- src_start + i * src_z_step + depth_center_offset,
- };
- float src_offset_1[3] = {
- src_x1,
- src_y1,
- src_start + i * src_z_step + depth_center_offset,
- };
- const uint32_t dst_array_slice = dst_start + i;
-
- /* 3D images have just 1 layer */
- const uint32_t src_array_slice = src_image->type == VK_IMAGE_TYPE_3D ? 0 : src_start + i;
-
- radv_image_view_init(&dst_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(dst_image),
- .viewType = radv_meta_get_view_type(dst_image),
- .format = dst_image->vk_format,
- .subresourceRange = {
- .aspectMask = dst_res->aspectMask,
- .baseMipLevel = dst_res->mipLevel,
- .levelCount = 1,
- .baseArrayLayer = dst_array_slice,
- .layerCount = 1
- },
- }, NULL);
- radv_image_view_init(&src_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(src_image),
- .viewType = radv_meta_get_view_type(src_image),
- .format = src_image->vk_format,
- .subresourceRange = {
- .aspectMask = src_res->aspectMask,
- .baseMipLevel = src_res->mipLevel,
- .levelCount = 1,
- .baseArrayLayer = src_array_slice,
- .layerCount = 1
- },
- }, NULL);
- meta_emit_blit(cmd_buffer,
- src_image, &src_iview, src_image_layout,
- src_offset_0, src_offset_1,
- dst_image, &dst_iview, dst_image_layout,
- dst_offset_0, dst_offset_1,
- dst_box,
- sampler);
- }
-
- /* Restore conditional rendering. */
- cmd_buffer->state.predicating = old_predicating;
-
- radv_meta_restore(&saved_state, cmd_buffer);
-
- radv_DestroySampler(radv_device_to_handle(device), sampler,
- &cmd_buffer->pool->alloc);
+ const VkImageSubresourceLayers *src_res = &region->srcSubresource;
+ const VkImageSubresourceLayers *dst_res = &region->dstSubresource;
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_saved_state saved_state;
+ bool old_predicating;
+ VkSampler sampler;
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * vkCmdBlitImage must not be used for multisampled source or
+ * destination images. Use vkCmdResolveImage for this purpose.
+ */
+ assert(src_image->info.samples == 1);
+ assert(dst_image->info.samples == 1);
+
+ radv_CreateSampler(radv_device_to_handle(device),
+ &(VkSamplerCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+ .magFilter = filter,
+ .minFilter = filter,
+ .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+ },
+ &cmd_buffer->pool->alloc, &sampler);
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ /* VK_EXT_conditional_rendering says that blit commands should not be
+ * affected by conditional rendering.
+ */
+ old_predicating = cmd_buffer->state.predicating;
+ cmd_buffer->state.predicating = false;
+
+ unsigned dst_start, dst_end;
+ if (dst_image->type == VK_IMAGE_TYPE_3D) {
+ assert(dst_res->baseArrayLayer == 0);
+ dst_start = region->dstOffsets[0].z;
+ dst_end = region->dstOffsets[1].z;
+ } else {
+ dst_start = dst_res->baseArrayLayer;
+ dst_end = dst_start + dst_res->layerCount;
+ }
+
+ unsigned src_start, src_end;
+ if (src_image->type == VK_IMAGE_TYPE_3D) {
+ assert(src_res->baseArrayLayer == 0);
+ src_start = region->srcOffsets[0].z;
+ src_end = region->srcOffsets[1].z;
+ } else {
+ src_start = src_res->baseArrayLayer;
+ src_end = src_start + src_res->layerCount;
+ }
+
+ bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
+ float src_z_step = (float)(src_end - src_start) / (float)(dst_end - dst_start);
+
+ /* There is no interpolation to the pixel center during
+ * rendering, so add the 0.5 offset ourselves here. */
+ float depth_center_offset = 0;
+ if (src_image->type == VK_IMAGE_TYPE_3D)
+ depth_center_offset = 0.5 / (dst_end - dst_start) * (src_end - src_start);
+
+ if (flip_z) {
+ src_start = src_end;
+ src_z_step *= -1;
+ depth_center_offset *= -1;
+ }
+
+ unsigned src_x0 = region->srcOffsets[0].x;
+ unsigned src_x1 = region->srcOffsets[1].x;
+ unsigned dst_x0 = region->dstOffsets[0].x;
+ unsigned dst_x1 = region->dstOffsets[1].x;
+
+ unsigned src_y0 = region->srcOffsets[0].y;
+ unsigned src_y1 = region->srcOffsets[1].y;
+ unsigned dst_y0 = region->dstOffsets[0].y;
+ unsigned dst_y1 = region->dstOffsets[1].y;
+
+ VkRect2D dst_box;
+ dst_box.offset.x = MIN2(dst_x0, dst_x1);
+ dst_box.offset.y = MIN2(dst_y0, dst_y1);
+ dst_box.extent.width = dst_x1 - dst_x0;
+ dst_box.extent.height = dst_y1 - dst_y0;
+
+ const unsigned num_layers = dst_end - dst_start;
+ for (unsigned i = 0; i < num_layers; i++) {
+ struct radv_image_view dst_iview, src_iview;
+
+ const VkOffset2D dst_offset_0 = {
+ .x = dst_x0,
+ .y = dst_y0,
+ };
+ const VkOffset2D dst_offset_1 = {
+ .x = dst_x1,
+ .y = dst_y1,
+ };
+
+ float src_offset_0[3] = {
+ src_x0,
+ src_y0,
+ src_start + i * src_z_step + depth_center_offset,
+ };
+ float src_offset_1[3] = {
+ src_x1,
+ src_y1,
+ src_start + i * src_z_step + depth_center_offset,
+ };
+ const uint32_t dst_array_slice = dst_start + i;
+
+ /* 3D images have just 1 layer */
+ const uint32_t src_array_slice = src_image->type == VK_IMAGE_TYPE_3D ? 0 : src_start + i;
+
+ radv_image_view_init(&dst_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(dst_image),
+ .viewType = radv_meta_get_view_type(dst_image),
+ .format = dst_image->vk_format,
+ .subresourceRange = {.aspectMask = dst_res->aspectMask,
+ .baseMipLevel = dst_res->mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dst_array_slice,
+ .layerCount = 1},
+ },
+ NULL);
+ radv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(src_image),
+ .viewType = radv_meta_get_view_type(src_image),
+ .format = src_image->vk_format,
+ .subresourceRange = {.aspectMask = src_res->aspectMask,
+ .baseMipLevel = src_res->mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_array_slice,
+ .layerCount = 1},
+ },
+ NULL);
+ meta_emit_blit(cmd_buffer, src_image, &src_iview, src_image_layout, src_offset_0,
+ src_offset_1, dst_image, &dst_iview, dst_image_layout, dst_offset_0,
+ dst_offset_1, dst_box, sampler);
+ }
+
+ /* Restore conditional rendering. */
+ cmd_buffer->state.predicating = old_predicating;
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+
+ radv_DestroySampler(radv_device_to_handle(device), sampler, &cmd_buffer->pool->alloc);
}
-void radv_CmdBlitImage2KHR(
- VkCommandBuffer commandBuffer,
- const VkBlitImageInfo2KHR* pBlitImageInfo)
+void
+radv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, const VkBlitImageInfo2KHR *pBlitImageInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_image, src_image, pBlitImageInfo->srcImage);
- RADV_FROM_HANDLE(radv_image, dst_image, pBlitImageInfo->dstImage);
-
- for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
- blit_image(cmd_buffer,
- src_image, pBlitImageInfo->srcImageLayout,
- dst_image, pBlitImageInfo->dstImageLayout,
- &pBlitImageInfo->pRegions[r],
- pBlitImageInfo->filter);
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_image, src_image, pBlitImageInfo->srcImage);
+ RADV_FROM_HANDLE(radv_image, dst_image, pBlitImageInfo->dstImage);
+
+ for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
+ blit_image(cmd_buffer, src_image, pBlitImageInfo->srcImageLayout, dst_image,
+ pBlitImageInfo->dstImageLayout, &pBlitImageInfo->pRegions[r],
+ pBlitImageInfo->filter);
+ }
}
void
radv_device_finish_meta_blit_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
- for (unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->blit.render_pass[i][j],
- &state->alloc);
- }
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.pipeline_1d_src[i],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.pipeline_2d_src[i],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.pipeline_3d_src[i],
- &state->alloc);
- }
-
- for (enum radv_blit_ds_layout i = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; i < RADV_BLIT_DS_LAYOUT_COUNT; i++) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->blit.depth_only_rp[i], &state->alloc);
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->blit.stencil_only_rp[i], &state->alloc);
- }
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.depth_only_1d_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.depth_only_2d_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.depth_only_3d_pipeline, &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.stencil_only_1d_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.stencil_only_2d_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit.stencil_only_3d_pipeline,
- &state->alloc);
-
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->blit.pipeline_layout, &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->blit.ds_layout, &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
+ for (unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->blit.render_pass[i][j],
+ &state->alloc);
+ }
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_1d_src[i],
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_2d_src[i],
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.pipeline_3d_src[i],
+ &state->alloc);
+ }
+
+ for (enum radv_blit_ds_layout i = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; i < RADV_BLIT_DS_LAYOUT_COUNT;
+ i++) {
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->blit.depth_only_rp[i],
+ &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->blit.stencil_only_rp[i],
+ &state->alloc);
+ }
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_1d_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_2d_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.depth_only_3d_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_1d_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_2d_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->blit.stencil_only_3d_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->blit.pipeline_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->blit.ds_layout,
+ &state->alloc);
}
static VkResult
-build_pipeline(struct radv_device *device,
- VkImageAspectFlagBits aspect,
- enum glsl_sampler_dim tex_dim,
- unsigned fs_key,
- VkPipeline *pipeline)
+build_pipeline(struct radv_device *device, VkImageAspectFlagBits aspect,
+ enum glsl_sampler_dim tex_dim, unsigned fs_key, VkPipeline *pipeline)
{
- VkResult result = VK_SUCCESS;
-
- mtx_lock(&device->meta_state.mtx);
-
- if (*pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- nir_shader *fs;
- nir_shader *vs = build_nir_vertex_shader();
- VkRenderPass rp;
-
- switch(aspect) {
- case VK_IMAGE_ASPECT_COLOR_BIT:
- fs = build_nir_copy_fragment_shader(tex_dim);
- rp = device->meta_state.blit.render_pass[fs_key][0];
- break;
- case VK_IMAGE_ASPECT_DEPTH_BIT:
- fs = build_nir_copy_fragment_shader_depth(tex_dim);
- rp = device->meta_state.blit.depth_only_rp[0];
- break;
- case VK_IMAGE_ASPECT_STENCIL_BIT:
- fs = build_nir_copy_fragment_shader_stencil(tex_dim);
- rp = device->meta_state.blit.stencil_only_rp[0];
- break;
- default:
- unreachable("Unhandled aspect");
- }
- VkPipelineVertexInputStateCreateInfo vi_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
- };
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs),
- .pName = "main",
- .pSpecializationInfo = NULL
- }, {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs),
- .pName = "main",
- .pSpecializationInfo = NULL
- },
- };
-
- VkGraphicsPipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = ARRAY_SIZE(pipeline_shader_stages),
- .pStages = pipeline_shader_stages,
- .pVertexInputState = &vi_create_info,
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 4,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_LINE_WIDTH,
- VK_DYNAMIC_STATE_BLEND_CONSTANTS,
- },
- },
- .flags = 0,
- .layout = device->meta_state.blit.pipeline_layout,
- .renderPass = rp,
- .subpass = 0,
- };
-
- VkPipelineColorBlendStateCreateInfo color_blend_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkPipelineColorBlendAttachmentState []) {
- {
- .colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
- VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT },
- }
- };
-
- VkPipelineDepthStencilStateCreateInfo depth_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .depthTestEnable = true,
- .depthWriteEnable = true,
- .depthCompareOp = VK_COMPARE_OP_ALWAYS,
- };
-
- VkPipelineDepthStencilStateCreateInfo stencil_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .depthTestEnable = false,
- .depthWriteEnable = false,
- .stencilTestEnable = true,
- .front = {
- .failOp = VK_STENCIL_OP_REPLACE,
- .passOp = VK_STENCIL_OP_REPLACE,
- .depthFailOp = VK_STENCIL_OP_REPLACE,
- .compareOp = VK_COMPARE_OP_ALWAYS,
- .compareMask = 0xff,
- .writeMask = 0xff,
- .reference = 0
- },
- .back = {
- .failOp = VK_STENCIL_OP_REPLACE,
- .passOp = VK_STENCIL_OP_REPLACE,
- .depthFailOp = VK_STENCIL_OP_REPLACE,
- .compareOp = VK_COMPARE_OP_ALWAYS,
- .compareMask = 0xff,
- .writeMask = 0xff,
- .reference = 0
- },
- .depthCompareOp = VK_COMPARE_OP_ALWAYS,
- };
-
- switch(aspect) {
- case VK_IMAGE_ASPECT_COLOR_BIT:
- vk_pipeline_info.pColorBlendState = &color_blend_info;
- break;
- case VK_IMAGE_ASPECT_DEPTH_BIT:
- vk_pipeline_info.pDepthStencilState = &depth_info;
- break;
- case VK_IMAGE_ASPECT_STENCIL_BIT:
- vk_pipeline_info.pDepthStencilState = &stencil_info;
- break;
- default:
- unreachable("Unhandled aspect");
- }
-
- const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
- .use_rectlist = true
- };
-
- result = radv_graphics_pipeline_create(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc, pipeline);
- ralloc_free(vs);
- ralloc_free(fs);
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ VkResult result = VK_SUCCESS;
+
+ mtx_lock(&device->meta_state.mtx);
+
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ nir_shader *fs;
+ nir_shader *vs = build_nir_vertex_shader();
+ VkRenderPass rp;
+
+ switch (aspect) {
+ case VK_IMAGE_ASPECT_COLOR_BIT:
+ fs = build_nir_copy_fragment_shader(tex_dim);
+ rp = device->meta_state.blit.render_pass[fs_key][0];
+ break;
+ case VK_IMAGE_ASPECT_DEPTH_BIT:
+ fs = build_nir_copy_fragment_shader_depth(tex_dim);
+ rp = device->meta_state.blit.depth_only_rp[0];
+ break;
+ case VK_IMAGE_ASPECT_STENCIL_BIT:
+ fs = build_nir_copy_fragment_shader_stencil(tex_dim);
+ rp = device->meta_state.blit.stencil_only_rp[0];
+ break;
+ default:
+ unreachable("Unhandled aspect");
+ }
+ VkPipelineVertexInputStateCreateInfo vi_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
+ };
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ };
+
+ VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = &vi_create_info,
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+ },
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 4,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.blit.pipeline_layout,
+ .renderPass = rp,
+ .subpass = 0,
+ };
+
+ VkPipelineColorBlendStateCreateInfo color_blend_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkPipelineColorBlendAttachmentState[]){
+ {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+ }};
+
+ VkPipelineDepthStencilStateCreateInfo depth_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = true,
+ .depthWriteEnable = true,
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ };
+
+ VkPipelineDepthStencilStateCreateInfo stencil_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = false,
+ .depthWriteEnable = false,
+ .stencilTestEnable = true,
+ .front = {.failOp = VK_STENCIL_OP_REPLACE,
+ .passOp = VK_STENCIL_OP_REPLACE,
+ .depthFailOp = VK_STENCIL_OP_REPLACE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .compareMask = 0xff,
+ .writeMask = 0xff,
+ .reference = 0},
+ .back = {.failOp = VK_STENCIL_OP_REPLACE,
+ .passOp = VK_STENCIL_OP_REPLACE,
+ .depthFailOp = VK_STENCIL_OP_REPLACE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .compareMask = 0xff,
+ .writeMask = 0xff,
+ .reference = 0},
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ };
+
+ switch (aspect) {
+ case VK_IMAGE_ASPECT_COLOR_BIT:
+ vk_pipeline_info.pColorBlendState = &color_blend_info;
+ break;
+ case VK_IMAGE_ASPECT_DEPTH_BIT:
+ vk_pipeline_info.pDepthStencilState = &depth_info;
+ break;
+ case VK_IMAGE_ASPECT_STENCIL_BIT:
+ vk_pipeline_info.pDepthStencilState = &stencil_info;
+ break;
+ default:
+ unreachable("Unhandled aspect");
+ }
+
+ const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+ result = radv_graphics_pipeline_create(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc, pipeline);
+ ralloc_free(vs);
+ ralloc_free(fs);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static VkResult
radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand)
{
- VkResult result;
-
- for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
- unsigned key = radv_format_meta_fs_key(device, radv_fs_key_format_exemplars[i]);
- for(unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
- VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
- result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = radv_fs_key_format_exemplars[i],
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = layout,
- .finalLayout = layout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = layout,
- },
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = VK_ATTACHMENT_UNUSED,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- }, &device->meta_state.alloc, &device->meta_state.blit.render_pass[key][j]);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- if (on_demand)
- continue;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key, &device->meta_state.blit.pipeline_1d_src[key]);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_2D, key, &device->meta_state.blit.pipeline_2d_src[key]);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_3D, key, &device->meta_state.blit.pipeline_3d_src[key]);
- if (result != VK_SUCCESS)
- goto fail;
-
- }
-
- result = VK_SUCCESS;
+ VkResult result;
+
+ for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
+ unsigned key = radv_format_meta_fs_key(device, radv_fs_key_format_exemplars[i]);
+ for (unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
+ VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
+ result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = radv_fs_key_format_exemplars[i],
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = layout,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ &device->meta_state.alloc, &device->meta_state.blit.render_pass[key][j]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ if (on_demand)
+ continue;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key,
+ &device->meta_state.blit.pipeline_1d_src[key]);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_2D, key,
+ &device->meta_state.blit.pipeline_2d_src[key]);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_3D, key,
+ &device->meta_state.blit.pipeline_3d_src[key]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ result = VK_SUCCESS;
fail:
- return result;
+ return result;
}
static VkResult
radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand)
{
- VkResult result;
-
- for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
- VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
- result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = VK_FORMAT_D32_SFLOAT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = layout,
- .finalLayout = layout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = layout,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- }, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp[ds_layout]);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- if (on_demand)
- return VK_SUCCESS;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.depth_only_1d_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, 0, &device->meta_state.blit.depth_only_2d_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, 0, &device->meta_state.blit.depth_only_3d_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
+ VkResult result;
+
+ for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+ ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+ VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+ result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = VK_FORMAT_D32_SFLOAT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp[ds_layout]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ if (on_demand)
+ return VK_SUCCESS;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0,
+ &device->meta_state.blit.depth_only_1d_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, 0,
+ &device->meta_state.blit.depth_only_2d_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, 0,
+ &device->meta_state.blit.depth_only_3d_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
fail:
- return result;
+ return result;
}
static VkResult
radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand)
{
- VkResult result;
-
- for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
- VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
- result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = VK_FORMAT_S8_UINT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = layout,
- .finalLayout = layout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = layout,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
-
- }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp[ds_layout]);
- }
- if (result != VK_SUCCESS)
- goto fail;
-
- if (on_demand)
- return VK_SUCCESS;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.stencil_only_1d_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, 0, &device->meta_state.blit.stencil_only_2d_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, 0, &device->meta_state.blit.stencil_only_3d_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
+ VkResult result;
+
+ for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+ ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+ VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+ result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = VK_FORMAT_S8_UINT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+
+ },
+ &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp[ds_layout]);
+ }
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ if (on_demand)
+ return VK_SUCCESS;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0,
+ &device->meta_state.blit.stencil_only_1d_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, 0,
+ &device->meta_state.blit.stencil_only_2d_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, 0,
+ &device->meta_state.blit.stencil_only_3d_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
fail:
- return result;
+ return result;
}
VkResult
radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand)
{
- VkResult result;
-
- VkDescriptorSetLayoutCreateInfo ds_layout_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_layout_info,
- &device->meta_state.alloc,
- &device->meta_state.blit.ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, 20};
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &(VkPipelineLayoutCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.blit.ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &push_constant_range,
- },
- &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = radv_device_init_meta_blit_color(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = radv_device_init_meta_blit_depth(device, on_demand);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = radv_device_init_meta_blit_stencil(device, on_demand);
+ VkResult result;
+
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+ result =
+ radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
+ &device->meta_state.alloc, &device->meta_state.blit.ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, 20};
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+ &(VkPipelineLayoutCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.blit.ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &push_constant_range,
+ },
+ &device->meta_state.alloc,
+ &device->meta_state.blit.pipeline_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = radv_device_init_meta_blit_color(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = radv_device_init_meta_blit_depth(device, on_demand);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = radv_device_init_meta_blit_stencil(device, on_demand);
fail:
- if (result != VK_SUCCESS)
- radv_device_finish_meta_blit_state(device);
- return result;
+ if (result != VK_SUCCESS)
+ radv_device_finish_meta_blit_state(device);
+ return result;
}
diff --git a/src/amd/vulkan/radv_meta_blit2d.c b/src/amd/vulkan/radv_meta_blit2d.c
index d2bd95bf757..b6ac95be413 100644
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -24,1360 +24,1323 @@
* IN THE SOFTWARE.
*/
-#include "radv_meta.h"
#include "nir/nir_builder.h"
+#include "radv_meta.h"
#include "vk_format.h"
enum blit2d_src_type {
- BLIT2D_SRC_TYPE_IMAGE,
- BLIT2D_SRC_TYPE_IMAGE_3D,
- BLIT2D_SRC_TYPE_BUFFER,
- BLIT2D_NUM_SRC_TYPES,
+ BLIT2D_SRC_TYPE_IMAGE,
+ BLIT2D_SRC_TYPE_IMAGE_3D,
+ BLIT2D_SRC_TYPE_BUFFER,
+ BLIT2D_NUM_SRC_TYPES,
};
-static VkResult
-blit2d_init_color_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type,
- VkFormat format,
- uint32_t log2_samples);
+static VkResult blit2d_init_color_pipeline(struct radv_device *device,
+ enum blit2d_src_type src_type, VkFormat format,
+ uint32_t log2_samples);
-static VkResult
-blit2d_init_depth_only_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type,
- uint32_t log2_samples);
+static VkResult blit2d_init_depth_only_pipeline(struct radv_device *device,
+ enum blit2d_src_type src_type,
+ uint32_t log2_samples);
-static VkResult
-blit2d_init_stencil_only_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type,
- uint32_t log2_samples);
+static VkResult blit2d_init_stencil_only_pipeline(struct radv_device *device,
+ enum blit2d_src_type src_type,
+ uint32_t log2_samples);
static void
-create_iview(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *surf,
- struct radv_image_view *iview, VkFormat depth_format,
- VkImageAspectFlagBits aspects)
+create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
+ struct radv_image_view *iview, VkFormat depth_format, VkImageAspectFlagBits aspects)
{
- VkFormat format;
- VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
- radv_meta_get_view_type(surf->image);
-
- if (depth_format)
- format = depth_format;
- else
- format = surf->format;
-
- radv_image_view_init(iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(surf->image),
- .viewType = view_type,
- .format = format,
- .subresourceRange = {
- .aspectMask = aspects,
- .baseMipLevel = surf->level,
- .levelCount = 1,
- .baseArrayLayer = surf->layer,
- .layerCount = 1
- },
- }, NULL);
+ VkFormat format;
+ VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9
+ ? VK_IMAGE_VIEW_TYPE_2D
+ : radv_meta_get_view_type(surf->image);
+
+ if (depth_format)
+ format = depth_format;
+ else
+ format = surf->format;
+
+ radv_image_view_init(iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(surf->image),
+ .viewType = view_type,
+ .format = format,
+ .subresourceRange = {.aspectMask = aspects,
+ .baseMipLevel = surf->level,
+ .levelCount = 1,
+ .baseArrayLayer = surf->layer,
+ .layerCount = 1},
+ },
+ NULL);
}
static void
-create_bview(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src,
- struct radv_buffer_view *bview, VkFormat depth_format)
+create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
+ struct radv_buffer_view *bview, VkFormat depth_format)
{
- VkFormat format;
-
- if (depth_format)
- format = depth_format;
- else
- format = src->format;
- radv_buffer_view_init(bview, cmd_buffer->device,
- &(VkBufferViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .flags = 0,
- .buffer = radv_buffer_to_handle(src->buffer),
- .format = format,
- .offset = src->offset,
- .range = VK_WHOLE_SIZE,
- });
-
+ VkFormat format;
+
+ if (depth_format)
+ format = depth_format;
+ else
+ format = src->format;
+ radv_buffer_view_init(bview, cmd_buffer->device,
+ &(VkBufferViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .flags = 0,
+ .buffer = radv_buffer_to_handle(src->buffer),
+ .format = format,
+ .offset = src->offset,
+ .range = VK_WHOLE_SIZE,
+ });
}
struct blit2d_src_temps {
- struct radv_image_view iview;
- struct radv_buffer_view bview;
+ struct radv_image_view iview;
+ struct radv_buffer_view bview;
};
static void
-blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src_img,
- struct radv_meta_blit2d_buffer *src_buf,
- struct blit2d_src_temps *tmp,
- enum blit2d_src_type src_type, VkFormat depth_format,
- VkImageAspectFlagBits aspects,
+blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
+ struct radv_meta_blit2d_buffer *src_buf, struct blit2d_src_temps *tmp,
+ enum blit2d_src_type src_type, VkFormat depth_format, VkImageAspectFlagBits aspects,
uint32_t log2_samples)
{
- struct radv_device *device = cmd_buffer->device;
-
- if (src_type == BLIT2D_SRC_TYPE_BUFFER) {
- create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format);
-
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(&tmp->bview) }
- }
- });
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
- &src_buf->pitch);
- } else {
- create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);
-
- if (src_type == BLIT2D_SRC_TYPE_IMAGE_3D)
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
- &src_img->layer);
-
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&tmp->iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
- }
+ struct radv_device *device = cmd_buffer->device;
+
+ if (src_type == BLIT2D_SRC_TYPE_BUFFER) {
+ create_bview(cmd_buffer, src_buf, &tmp->bview, depth_format);
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(&tmp->bview)}}});
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+ VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4, &src_buf->pitch);
+ } else {
+ create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);
+
+ if (src_type == BLIT2D_SRC_TYPE_IMAGE_3D)
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+ VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4, &src_img->layer);
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.blit2d[log2_samples].p_layouts[src_type], 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&tmp->iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
+ }
}
struct blit2d_dst_temps {
- VkImage image;
- struct radv_image_view iview;
- VkFramebuffer fb;
+ VkImage image;
+ struct radv_image_view iview;
+ VkFramebuffer fb;
};
static void
-blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *dst,
- uint32_t width,
- uint32_t height,
- VkFormat depth_format,
- struct blit2d_dst_temps *tmp,
- VkImageAspectFlagBits aspects)
+blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
+ uint32_t width, uint32_t height, VkFormat depth_format,
+ struct blit2d_dst_temps *tmp, VkImageAspectFlagBits aspects)
{
- create_iview(cmd_buffer, dst, &tmp->iview, depth_format, aspects);
-
- radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(&tmp->iview),
- },
- .width = width,
- .height = height,
- .layers = 1
- }, &cmd_buffer->pool->alloc, &tmp->fb);
+ create_iview(cmd_buffer, dst, &tmp->iview, depth_format, aspects);
+
+ radv_CreateFramebuffer(
+ radv_device_to_handle(cmd_buffer->device),
+ &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments =
+ (VkImageView[]){
+ radv_image_view_to_handle(&tmp->iview),
+ },
+ .width = width,
+ .height = height,
+ .layers = 1},
+ &cmd_buffer->pool->alloc, &tmp->fb);
}
static void
-bind_pipeline(struct radv_cmd_buffer *cmd_buffer,
- enum blit2d_src_type src_type, unsigned fs_key,
+bind_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type, unsigned fs_key,
uint32_t log2_samples)
{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key];
+ VkPipeline pipeline =
+ cmd_buffer->device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key];
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline);
}
static void
-bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
- enum blit2d_src_type src_type,
- uint32_t log2_samples)
+bind_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type,
+ uint32_t log2_samples)
{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type];
+ VkPipeline pipeline =
+ cmd_buffer->device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type];
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline);
}
static void
-bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
- enum blit2d_src_type src_type,
- uint32_t log2_samples)
+bind_stencil_pipeline(struct radv_cmd_buffer *cmd_buffer, enum blit2d_src_type src_type,
+ uint32_t log2_samples)
{
- VkPipeline pipeline =
- cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type];
+ VkPipeline pipeline =
+ cmd_buffer->device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type];
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+ pipeline);
}
static void
radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src_img,
- struct radv_meta_blit2d_buffer *src_buf,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type,
- uint32_t log2_samples)
+ struct radv_meta_blit2d_surf *src_img,
+ struct radv_meta_blit2d_buffer *src_buf,
+ struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects, enum blit2d_src_type src_type,
+ uint32_t log2_samples)
{
- struct radv_device *device = cmd_buffer->device;
-
- for (unsigned r = 0; r < num_rects; ++r) {
- u_foreach_bit(i, dst->aspect_mask) {
- unsigned aspect_mask = 1u << i;
- unsigned src_aspect_mask = aspect_mask;
- VkFormat depth_format = 0;
- if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
- depth_format = vk_format_stencil_only(dst->image->vk_format);
- else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
- depth_format = vk_format_depth_only(dst->image->vk_format);
- else if (src_img)
- src_aspect_mask = src_img->aspect_mask;
-
- struct blit2d_src_temps src_temps;
- blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, src_aspect_mask, log2_samples);
-
- struct blit2d_dst_temps dst_temps;
- blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
- rects[r].dst_y + rects[r].height, depth_format, &dst_temps, aspect_mask);
-
- float vertex_push_constants[4] = {
- rects[r].src_x,
- rects[r].src_y,
- rects[r].src_x + rects[r].width,
- rects[r].src_y + rects[r].height,
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
- vertex_push_constants);
-
- if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT ||
- aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT ||
- aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT ||
- aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) {
- unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk_format);
- unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
-
- if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
- VkResult ret = blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- goto fail_pipeline;
- }
- }
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit2d_render_passes[fs_key][dst_layout],
- .framebuffer = dst_temps.fb,
- .renderArea = {
- .offset = { rects[r].dst_x, rects[r].dst_y, },
- .extent = { rects[r].width, rects[r].height },
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, &(struct radv_extra_render_pass_begin_info) {
- .disable_dcc = dst->disable_compression
- });
-
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
- } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
- enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
-
- if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) {
- VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- goto fail_pipeline;
- }
- }
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit2d_depth_only_rp[ds_layout],
- .framebuffer = dst_temps.fb,
- .renderArea = {
- .offset = { rects[r].dst_x, rects[r].dst_y, },
- .extent = { rects[r].width, rects[r].height },
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
-
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- bind_depth_pipeline(cmd_buffer, src_type, log2_samples);
-
- } else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
- enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
-
- if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) {
- VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- goto fail_pipeline;
- }
- }
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.blit2d_stencil_only_rp[ds_layout],
- .framebuffer = dst_temps.fb,
- .renderArea = {
- .offset = { rects[r].dst_x, rects[r].dst_y, },
- .extent = { rects[r].width, rects[r].height },
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
-
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- bind_stencil_pipeline(cmd_buffer, src_type, log2_samples);
- } else
- unreachable("Processing blit2d with multiple aspects.");
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = rects[r].dst_x,
- .y = rects[r].dst_y,
- .width = rects[r].width,
- .height = rects[r].height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
- .extent = (VkExtent2D) { rects[r].width, rects[r].height },
- });
-
-
-
- radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
- radv_cmd_buffer_end_render_pass(cmd_buffer);
-
-fail_pipeline:
- /* At the point where we emit the draw call, all data from the
- * descriptor sets, etc. has been used. We are free to delete it.
- */
- radv_DestroyFramebuffer(radv_device_to_handle(device),
- dst_temps.fb,
- &cmd_buffer->pool->alloc);
- }
- }
+ struct radv_device *device = cmd_buffer->device;
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+ u_foreach_bit(i, dst->aspect_mask)
+ {
+ unsigned aspect_mask = 1u << i;
+ unsigned src_aspect_mask = aspect_mask;
+ VkFormat depth_format = 0;
+ if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+ depth_format = vk_format_stencil_only(dst->image->vk_format);
+ else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
+ depth_format = vk_format_depth_only(dst->image->vk_format);
+ else if (src_img)
+ src_aspect_mask = src_img->aspect_mask;
+
+ struct blit2d_src_temps src_temps;
+ blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format,
+ src_aspect_mask, log2_samples);
+
+ struct blit2d_dst_temps dst_temps;
+ blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
+ rects[r].dst_y + rects[r].height, depth_format, &dst_temps, aspect_mask);
+
+ float vertex_push_constants[4] = {
+ rects[r].src_x,
+ rects[r].src_y,
+ rects[r].src_x + rects[r].width,
+ rects[r].src_y + rects[r].height,
+ };
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+ VK_SHADER_STAGE_VERTEX_BIT, 0, 16, vertex_push_constants);
+
+ if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT ||
+ aspect_mask == VK_IMAGE_ASPECT_PLANE_0_BIT ||
+ aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT ||
+ aspect_mask == VK_IMAGE_ASPECT_PLANE_2_BIT) {
+ unsigned fs_key = radv_format_meta_fs_key(device, dst_temps.iview.vk_format);
+ unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);
+
+ if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] ==
+ VK_NULL_HANDLE) {
+ VkResult ret = blit2d_init_color_pipeline(
+ device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ goto fail_pipeline;
+ }
+ }
+
+ radv_cmd_buffer_begin_render_pass(
+ cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit2d_render_passes[fs_key][dst_layout],
+ .framebuffer = dst_temps.fb,
+ .renderArea =
+ {
+ .offset =
+ {
+ rects[r].dst_x,
+ rects[r].dst_y,
+ },
+ .extent = {rects[r].width, rects[r].height},
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ &(struct radv_extra_render_pass_begin_info){.disable_dcc =
+ dst->disable_compression});
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
+ } else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
+
+ if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] ==
+ VK_NULL_HANDLE) {
+ VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ goto fail_pipeline;
+ }
+ }
+
+ radv_cmd_buffer_begin_render_pass(
+ cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit2d_depth_only_rp[ds_layout],
+ .framebuffer = dst_temps.fb,
+ .renderArea =
+ {
+ .offset =
+ {
+ rects[r].dst_x,
+ rects[r].dst_y,
+ },
+ .extent = {rects[r].width, rects[r].height},
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ bind_depth_pipeline(cmd_buffer, src_type, log2_samples);
+
+ } else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
+
+ if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] ==
+ VK_NULL_HANDLE) {
+ VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ goto fail_pipeline;
+ }
+ }
+
+ radv_cmd_buffer_begin_render_pass(
+ cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.blit2d_stencil_only_rp[ds_layout],
+ .framebuffer = dst_temps.fb,
+ .renderArea =
+ {
+ .offset =
+ {
+ rects[r].dst_x,
+ rects[r].dst_y,
+ },
+ .extent = {rects[r].width, rects[r].height},
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ bind_stencil_pipeline(cmd_buffer, src_type, log2_samples);
+ } else
+ unreachable("Processing blit2d with multiple aspects.");
+
+ radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = rects[r].dst_x,
+ .y = rects[r].dst_y,
+ .width = rects[r].width,
+ .height = rects[r].height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkRect2D){
+ .offset = (VkOffset2D){rects[r].dst_x, rects[r].dst_y},
+ .extent = (VkExtent2D){rects[r].width, rects[r].height},
+ });
+
+ radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+ radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+ fail_pipeline:
+ /* At the point where we emit the draw call, all data from the
+ * descriptor sets, etc. has been used. We are free to delete it.
+ */
+ radv_DestroyFramebuffer(radv_device_to_handle(device), dst_temps.fb,
+ &cmd_buffer->pool->alloc);
+ }
+ }
}
void
-radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src_img,
- struct radv_meta_blit2d_buffer *src_buf,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+radv_meta_blit2d(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src_img,
+ struct radv_meta_blit2d_buffer *src_buf, struct radv_meta_blit2d_surf *dst,
+ unsigned num_rects, struct radv_meta_blit2d_rect *rects)
{
- bool use_3d = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
- (src_img && src_img->image->type == VK_IMAGE_TYPE_3D);
- enum blit2d_src_type src_type = src_buf ? BLIT2D_SRC_TYPE_BUFFER :
- use_3d ? BLIT2D_SRC_TYPE_IMAGE_3D : BLIT2D_SRC_TYPE_IMAGE;
- radv_meta_blit2d_normal_dst(cmd_buffer, src_img, src_buf, dst,
- num_rects, rects, src_type,
- src_img ? util_logbase2(src_img->image->info.samples) : 0);
+ bool use_3d = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+ (src_img && src_img->image->type == VK_IMAGE_TYPE_3D);
+ enum blit2d_src_type src_type = src_buf ? BLIT2D_SRC_TYPE_BUFFER
+ : use_3d ? BLIT2D_SRC_TYPE_IMAGE_3D
+ : BLIT2D_SRC_TYPE_IMAGE;
+ radv_meta_blit2d_normal_dst(cmd_buffer, src_img, src_buf, dst, num_rects, rects, src_type,
+ src_img ? util_logbase2(src_img->image->info.samples) : 0);
}
static nir_shader *
build_nir_vertex_shader(void)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_blit2d_vs");
-
- nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "gl_Position");
- pos_out->data.location = VARYING_SLOT_POS;
-
- nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec2, "v_tex_pos");
- tex_pos_out->data.location = VARYING_SLOT_VAR0;
- tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
-
- nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
- nir_store_var(&b, pos_out, outvec, 0xf);
-
- nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range=16);
- nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
-
- /* vertex 0 - src_x, src_y */
- /* vertex 1 - src_x, src_y+h */
- /* vertex 2 - src_x+w, src_y */
- /* so channel 0 is vertex_id != 2 ? src_x : src_x + w
- channel 1 is vertex id != 1 ? src_y : src_y + w */
-
- nir_ssa_def *c0cmp = nir_ine(&b, vertex_id,
- nir_imm_int(&b, 2));
- nir_ssa_def *c1cmp = nir_ine(&b, vertex_id,
- nir_imm_int(&b, 1));
-
- nir_ssa_def *comp[2];
- comp[0] = nir_bcsel(&b, c0cmp,
- nir_channel(&b, src_box, 0),
- nir_channel(&b, src_box, 2));
-
- comp[1] = nir_bcsel(&b, c1cmp,
- nir_channel(&b, src_box, 1),
- nir_channel(&b, src_box, 3));
- nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
- nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
- return b.shader;
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_blit2d_vs");
+
+ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
+ pos_out->data.location = VARYING_SLOT_POS;
+
+ nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec2, "v_tex_pos");
+ tex_pos_out->data.location = VARYING_SLOT_VAR0;
+ tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
+
+ nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+ nir_store_var(&b, pos_out, outvec, 0xf);
+
+ nir_ssa_def *src_box = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
+ nir_ssa_def *vertex_id = nir_load_vertex_id_zero_base(&b);
+
+ /* vertex 0 - src_x, src_y */
+ /* vertex 1 - src_x, src_y+h */
+ /* vertex 2 - src_x+w, src_y */
+ /* so channel 0 is vertex_id != 2 ? src_x : src_x + w
+ channel 1 is vertex id != 1 ? src_y : src_y + w */
+
+ nir_ssa_def *c0cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 2));
+ nir_ssa_def *c1cmp = nir_ine(&b, vertex_id, nir_imm_int(&b, 1));
+
+ nir_ssa_def *comp[2];
+ comp[0] = nir_bcsel(&b, c0cmp, nir_channel(&b, src_box, 0), nir_channel(&b, src_box, 2));
+
+ comp[1] = nir_bcsel(&b, c1cmp, nir_channel(&b, src_box, 1), nir_channel(&b, src_box, 3));
+ nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
+ nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
+ return b.shader;
}
-typedef nir_ssa_def* (*texel_fetch_build_func)(struct nir_builder *,
- struct radv_device *,
+typedef nir_ssa_def *(*texel_fetch_build_func)(struct nir_builder *, struct radv_device *,
nir_ssa_def *, bool, bool);
static nir_ssa_def *
-build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device,
- nir_ssa_def *tex_pos, bool is_3d, bool is_multisampled)
+build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos,
+ bool is_3d, bool is_multisampled)
{
- enum glsl_sampler_dim dim =
- is_3d ? GLSL_SAMPLER_DIM_3D : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
- const struct glsl_type *sampler_type =
- glsl_sampler_type(dim, false, false, GLSL_TYPE_UINT);
- nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
- sampler_type, "s_tex");
- sampler->data.descriptor_set = 0;
- sampler->data.binding = 0;
-
- nir_ssa_def *tex_pos_3d = NULL;
- nir_ssa_def *sample_idx = NULL;
- if (is_3d) {
- nir_ssa_def *layer = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base=16, .range=4);
-
- nir_ssa_def *chans[3];
- chans[0] = nir_channel(b, tex_pos, 0);
- chans[1] = nir_channel(b, tex_pos, 1);
- chans[2] = layer;
- tex_pos_3d = nir_vec(b, chans, 3);
- }
- if (is_multisampled) {
- sample_idx = nir_load_sample_id(b);
- }
-
- nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b->shader, is_multisampled ? 4 : 3);
- tex->sampler_dim = dim;
- tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(is_3d ? tex_pos_3d : tex_pos);
- tex->src[1].src_type = is_multisampled ? nir_tex_src_ms_index : nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(is_multisampled ? sample_idx : nir_imm_int(b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(tex_deref);
- if (is_multisampled) {
- tex->src[3].src_type = nir_tex_src_lod;
- tex->src[3].src = nir_src_for_ssa(nir_imm_int(b, 0));
- }
- tex->dest_type = nir_type_uint32;
- tex->is_array = false;
- tex->coord_components = is_3d ? 3 : 2;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(b, &tex->instr);
-
- return &tex->dest.ssa;
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
+ : is_multisampled ? GLSL_SAMPLER_DIM_MS
+ : GLSL_SAMPLER_DIM_2D;
+ const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_UINT);
+ nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
+ sampler->data.descriptor_set = 0;
+ sampler->data.binding = 0;
+
+ nir_ssa_def *tex_pos_3d = NULL;
+ nir_ssa_def *sample_idx = NULL;
+ if (is_3d) {
+ nir_ssa_def *layer =
+ nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
+
+ nir_ssa_def *chans[3];
+ chans[0] = nir_channel(b, tex_pos, 0);
+ chans[1] = nir_channel(b, tex_pos, 1);
+ chans[2] = layer;
+ tex_pos_3d = nir_vec(b, chans, 3);
+ }
+ if (is_multisampled) {
+ sample_idx = nir_load_sample_id(b);
+ }
+
+ nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, is_multisampled ? 4 : 3);
+ tex->sampler_dim = dim;
+ tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(is_3d ? tex_pos_3d : tex_pos);
+ tex->src[1].src_type = is_multisampled ? nir_tex_src_ms_index : nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(is_multisampled ? sample_idx : nir_imm_int(b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(tex_deref);
+ if (is_multisampled) {
+ tex->src[3].src_type = nir_tex_src_lod;
+ tex->src[3].src = nir_src_for_ssa(nir_imm_int(b, 0));
+ }
+ tex->dest_type = nir_type_uint32;
+ tex->is_array = false;
+ tex->coord_components = is_3d ? 3 : 2;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(b, &tex->instr);
+
+ return &tex->dest.ssa;
}
-
static nir_ssa_def *
-build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
- nir_ssa_def *tex_pos, bool is_3d, bool is_multisampled)
+build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device, nir_ssa_def *tex_pos,
+ bool is_3d, bool is_multisampled)
{
- const struct glsl_type *sampler_type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
- nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform,
- sampler_type, "s_tex");
- sampler->data.descriptor_set = 0;
- sampler->data.binding = 0;
-
- nir_ssa_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base=16, .range=4);
-
- nir_ssa_def *pos_x = nir_channel(b, tex_pos, 0);
- nir_ssa_def *pos_y = nir_channel(b, tex_pos, 1);
- pos_y = nir_imul(b, pos_y, width);
- pos_x = nir_iadd(b, pos_x, pos_y);
-
- nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
- tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
- tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(pos_x);
- tex->src[1].src_type = nir_tex_src_texture_deref;
- tex->src[1].src = nir_src_for_ssa(tex_deref);
- tex->dest_type = nir_type_uint32;
- tex->is_array = false;
- tex->coord_components = 1;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(b, &tex->instr);
-
- return &tex->dest.ssa;
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
+ nir_variable *sampler = nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
+ sampler->data.descriptor_set = 0;
+ sampler->data.binding = 0;
+
+ nir_ssa_def *width = nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
+
+ nir_ssa_def *pos_x = nir_channel(b, tex_pos, 0);
+ nir_ssa_def *pos_y = nir_channel(b, tex_pos, 1);
+ pos_y = nir_imul(b, pos_y, width);
+ pos_x = nir_iadd(b, pos_x, pos_y);
+
+ nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+ tex->op = nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(pos_x);
+ tex->src[1].src_type = nir_tex_src_texture_deref;
+ tex->src[1].src = nir_src_for_ssa(tex_deref);
+ tex->dest_type = nir_type_uint32;
+ tex->is_array = false;
+ tex->coord_components = 1;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(b, &tex->instr);
+
+ return &tex->dest.ssa;
}
static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
};
static nir_shader *
-build_nir_copy_fragment_shader(struct radv_device *device,
- texel_fetch_build_func txf_func, const char* name, bool is_3d,
- bool is_multisampled)
+build_nir_copy_fragment_shader(struct radv_device *device, texel_fetch_build_func txf_func,
+ const char *name, bool is_3d, bool is_multisampled)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
- nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
- vec2, "v_tex_pos");
- tex_pos_in->data.location = VARYING_SLOT_VAR0;
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
- nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "f_color");
- color_out->data.location = FRAG_RESULT_DATA0;
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DATA0;
- nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
- nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
+ nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+ nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
- nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
- nir_store_var(&b, color_out, color, 0xf);
+ nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
+ nir_store_var(&b, color_out, color, 0xf);
- return b.shader;
+ return b.shader;
}
static nir_shader *
-build_nir_copy_fragment_shader_depth(struct radv_device *device,
- texel_fetch_build_func txf_func, const char* name, bool is_3d,
- bool is_multisampled)
+build_nir_copy_fragment_shader_depth(struct radv_device *device, texel_fetch_build_func txf_func,
+ const char *name, bool is_3d, bool is_multisampled)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
- nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
- vec2, "v_tex_pos");
- tex_pos_in->data.location = VARYING_SLOT_VAR0;
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
- nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "f_color");
- color_out->data.location = FRAG_RESULT_DEPTH;
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DEPTH;
- nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
- nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
+ nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+ nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
- nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
- nir_store_var(&b, color_out, color, 0x1);
+ nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
+ nir_store_var(&b, color_out, color, 0x1);
- return b.shader;
+ return b.shader;
}
static nir_shader *
-build_nir_copy_fragment_shader_stencil(struct radv_device *device,
- texel_fetch_build_func txf_func, const char* name, bool is_3d,
- bool is_multisampled)
+build_nir_copy_fragment_shader_stencil(struct radv_device *device, texel_fetch_build_func txf_func,
+ const char *name, bool is_3d, bool is_multisampled)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "%s", name);
- nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
- vec2, "v_tex_pos");
- tex_pos_in->data.location = VARYING_SLOT_VAR0;
+ nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "v_tex_pos");
+ tex_pos_in->data.location = VARYING_SLOT_VAR0;
- nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "f_color");
- color_out->data.location = FRAG_RESULT_STENCIL;
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_STENCIL;
- nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
- nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
+ nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
+ nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);
- nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
- nir_store_var(&b, color_out, color, 0x1);
+ nir_ssa_def *color = txf_func(&b, device, tex_pos, is_3d, is_multisampled);
+ nir_store_var(&b, color_out, color, 0x1);
- return b.shader;
+ return b.shader;
}
void
radv_device_finish_meta_blit2d_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- for(unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- for (unsigned k = 0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->blit2d_render_passes[j][k],
- &state->alloc);
- }
- }
-
- for (enum radv_blit_ds_layout j = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; j < RADV_BLIT_DS_LAYOUT_COUNT; j++) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->blit2d_depth_only_rp[j], &state->alloc);
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->blit2d_stencil_only_rp[j], &state->alloc);
- }
-
- for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; ++log2_samples) {
- for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->blit2d[log2_samples].p_layouts[src],
- &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->blit2d[log2_samples].ds_layouts[src],
- &state->alloc);
-
- for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit2d[log2_samples].pipelines[src][j],
- &state->alloc);
- }
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit2d[log2_samples].depth_only_pipeline[src],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->blit2d[log2_samples].stencil_only_pipeline[src],
- &state->alloc);
- }
- }
+ struct radv_meta_state *state = &device->meta_state;
+
+ for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+ for (unsigned k = 0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->blit2d_render_passes[j][k],
+ &state->alloc);
+ }
+ }
+
+ for (enum radv_blit_ds_layout j = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; j < RADV_BLIT_DS_LAYOUT_COUNT;
+ j++) {
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->blit2d_depth_only_rp[j],
+ &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->blit2d_stencil_only_rp[j],
+ &state->alloc);
+ }
+
+ for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; ++log2_samples) {
+ for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->blit2d[log2_samples].p_layouts[src], &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->blit2d[log2_samples].ds_layouts[src],
+ &state->alloc);
+
+ for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit2d[log2_samples].pipelines[src][j], &state->alloc);
+ }
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit2d[log2_samples].depth_only_pipeline[src], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->blit2d[log2_samples].stencil_only_pipeline[src],
+ &state->alloc);
+ }
+ }
}
static VkResult
-blit2d_init_color_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type,
- VkFormat format,
- uint32_t log2_samples)
+blit2d_init_color_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
+ VkFormat format, uint32_t log2_samples)
{
- VkResult result;
- unsigned fs_key = radv_format_meta_fs_key(device, format);
- const char *name;
-
- mtx_lock(&device->meta_state.mtx);
- if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- texel_fetch_build_func src_func;
- switch(src_type) {
- case BLIT2D_SRC_TYPE_IMAGE:
- src_func = build_nir_texel_fetch;
- name = "meta_blit2d_image_fs";
- break;
- case BLIT2D_SRC_TYPE_IMAGE_3D:
- src_func = build_nir_texel_fetch;
- name = "meta_blit3d_image_fs";
- break;
- case BLIT2D_SRC_TYPE_BUFFER:
- src_func = build_nir_buffer_fetch;
- name = "meta_blit2d_buffer_fs";
- break;
- default:
- unreachable("unknown blit src type\n");
- break;
- }
-
- const VkPipelineVertexInputStateCreateInfo *vi_create_info;
- nir_shader *fs = build_nir_copy_fragment_shader(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
- nir_shader *vs = build_nir_vertex_shader();
-
- vi_create_info = &normal_vi_create_info;
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs),
- .pName = "main",
- .pSpecializationInfo = NULL
- }, {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs),
- .pName = "main",
- .pSpecializationInfo = NULL
- },
- };
-
- for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
- if (!device->meta_state.blit2d_render_passes[fs_key][dst_layout]) {
- VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
-
- result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = format,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = layout,
- .finalLayout = layout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = layout,
- },
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = VK_ATTACHMENT_UNUSED,
- .layout = layout,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- }, &device->meta_state.alloc, &device->meta_state.blit2d_render_passes[fs_key][dst_layout]);
- }
- }
-
- const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = ARRAY_SIZE(pipeline_shader_stages),
- .pStages = pipeline_shader_stages,
- .pVertexInputState = vi_create_info,
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1 << log2_samples,
- .sampleShadingEnable = log2_samples > 1,
- .minSampleShading = 1.0,
- .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkPipelineColorBlendAttachmentState []) {
- { .colorWriteMask =
- VK_COLOR_COMPONENT_A_BIT |
- VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT },
- }
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 9,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_LINE_WIDTH,
- VK_DYNAMIC_STATE_DEPTH_BIAS,
- VK_DYNAMIC_STATE_BLEND_CONSTANTS,
- VK_DYNAMIC_STATE_DEPTH_BOUNDS,
- VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
- VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
- VK_DYNAMIC_STATE_STENCIL_REFERENCE,
- },
- },
- .flags = 0,
- .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- .renderPass = device->meta_state.blit2d_render_passes[fs_key][0],
- .subpass = 0,
- };
-
- const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
- .use_rectlist = true
- };
-
- result = radv_graphics_pipeline_create(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc,
- &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]);
-
-
- ralloc_free(vs);
- ralloc_free(fs);
-
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ VkResult result;
+ unsigned fs_key = radv_format_meta_fs_key(device, format);
+ const char *name;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ texel_fetch_build_func src_func;
+ switch (src_type) {
+ case BLIT2D_SRC_TYPE_IMAGE:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit2d_image_fs";
+ break;
+ case BLIT2D_SRC_TYPE_IMAGE_3D:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit3d_image_fs";
+ break;
+ case BLIT2D_SRC_TYPE_BUFFER:
+ src_func = build_nir_buffer_fetch;
+ name = "meta_blit2d_buffer_fs";
+ break;
+ default:
+ unreachable("unknown blit src type\n");
+ break;
+ }
+
+ const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+ nir_shader *fs = build_nir_copy_fragment_shader(
+ device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+ nir_shader *vs = build_nir_vertex_shader();
+
+ vi_create_info = &normal_vi_create_info;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ };
+
+ for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
+ if (!device->meta_state.blit2d_render_passes[fs_key][dst_layout]) {
+ VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
+
+ result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = format,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = layout,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ &device->meta_state.alloc,
+ &device->meta_state.blit2d_render_passes[fs_key][dst_layout]);
+ }
+ }
+
+ const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = vi_create_info,
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1 << log2_samples,
+ .sampleShadingEnable = log2_samples > 1,
+ .minSampleShading = 1.0,
+ .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+ },
+ .pColorBlendState =
+ &(VkPipelineColorBlendStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments =
+ (VkPipelineColorBlendAttachmentState[]){
+ {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+ }},
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 9,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+ .renderPass = device->meta_state.blit2d_render_passes[fs_key][0],
+ .subpass = 0,
+ };
+
+ const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+ result = radv_graphics_pipeline_create(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc,
+ &device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]);
+
+ ralloc_free(vs);
+ ralloc_free(fs);
+
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static VkResult
-blit2d_init_depth_only_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type,
- uint32_t log2_samples)
+blit2d_init_depth_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
+ uint32_t log2_samples)
{
- VkResult result;
- const char *name;
-
- mtx_lock(&device->meta_state.mtx);
- if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- texel_fetch_build_func src_func;
- switch(src_type) {
- case BLIT2D_SRC_TYPE_IMAGE:
- src_func = build_nir_texel_fetch;
- name = "meta_blit2d_depth_image_fs";
- break;
- case BLIT2D_SRC_TYPE_IMAGE_3D:
- src_func = build_nir_texel_fetch;
- name = "meta_blit3d_depth_image_fs";
- break;
- case BLIT2D_SRC_TYPE_BUFFER:
- src_func = build_nir_buffer_fetch;
- name = "meta_blit2d_depth_buffer_fs";
- break;
- default:
- unreachable("unknown blit src type\n");
- break;
- }
-
- const VkPipelineVertexInputStateCreateInfo *vi_create_info;
- nir_shader *fs = build_nir_copy_fragment_shader_depth(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
- nir_shader *vs = build_nir_vertex_shader();
-
- vi_create_info = &normal_vi_create_info;
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs),
- .pName = "main",
- .pSpecializationInfo = NULL
- }, {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs),
- .pName = "main",
- .pSpecializationInfo = NULL
- },
- };
-
- for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
- if (!device->meta_state.blit2d_depth_only_rp[ds_layout]) {
- VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
- result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = VK_FORMAT_D32_SFLOAT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = layout,
- .finalLayout = layout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = layout,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- }, &device->meta_state.alloc, &device->meta_state.blit2d_depth_only_rp[ds_layout]);
- }
- }
-
- const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = ARRAY_SIZE(pipeline_shader_stages),
- .pStages = pipeline_shader_stages,
- .pVertexInputState = vi_create_info,
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1 << log2_samples,
- .sampleShadingEnable = false,
- .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .attachmentCount = 0,
- .pAttachments = NULL,
- },
- .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .depthTestEnable = true,
- .depthWriteEnable = true,
- .depthCompareOp = VK_COMPARE_OP_ALWAYS,
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 9,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_LINE_WIDTH,
- VK_DYNAMIC_STATE_DEPTH_BIAS,
- VK_DYNAMIC_STATE_BLEND_CONSTANTS,
- VK_DYNAMIC_STATE_DEPTH_BOUNDS,
- VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
- VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
- VK_DYNAMIC_STATE_STENCIL_REFERENCE,
- },
- },
- .flags = 0,
- .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- .renderPass = device->meta_state.blit2d_depth_only_rp[0],
- .subpass = 0,
- };
-
- const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
- .use_rectlist = true
- };
-
- result = radv_graphics_pipeline_create(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc,
- &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]);
-
-
- ralloc_free(vs);
- ralloc_free(fs);
-
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ VkResult result;
+ const char *name;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ texel_fetch_build_func src_func;
+ switch (src_type) {
+ case BLIT2D_SRC_TYPE_IMAGE:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit2d_depth_image_fs";
+ break;
+ case BLIT2D_SRC_TYPE_IMAGE_3D:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit3d_depth_image_fs";
+ break;
+ case BLIT2D_SRC_TYPE_BUFFER:
+ src_func = build_nir_buffer_fetch;
+ name = "meta_blit2d_depth_buffer_fs";
+ break;
+ default:
+ unreachable("unknown blit src type\n");
+ break;
+ }
+
+ const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+ nir_shader *fs = build_nir_copy_fragment_shader_depth(
+ device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+ nir_shader *vs = build_nir_vertex_shader();
+
+ vi_create_info = &normal_vi_create_info;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ };
+
+ for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+ ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+ if (!device->meta_state.blit2d_depth_only_rp[ds_layout]) {
+ VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+ result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = VK_FORMAT_D32_SFLOAT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ &device->meta_state.alloc, &device->meta_state.blit2d_depth_only_rp[ds_layout]);
+ }
+ }
+
+ const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = vi_create_info,
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1 << log2_samples,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+ },
+ .pColorBlendState =
+ &(VkPipelineColorBlendStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 0,
+ .pAttachments = NULL,
+ },
+ .pDepthStencilState =
+ &(VkPipelineDepthStencilStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = true,
+ .depthWriteEnable = true,
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ },
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 9,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+ .renderPass = device->meta_state.blit2d_depth_only_rp[0],
+ .subpass = 0,
+ };
+
+ const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+ result = radv_graphics_pipeline_create(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc,
+ &device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]);
+
+ ralloc_free(vs);
+ ralloc_free(fs);
+
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static VkResult
-blit2d_init_stencil_only_pipeline(struct radv_device *device,
- enum blit2d_src_type src_type,
- uint32_t log2_samples)
+blit2d_init_stencil_only_pipeline(struct radv_device *device, enum blit2d_src_type src_type,
+ uint32_t log2_samples)
{
- VkResult result;
- const char *name;
-
- mtx_lock(&device->meta_state.mtx);
- if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- texel_fetch_build_func src_func;
- switch(src_type) {
- case BLIT2D_SRC_TYPE_IMAGE:
- src_func = build_nir_texel_fetch;
- name = "meta_blit2d_stencil_image_fs";
- break;
- case BLIT2D_SRC_TYPE_IMAGE_3D:
- src_func = build_nir_texel_fetch;
- name = "meta_blit3d_stencil_image_fs";
- break;
- case BLIT2D_SRC_TYPE_BUFFER:
- src_func = build_nir_buffer_fetch;
- name = "meta_blit2d_stencil_buffer_fs";
- break;
- default:
- unreachable("unknown blit src type\n");
- break;
- }
-
- const VkPipelineVertexInputStateCreateInfo *vi_create_info;
- nir_shader *fs = build_nir_copy_fragment_shader_stencil(device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
- nir_shader *vs = build_nir_vertex_shader();
-
- vi_create_info = &normal_vi_create_info;
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs),
- .pName = "main",
- .pSpecializationInfo = NULL
- }, {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs),
- .pName = "main",
- .pSpecializationInfo = NULL
- },
- };
-
- for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
- if (!device->meta_state.blit2d_stencil_only_rp[ds_layout]) {
- VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
- result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = VK_FORMAT_S8_UINT,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = layout,
- .finalLayout = layout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = layout,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- }, &device->meta_state.alloc, &device->meta_state.blit2d_stencil_only_rp[ds_layout]);
- }
- }
-
- const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = ARRAY_SIZE(pipeline_shader_stages),
- .pStages = pipeline_shader_stages,
- .pVertexInputState = vi_create_info,
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1 << log2_samples,
- .sampleShadingEnable = false,
- .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .attachmentCount = 0,
- .pAttachments = NULL,
- },
- .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .depthTestEnable = false,
- .depthWriteEnable = false,
- .stencilTestEnable = true,
- .front = {
- .failOp = VK_STENCIL_OP_REPLACE,
- .passOp = VK_STENCIL_OP_REPLACE,
- .depthFailOp = VK_STENCIL_OP_REPLACE,
- .compareOp = VK_COMPARE_OP_ALWAYS,
- .compareMask = 0xff,
- .writeMask = 0xff,
- .reference = 0
- },
- .back = {
- .failOp = VK_STENCIL_OP_REPLACE,
- .passOp = VK_STENCIL_OP_REPLACE,
- .depthFailOp = VK_STENCIL_OP_REPLACE,
- .compareOp = VK_COMPARE_OP_ALWAYS,
- .compareMask = 0xff,
- .writeMask = 0xff,
- .reference = 0
- },
- .depthCompareOp = VK_COMPARE_OP_ALWAYS,
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 6,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_LINE_WIDTH,
- VK_DYNAMIC_STATE_DEPTH_BIAS,
- VK_DYNAMIC_STATE_BLEND_CONSTANTS,
- VK_DYNAMIC_STATE_DEPTH_BOUNDS,
- },
- },
- .flags = 0,
- .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
- .renderPass = device->meta_state.blit2d_stencil_only_rp[0],
- .subpass = 0,
- };
-
- const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
- .use_rectlist = true
- };
-
- result = radv_graphics_pipeline_create(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc,
- &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]);
-
-
- ralloc_free(vs);
- ralloc_free(fs);
-
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ VkResult result;
+ const char *name;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ texel_fetch_build_func src_func;
+ switch (src_type) {
+ case BLIT2D_SRC_TYPE_IMAGE:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit2d_stencil_image_fs";
+ break;
+ case BLIT2D_SRC_TYPE_IMAGE_3D:
+ src_func = build_nir_texel_fetch;
+ name = "meta_blit3d_stencil_image_fs";
+ break;
+ case BLIT2D_SRC_TYPE_BUFFER:
+ src_func = build_nir_buffer_fetch;
+ name = "meta_blit2d_stencil_buffer_fs";
+ break;
+ default:
+ unreachable("unknown blit src type\n");
+ break;
+ }
+
+ const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+ nir_shader *fs = build_nir_copy_fragment_shader_stencil(
+ device, src_func, name, src_type == BLIT2D_SRC_TYPE_IMAGE_3D, log2_samples > 0);
+ nir_shader *vs = build_nir_vertex_shader();
+
+ vi_create_info = &normal_vi_create_info;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ };
+
+ for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+ ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
+ if (!device->meta_state.blit2d_stencil_only_rp[ds_layout]) {
+ VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
+ result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = VK_FORMAT_S8_UINT,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = layout,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ &device->meta_state.alloc, &device->meta_state.blit2d_stencil_only_rp[ds_layout]);
+ }
+ }
+
+ const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = vi_create_info,
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1 << log2_samples,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+ },
+ .pColorBlendState =
+ &(VkPipelineColorBlendStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 0,
+ .pAttachments = NULL,
+ },
+ .pDepthStencilState =
+ &(VkPipelineDepthStencilStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = false,
+ .depthWriteEnable = false,
+ .stencilTestEnable = true,
+ .front = {.failOp = VK_STENCIL_OP_REPLACE,
+ .passOp = VK_STENCIL_OP_REPLACE,
+ .depthFailOp = VK_STENCIL_OP_REPLACE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .compareMask = 0xff,
+ .writeMask = 0xff,
+ .reference = 0},
+ .back = {.failOp = VK_STENCIL_OP_REPLACE,
+ .passOp = VK_STENCIL_OP_REPLACE,
+ .depthFailOp = VK_STENCIL_OP_REPLACE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .compareMask = 0xff,
+ .writeMask = 0xff,
+ .reference = 0},
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ },
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 6,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.blit2d[log2_samples].p_layouts[src_type],
+ .renderPass = device->meta_state.blit2d_stencil_only_rp[0],
+ .subpass = 0,
+ };
+
+ const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+ result = radv_graphics_pipeline_create(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc,
+ &device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]);
+
+ ralloc_free(vs);
+ ralloc_free(fs);
+
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static VkResult
-meta_blit2d_create_pipe_layout(struct radv_device *device,
- int idx,
- uint32_t log2_samples)
+meta_blit2d_create_pipe_layout(struct radv_device *device, int idx, uint32_t log2_samples)
{
- VkResult result;
- VkDescriptorType desc_type = (idx == BLIT2D_SRC_TYPE_BUFFER) ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
- const VkPushConstantRange push_constant_ranges[] = {
- {VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
- {VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
- };
- int num_push_constant_range = (idx != BLIT2D_SRC_TYPE_IMAGE || log2_samples > 0) ? 2 : 1;
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &(VkDescriptorSetLayoutCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = desc_type,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
- .pImmutableSamplers = NULL
- },
- }
- }, &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].ds_layouts[idx]);
- if (result != VK_SUCCESS)
- goto fail;
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &(VkPipelineLayoutCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx],
- .pushConstantRangeCount = num_push_constant_range,
- .pPushConstantRanges = push_constant_ranges,
- },
- &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]);
- if (result != VK_SUCCESS)
- goto fail;
- return VK_SUCCESS;
+ VkResult result;
+ VkDescriptorType desc_type = (idx == BLIT2D_SRC_TYPE_BUFFER)
+ ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER
+ : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
+ const VkPushConstantRange push_constant_ranges[] = {
+ {VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
+ {VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
+ };
+ int num_push_constant_range = (idx != BLIT2D_SRC_TYPE_IMAGE || log2_samples > 0) ? 2 : 1;
+
+ result = radv_CreateDescriptorSetLayout(
+ radv_device_to_handle(device),
+ &(VkDescriptorSetLayoutCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings =
+ (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = desc_type,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL},
+ }},
+ &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].ds_layouts[idx]);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ result = radv_CreatePipelineLayout(
+ radv_device_to_handle(device),
+ &(VkPipelineLayoutCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.blit2d[log2_samples].ds_layouts[idx],
+ .pushConstantRangeCount = num_push_constant_range,
+ .pPushConstantRanges = push_constant_ranges,
+ },
+ &device->meta_state.alloc, &device->meta_state.blit2d[log2_samples].p_layouts[idx]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ return VK_SUCCESS;
fail:
- return result;
+ return result;
}
VkResult
radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand)
{
- VkResult result;
- bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
+ VkResult result;
+ bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
- for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; log2_samples++) {
- for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
- if (src == BLIT2D_SRC_TYPE_IMAGE_3D && !create_3d)
- continue;
+ for (unsigned log2_samples = 0; log2_samples < MAX_SAMPLES_LOG2; log2_samples++) {
+ for (unsigned src = 0; src < BLIT2D_NUM_SRC_TYPES; src++) {
+ if (src == BLIT2D_SRC_TYPE_IMAGE_3D && !create_3d)
+ continue;
- /* Don't need to handle copies between buffers and multisample images. */
- if (src == BLIT2D_SRC_TYPE_BUFFER && log2_samples > 0)
- continue;
+ /* Don't need to handle copies between buffers and multisample images. */
+ if (src == BLIT2D_SRC_TYPE_BUFFER && log2_samples > 0)
+ continue;
- result = meta_blit2d_create_pipe_layout(device, src, log2_samples);
- if (result != VK_SUCCESS)
- goto fail;
+ result = meta_blit2d_create_pipe_layout(device, src, log2_samples);
+ if (result != VK_SUCCESS)
+ goto fail;
- if (on_demand)
- continue;
+ if (on_demand)
+ continue;
- for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples);
- if (result != VK_SUCCESS)
- goto fail;
- }
+ for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+ result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j],
+ log2_samples);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
- result = blit2d_init_depth_only_pipeline(device, src, log2_samples);
- if (result != VK_SUCCESS)
- goto fail;
+ result = blit2d_init_depth_only_pipeline(device, src, log2_samples);
+ if (result != VK_SUCCESS)
+ goto fail;
- result = blit2d_init_stencil_only_pipeline(device, src, log2_samples);
- if (result != VK_SUCCESS)
- goto fail;
- }
- }
+ result = blit2d_init_stencil_only_pipeline(device, src, log2_samples);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
fail:
- radv_device_finish_meta_blit2d_state(device);
- return result;
+ radv_device_finish_meta_blit2d_state(device);
+ return result;
}
diff --git a/src/amd/vulkan/radv_meta_buffer.c b/src/amd/vulkan/radv_meta_buffer.c
index 1bfc15064f0..9c33dfe64ad 100644
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -1,527 +1,450 @@
-#include "radv_meta.h"
#include "nir/nir_builder.h"
+#include "radv_meta.h"
-#include "sid.h"
#include "radv_cs.h"
+#include "sid.h"
static nir_shader *
build_buffer_fill_shader(struct radv_device *dev)
{
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
- "meta_buffer_fill");
- b.shader->info.cs.local_size[0] = 64;
- b.shader->info.cs.local_size[1] = 1;
- b.shader->info.cs.local_size[2] = 1;
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_buffer_fill");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
- offset = nir_channel(&b, offset, 0);
+ nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
+ offset = nir_channel(&b, offset, 0);
- nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+ nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
- nir_ssa_def *load = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=4);
- nir_ssa_def *swizzled_load = nir_swizzle(&b, load, (unsigned[]) { 0, 0, 0, 0}, 4);
+ nir_ssa_def *load = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 4);
+ nir_ssa_def *swizzled_load = nir_swizzle(&b, load, (unsigned[]){0, 0, 0, 0}, 4);
- nir_store_ssbo(&b, swizzled_load, dst_buf, offset, .write_mask=0xf,
- .access=ACCESS_NON_READABLE, .align_mul=16);
+ nir_store_ssbo(&b, swizzled_load, dst_buf, offset, .write_mask = 0xf,
+ .access = ACCESS_NON_READABLE, .align_mul = 16);
- return b.shader;
+ return b.shader;
}
static nir_shader *
build_buffer_copy_shader(struct radv_device *dev)
{
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
- "meta_buffer_copy");
- b.shader->info.cs.local_size[0] = 64;
- b.shader->info.cs.local_size[1] = 1;
- b.shader->info.cs.local_size[2] = 1;
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_buffer_copy");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
- offset = nir_channel(&b, offset, 0);
+ nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
+ offset = nir_channel(&b, offset, 0);
- nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
- nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+ nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+ nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
- nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, src_buf, offset, .align_mul=16);
- nir_store_ssbo(&b, load, dst_buf, offset, .write_mask=0xf,
- .access=ACCESS_NON_READABLE, .align_mul=16);
+ nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, src_buf, offset, .align_mul = 16);
+ nir_store_ssbo(&b, load, dst_buf, offset, .write_mask = 0xf, .access = ACCESS_NON_READABLE,
+ .align_mul = 16);
- return b.shader;
+ return b.shader;
}
-
-
-VkResult radv_device_init_meta_buffer_state(struct radv_device *device)
+VkResult
+radv_device_init_meta_buffer_state(struct radv_device *device)
{
- VkResult result;
- nir_shader *fill_cs = build_buffer_fill_shader(device);
- nir_shader *copy_cs = build_buffer_copy_shader(device);
-
- VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &fill_ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.buffer.fill_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &copy_ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.buffer.copy_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo fill_pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &fill_pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.buffer.fill_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineLayoutCreateInfo copy_pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
- .pushConstantRangeCount = 0,
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &copy_pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.buffer.copy_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(fill_cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo fill_vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = fill_pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.buffer.fill_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &fill_vk_pipeline_info, NULL,
- &device->meta_state.buffer.fill_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(copy_cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo copy_vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = copy_pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.buffer.copy_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &copy_vk_pipeline_info, NULL,
- &device->meta_state.buffer.copy_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- ralloc_free(fill_cs);
- ralloc_free(copy_cs);
- return VK_SUCCESS;
+ VkResult result;
+ nir_shader *fill_cs = build_buffer_fill_shader(device);
+ nir_shader *copy_cs = build_buffer_copy_shader(device);
+
+ VkDescriptorSetLayoutCreateInfo fill_ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &fill_ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.buffer.fill_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkDescriptorSetLayoutCreateInfo copy_ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &copy_ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.buffer.copy_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo fill_pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.buffer.fill_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 4},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &fill_pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.buffer.fill_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo copy_pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.buffer.copy_ds_layout,
+ .pushConstantRangeCount = 0,
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &copy_pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.buffer.copy_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(fill_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo fill_vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = fill_pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.buffer.fill_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &fill_vk_pipeline_info, NULL, &device->meta_state.buffer.fill_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(copy_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo copy_vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = copy_pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.buffer.copy_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &copy_vk_pipeline_info, NULL, &device->meta_state.buffer.copy_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ ralloc_free(fill_cs);
+ ralloc_free(copy_cs);
+ return VK_SUCCESS;
fail:
- radv_device_finish_meta_buffer_state(device);
- ralloc_free(fill_cs);
- ralloc_free(copy_cs);
- return result;
+ radv_device_finish_meta_buffer_state(device);
+ ralloc_free(fill_cs);
+ ralloc_free(copy_cs);
+ return result;
}
-void radv_device_finish_meta_buffer_state(struct radv_device *device)
+void
+radv_device_finish_meta_buffer_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->buffer.copy_pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->buffer.fill_pipeline, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->buffer.copy_p_layout, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->buffer.fill_p_layout, &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->buffer.copy_ds_layout,
- &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->buffer.fill_ds_layout,
- &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.copy_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.fill_pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.copy_p_layout,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.fill_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->buffer.copy_ds_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->buffer.fill_ds_layout,
+ &state->alloc);
}
-static void fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
- struct radeon_winsys_bo *bo,
- uint64_t offset, uint64_t size, uint32_t value)
+static void
+fill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t offset,
+ uint64_t size, uint32_t value)
{
- struct radv_device *device = cmd_buffer->device;
- uint64_t block_count = round_up_u64(size, 1024);
- struct radv_meta_saved_state saved_state;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- struct radv_buffer dst_buffer = {
- .bo = bo,
- .offset = offset,
- .size = size
- };
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.buffer.fill_pipeline);
-
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.buffer.fill_p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = size
- }
- }
- });
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.buffer.fill_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
- &value);
-
- radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_device *device = cmd_buffer->device;
+ uint64_t block_count = round_up_u64(size, 1024);
+ struct radv_meta_saved_state saved_state;
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ struct radv_buffer dst_buffer = {.bo = bo, .offset = offset, .size = size};
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.buffer.fill_pipeline);
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.fill_p_layout,
+ 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+ .offset = 0,
+ .range = size}}});
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.buffer.fill_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 4,
+ &value);
+
+ radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
-static void copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer,
- struct radeon_winsys_bo *src_bo,
- struct radeon_winsys_bo *dst_bo,
- uint64_t src_offset, uint64_t dst_offset,
- uint64_t size)
+static void
+copy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
+ struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
+ uint64_t size)
{
- struct radv_device *device = cmd_buffer->device;
- uint64_t block_count = round_up_u64(size, 1024);
- struct radv_meta_saved_state saved_state;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_DESCRIPTORS);
-
- struct radv_buffer dst_buffer = {
- .bo = dst_bo,
- .offset = dst_offset,
- .size = size
- };
-
- struct radv_buffer src_buffer = {
- .bo = src_bo,
- .offset = src_offset,
- .size = size
- };
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.buffer.copy_pipeline);
-
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.buffer.copy_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = size
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&src_buffer),
- .offset = 0,
- .range = size
- }
- }
- });
-
- radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_device *device = cmd_buffer->device;
+ uint64_t block_count = round_up_u64(size, 1024);
+ struct radv_meta_saved_state saved_state;
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+
+ struct radv_buffer dst_buffer = {.bo = dst_bo, .offset = dst_offset, .size = size};
+
+ struct radv_buffer src_buffer = {.bo = src_bo, .offset = src_offset, .size = size};
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.buffer.copy_pipeline);
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.buffer.copy_p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+ .offset = 0,
+ .range = size}},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer),
+ .offset = 0,
+ .range = size}}});
+
+ radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
static bool
-radv_prefer_compute_dma(const struct radv_device *device,
- uint64_t size,
- struct radeon_winsys_bo *src_bo,
- struct radeon_winsys_bo *dst_bo)
+radv_prefer_compute_dma(const struct radv_device *device, uint64_t size,
+ struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo)
{
- bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
-
- if (device->physical_device->rad_info.chip_class >= GFX10 &&
- device->physical_device->rad_info.has_dedicated_vram) {
- if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
- !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM)) {
- /* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
- use_compute = false;
- }
- }
-
- return use_compute;
+ bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
+
+ if (device->physical_device->rad_info.chip_class >= GFX10 &&
+ device->physical_device->rad_info.has_dedicated_vram) {
+ if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
+ !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM)) {
+ /* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
+ use_compute = false;
+ }
+ }
+
+ return use_compute;
}
-uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image *image,
- struct radeon_winsys_bo *bo,
- uint64_t offset, uint64_t size, uint32_t value)
+uint32_t
+radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
+ struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t value)
{
- bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
- uint32_t flush_bits = 0;
+ bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
+ uint32_t flush_bits = 0;
- assert(!(offset & 3));
- assert(!(size & 3));
+ assert(!(offset & 3));
+ assert(!(size & 3));
- if (use_compute) {
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+ if (use_compute) {
+ cmd_buffer->state.flush_bits |=
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
- fill_buffer_shader(cmd_buffer, bo, offset, size, value);
+ fill_buffer_shader(cmd_buffer, bo, offset, size, value);
- flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
- } else if (size) {
- uint64_t va = radv_buffer_get_va(bo);
- va += offset;
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
- si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
- }
+ flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+ } else if (size) {
+ uint64_t va = radv_buffer_get_va(bo);
+ va += offset;
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
+ si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
+ }
- return flush_bits;
+ return flush_bits;
}
-static
-void radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
- struct radeon_winsys_bo *src_bo,
- struct radeon_winsys_bo *dst_bo,
- uint64_t src_offset, uint64_t dst_offset,
- uint64_t size)
+static void
+radv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
+ struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
+ uint64_t size)
{
- bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
- radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
-
- if (use_compute)
- copy_buffer_shader(cmd_buffer, src_bo, dst_bo,
- src_offset, dst_offset, size);
- else if (size) {
- uint64_t src_va = radv_buffer_get_va(src_bo);
- uint64_t dst_va = radv_buffer_get_va(dst_bo);
- src_va += src_offset;
- dst_va += dst_offset;
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
-
- si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
- }
+ bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
+ radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
+
+ if (use_compute)
+ copy_buffer_shader(cmd_buffer, src_bo, dst_bo, src_offset, dst_offset, size);
+ else if (size) {
+ uint64_t src_va = radv_buffer_get_va(src_bo);
+ uint64_t dst_va = radv_buffer_get_va(dst_bo);
+ src_va += src_offset;
+ dst_va += dst_offset;
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
+
+ si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
+ }
}
-void radv_CmdFillBuffer(
- VkCommandBuffer commandBuffer,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- VkDeviceSize fillSize,
- uint32_t data)
+void
+radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
+ VkDeviceSize fillSize, uint32_t data)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
- if (fillSize == VK_WHOLE_SIZE)
- fillSize = (dst_buffer->size - dstOffset) & ~3ull;
+ if (fillSize == VK_WHOLE_SIZE)
+ fillSize = (dst_buffer->size - dstOffset) & ~3ull;
- radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo, dst_buffer->offset + dstOffset,
- fillSize, data);
+ radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo, dst_buffer->offset + dstOffset, fillSize,
+ data);
}
static void
-copy_buffer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer *src_buffer,
- struct radv_buffer *dst_buffer,
- const VkBufferCopy2KHR *region)
+copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer,
+ struct radv_buffer *dst_buffer, const VkBufferCopy2KHR *region)
{
- bool old_predicating;
-
- /* VK_EXT_conditional_rendering says that copy commands should not be
- * affected by conditional rendering.
- */
- old_predicating = cmd_buffer->state.predicating;
- cmd_buffer->state.predicating = false;
-
- radv_copy_buffer(cmd_buffer,
- src_buffer->bo,
- dst_buffer->bo,
- src_buffer->offset + region->srcOffset,
- dst_buffer->offset + region->dstOffset,
- region->size);
-
- /* Restore conditional rendering. */
- cmd_buffer->state.predicating = old_predicating;
+ bool old_predicating;
+
+ /* VK_EXT_conditional_rendering says that copy commands should not be
+ * affected by conditional rendering.
+ */
+ old_predicating = cmd_buffer->state.predicating;
+ cmd_buffer->state.predicating = false;
+
+ radv_copy_buffer(cmd_buffer, src_buffer->bo, dst_buffer->bo,
+ src_buffer->offset + region->srcOffset, dst_buffer->offset + region->dstOffset,
+ region->size);
+
+ /* Restore conditional rendering. */
+ cmd_buffer->state.predicating = old_predicating;
}
-void radv_CmdCopyBuffer2KHR(
- VkCommandBuffer commandBuffer,
- const VkCopyBufferInfo2KHR* pCopyBufferInfo)
+void
+radv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2KHR *pCopyBufferInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
- RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
-
- for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
- copy_buffer(cmd_buffer, src_buffer, dst_buffer,
- &pCopyBufferInfo->pRegions[r]);
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
+ RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
+
+ for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
+ copy_buffer(cmd_buffer, src_buffer, dst_buffer, &pCopyBufferInfo->pRegions[r]);
+ }
}
-void radv_CmdUpdateBuffer(
- VkCommandBuffer commandBuffer,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- VkDeviceSize dataSize,
- const void* pData)
+void
+radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
+ VkDeviceSize dataSize, const void *pData)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
- bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
- uint64_t words = dataSize / 4;
- uint64_t va = radv_buffer_get_va(dst_buffer->bo);
- va += dstOffset + dst_buffer->offset;
-
- assert(!(dataSize & 3));
- assert(!(va & 3));
-
- if (!dataSize)
- return;
-
- if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
- si_emit_cache_flush(cmd_buffer);
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
-
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
-
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
- radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
- V_370_MEM : V_370_MEM_GRBM) |
- S_370_WR_CONFIRM(1) |
- S_370_ENGINE_SEL(V_370_ME));
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
- radeon_emit_array(cmd_buffer->cs, pData, words);
-
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_cmd_buffer_trace_emit(cmd_buffer);
- } else {
- uint32_t buf_offset;
- radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset);
- radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo,
- buf_offset, dstOffset + dst_buffer->offset, dataSize);
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
+ bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
+ uint64_t words = dataSize / 4;
+ uint64_t va = radv_buffer_get_va(dst_buffer->bo);
+ va += dstOffset + dst_buffer->offset;
+
+ assert(!(dataSize & 3));
+ assert(!(va & 3));
+
+ if (!dataSize)
+ return;
+
+ if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
+ si_emit_cache_flush(cmd_buffer);
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
+
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
+
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
+ radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) |
+ S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
+ radeon_emit_array(cmd_buffer->cs, pData, words);
+
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_cmd_buffer_trace_emit(cmd_buffer);
+ } else {
+ uint32_t buf_offset;
+ radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset);
+ radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, buf_offset,
+ dstOffset + dst_buffer->offset, dataSize);
+ }
}
diff --git a/src/amd/vulkan/radv_meta_bufimage.c b/src/amd/vulkan/radv_meta_bufimage.c
index c39be196ab7..da4884444b4 100644
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -21,8 +21,8 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#include "radv_meta.h"
#include "nir/nir_builder.h"
+#include "radv_meta.h"
/*
* GFX queue: Compute shader implementation of image->buffer copy
@@ -35,2078 +35,1857 @@
static nir_shader *
build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
{
- enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
- const struct glsl_type *sampler_type = glsl_sampler_type(dim,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- sampler_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range=16);
- nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
- nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = dim;
- tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
- tex->src[1].src_type = nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = nir_type_float32;
- tex->is_array = false;
- tex->coord_components = is_3d ? 3 : 2;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
- nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
-
- nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
- tmp = nir_iadd(&b, tmp, pos_x);
-
- nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
-
- nir_ssa_def *outval = &tex->dest.ssa;
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- coord, nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
-
- return b.shader;
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+ const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
+ is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_ssa_def *offset =
+ nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
+ nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+ nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = dim;
+ tex->op = nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
+ tex->src[1].src_type = nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = nir_type_float32;
+ tex->is_array = false;
+ tex->coord_components = is_3d ? 3 : 2;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
+ nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
+
+ nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
+ tmp = nir_iadd(&b, tmp, pos_x);
+
+ nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
+
+ nir_ssa_def *outval = &tex->dest.ssa;
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+ nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
+
+ return b.shader;
}
/* Image to buffer - don't write use image accessors */
static VkResult
radv_device_init_meta_itob_state(struct radv_device *device)
{
- VkResult result;
- nir_shader *cs = build_nir_itob_compute_shader(device, false);
- nir_shader *cs_3d = NULL;
-
- if (device->physical_device->rad_info.chip_class >= GFX9)
- cs_3d = build_nir_itob_compute_shader(device, true);
-
- /*
- * two descriptors one for the image being sampled
- * one for the buffer being written.
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.itob.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.itob.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.itob.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.itob.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.itob.pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs_3d),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info_3d = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage_3d,
- .flags = 0,
- .layout = device->meta_state.itob.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info_3d, NULL,
- &device->meta_state.itob.pipeline_3d);
- if (result != VK_SUCCESS)
- goto fail;
- ralloc_free(cs_3d);
- }
- ralloc_free(cs);
-
- return VK_SUCCESS;
+ VkResult result;
+ nir_shader *cs = build_nir_itob_compute_shader(device, false);
+ nir_shader *cs_3d = NULL;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ cs_3d = build_nir_itob_compute_shader(device, true);
+
+ /*
+ * two descriptors one for the image being sampled
+ * one for the buffer being written.
+ */
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.itob.img_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.itob.img_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+ };
+
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc, &device->meta_state.itob.img_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* compute shader */
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.itob.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, &device->meta_state.itob.pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs_3d),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage_3d,
+ .flags = 0,
+ .layout = device->meta_state.itob.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info_3d, NULL, &device->meta_state.itob.pipeline_3d);
+ if (result != VK_SUCCESS)
+ goto fail;
+ ralloc_free(cs_3d);
+ }
+ ralloc_free(cs);
+
+ return VK_SUCCESS;
fail:
- ralloc_free(cs);
- ralloc_free(cs_3d);
- return result;
+ ralloc_free(cs);
+ ralloc_free(cs_3d);
+ return result;
}
static void
radv_device_finish_meta_itob_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->itob.img_p_layout, &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->itob.img_ds_layout,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->itob.pipeline, &state->alloc);
- if (device->physical_device->rad_info.chip_class >= GFX9)
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->itob.pipeline_3d, &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
}
static nir_shader *
build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
{
- enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
- const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_image_type(dim,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_ssa_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range=16);
- nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
- nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
- nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
-
- nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
- tmp = nir_iadd(&b, tmp, pos_x);
-
- nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
-
- nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
- tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
- tex->src[1].src_type = nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = nir_type_float32;
- tex->is_array = false;
- tex->coord_components = 1;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_ssa_def *outval = &tex->dest.ssa;
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- img_coord, nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
-
- return b.shader;
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
+ const struct glsl_type *buf_type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
+ is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_ssa_def *offset =
+ nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
+ nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+ nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
+ nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
+
+ nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
+ tmp = nir_iadd(&b, tmp, pos_x);
+
+ nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
+
+ nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+ tex->op = nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
+ tex->src[1].src_type = nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = nir_type_float32;
+ tex->is_array = false;
+ tex->coord_components = 1;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_ssa_def *outval = &tex->dest.ssa;
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
+ nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
+
+ return b.shader;
}
/* Buffer to image - don't write use image accessors */
static VkResult
radv_device_init_meta_btoi_state(struct radv_device *device)
{
- VkResult result;
- nir_shader *cs = build_nir_btoi_compute_shader(device, false);
- nir_shader *cs_3d = NULL;
- if (device->physical_device->rad_info.chip_class >= GFX9)
- cs_3d = build_nir_btoi_compute_shader(device, true);
- /*
- * two descriptors one for the image being sampled
- * one for the buffer being written.
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.btoi.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.btoi.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.btoi.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.btoi.pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs_3d),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info_3d = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage_3d,
- .flags = 0,
- .layout = device->meta_state.btoi.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info_3d, NULL,
- &device->meta_state.btoi.pipeline_3d);
- ralloc_free(cs_3d);
- }
- ralloc_free(cs);
-
- return VK_SUCCESS;
+ VkResult result;
+ nir_shader *cs = build_nir_btoi_compute_shader(device, false);
+ nir_shader *cs_3d = NULL;
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ cs_3d = build_nir_btoi_compute_shader(device, true);
+ /*
+ * two descriptors one for the image being sampled
+ * one for the buffer being written.
+ */
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.btoi.img_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+ };
+
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc, &device->meta_state.btoi.img_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* compute shader */
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.btoi.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs_3d),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage_3d,
+ .flags = 0,
+ .layout = device->meta_state.btoi.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info_3d, NULL, &device->meta_state.btoi.pipeline_3d);
+ ralloc_free(cs_3d);
+ }
+ ralloc_free(cs);
+
+ return VK_SUCCESS;
fail:
- ralloc_free(cs_3d);
- ralloc_free(cs);
- return result;
+ ralloc_free(cs_3d);
+ ralloc_free(cs);
+ return result;
}
static void
radv_device_finish_meta_btoi_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->btoi.img_p_layout, &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->btoi.img_ds_layout,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->btoi.pipeline, &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->btoi.pipeline_3d, &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
}
/* Buffer to image - special path for R32G32B32 */
static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
{
- const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs");
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range=16);
- nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range=16);
- nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
- nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
- nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
-
- nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
- tmp = nir_iadd(&b, tmp, pos_x);
-
- nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
-
- nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
-
- nir_ssa_def *global_pos =
- nir_iadd(&b,
- nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
- nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
-
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
- tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
- tex->src[1].src_type = nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = nir_type_float32;
- tex->is_array = false;
- tex->coord_components = 1;
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_ssa_def *outval = &tex->dest.ssa;
-
- for (int chan = 0; chan < 3; chan++) {
- nir_ssa_def *local_pos =
- nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
-
- nir_ssa_def *coord =
- nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
-
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- coord, nir_ssa_undef(&b, 1, 32),
- nir_channel(&b, outval, chan), nir_imm_int(&b, 0));
- }
-
- return b.shader;
+ const struct glsl_type *buf_type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs");
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
+ nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16);
+ nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+ nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
+ nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
+
+ nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
+ tmp = nir_iadd(&b, tmp, pos_x);
+
+ nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
+
+ nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
+
+ nir_ssa_def *global_pos =
+ nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
+ nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
+
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+ tex->op = nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
+ tex->src[1].src_type = nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = nir_type_float32;
+ tex->is_array = false;
+ tex->coord_components = 1;
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_ssa_def *outval = &tex->dest.ssa;
+
+ for (int chan = 0; chan < 3; chan++) {
+ nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
+
+ nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
+
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+ nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan),
+ nir_imm_int(&b, 0));
+ }
+
+ return b.shader;
}
static VkResult
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
{
- VkResult result;
- nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
-
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.btoi_r32g32b32.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.btoi_r32g32b32.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.btoi_r32g32b32.pipeline);
+ VkResult result;
+ nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
+
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.btoi_r32g32b32.img_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.btoi_r32g32b32.img_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* compute shader */
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, &device->meta_state.btoi_r32g32b32.pipeline);
fail:
- ralloc_free(cs);
- return result;
+ ralloc_free(cs);
+ return result;
}
static void
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->btoi_r32g32b32.img_p_layout, &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->btoi_r32g32b32.img_ds_layout,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->btoi_r32g32b32.pipeline, &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->btoi_r32g32b32.img_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline,
+ &state->alloc);
}
static nir_shader *
build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
{
- bool is_multisampled = samples > 1;
- enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
- const struct glsl_type *buf_type = glsl_sampler_type(dim,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_image_type(dim,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_ssa_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range=24);
- nir_ssa_def *dst_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range=24);
-
- nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
- nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
-
- nir_tex_instr *tex_instr[8];
- for (uint32_t i = 0; i < samples; i++) {
- tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3);
-
- nir_tex_instr *tex = tex_instr[i];
- tex->sampler_dim = dim;
- tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
- tex->src[1].src_type = nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- if (is_multisampled) {
- tex->src[3].src_type = nir_tex_src_ms_index;
- tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i));
- }
- tex->dest_type = nir_type_float32;
- tex->is_array = false;
- tex->coord_components = is_3d ? 3 : 2;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
- }
-
- for (uint32_t i = 0; i < samples; i++) {
- nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- dst_coord, nir_imm_int(&b, i), outval, nir_imm_int(&b, 0));
- }
-
- return b.shader;
+ bool is_multisampled = samples > 1;
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
+ : is_multisampled ? GLSL_SAMPLER_DIM_MS
+ : GLSL_SAMPLER_DIM_2D;
+ const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
+ nir_builder b = nir_builder_init_simple_shader(
+ MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_ssa_def *src_offset =
+ nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 24);
+ nir_ssa_def *dst_offset =
+ nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = 24);
+
+ nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
+
+ nir_tex_instr *tex_instr[8];
+ for (uint32_t i = 0; i < samples; i++) {
+ tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3);
+
+ nir_tex_instr *tex = tex_instr[i];
+ tex->sampler_dim = dim;
+ tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
+ tex->src[1].src_type = nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ if (is_multisampled) {
+ tex->src[3].src_type = nir_tex_src_ms_index;
+ tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ }
+ tex->dest_type = nir_type_float32;
+ tex->is_array = false;
+ tex->coord_components = is_3d ? 3 : 2;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+ }
+
+ for (uint32_t i = 0; i < samples; i++) {
+ nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
+ nir_imm_int(&b, i), outval, nir_imm_int(&b, 0));
+ }
+
+ return b.shader;
}
static VkResult
-create_itoi_pipeline(struct radv_device *device,
- int samples,
- VkPipeline *pipeline)
+create_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
{
- struct radv_meta_state *state = &device->meta_state;
- nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples);
- VkResult result;
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = state->itoi.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&state->cache),
- 1, &vk_pipeline_info, NULL,
- pipeline);
- ralloc_free(cs);
- return result;
+ struct radv_meta_state *state = &device->meta_state;
+ nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples);
+ VkResult result;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = state->itoi.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&state->cache), 1,
+ &vk_pipeline_info, NULL, pipeline);
+ ralloc_free(cs);
+ return result;
}
/* image to image - don't write use image accessors */
static VkResult
radv_device_init_meta_itoi_state(struct radv_device *device)
{
- VkResult result;
-
- /*
- * two descriptors one for the image being sampled
- * one for the buffer being written.
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.itoi.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.itoi.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
- uint32_t samples = 1 << i;
- result = create_itoi_pipeline(device, samples,
- &device->meta_state.itoi.pipeline[i]);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1);
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-.stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs_3d),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info_3d = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage_3d,
- .flags = 0,
- .layout = device->meta_state.itoi.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info_3d, NULL,
- &device->meta_state.itoi.pipeline_3d);
- ralloc_free(cs_3d);
- }
-
- return VK_SUCCESS;
+ VkResult result;
+
+ /*
+ * two descriptors one for the image being sampled
+ * one for the buffer being written.
+ */
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.itoi.img_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
+ };
+
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc, &device->meta_state.itoi.img_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
+ uint32_t samples = 1 << i;
+ result = create_itoi_pipeline(device, samples, &device->meta_state.itoi.pipeline[i]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1);
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs_3d),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage_3d,
+ .flags = 0,
+ .layout = device->meta_state.itoi.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info_3d, NULL, &device->meta_state.itoi.pipeline_3d);
+ ralloc_free(cs_3d);
+ }
+
+ return VK_SUCCESS;
fail:
- return result;
+ return result;
}
static void
radv_device_finish_meta_itoi_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->itoi.img_p_layout, &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->itoi.img_ds_layout,
- &state->alloc);
-
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->itoi.pipeline[i], &state->alloc);
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX9)
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->itoi.pipeline_3d, &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
+ &state->alloc);
+
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+ radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d, &state->alloc);
}
static nir_shader *
build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
{
- const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs");
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- type, "input_img");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "output_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range=24);
- nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range=24);
-
- nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
- nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
-
- nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
- nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
-
- nir_ssa_def *src_global_pos =
- nir_iadd(&b,
- nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
- nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
-
- nir_ssa_def *dst_global_pos =
- nir_iadd(&b,
- nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
- nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
-
- for (int chan = 0; chan < 3; chan++) {
- /* src */
- nir_ssa_def *src_local_pos =
- nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
-
- nir_ssa_def *src_coord =
- nir_vec4(&b, src_local_pos, src_local_pos,
- src_local_pos, src_local_pos);
-
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
- tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
- tex->src[1].src_type = nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = nir_type_float32;
- tex->is_array = false;
- tex->coord_components = 1;
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_ssa_def *outval = &tex->dest.ssa;
-
- /* dst */
- nir_ssa_def *dst_local_pos =
- nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
-
- nir_ssa_def *dst_coord =
- nir_vec4(&b, dst_local_pos, dst_local_pos,
- dst_local_pos, dst_local_pos);
-
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- dst_coord, nir_ssa_undef(&b, 1, 32),
- nir_channel(&b, outval, 0), nir_imm_int(&b, 0));
- }
-
- return b.shader;
+ const struct glsl_type *type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs");
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img =
+ nir_variable_create(b.shader, nir_var_uniform, img_type, "output_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 24);
+ nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
+
+ nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
+ nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
+
+ nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
+ nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
+
+ nir_ssa_def *src_global_pos =
+ nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
+ nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
+
+ nir_ssa_def *dst_global_pos =
+ nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
+ nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
+
+ for (int chan = 0; chan < 3; chan++) {
+ /* src */
+ nir_ssa_def *src_local_pos = nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
+
+ nir_ssa_def *src_coord =
+ nir_vec4(&b, src_local_pos, src_local_pos, src_local_pos, src_local_pos);
+
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
+ tex->op = nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
+ tex->src[1].src_type = nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = nir_type_float32;
+ tex->is_array = false;
+ tex->coord_components = 1;
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_ssa_def *outval = &tex->dest.ssa;
+
+ /* dst */
+ nir_ssa_def *dst_local_pos = nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
+
+ nir_ssa_def *dst_coord =
+ nir_vec4(&b, dst_local_pos, dst_local_pos, dst_local_pos, dst_local_pos);
+
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
+ nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0),
+ nir_imm_int(&b, 0));
+ }
+
+ return b.shader;
}
/* Image to image - special path for R32G32B32 */
static VkResult
radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
{
- VkResult result;
- nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
-
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.itoi_r32g32b32.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.itoi_r32g32b32.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.itoi_r32g32b32.pipeline);
+ VkResult result;
+ nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
+
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.itoi_r32g32b32.img_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.itoi_r32g32b32.img_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* compute shader */
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, &device->meta_state.itoi_r32g32b32.pipeline);
fail:
- ralloc_free(cs);
- return result;
+ ralloc_free(cs);
+ return result;
}
static void
radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->itoi_r32g32b32.img_p_layout, &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->itoi_r32g32b32.img_ds_layout,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->itoi_r32g32b32.pipeline, &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->itoi_r32g32b32.img_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline,
+ &state->alloc);
}
static nir_shader *
build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
{
- bool is_multisampled = samples > 1;
- enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : is_multisampled ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
- const struct glsl_type *img_type = glsl_image_type(dim,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 0;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range=20);
- nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range=20);
-
- nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), layer);
-
- nir_ssa_def *comps[4];
- comps[0] = nir_channel(&b, global_id, 0);
- comps[1] = nir_channel(&b, global_id, 1);
- comps[2] = global_z;
- comps[3] = nir_imm_int(&b, 0);
- global_id = nir_vec(&b, comps, 4);
-
- for (uint32_t i = 0; i < samples; i++) {
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- global_id, nir_imm_int(&b, i), clear_val,
- nir_imm_int(&b, 0));
- }
-
- return b.shader;
+ bool is_multisampled = samples > 1;
+ enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
+ : is_multisampled ? GLSL_SAMPLER_DIM_MS
+ : GLSL_SAMPLER_DIM_2D;
+ const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
+ nir_builder b = nir_builder_init_simple_shader(
+ MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 0;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 20);
+ nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
+
+ nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), layer);
+
+ nir_ssa_def *comps[4];
+ comps[0] = nir_channel(&b, global_id, 0);
+ comps[1] = nir_channel(&b, global_id, 1);
+ comps[2] = global_z;
+ comps[3] = nir_imm_int(&b, 0);
+ global_id = nir_vec(&b, comps, 4);
+
+ for (uint32_t i = 0; i < samples; i++) {
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
+ nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0));
+ }
+
+ return b.shader;
}
static VkResult
-create_cleari_pipeline(struct radv_device *device,
- int samples,
- VkPipeline *pipeline)
+create_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
{
- nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples);
- VkResult result;
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.cleari.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- pipeline);
- ralloc_free(cs);
- return result;
+ nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples);
+ VkResult result;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.cleari.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, pipeline);
+ ralloc_free(cs);
+ return result;
}
static VkResult
radv_device_init_meta_cleari_state(struct radv_device *device)
{
- VkResult result;
-
- /*
- * two descriptors one for the image being sampled
- * one for the buffer being written.
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.cleari.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.cleari.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
- uint32_t samples = 1 << i;
- result = create_cleari_pipeline(device, samples,
- &device->meta_state.cleari.pipeline[i]);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1);
-
- /* compute shader */
- VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs_3d),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info_3d = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage_3d,
- .flags = 0,
- .layout = device->meta_state.cleari.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info_3d, NULL,
- &device->meta_state.cleari.pipeline_3d);
- ralloc_free(cs_3d);
- }
-
- return VK_SUCCESS;
+ VkResult result;
+
+ /*
+ * two descriptors one for the image being sampled
+ * one for the buffer being written.
+ */
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.cleari.img_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
+ };
+
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc, &device->meta_state.cleari.img_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
+ uint32_t samples = 1 << i;
+ result = create_cleari_pipeline(device, samples, &device->meta_state.cleari.pipeline[i]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1);
+
+ /* compute shader */
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs_3d),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info_3d = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage_3d,
+ .flags = 0,
+ .layout = device->meta_state.cleari.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info_3d, NULL, &device->meta_state.cleari.pipeline_3d);
+ ralloc_free(cs_3d);
+ }
+
+ return VK_SUCCESS;
fail:
- return result;
+ return result;
}
static void
radv_device_finish_meta_cleari_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->cleari.img_p_layout, &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->cleari.img_ds_layout,
- &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
+ &state->alloc);
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->cleari.pipeline[i], &state->alloc);
- }
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+ radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
+ }
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->cleari.pipeline_3d, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc);
}
/* Special path for clearing R32G32B32 images using a compute shader. */
static nir_shader *
build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
{
- const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs");
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 0;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range=16);
- nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
- nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
- nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
-
- nir_ssa_def *global_pos =
- nir_iadd(&b,
- nir_imul(&b, global_y, stride),
- nir_imul(&b, global_x, nir_imm_int(&b, 3)));
-
- for (unsigned chan = 0; chan < 3; chan++) {
- nir_ssa_def *local_pos =
- nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
-
- nir_ssa_def *coord =
- nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
-
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- coord, nir_ssa_undef(&b, 1, 32),
- nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0));
- }
-
- return b.shader;
+ const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs");
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 0;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 16);
+ nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+ nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
+ nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
+
+ nir_ssa_def *global_pos =
+ nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul(&b, global_x, nir_imm_int(&b, 3)));
+
+ for (unsigned chan = 0; chan < 3; chan++) {
+ nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
+
+ nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
+
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+ nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan),
+ nir_imm_int(&b, 0));
+ }
+
+ return b.shader;
}
static VkResult
radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
{
- VkResult result;
- nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
-
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.cleari_r32g32b32.img_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.cleari_r32g32b32.img_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* compute shader */
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.cleari_r32g32b32.pipeline);
+ VkResult result;
+ nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
+
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.cleari_r32g32b32.img_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.cleari_r32g32b32.img_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* compute shader */
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, &device->meta_state.cleari_r32g32b32.pipeline);
fail:
- ralloc_free(cs);
- return result;
+ ralloc_free(cs);
+ return result;
}
static void
radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->cleari_r32g32b32.img_p_layout,
- &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->cleari_r32g32b32.img_ds_layout,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->cleari_r32g32b32.pipeline, &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->cleari_r32g32b32.img_ds_layout, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline,
+ &state->alloc);
}
void
radv_device_finish_meta_bufimage_state(struct radv_device *device)
{
- radv_device_finish_meta_itob_state(device);
- radv_device_finish_meta_btoi_state(device);
- radv_device_finish_meta_btoi_r32g32b32_state(device);
- radv_device_finish_meta_itoi_state(device);
- radv_device_finish_meta_itoi_r32g32b32_state(device);
- radv_device_finish_meta_cleari_state(device);
- radv_device_finish_meta_cleari_r32g32b32_state(device);
+ radv_device_finish_meta_itob_state(device);
+ radv_device_finish_meta_btoi_state(device);
+ radv_device_finish_meta_btoi_r32g32b32_state(device);
+ radv_device_finish_meta_itoi_state(device);
+ radv_device_finish_meta_itoi_r32g32b32_state(device);
+ radv_device_finish_meta_cleari_state(device);
+ radv_device_finish_meta_cleari_r32g32b32_state(device);
}
VkResult
radv_device_init_meta_bufimage_state(struct radv_device *device)
{
- VkResult result;
+ VkResult result;
- result = radv_device_init_meta_itob_state(device);
- if (result != VK_SUCCESS)
- goto fail_itob;
+ result = radv_device_init_meta_itob_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_itob;
- result = radv_device_init_meta_btoi_state(device);
- if (result != VK_SUCCESS)
- goto fail_btoi;
+ result = radv_device_init_meta_btoi_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_btoi;
- result = radv_device_init_meta_btoi_r32g32b32_state(device);
- if (result != VK_SUCCESS)
- goto fail_btoi_r32g32b32;
+ result = radv_device_init_meta_btoi_r32g32b32_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_btoi_r32g32b32;
- result = radv_device_init_meta_itoi_state(device);
- if (result != VK_SUCCESS)
- goto fail_itoi;
+ result = radv_device_init_meta_itoi_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_itoi;
- result = radv_device_init_meta_itoi_r32g32b32_state(device);
- if (result != VK_SUCCESS)
- goto fail_itoi_r32g32b32;
+ result = radv_device_init_meta_itoi_r32g32b32_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_itoi_r32g32b32;
- result = radv_device_init_meta_cleari_state(device);
- if (result != VK_SUCCESS)
- goto fail_cleari;
+ result = radv_device_init_meta_cleari_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_cleari;
- result = radv_device_init_meta_cleari_r32g32b32_state(device);
- if (result != VK_SUCCESS)
- goto fail_cleari_r32g32b32;
+ result = radv_device_init_meta_cleari_r32g32b32_state(device);
+ if (result != VK_SUCCESS)
+ goto fail_cleari_r32g32b32;
- return VK_SUCCESS;
+ return VK_SUCCESS;
fail_cleari_r32g32b32:
- radv_device_finish_meta_cleari_r32g32b32_state(device);
+ radv_device_finish_meta_cleari_r32g32b32_state(device);
fail_cleari:
- radv_device_finish_meta_cleari_state(device);
+ radv_device_finish_meta_cleari_state(device);
fail_itoi_r32g32b32:
- radv_device_finish_meta_itoi_r32g32b32_state(device);
+ radv_device_finish_meta_itoi_r32g32b32_state(device);
fail_itoi:
- radv_device_finish_meta_itoi_state(device);
+ radv_device_finish_meta_itoi_state(device);
fail_btoi_r32g32b32:
- radv_device_finish_meta_btoi_r32g32b32_state(device);
+ radv_device_finish_meta_btoi_r32g32b32_state(device);
fail_btoi:
- radv_device_finish_meta_btoi_state(device);
+ radv_device_finish_meta_btoi_state(device);
fail_itob:
- radv_device_finish_meta_itob_state(device);
- return result;
+ radv_device_finish_meta_itob_state(device);
+ return result;
}
static void
-create_iview(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *surf,
+create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
struct radv_image_view *iview)
{
- VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
- radv_meta_get_view_type(surf->image);
- radv_image_view_init(iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(surf->image),
- .viewType = view_type,
- .format = surf->format,
- .subresourceRange = {
- .aspectMask = surf->aspect_mask,
- .baseMipLevel = surf->level,
- .levelCount = 1,
- .baseArrayLayer = surf->layer,
- .layerCount = 1
- },
- }, &(struct radv_image_view_extra_create_info) {
- .disable_compression = surf->disable_compression,
- });
+ VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9
+ ? VK_IMAGE_VIEW_TYPE_2D
+ : radv_meta_get_view_type(surf->image);
+ radv_image_view_init(iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(surf->image),
+ .viewType = view_type,
+ .format = surf->format,
+ .subresourceRange = {.aspectMask = surf->aspect_mask,
+ .baseMipLevel = surf->level,
+ .levelCount = 1,
+ .baseArrayLayer = surf->layer,
+ .layerCount = 1},
+ },
+ &(struct radv_image_view_extra_create_info){
+ .disable_compression = surf->disable_compression,
+ });
}
static void
-create_bview(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer *buffer,
- unsigned offset,
- VkFormat format,
- struct radv_buffer_view *bview)
+create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
+ VkFormat format, struct radv_buffer_view *bview)
{
- radv_buffer_view_init(bview, cmd_buffer->device,
- &(VkBufferViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .flags = 0,
- .buffer = radv_buffer_to_handle(buffer),
- .format = format,
- .offset = offset,
- .range = VK_WHOLE_SIZE,
- });
-
+ radv_buffer_view_init(bview, cmd_buffer->device,
+ &(VkBufferViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .flags = 0,
+ .buffer = radv_buffer_to_handle(buffer),
+ .format = format,
+ .offset = offset,
+ .range = VK_WHOLE_SIZE,
+ });
}
static void
-create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *surf,
- VkBufferUsageFlagBits usage,
- VkBuffer *buffer)
+create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
+ VkBufferUsageFlagBits usage, VkBuffer *buffer)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_device_memory mem = { .bo = surf->image->bo };
-
- radv_CreateBuffer(radv_device_to_handle(device),
- &(VkBufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
- .flags = 0,
- .size = surf->image->size,
- .usage = usage,
- .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
- }, NULL, buffer);
-
- radv_BindBufferMemory2(radv_device_to_handle(device), 1,
- (VkBindBufferMemoryInfo[]) {
- {
- .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
- .buffer = *buffer,
- .memory = radv_device_memory_to_handle(&mem),
- .memoryOffset = surf->image->offset,
- }
- });
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_device_memory mem = {.bo = surf->image->bo};
+
+ radv_CreateBuffer(radv_device_to_handle(device),
+ &(VkBufferCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+ .flags = 0,
+ .size = surf->image->size,
+ .usage = usage,
+ .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+ },
+ NULL, buffer);
+
+ radv_BindBufferMemory2(radv_device_to_handle(device), 1,
+ (VkBindBufferMemoryInfo[]){{
+ .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
+ .buffer = *buffer,
+ .memory = radv_device_memory_to_handle(&mem),
+ .memoryOffset = surf->image->offset,
+ }});
}
static void
-create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer *buffer,
- unsigned offset,
- VkFormat src_format,
- struct radv_buffer_view *bview)
+create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
+ unsigned offset, VkFormat src_format, struct radv_buffer_view *bview)
{
- VkFormat format;
-
- switch (src_format) {
- case VK_FORMAT_R32G32B32_UINT:
- format = VK_FORMAT_R32_UINT;
- break;
- case VK_FORMAT_R32G32B32_SINT:
- format = VK_FORMAT_R32_SINT;
- break;
- case VK_FORMAT_R32G32B32_SFLOAT:
- format = VK_FORMAT_R32_SFLOAT;
- break;
- default:
- unreachable("invalid R32G32B32 format");
- }
-
- radv_buffer_view_init(bview, cmd_buffer->device,
- &(VkBufferViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .flags = 0,
- .buffer = radv_buffer_to_handle(buffer),
- .format = format,
- .offset = offset,
- .range = VK_WHOLE_SIZE,
- });
+ VkFormat format;
+
+ switch (src_format) {
+ case VK_FORMAT_R32G32B32_UINT:
+ format = VK_FORMAT_R32_UINT;
+ break;
+ case VK_FORMAT_R32G32B32_SINT:
+ format = VK_FORMAT_R32_SINT;
+ break;
+ case VK_FORMAT_R32G32B32_SFLOAT:
+ format = VK_FORMAT_R32_SFLOAT;
+ break;
+ default:
+ unreachable("invalid R32G32B32 format");
+ }
+
+ radv_buffer_view_init(bview, cmd_buffer->device,
+ &(VkBufferViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .flags = 0,
+ .buffer = radv_buffer_to_handle(buffer),
+ .format = format,
+ .offset = offset,
+ .range = VK_WHOLE_SIZE,
+ });
}
static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *surf)
+ struct radv_meta_blit2d_surf *surf)
{
- unsigned stride;
+ unsigned stride;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
- } else {
- stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
- }
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
+ } else {
+ stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
+ }
- return stride;
+ return stride;
}
static void
-itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src,
- struct radv_buffer_view *dst)
+itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
+ struct radv_buffer_view *dst)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.itob.img_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
- }
- });
+ struct radv_device *device = cmd_buffer->device;
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+ }});
}
void
-radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_buffer *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
+ struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects)
{
- VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
- struct radv_device *device = cmd_buffer->device;
- struct radv_image_view src_view;
- struct radv_buffer_view dst_view;
-
- create_iview(cmd_buffer, src, &src_view);
- create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
- itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
- if (device->physical_device->rad_info.chip_class >= GFX9 &&
- src->image->type == VK_IMAGE_TYPE_3D)
- pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[4] = {
- rects[r].src_x,
- rects[r].src_y,
- src->layer,
- dst->pitch
- };
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.itob.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
- }
+ VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_image_view src_view;
+ struct radv_buffer_view dst_view;
+
+ create_iview(cmd_buffer, src, &src_view);
+ create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
+ itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && src->image->type == VK_IMAGE_TYPE_3D)
+ pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline);
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+ unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch};
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
+ 16, push_constants);
+
+ radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+ }
}
static void
-btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer_view *src,
- struct radv_buffer_view *dst)
+btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
+ struct radv_buffer_view *dst)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.btoi_r32g32b32.img_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
- }
- });
+ struct radv_device *device = cmd_buffer->device;
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+ }});
}
static void
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+ struct radv_meta_blit2d_buffer *src,
+ struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects)
{
- VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
- struct radv_device *device = cmd_buffer->device;
- struct radv_buffer_view src_view, dst_view;
- unsigned dst_offset = 0;
- unsigned stride;
- VkBuffer buffer;
-
- /* This special btoi path for R32G32B32 formats will write the linear
- * image as a buffer with the same underlying memory. The compute
- * shader will copy all components separately using a R32 format.
- */
- create_buffer_from_image(cmd_buffer, dst,
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
- &buffer);
-
- create_bview(cmd_buffer, src->buffer, src->offset,
- src->format, &src_view);
- create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
- dst_offset, dst->format, &dst_view);
- btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
-
- for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[4] = {
- rects[r].dst_x,
- rects[r].dst_y,
- stride,
- src->pitch,
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.btoi_r32g32b32.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
- }
-
- radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
+ VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_buffer_view src_view, dst_view;
+ unsigned dst_offset = 0;
+ unsigned stride;
+ VkBuffer buffer;
+
+ /* This special btoi path for R32G32B32 formats will write the linear
+ * image as a buffer with the same underlying memory. The compute
+ * shader will copy all components separately using a R32 format.
+ */
+ create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
+
+ create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
+ create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format,
+ &dst_view);
+ btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline);
+
+ stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+ unsigned push_constants[4] = {
+ rects[r].dst_x,
+ rects[r].dst_y,
+ stride,
+ src->pitch,
+ };
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.btoi_r32g32b32.img_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
+
+ radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+ }
+
+ radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
}
static void
-btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer_view *src,
- struct radv_image_view *dst)
+btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
+ struct radv_image_view *dst)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.btoi.img_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
+ struct radv_device *device = cmd_buffer->device;
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+ },
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dst),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
}
void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_buffer *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+ struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst,
+ unsigned num_rects, struct radv_meta_blit2d_rect *rects)
{
- VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
- struct radv_device *device = cmd_buffer->device;
- struct radv_buffer_view src_view;
- struct radv_image_view dst_view;
-
- if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
- dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
- dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
- radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
- num_rects, rects);
- return;
- }
-
- create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
- create_iview(cmd_buffer, dst, &dst_view);
- btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
- if (device->physical_device->rad_info.chip_class >= GFX9 &&
- dst->image->type == VK_IMAGE_TYPE_3D)
- pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[4] = {
- rects[r].dst_x,
- rects[r].dst_y,
- dst->layer,
- src->pitch,
- };
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.btoi.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
- }
+ VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_buffer_view src_view;
+ struct radv_image_view dst_view;
+
+ if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+ dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+ dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
+ radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
+ return;
+ }
+
+ create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
+ create_iview(cmd_buffer, dst, &dst_view);
+ btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
+ pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline);
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+ unsigned push_constants[4] = {
+ rects[r].dst_x,
+ rects[r].dst_y,
+ dst->layer,
+ src->pitch,
+ };
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
+ 16, push_constants);
+
+ radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+ }
}
static void
-itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer_view *src,
- struct radv_buffer_view *dst)
+itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
+ struct radv_buffer_view *dst)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.itoi_r32g32b32.img_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
- }
- });
+ struct radv_device *device = cmd_buffer->device;
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
+ }});
}
static void
radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+ struct radv_meta_blit2d_surf *src,
+ struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects)
{
- VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
- struct radv_device *device = cmd_buffer->device;
- struct radv_buffer_view src_view, dst_view;
- unsigned src_offset = 0, dst_offset = 0;
- unsigned src_stride, dst_stride;
- VkBuffer src_buffer, dst_buffer;
-
- /* 96-bit formats are only compatible to themselves. */
- assert(dst->format == VK_FORMAT_R32G32B32_UINT ||
- dst->format == VK_FORMAT_R32G32B32_SINT ||
- dst->format == VK_FORMAT_R32G32B32_SFLOAT);
-
- /* This special itoi path for R32G32B32 formats will write the linear
- * image as a buffer with the same underlying memory. The compute
- * shader will copy all components separately using a R32 format.
- */
- create_buffer_from_image(cmd_buffer, src,
- VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
- &src_buffer);
- create_buffer_from_image(cmd_buffer, dst,
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
- &dst_buffer);
-
- create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer),
- src_offset, src->format, &src_view);
- create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer),
- dst_offset, dst->format, &dst_view);
- itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
- dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
-
- for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[6] = {
- rects[r].src_x,
- rects[r].src_y,
- src_stride,
- rects[r].dst_x,
- rects[r].dst_y,
- dst_stride,
- };
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.itoi_r32g32b32.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
- }
-
- radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
- radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
+ VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_buffer_view src_view, dst_view;
+ unsigned src_offset = 0, dst_offset = 0;
+ unsigned src_stride, dst_stride;
+ VkBuffer src_buffer, dst_buffer;
+
+ /* 96-bit formats are only compatible to themselves. */
+ assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
+ dst->format == VK_FORMAT_R32G32B32_SFLOAT);
+
+ /* This special itoi path for R32G32B32 formats will write the linear
+ * image as a buffer with the same underlying memory. The compute
+ * shader will copy all components separately using a R32 format.
+ */
+ create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer);
+ create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer);
+
+ create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset,
+ src->format, &src_view);
+ create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset,
+ dst->format, &dst_view);
+ itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline);
+
+ src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
+ dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+ unsigned push_constants[6] = {
+ rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride,
+ };
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.itoi_r32g32b32.img_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
+
+ radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+ }
+
+ radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
+ radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
}
static void
-itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src,
- struct radv_image_view *dst)
+itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
+ struct radv_image_view *dst)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.itoi.img_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
+ struct radv_device *device = cmd_buffer->device;
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dst),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
}
void
-radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *src,
- struct radv_meta_blit2d_surf *dst,
- unsigned num_rects,
- struct radv_meta_blit2d_rect *rects)
+radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
+ struct radv_meta_blit2d_surf *dst, unsigned num_rects,
+ struct radv_meta_blit2d_rect *rects)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_image_view src_view, dst_view;
- uint32_t samples = src->image->info.samples;
- uint32_t samples_log2 = ffs(samples) - 1;
-
- if (src->format == VK_FORMAT_R32G32B32_UINT ||
- src->format == VK_FORMAT_R32G32B32_SINT ||
- src->format == VK_FORMAT_R32G32B32_SFLOAT) {
- radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst,
- num_rects, rects);
- return;
- }
-
- create_iview(cmd_buffer, src, &src_view);
- create_iview(cmd_buffer, dst, &dst_view);
-
- itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
-
- VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
- if (device->physical_device->rad_info.chip_class >= GFX9 &&
- (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
- pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- for (unsigned r = 0; r < num_rects; ++r) {
- unsigned push_constants[6] = {
- rects[r].src_x,
- rects[r].src_y,
- src->layer,
- rects[r].dst_x,
- rects[r].dst_y,
- dst->layer,
- };
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.itoi.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
- }
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_image_view src_view, dst_view;
+ uint32_t samples = src->image->info.samples;
+ uint32_t samples_log2 = ffs(samples) - 1;
+
+ if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT ||
+ src->format == VK_FORMAT_R32G32B32_SFLOAT) {
+ radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
+ return;
+ }
+
+ create_iview(cmd_buffer, src, &src_view);
+ create_iview(cmd_buffer, dst, &dst_view);
+
+ itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
+
+ VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
+ pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline);
+
+ for (unsigned r = 0; r < num_rects; ++r) {
+ unsigned push_constants[6] = {
+ rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer,
+ };
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
+ 24, push_constants);
+
+ radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
+ }
}
static void
-cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer_view *view)
+cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.cleari_r32g32b32.img_p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(view) },
- }
- });
+ struct radv_device *device = cmd_buffer->device;
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout,
+ 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
+ }});
}
static void
radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *dst,
- const VkClearColorValue *clear_color)
+ struct radv_meta_blit2d_surf *dst,
+ const VkClearColorValue *clear_color)
{
- VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
- struct radv_device *device = cmd_buffer->device;
- struct radv_buffer_view dst_view;
- unsigned stride;
- VkBuffer buffer;
-
- /* This special clear path for R32G32B32 formats will write the linear
- * image as a buffer with the same underlying memory. The compute
- * shader will clear all components separately using a R32 format.
- */
- create_buffer_from_image(cmd_buffer, dst,
- VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
- &buffer);
-
- create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
- 0, dst->format, &dst_view);
- cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
-
- unsigned push_constants[4] = {
- clear_color->uint32[0],
- clear_color->uint32[1],
- clear_color->uint32[2],
- stride,
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.cleari_r32g32b32.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
- dst->image->info.height, 1);
-
- radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
+ VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_buffer_view dst_view;
+ unsigned stride;
+ VkBuffer buffer;
+
+ /* This special clear path for R32G32B32 formats will write the linear
+ * image as a buffer with the same underlying memory. The compute
+ * shader will clear all components separately using a R32 format.
+ */
+ create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
+
+ create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format,
+ &dst_view);
+ cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline);
+
+ stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
+
+ unsigned push_constants[4] = {
+ clear_color->uint32[0],
+ clear_color->uint32[1],
+ clear_color->uint32[2],
+ stride,
+ };
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.cleari_r32g32b32.img_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
+
+ radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
+
+ radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
}
static void
-cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *dst_iview)
+cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview)
{
- struct radv_device *device = cmd_buffer->device;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.cleari.img_p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dst_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- });
+ struct radv_device *device = cmd_buffer->device;
+
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.cleari.img_p_layout, 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(dst_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ });
}
void
-radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_blit2d_surf *dst,
- const VkClearColorValue *clear_color)
+radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
+ const VkClearColorValue *clear_color)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_image_view dst_iview;
- uint32_t samples = dst->image->info.samples;
- uint32_t samples_log2 = ffs(samples) - 1;
-
- if (dst->format == VK_FORMAT_R32G32B32_UINT ||
- dst->format == VK_FORMAT_R32G32B32_SINT ||
- dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
- radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
- return;
- }
-
- create_iview(cmd_buffer, dst, &dst_iview);
- cleari_bind_descriptors(cmd_buffer, &dst_iview);
-
- VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2];
- if (device->physical_device->rad_info.chip_class >= GFX9 &&
- dst->image->type == VK_IMAGE_TYPE_3D)
- pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- unsigned push_constants[5] = {
- clear_color->uint32[0],
- clear_color->uint32[1],
- clear_color->uint32[2],
- clear_color->uint32[3],
- dst->layer,
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.cleari.img_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
- push_constants);
-
- radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_image_view dst_iview;
+ uint32_t samples = dst->image->info.samples;
+ uint32_t samples_log2 = ffs(samples) - 1;
+
+ if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
+ dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
+ radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
+ return;
+ }
+
+ create_iview(cmd_buffer, dst, &dst_iview);
+ cleari_bind_descriptors(cmd_buffer, &dst_iview);
+
+ VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2];
+ if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
+ pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline);
+
+ unsigned push_constants[5] = {
+ clear_color->uint32[0],
+ clear_color->uint32[1],
+ clear_color->uint32[2],
+ clear_color->uint32[3],
+ dst->layer,
+ };
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
+ push_constants);
+
+ radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
}
diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index f49bf3b23e4..0acbfc12893 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -21,1870 +21,1696 @@
* IN THE SOFTWARE.
*/
+#include "nir/nir_builder.h"
#include "radv_debug.h"
#include "radv_meta.h"
#include "radv_private.h"
-#include "nir/nir_builder.h"
#include "util/format_rgb9e5.h"
#include "vk_format.h"
-enum {
- DEPTH_CLEAR_SLOW,
- DEPTH_CLEAR_FAST_EXPCLEAR,
- DEPTH_CLEAR_FAST_NO_EXPCLEAR
-};
+enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST_EXPCLEAR, DEPTH_CLEAR_FAST_NO_EXPCLEAR };
static void
-build_color_shaders(struct nir_shader **out_vs,
- struct nir_shader **out_fs,
- uint32_t frag_output)
+build_color_shaders(struct nir_shader **out_vs, struct nir_shader **out_fs, uint32_t frag_output)
{
- nir_builder vs_b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_clear_color_vs");
- nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_clear_color_fs");
+ nir_builder vs_b =
+ nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_clear_color_vs");
+ nir_builder fs_b =
+ nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_clear_color_fs");
- const struct glsl_type *position_type = glsl_vec4_type();
- const struct glsl_type *color_type = glsl_vec4_type();
+ const struct glsl_type *position_type = glsl_vec4_type();
+ const struct glsl_type *color_type = glsl_vec4_type();
- nir_variable *vs_out_pos =
- nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
- "gl_Position");
- vs_out_pos->data.location = VARYING_SLOT_POS;
+ nir_variable *vs_out_pos =
+ nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
+ vs_out_pos->data.location = VARYING_SLOT_POS;
- nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range=16);
+ nir_ssa_def *in_color_load =
+ nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
- nir_variable *fs_out_color =
- nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
- "f_color");
- fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
+ nir_variable *fs_out_color =
+ nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
+ fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
- nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
+ nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
- nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
- nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
+ nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
+ nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
- const struct glsl_type *layer_type = glsl_int_type();
- nir_variable *vs_out_layer =
- nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
- "v_layer");
- vs_out_layer->data.location = VARYING_SLOT_LAYER;
- vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
- nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
- nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
+ const struct glsl_type *layer_type = glsl_int_type();
+ nir_variable *vs_out_layer =
+ nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
+ vs_out_layer->data.location = VARYING_SLOT_LAYER;
+ vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+ nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
+ nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
- nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
- nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
+ nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
+ nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
- *out_vs = vs_b.shader;
- *out_fs = fs_b.shader;
+ *out_vs = vs_b.shader;
+ *out_fs = fs_b.shader;
}
static VkResult
-create_pipeline(struct radv_device *device,
- struct radv_render_pass *render_pass,
- uint32_t samples,
- struct nir_shader *vs_nir,
- struct nir_shader *fs_nir,
+create_pipeline(struct radv_device *device, struct radv_render_pass *render_pass, uint32_t samples,
+ struct nir_shader *vs_nir, struct nir_shader *fs_nir,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
- const VkPipelineColorBlendStateCreateInfo *cb_state,
- const VkPipelineLayout layout,
- const struct radv_graphics_pipeline_create_info *extra,
- const VkAllocationCallbacks *alloc,
- VkPipeline *pipeline)
+ const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineLayout layout,
+ const struct radv_graphics_pipeline_create_info *extra,
+ const VkAllocationCallbacks *alloc, VkPipeline *pipeline)
{
- VkDevice device_h = radv_device_to_handle(device);
- VkResult result;
-
- result = radv_graphics_pipeline_create(device_h,
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &(VkGraphicsPipelineCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = fs_nir ? 2 : 1,
- .pStages = (VkPipelineShaderStageCreateInfo[]) {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs_nir),
- .pName = "main",
- },
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs_nir),
- .pName = "main",
- },
- },
- .pVertexInputState = vi_state,
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .depthBiasEnable = false,
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = samples,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pDepthStencilState = ds_state,
- .pColorBlendState = cb_state,
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- /* The meta clear pipeline declares all state as dynamic.
- * As a consequence, vkCmdBindPipeline writes no dynamic state
- * to the cmd buffer. Therefore, at the end of the meta clear,
- * we need only restore dynamic state was vkCmdSet.
- */
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 8,
- .pDynamicStates = (VkDynamicState[]) {
- /* Everything except stencil write mask */
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_LINE_WIDTH,
- VK_DYNAMIC_STATE_DEPTH_BIAS,
- VK_DYNAMIC_STATE_BLEND_CONSTANTS,
- VK_DYNAMIC_STATE_DEPTH_BOUNDS,
- VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
- VK_DYNAMIC_STATE_STENCIL_REFERENCE,
- },
- },
- .layout = layout,
- .flags = 0,
- .renderPass = radv_render_pass_to_handle(render_pass),
- .subpass = 0,
- },
- extra,
- alloc,
- pipeline);
-
- ralloc_free(vs_nir);
- ralloc_free(fs_nir);
-
- return result;
+ VkDevice device_h = radv_device_to_handle(device);
+ VkResult result;
+
+ result = radv_graphics_pipeline_create(
+ device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &(VkGraphicsPipelineCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = fs_nir ? 2 : 1,
+ .pStages =
+ (VkPipelineShaderStageCreateInfo[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs_nir),
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs_nir),
+ .pName = "main",
+ },
+ },
+ .pVertexInputState = vi_state,
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ .depthBiasEnable = false,
+ },
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = samples,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pDepthStencilState = ds_state,
+ .pColorBlendState = cb_state,
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ /* The meta clear pipeline declares all state as dynamic.
+ * As a consequence, vkCmdBindPipeline writes no dynamic state
+ * to the cmd buffer. Therefore, at the end of the meta clear,
+ * we need only restore dynamic state was vkCmdSet.
+ */
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 8,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ /* Everything except stencil write mask */
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .layout = layout,
+ .flags = 0,
+ .renderPass = radv_render_pass_to_handle(render_pass),
+ .subpass = 0,
+ },
+ extra, alloc, pipeline);
+
+ ralloc_free(vs_nir);
+ ralloc_free(fs_nir);
+
+ return result;
}
static VkResult
-create_color_renderpass(struct radv_device *device,
- VkFormat vk_format,
- uint32_t samples,
- VkRenderPass *pass)
+create_color_renderpass(struct radv_device *device, VkFormat vk_format, uint32_t samples,
+ VkRenderPass *pass)
{
- mtx_lock(&device->meta_state.mtx);
- if (*pass) {
- mtx_unlock (&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- VkResult result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = vk_format,
- .samples = samples,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = VK_ATTACHMENT_UNUSED,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- }, &device->meta_state.alloc, pass);
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ mtx_lock(&device->meta_state.mtx);
+ if (*pass) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ VkResult result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = vk_format,
+ .samples = samples,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ &device->meta_state.alloc, pass);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static VkResult
-create_color_pipeline(struct radv_device *device,
- uint32_t samples,
- uint32_t frag_output,
- VkPipeline *pipeline,
- VkRenderPass pass)
+create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output,
+ VkPipeline *pipeline, VkRenderPass pass)
{
- struct nir_shader *vs_nir;
- struct nir_shader *fs_nir;
- VkResult result;
-
- mtx_lock(&device->meta_state.mtx);
- if (*pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- build_color_shaders(&vs_nir, &fs_nir, frag_output);
-
- const VkPipelineVertexInputStateCreateInfo vi_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
- };
-
- const VkPipelineDepthStencilStateCreateInfo ds_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .depthTestEnable = false,
- .depthWriteEnable = false,
- .depthBoundsTestEnable = false,
- .stencilTestEnable = false,
- };
-
- VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = { 0 };
- blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState) {
- .blendEnable = false,
- .colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
- VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT,
- };
-
- const VkPipelineColorBlendStateCreateInfo cb_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .logicOpEnable = false,
- .attachmentCount = MAX_RTS,
- .pAttachments = blend_attachment_state
- };
-
-
- struct radv_graphics_pipeline_create_info extra = {
- .use_rectlist = true,
- };
- result = create_pipeline(device, radv_render_pass_from_handle(pass),
- samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
- device->meta_state.clear_color_p_layout,
- &extra, &device->meta_state.alloc, pipeline);
-
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ struct nir_shader *vs_nir;
+ struct nir_shader *fs_nir;
+ VkResult result;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ build_color_shaders(&vs_nir, &fs_nir, frag_output);
+
+ const VkPipelineVertexInputStateCreateInfo vi_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
+ };
+
+ const VkPipelineDepthStencilStateCreateInfo ds_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = false,
+ .depthWriteEnable = false,
+ .depthBoundsTestEnable = false,
+ .stencilTestEnable = false,
+ };
+
+ VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
+ blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
+ .blendEnable = false,
+ .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
+ };
+
+ const VkPipelineColorBlendStateCreateInfo cb_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = MAX_RTS,
+ .pAttachments = blend_attachment_state};
+
+ struct radv_graphics_pipeline_create_info extra = {
+ .use_rectlist = true,
+ };
+ result =
+ create_pipeline(device, radv_render_pass_from_handle(pass), samples, vs_nir, fs_nir,
+ &vi_state, &ds_state, &cb_state, device->meta_state.clear_color_p_layout,
+ &extra, &device->meta_state.alloc, pipeline);
+
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static void
finish_meta_clear_htile_mask_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear_htile_mask_pipeline,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->clear_htile_mask_p_layout,
- &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->clear_htile_mask_ds_layout,
- &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->clear_htile_mask_ds_layout,
+ &state->alloc);
}
void
radv_device_finish_meta_clear_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
- for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear[i].color_pipelines[j],
- &state->alloc);
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->clear[i].render_pass[j],
- &state->alloc);
- }
-
- for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear[i].depth_only_pipeline[j],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear[i].stencil_only_pipeline[j],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear[i].depthstencil_pipeline[j],
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear[i].depth_only_unrestricted_pipeline[j],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear[i].stencil_only_unrestricted_pipeline[j],
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->clear[i].depthstencil_unrestricted_pipeline[j],
- &state->alloc);
- }
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->clear[i].depthstencil_rp,
- &state->alloc);
- }
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->clear_color_p_layout,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->clear_depth_p_layout,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->clear_depth_unrestricted_p_layout,
- &state->alloc);
-
- finish_meta_clear_htile_mask_state(device);
+ struct radv_meta_state *state = &device->meta_state;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
+ for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {
+ radv_DestroyPipeline(radv_device_to_handle(device), state->clear[i].color_pipelines[j],
+ &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->clear[i].render_pass[j],
+ &state->alloc);
+ }
+
+ for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
+ radv_DestroyPipeline(radv_device_to_handle(device), state->clear[i].depth_only_pipeline[j],
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].stencil_only_pipeline[j], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].depthstencil_pipeline[j], &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].depth_only_unrestricted_pipeline[j], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].stencil_only_unrestricted_pipeline[j], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->clear[i].depthstencil_unrestricted_pipeline[j], &state->alloc);
+ }
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->clear[i].depthstencil_rp,
+ &state->alloc);
+ }
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->clear_depth_unrestricted_p_layout, &state->alloc);
+
+ finish_meta_clear_htile_mask_state(device);
}
static void
-emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
- const VkClearAttachment *clear_att,
- const VkClearRect *clear_rect,
- uint32_t view_mask)
+emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
+ const VkClearRect *clear_rect, uint32_t view_mask)
{
- struct radv_device *device = cmd_buffer->device;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- const uint32_t subpass_att = clear_att->colorAttachment;
- const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
- const struct radv_image_view *iview = cmd_buffer->state.attachments ?
- cmd_buffer->state.attachments[pass_att].iview : NULL;
- uint32_t samples, samples_log2;
- VkFormat format;
- unsigned fs_key;
- VkClearColorValue clear_value = clear_att->clearValue.color;
- VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- VkPipeline pipeline;
-
- /* When a framebuffer is bound to the current command buffer, get the
- * number of samples from it. Otherwise, get the number of samples from
- * the render pass because it's likely a secondary command buffer.
- */
- if (iview) {
- samples = iview->image->info.samples;
- format = iview->vk_format;
- } else {
- samples = cmd_buffer->state.pass->attachments[pass_att].samples;
- format = cmd_buffer->state.pass->attachments[pass_att].format;
- }
-
- samples_log2 = ffs(samples) - 1;
- fs_key = radv_format_meta_fs_key(device, format);
-
- if (fs_key == -1) {
- radv_finishme("color clears incomplete");
- return;
- }
-
- if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
- VkResult ret = create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key],
- samples,
- &device->meta_state.clear[samples_log2].render_pass[fs_key]);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
- VkResult ret = create_color_pipeline(device, samples, 0,
- &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
- device->meta_state.clear[samples_log2].render_pass[fs_key]);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
- if (!pipeline) {
- radv_finishme("color clears incomplete");
- return;
- }
- assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
- assert(pipeline);
- assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(clear_att->colorAttachment < subpass->color_count);
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.clear_color_p_layout,
- VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
- &clear_value);
-
- struct radv_subpass clear_subpass = {
- .color_count = 1,
- .color_attachments = (struct radv_subpass_attachment[]) {
- subpass->color_attachments[clear_att->colorAttachment]
- },
- .depth_stencil_attachment = NULL,
- };
-
- radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
-
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline);
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = clear_rect->rect.offset.x,
- .y = clear_rect->rect.offset.y,
- .width = clear_rect->rect.extent.width,
- .height = clear_rect->rect.extent.height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
-
- if (view_mask) {
- u_foreach_bit(i, view_mask)
- radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
- } else {
- radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
- }
-
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+ struct radv_device *device = cmd_buffer->device;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ const uint32_t subpass_att = clear_att->colorAttachment;
+ const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
+ const struct radv_image_view *iview =
+ cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;
+ uint32_t samples, samples_log2;
+ VkFormat format;
+ unsigned fs_key;
+ VkClearColorValue clear_value = clear_att->clearValue.color;
+ VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+ VkPipeline pipeline;
+
+ /* When a framebuffer is bound to the current command buffer, get the
+ * number of samples from it. Otherwise, get the number of samples from
+ * the render pass because it's likely a secondary command buffer.
+ */
+ if (iview) {
+ samples = iview->image->info.samples;
+ format = iview->vk_format;
+ } else {
+ samples = cmd_buffer->state.pass->attachments[pass_att].samples;
+ format = cmd_buffer->state.pass->attachments[pass_att].format;
+ }
+
+ samples_log2 = ffs(samples) - 1;
+ fs_key = radv_format_meta_fs_key(device, format);
+
+ if (fs_key == -1) {
+ radv_finishme("color clears incomplete");
+ return;
+ }
+
+ if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {
+ VkResult ret =
+ create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key], samples,
+ &device->meta_state.clear[samples_log2].render_pass[fs_key]);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {
+ VkResult ret = create_color_pipeline(
+ device, samples, 0, &device->meta_state.clear[samples_log2].color_pipelines[fs_key],
+ device->meta_state.clear[samples_log2].render_pass[fs_key]);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];
+ if (!pipeline) {
+ radv_finishme("color clears incomplete");
+ return;
+ }
+ assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));
+ assert(pipeline);
+ assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(clear_att->colorAttachment < subpass->color_count);
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.clear_color_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
+ 16, &clear_value);
+
+ struct radv_subpass clear_subpass = {
+ .color_count = 1,
+ .color_attachments =
+ (struct radv_subpass_attachment[]){subpass->color_attachments[clear_att->colorAttachment]},
+ .depth_stencil_attachment = NULL,
+ };
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
+
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+
+ radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = clear_rect->rect.offset.x,
+ .y = clear_rect->rect.offset.y,
+ .width = clear_rect->rect.extent.width,
+ .height = clear_rect->rect.extent.height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
+
+ if (view_mask) {
+ u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
+ } else {
+ radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+ }
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
}
-
static void
-build_depthstencil_shader(struct nir_shader **out_vs,
- struct nir_shader **out_fs,
- bool unrestricted)
+build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs, bool unrestricted)
{
- nir_builder vs_b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL,
- unrestricted ? "meta_clear_depthstencil_unrestricted_vs"
- : "meta_clear_depthstencil_vs");
- nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
- unrestricted ? "meta_clear_depthstencil_unrestricted_fs"
- : "meta_clear_depthstencil_fs");
-
- const struct glsl_type *position_out_type = glsl_vec4_type();
-
- nir_variable *vs_out_pos =
- nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type,
- "gl_Position");
- vs_out_pos->data.location = VARYING_SLOT_POS;
-
- nir_ssa_def *z;
- if (unrestricted) {
- nir_ssa_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range=4);
-
- nir_variable *fs_out_depth =
- nir_variable_create(fs_b.shader, nir_var_shader_out,
- glsl_int_type(), "f_depth");
- fs_out_depth->data.location = FRAG_RESULT_DEPTH;
- nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
-
- z = nir_imm_float(&vs_b, 0.0);
- } else {
- z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range=4);
- }
-
- nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, z);
- nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
-
- const struct glsl_type *layer_type = glsl_int_type();
- nir_variable *vs_out_layer =
- nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
- "v_layer");
- vs_out_layer->data.location = VARYING_SLOT_LAYER;
- vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
- nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
- nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
-
- nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
- nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
-
- *out_vs = vs_b.shader;
- *out_fs = fs_b.shader;
+ nir_builder vs_b = nir_builder_init_simple_shader(
+ MESA_SHADER_VERTEX, NULL,
+ unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
+ nir_builder fs_b = nir_builder_init_simple_shader(
+ MESA_SHADER_FRAGMENT, NULL,
+ unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
+
+ const struct glsl_type *position_out_type = glsl_vec4_type();
+
+ nir_variable *vs_out_pos =
+ nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
+ vs_out_pos->data.location = VARYING_SLOT_POS;
+
+ nir_ssa_def *z;
+ if (unrestricted) {
+ nir_ssa_def *in_color_load =
+ nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
+
+ nir_variable *fs_out_depth =
+ nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
+ fs_out_depth->data.location = FRAG_RESULT_DEPTH;
+ nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
+
+ z = nir_imm_float(&vs_b, 0.0);
+ } else {
+ z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
+ }
+
+ nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, z);
+ nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
+
+ const struct glsl_type *layer_type = glsl_int_type();
+ nir_variable *vs_out_layer =
+ nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
+ vs_out_layer->data.location = VARYING_SLOT_LAYER;
+ vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+ nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
+ nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
+
+ nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
+ nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
+
+ *out_vs = vs_b.shader;
+ *out_fs = fs_b.shader;
}
static VkResult
-create_depthstencil_renderpass(struct radv_device *device,
- uint32_t samples,
- VkRenderPass *render_pass)
+create_depthstencil_renderpass(struct radv_device *device, uint32_t samples,
+ VkRenderPass *render_pass)
{
- mtx_lock(&device->meta_state.mtx);
- if (*render_pass) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- VkResult result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = VK_FORMAT_D32_SFLOAT_S8_UINT,
- .samples = samples,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- }
- }, &device->meta_state.alloc, render_pass);
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ mtx_lock(&device->meta_state.mtx);
+ if (*render_pass) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ VkResult result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = VK_FORMAT_D32_SFLOAT_S8_UINT,
+ .samples = samples,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}}},
+ &device->meta_state.alloc, render_pass);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static VkResult
-create_depthstencil_pipeline(struct radv_device *device,
- VkImageAspectFlags aspects,
- uint32_t samples,
- int index,
- bool unrestricted,
- VkPipeline *pipeline,
- VkRenderPass render_pass)
+create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects,
+ uint32_t samples, int index, bool unrestricted, VkPipeline *pipeline,
+ VkRenderPass render_pass)
{
- struct nir_shader *vs_nir, *fs_nir;
- VkResult result;
-
- mtx_lock(&device->meta_state.mtx);
- if (*pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- build_depthstencil_shader(&vs_nir, &fs_nir, unrestricted);
-
- const VkPipelineVertexInputStateCreateInfo vi_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
- };
-
- const VkPipelineDepthStencilStateCreateInfo ds_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
- .depthCompareOp = VK_COMPARE_OP_ALWAYS,
- .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
- .depthBoundsTestEnable = false,
- .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
- .front = {
- .passOp = VK_STENCIL_OP_REPLACE,
- .compareOp = VK_COMPARE_OP_ALWAYS,
- .writeMask = UINT32_MAX,
- .reference = 0, /* dynamic */
- },
- .back = { 0 /* dont care */ },
- };
-
- const VkPipelineColorBlendStateCreateInfo cb_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .logicOpEnable = false,
- .attachmentCount = 0,
- .pAttachments = NULL,
- };
-
- struct radv_graphics_pipeline_create_info extra = {
- .use_rectlist = true,
- };
-
- if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
- extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
- extra.db_depth_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
- }
- if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
- extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
- extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
- }
- result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
- samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
- device->meta_state.clear_depth_p_layout,
- &extra, &device->meta_state.alloc, pipeline);
-
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ struct nir_shader *vs_nir, *fs_nir;
+ VkResult result;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ build_depthstencil_shader(&vs_nir, &fs_nir, unrestricted);
+
+ const VkPipelineVertexInputStateCreateInfo vi_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
+ };
+
+ const VkPipelineDepthStencilStateCreateInfo ds_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
+ .depthBoundsTestEnable = false,
+ .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
+ .front =
+ {
+ .passOp = VK_STENCIL_OP_REPLACE,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ .writeMask = UINT32_MAX,
+ .reference = 0, /* dynamic */
+ },
+ .back = {0 /* dont care */},
+ };
+
+ const VkPipelineColorBlendStateCreateInfo cb_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = 0,
+ .pAttachments = NULL,
+ };
+
+ struct radv_graphics_pipeline_create_info extra = {
+ .use_rectlist = true,
+ };
+
+ if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
+ extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
+ extra.db_depth_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
+ }
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
+ extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
+ }
+ result =
+ create_pipeline(device, radv_render_pass_from_handle(render_pass), samples, vs_nir, fs_nir,
+ &vi_state, &ds_state, &cb_state, device->meta_state.clear_depth_p_layout,
+ &extra, &device->meta_state.alloc, pipeline);
+
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
-static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- VkImageAspectFlags aspects,
- VkImageLayout layout,
- bool in_render_loop,
- const VkClearRect *clear_rect,
- VkClearDepthStencilValue clear_value)
+static bool
+depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+ VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,
+ const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)
{
- if (!iview)
- return false;
-
- uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
- if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
- clear_rect->rect.extent.width != iview->extent.width ||
- clear_rect->rect.extent.height != iview->extent.height)
- return false;
- if (radv_image_is_tc_compat_htile(iview->image) &&
- (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
- clear_value.depth != 1.0) ||
- ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
- return false;
- if (radv_htile_enabled(iview->image, iview->base_mip) &&
- iview->base_mip == 0 &&
- iview->base_layer == 0 &&
- iview->layer_count == iview->image->info.array_size &&
- radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop, queue_mask) &&
- radv_image_extent_compare(iview->image, &iview->extent))
- return true;
- return false;
+ if (!iview)
+ return false;
+
+ uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+ if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
+ clear_rect->rect.extent.width != iview->extent.width ||
+ clear_rect->rect.extent.height != iview->extent.height)
+ return false;
+ if (radv_image_is_tc_compat_htile(iview->image) &&
+ (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
+ clear_value.depth != 1.0) ||
+ ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
+ return false;
+ if (radv_htile_enabled(iview->image, iview->base_mip) && iview->base_mip == 0 &&
+ iview->base_layer == 0 && iview->layer_count == iview->image->info.array_size &&
+ radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop,
+ queue_mask) &&
+ radv_image_extent_compare(iview->image, &iview->extent))
+ return true;
+ return false;
}
static VkPipeline
-pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
- struct radv_meta_state *meta_state,
- const struct radv_image_view *iview,
- int samples_log2,
- VkImageAspectFlags aspects,
- VkImageLayout layout,
- bool in_render_loop,
- const VkClearRect *clear_rect,
- VkClearDepthStencilValue clear_value)
+pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state,
+ const struct radv_image_view *iview, int samples_log2,
+ VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,
+ const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)
{
- bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout,
- in_render_loop, clear_rect, clear_value);
- bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
- int index = DEPTH_CLEAR_SLOW;
- VkPipeline *pipeline;
-
- if (fast) {
- /* we don't know the previous clear values, so we always have
- * the NO_EXPCLEAR path */
- index = DEPTH_CLEAR_FAST_NO_EXPCLEAR;
- }
-
- switch (aspects) {
- case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
- pipeline = unrestricted ?
- &meta_state->clear[samples_log2].depthstencil_unrestricted_pipeline[index] :
- &meta_state->clear[samples_log2].depthstencil_pipeline[index];
- break;
- case VK_IMAGE_ASPECT_DEPTH_BIT:
- pipeline = unrestricted ?
- &meta_state->clear[samples_log2].depth_only_unrestricted_pipeline[index] :
- &meta_state->clear[samples_log2].depth_only_pipeline[index];
- break;
- case VK_IMAGE_ASPECT_STENCIL_BIT:
- pipeline = unrestricted ?
- &meta_state->clear[samples_log2].stencil_only_unrestricted_pipeline[index] :
- &meta_state->clear[samples_log2].stencil_only_pipeline[index];
- break;
- default:
- unreachable("expected depth or stencil aspect");
- }
-
- if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
- VkResult ret = create_depthstencil_renderpass(cmd_buffer->device, 1u << samples_log2,
- &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return VK_NULL_HANDLE;
- }
- }
-
- if (*pipeline == VK_NULL_HANDLE) {
- VkResult ret = create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted,
- pipeline, cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return VK_NULL_HANDLE;
- }
- }
- return *pipeline;
+ bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, in_render_loop,
+ clear_rect, clear_value);
+ bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
+ int index = DEPTH_CLEAR_SLOW;
+ VkPipeline *pipeline;
+
+ if (fast) {
+ /* we don't know the previous clear values, so we always have
+ * the NO_EXPCLEAR path */
+ index = DEPTH_CLEAR_FAST_NO_EXPCLEAR;
+ }
+
+ switch (aspects) {
+ case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
+ pipeline = unrestricted
+ ? &meta_state->clear[samples_log2].depthstencil_unrestricted_pipeline[index]
+ : &meta_state->clear[samples_log2].depthstencil_pipeline[index];
+ break;
+ case VK_IMAGE_ASPECT_DEPTH_BIT:
+ pipeline = unrestricted
+ ? &meta_state->clear[samples_log2].depth_only_unrestricted_pipeline[index]
+ : &meta_state->clear[samples_log2].depth_only_pipeline[index];
+ break;
+ case VK_IMAGE_ASPECT_STENCIL_BIT:
+ pipeline = unrestricted
+ ? &meta_state->clear[samples_log2].stencil_only_unrestricted_pipeline[index]
+ : &meta_state->clear[samples_log2].stencil_only_pipeline[index];
+ break;
+ default:
+ unreachable("expected depth or stencil aspect");
+ }
+
+ if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {
+ VkResult ret = create_depthstencil_renderpass(
+ cmd_buffer->device, 1u << samples_log2,
+ &cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return VK_NULL_HANDLE;
+ }
+ }
+
+ if (*pipeline == VK_NULL_HANDLE) {
+ VkResult ret = create_depthstencil_pipeline(
+ cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline,
+ cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return VK_NULL_HANDLE;
+ }
+ }
+ return *pipeline;
}
static void
-emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
- const VkClearAttachment *clear_att,
- const VkClearRect *clear_rect,
- struct radv_subpass_attachment *ds_att,
+emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
+ const VkClearRect *clear_rect, struct radv_subpass_attachment *ds_att,
uint32_t view_mask)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_state *meta_state = &device->meta_state;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- const uint32_t pass_att = ds_att->attachment;
- VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
- VkImageAspectFlags aspects = clear_att->aspectMask;
- const struct radv_image_view *iview = cmd_buffer->state.attachments ?
- cmd_buffer->state.attachments[pass_att].iview : NULL;
- uint32_t samples, samples_log2;
- VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-
- /* When a framebuffer is bound to the current command buffer, get the
- * number of samples from it. Otherwise, get the number of samples from
- * the render pass because it's likely a secondary command buffer.
- */
- if (iview) {
- samples = iview->image->info.samples;
- } else {
- samples = cmd_buffer->state.pass->attachments[pass_att].samples;
- }
-
- samples_log2 = ffs(samples) - 1;
-
- assert(pass_att != VK_ATTACHMENT_UNUSED);
-
- if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
- clear_value.depth = 1.0f;
-
- if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.clear_depth_unrestricted_p_layout,
- VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
- &clear_value.depth);
- } else {
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.clear_depth_p_layout,
- VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
- &clear_value.depth);
- }
-
- uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
- if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
- radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
- clear_value.stencil);
- }
-
- VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer,
- meta_state,
- iview,
- samples_log2,
- aspects,
- ds_att->layout,
- ds_att->in_render_loop,
- clear_rect,
- clear_value);
- if (!pipeline)
- return;
-
- struct radv_subpass clear_subpass = {
- .color_count = 0,
- .color_attachments = NULL,
- .depth_stencil_attachment = ds_att,
- };
-
- radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
-
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- pipeline);
-
- if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
- ds_att->layout, ds_att->in_render_loop,
- clear_rect, clear_value))
- radv_update_ds_clear_metadata(cmd_buffer, iview,
- clear_value, aspects);
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = clear_rect->rect.offset.x,
- .y = clear_rect->rect.offset.y,
- .width = clear_rect->rect.extent.width,
- .height = clear_rect->rect.extent.height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
-
- if (view_mask) {
- u_foreach_bit(i, view_mask)
- radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
- } else {
- radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
- }
-
- if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
- radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
- prev_reference);
- }
-
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_state *meta_state = &device->meta_state;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ const uint32_t pass_att = ds_att->attachment;
+ VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+ VkImageAspectFlags aspects = clear_att->aspectMask;
+ const struct radv_image_view *iview =
+ cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;
+ uint32_t samples, samples_log2;
+ VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+
+ /* When a framebuffer is bound to the current command buffer, get the
+ * number of samples from it. Otherwise, get the number of samples from
+ * the render pass because it's likely a secondary command buffer.
+ */
+ if (iview) {
+ samples = iview->image->info.samples;
+ } else {
+ samples = cmd_buffer->state.pass->attachments[pass_att].samples;
+ }
+
+ samples_log2 = ffs(samples) - 1;
+
+ assert(pass_att != VK_ATTACHMENT_UNUSED);
+
+ if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
+ clear_value.depth = 1.0f;
+
+ if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.clear_depth_unrestricted_p_layout,
+ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4, &clear_value.depth);
+ } else {
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.clear_depth_p_layout, VK_SHADER_STAGE_VERTEX_BIT, 0,
+ 4, &clear_value.depth);
+ }
+
+ uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
+ }
+
+ VkPipeline pipeline =
+ pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects,
+ ds_att->layout, ds_att->in_render_loop, clear_rect, clear_value);
+ if (!pipeline)
+ return;
+
+ struct radv_subpass clear_subpass = {
+ .color_count = 0,
+ .color_attachments = NULL,
+ .depth_stencil_attachment = ds_att,
+ };
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
+
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+
+ if (depth_view_can_fast_clear(cmd_buffer, iview, aspects, ds_att->layout, ds_att->in_render_loop,
+ clear_rect, clear_value))
+ radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
+
+ radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = clear_rect->rect.offset.x,
+ .y = clear_rect->rect.offset.y,
+ .width = clear_rect->rect.extent.width,
+ .height = clear_rect->rect.extent.height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
+
+ if (view_mask) {
+ u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
+ } else {
+ radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
+ }
+
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
+ radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);
+ }
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
}
static uint32_t
clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
- struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
- uint32_t htile_value, uint32_t htile_mask)
+ struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t htile_value,
+ uint32_t htile_mask)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_state *state = &device->meta_state;
- uint64_t block_count = round_up_u64(size, 1024);
- struct radv_meta_saved_state saved_state;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- struct radv_buffer dst_buffer = {
- .bo = bo,
- .offset = offset,
- .size = size
- };
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE,
- state->clear_htile_mask_pipeline);
-
- radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
- state->clear_htile_mask_p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = size
- }
- }
- });
-
- const unsigned constants[2] = {
- htile_value & htile_mask,
- ~htile_mask,
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- state->clear_htile_mask_p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
- constants);
-
- radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
-
- radv_meta_restore(&saved_state, cmd_buffer);
-
- return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_state *state = &device->meta_state;
+ uint64_t block_count = round_up_u64(size, 1024);
+ struct radv_meta_saved_state saved_state;
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ struct radv_buffer dst_buffer = {.bo = bo, .offset = offset, .size = size};
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ state->clear_htile_mask_pipeline);
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+ .offset = 0,
+ .range = size}}});
+
+ const unsigned constants[2] = {
+ htile_value & htile_mask,
+ ~htile_mask,
+ };
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants);
+
+ radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+
+ return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
}
static uint32_t
-radv_get_htile_fast_clear_value(const struct radv_device *device,
- const struct radv_image *image,
- VkClearDepthStencilValue value)
+radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
+ VkClearDepthStencilValue value)
{
- uint32_t clear_value;
+ uint32_t clear_value;
- if (radv_image_tile_stencil_disabled(device, image)) {
- clear_value = value.depth ? 0xfffffff0 : 0;
- } else {
- clear_value = value.depth ? 0xfffc00f0 : 0xf0;
- }
+ if (radv_image_tile_stencil_disabled(device, image)) {
+ clear_value = value.depth ? 0xfffffff0 : 0;
+ } else {
+ clear_value = value.depth ? 0xfffc00f0 : 0xf0;
+ }
- return clear_value;
+ return clear_value;
}
static uint32_t
-radv_get_htile_mask(const struct radv_device *device,
- const struct radv_image *image, VkImageAspectFlags aspects)
+radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image,
+ VkImageAspectFlags aspects)
{
- uint32_t mask = 0;
-
- if (radv_image_tile_stencil_disabled(device, image)) {
- /* All the HTILE buffer is used when there is no stencil. */
- mask = UINT32_MAX;
- } else {
- if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
- mask |= 0xfffffc0f;
- if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
- mask |= 0x000003f0;
- }
-
- return mask;
+ uint32_t mask = 0;
+
+ if (radv_image_tile_stencil_disabled(device, image)) {
+ /* All the HTILE buffer is used when there is no stencil. */
+ mask = UINT32_MAX;
+ } else {
+ if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
+ mask |= 0xfffffc0f;
+ if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
+ mask |= 0x000003f0;
+ }
+
+ return mask;
}
static bool
radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
{
- return value.depth == 1.0f || value.depth == 0.0f;
+ return value.depth == 1.0f || value.depth == 0.0f;
}
static bool
radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
{
- return value.stencil == 0;
+ return value.stencil == 0;
}
static bool
-radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- VkImageLayout image_layout,
- bool in_render_loop,
- VkImageAspectFlags aspects,
- const VkClearRect *clear_rect,
- const VkClearDepthStencilValue clear_value,
- uint32_t view_mask)
+radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+ VkImageLayout image_layout, bool in_render_loop,
+ VkImageAspectFlags aspects, const VkClearRect *clear_rect,
+ const VkClearDepthStencilValue clear_value, uint32_t view_mask)
{
- if (!iview || !iview->support_fast_clear)
- return false;
-
- if (!radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, image_layout, in_render_loop,
- radv_image_queue_family_mask(iview->image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index)))
- return false;
-
- if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
- clear_rect->rect.extent.width != iview->image->info.width ||
- clear_rect->rect.extent.height != iview->image->info.height)
- return false;
-
- if (view_mask && (iview->image->info.array_size >= 32 ||
- (1u << iview->image->info.array_size) - 1u != view_mask))
- return false;
- if (!view_mask && clear_rect->baseArrayLayer != 0)
- return false;
- if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
- return false;
-
- if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- !radv_is_fast_clear_depth_allowed(clear_value)) ||
- ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
- !radv_is_fast_clear_stencil_allowed(clear_value)))
- return false;
-
- return true;
+ if (!iview || !iview->support_fast_clear)
+ return false;
+
+ if (!radv_layout_is_htile_compressed(
+ cmd_buffer->device, iview->image, image_layout, in_render_loop,
+ radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index)))
+ return false;
+
+ if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
+ clear_rect->rect.extent.width != iview->image->info.width ||
+ clear_rect->rect.extent.height != iview->image->info.height)
+ return false;
+
+ if (view_mask && (iview->image->info.array_size >= 32 ||
+ (1u << iview->image->info.array_size) - 1u != view_mask))
+ return false;
+ if (!view_mask && clear_rect->baseArrayLayer != 0)
+ return false;
+ if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
+ return false;
+
+ if (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
+ ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+ !radv_is_fast_clear_stencil_allowed(clear_value)))
+ return false;
+
+ return true;
}
static void
-radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- const VkClearAttachment *clear_att,
- enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush)
+radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+ const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
+ enum radv_cmd_flush_bits *post_flush)
{
- VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
- VkImageAspectFlags aspects = clear_att->aspectMask;
- uint32_t clear_word, flush_bits;
-
- clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value);
-
- if (pre_flush) {
- enum radv_cmd_flush_bits bits =
- radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);
- cmd_buffer->state.flush_bits |= bits & ~ *pre_flush;
- *pre_flush |= cmd_buffer->state.flush_bits;
- }
-
- VkImageSubresourceRange range = {
- .aspectMask = aspects,
- .baseMipLevel = iview->base_mip,
- .levelCount = iview->level_count,
- .baseArrayLayer = iview->base_layer,
- .layerCount = iview->layer_count,
- };
-
- flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
-
- if (iview->image->planes[0].surface.has_stencil &&
- !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
- /* Synchronize after performing a depth-only or a stencil-only
- * fast clear because the driver uses an optimized path which
- * performs a read-modify-write operation, and the two separate
- * aspects might use the same HTILE memory.
- */
- cmd_buffer->state.flush_bits |= flush_bits;
- }
-
- radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
- if (post_flush) {
- *post_flush |= flush_bits;
- }
+ VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+ VkImageAspectFlags aspects = clear_att->aspectMask;
+ uint32_t clear_word, flush_bits;
+
+ clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value);
+
+ if (pre_flush) {
+ enum radv_cmd_flush_bits bits =
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+ iview->image) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);
+ cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
+ *pre_flush |= cmd_buffer->state.flush_bits;
+ }
+
+ VkImageSubresourceRange range = {
+ .aspectMask = aspects,
+ .baseMipLevel = iview->base_mip,
+ .levelCount = iview->level_count,
+ .baseArrayLayer = iview->base_layer,
+ .layerCount = iview->layer_count,
+ };
+
+ flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
+
+ if (iview->image->planes[0].surface.has_stencil &&
+ !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
+ /* Synchronize after performing a depth-only or a stencil-only
+ * fast clear because the driver uses an optimized path which
+ * performs a read-modify-write operation, and the two separate
+ * aspects might use the same HTILE memory.
+ */
+ cmd_buffer->state.flush_bits |= flush_bits;
+ }
+
+ radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
+ if (post_flush) {
+ *post_flush |= flush_bits;
+ }
}
static nir_shader *
build_clear_htile_mask_shader()
{
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_clear_htile_mask");
- b.shader->info.cs.local_size[0] = 64;
- b.shader->info.cs.local_size[1] = 1;
- b.shader->info.cs.local_size[2] = 1;
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_clear_htile_mask");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
- offset = nir_channel(&b, offset, 0);
+ nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));
+ offset = nir_channel(&b, offset, 0);
- nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
+ nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
- nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range=8);
+ nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
- nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul=16);
+ nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
- /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
- nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
- data = nir_ior(&b, data, nir_channel(&b, constants, 0));
+ /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
+ nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
+ data = nir_ior(&b, data, nir_channel(&b, constants, 0));
- nir_store_ssbo(&b, data, buf, offset, .write_mask=0xf,
- .access=ACCESS_NON_READABLE, .align_mul=16);
+ nir_store_ssbo(&b, data, buf, offset, .write_mask = 0xf, .access = ACCESS_NON_READABLE,
+ .align_mul = 16);
- return b.shader;
+ return b.shader;
}
static VkResult
init_meta_clear_htile_mask_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
- VkResult result;
- nir_shader *cs = build_clear_htile_mask_shader();
-
- VkDescriptorSetLayoutCreateInfo ds_layout_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_layout_info, &state->alloc,
- &state->clear_htile_mask_ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineLayoutCreateInfo p_layout_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &state->clear_htile_mask_ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
- },
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &p_layout_info, &state->alloc,
- &state->clear_htile_mask_p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = shader_stage,
- .flags = 0,
- .layout = state->clear_htile_mask_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&state->cache),
- 1, &pipeline_info, NULL,
- &state->clear_htile_mask_pipeline);
-
- ralloc_free(cs);
- return result;
+ struct radv_meta_state *state = &device->meta_state;
+ VkResult result;
+ nir_shader *cs = build_clear_htile_mask_shader();
+
+ VkDescriptorSetLayoutCreateInfo ds_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
+ &state->alloc, &state->clear_htile_mask_ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo p_layout_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &state->clear_htile_mask_ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges =
+ &(VkPushConstantRange){
+ VK_SHADER_STAGE_COMPUTE_BIT,
+ 0,
+ 8,
+ },
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,
+ &state->clear_htile_mask_p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = shader_stage,
+ .flags = 0,
+ .layout = state->clear_htile_mask_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&state->cache), 1,
+ &pipeline_info, NULL, &state->clear_htile_mask_pipeline);
+
+ ralloc_free(cs);
+ return result;
fail:
- ralloc_free(cs);
- return result;
+ ralloc_free(cs);
+ return result;
}
VkResult
radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
{
- VkResult res;
- struct radv_meta_state *state = &device->meta_state;
-
- VkPipelineLayoutCreateInfo pl_color_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 0,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
- };
-
- res = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_color_create_info,
- &device->meta_state.alloc,
- &device->meta_state.clear_color_p_layout);
- if (res != VK_SUCCESS)
- goto fail;
-
- VkPipelineLayoutCreateInfo pl_depth_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 0,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
- };
-
- res = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_depth_create_info,
- &device->meta_state.alloc,
- &device->meta_state.clear_depth_p_layout);
- if (res != VK_SUCCESS)
- goto fail;
-
- VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 0,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
- };
-
- res = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_depth_unrestricted_create_info,
- &device->meta_state.alloc,
- &device->meta_state.clear_depth_unrestricted_p_layout);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = init_meta_clear_htile_mask_state(device);
- if (res != VK_SUCCESS)
- goto fail;
-
- if (on_demand)
- return VK_SUCCESS;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
- uint32_t samples = 1 << i;
- for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
- VkFormat format = radv_fs_key_format_exemplars[j];
- unsigned fs_key = radv_format_meta_fs_key(device, format);
- assert(!state->clear[i].color_pipelines[fs_key]);
-
- res = create_color_renderpass(device, format, samples,
- &state->clear[i].render_pass[fs_key]);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
- state->clear[i].render_pass[fs_key]);
- if (res != VK_SUCCESS)
- goto fail;
-
- }
-
- res = create_depthstencil_renderpass(device,
- samples,
- &state->clear[i].depthstencil_rp);
- if (res != VK_SUCCESS)
- goto fail;
-
- for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
- res = create_depthstencil_pipeline(device,
- VK_IMAGE_ASPECT_DEPTH_BIT,
- samples,
- j,
- false,
- &state->clear[i].depth_only_pipeline[j],
- state->clear[i].depthstencil_rp);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depthstencil_pipeline(device,
- VK_IMAGE_ASPECT_STENCIL_BIT,
- samples,
- j,
- false,
- &state->clear[i].stencil_only_pipeline[j],
- state->clear[i].depthstencil_rp);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depthstencil_pipeline(device,
- VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT,
- samples,
- j,
- false,
- &state->clear[i].depthstencil_pipeline[j],
- state->clear[i].depthstencil_rp);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depthstencil_pipeline(device,
- VK_IMAGE_ASPECT_DEPTH_BIT,
- samples,
- j,
- true,
- &state->clear[i].depth_only_unrestricted_pipeline[j],
- state->clear[i].depthstencil_rp);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depthstencil_pipeline(device,
- VK_IMAGE_ASPECT_STENCIL_BIT,
- samples,
- j,
- true,
- &state->clear[i].stencil_only_unrestricted_pipeline[j],
- state->clear[i].depthstencil_rp);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depthstencil_pipeline(device,
- VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT,
- samples,
- j,
- true,
- &state->clear[i].depthstencil_unrestricted_pipeline[j],
- state->clear[i].depthstencil_rp);
- if (res != VK_SUCCESS)
- goto fail;
- }
- }
- return VK_SUCCESS;
+ VkResult res;
+ struct radv_meta_state *state = &device->meta_state;
+
+ VkPipelineLayoutCreateInfo pl_color_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
+ };
+
+ res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.clear_color_p_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_depth_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
+ };
+
+ res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.clear_depth_p_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
+ };
+
+ res = radv_CreatePipelineLayout(radv_device_to_handle(device),
+ &pl_depth_unrestricted_create_info, &device->meta_state.alloc,
+ &device->meta_state.clear_depth_unrestricted_p_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = init_meta_clear_htile_mask_state(device);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ if (on_demand)
+ return VK_SUCCESS;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
+ uint32_t samples = 1 << i;
+ for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
+ VkFormat format = radv_fs_key_format_exemplars[j];
+ unsigned fs_key = radv_format_meta_fs_key(device, format);
+ assert(!state->clear[i].color_pipelines[fs_key]);
+
+ res =
+ create_color_renderpass(device, format, samples, &state->clear[i].render_pass[fs_key]);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
+ state->clear[i].render_pass[fs_key]);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ res = create_depthstencil_renderpass(device, samples, &state->clear[i].depthstencil_rp);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
+ res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false,
+ &state->clear[i].depth_only_pipeline[j],
+ state->clear[i].depthstencil_rp);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
+ &state->clear[i].stencil_only_pipeline[j],
+ state->clear[i].depthstencil_rp);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depthstencil_pipeline(
+ device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
+ &state->clear[i].depthstencil_pipeline[j], state->clear[i].depthstencil_rp);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true,
+ &state->clear[i].depth_only_unrestricted_pipeline[j],
+ state->clear[i].depthstencil_rp);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
+ &state->clear[i].stencil_only_unrestricted_pipeline[j],
+ state->clear[i].depthstencil_rp);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depthstencil_pipeline(
+ device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
+ &state->clear[i].depthstencil_unrestricted_pipeline[j],
+ state->clear[i].depthstencil_rp);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+ }
+ return VK_SUCCESS;
fail:
- radv_device_finish_meta_clear_state(device);
- return res;
+ radv_device_finish_meta_clear_state(device);
+ return res;
}
static uint32_t
radv_get_cmask_fast_clear_value(const struct radv_image *image)
{
- uint32_t value = 0; /* Default value when no DCC. */
+ uint32_t value = 0; /* Default value when no DCC. */
- /* The fast-clear value is different for images that have both DCC and
- * CMASK metadata.
- */
- if (radv_image_has_dcc(image)) {
- /* DCC fast clear with MSAA should clear CMASK to 0xC. */
- return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
- }
+ /* The fast-clear value is different for images that have both DCC and
+ * CMASK metadata.
+ */
+ if (radv_image_has_dcc(image)) {
+ /* DCC fast clear with MSAA should clear CMASK to 0xC. */
+ return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
+ }
- return value;
+ return value;
}
uint32_t
-radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value)
{
- uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;
- unsigned slice_size = image->planes[0].surface.cmask_slice_size;
- uint64_t size;
+ uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;
+ unsigned slice_size = image->planes[0].surface.cmask_slice_size;
+ uint64_t size;
- offset += slice_size * range->baseArrayLayer;
- size = slice_size * radv_get_layerCount(image, range);
+ offset += slice_size * range->baseArrayLayer;
+ size = slice_size * radv_get_layerCount(image, range);
- return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+ return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
}
-
uint32_t
-radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value)
{
- uint64_t offset = image->offset + image->planes[0].surface.fmask_offset;
- unsigned slice_size = image->planes[0].surface.fmask_slice_size;
- uint64_t size;
+ uint64_t offset = image->offset + image->planes[0].surface.fmask_offset;
+ unsigned slice_size = image->planes[0].surface.fmask_slice_size;
+ uint64_t size;
- /* MSAA images do not support mipmap levels. */
- assert(range->baseMipLevel == 0 &&
- radv_get_levelCount(image, range) == 1);
+ /* MSAA images do not support mipmap levels. */
+ assert(range->baseMipLevel == 0 && radv_get_levelCount(image, range) == 1);
- offset += slice_size * range->baseArrayLayer;
- size = slice_size * radv_get_layerCount(image, range);
+ offset += slice_size * range->baseArrayLayer;
+ size = slice_size * radv_get_layerCount(image, range);
- return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+ return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
}
uint32_t
-radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, uint32_t value)
+radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value)
{
- uint32_t level_count = radv_get_levelCount(image, range);
- uint32_t layer_count = radv_get_layerCount(image, range);
- uint32_t flush_bits = 0;
-
- /* Mark the image as being compressed. */
- radv_update_dcc_metadata(cmd_buffer, image, range, true);
-
- for (uint32_t l = 0; l < level_count; l++) {
- uint64_t offset = image->offset + image->planes[0].surface.dcc_offset;
- uint32_t level = range->baseMipLevel + l;
- uint64_t size;
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
- /* DCC for mipmaps+layers is currently disabled. */
- offset += image->planes[0].surface.dcc_slice_size * range->baseArrayLayer +
- image->planes[0].surface.u.gfx9.dcc_levels[level].offset;
- size = image->planes[0].surface.u.gfx9.dcc_levels[level].size * layer_count;
- } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
- /* Mipmap levels and layers aren't implemented. */
- assert(level == 0);
- size = image->planes[0].surface.dcc_size;
- } else {
- const struct legacy_surf_level *surf_level =
- &image->planes[0].surface.u.legacy.level[level];
-
- /* If dcc_fast_clear_size is 0 (which might happens for
- * mipmaps) the fill buffer operation below is a no-op.
- * This can only happen during initialization as the
- * fast clear path fallbacks to slow clears if one
- * level can't be fast cleared.
- */
- offset += surf_level->dcc_offset +
- surf_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
- size = surf_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
- }
-
- /* Do not clear this level if it can't be compressed. */
- if (!size)
- continue;
-
- flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, offset,
- size, value);
- }
-
- return flush_bits;
+ uint32_t level_count = radv_get_levelCount(image, range);
+ uint32_t layer_count = radv_get_layerCount(image, range);
+ uint32_t flush_bits = 0;
+
+ /* Mark the image as being compressed. */
+ radv_update_dcc_metadata(cmd_buffer, image, range, true);
+
+ for (uint32_t l = 0; l < level_count; l++) {
+ uint64_t offset = image->offset + image->planes[0].surface.dcc_offset;
+ uint32_t level = range->baseMipLevel + l;
+ uint64_t size;
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
+ /* DCC for mipmaps+layers is currently disabled. */
+ offset += image->planes[0].surface.dcc_slice_size * range->baseArrayLayer +
+ image->planes[0].surface.u.gfx9.dcc_levels[level].offset;
+ size = image->planes[0].surface.u.gfx9.dcc_levels[level].size * layer_count;
+ } else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
+ /* Mipmap levels and layers aren't implemented. */
+ assert(level == 0);
+ size = image->planes[0].surface.dcc_size;
+ } else {
+ const struct legacy_surf_level *surf_level =
+ &image->planes[0].surface.u.legacy.level[level];
+
+ /* If dcc_fast_clear_size is 0 (which might happens for
+ * mipmaps) the fill buffer operation below is a no-op.
+ * This can only happen during initialization as the
+ * fast clear path fallbacks to slow clears if one
+ * level can't be fast cleared.
+ */
+ offset +=
+ surf_level->dcc_offset + surf_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
+ size = surf_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
+ }
+
+ /* Do not clear this level if it can't be compressed. */
+ if (!size)
+ continue;
+
+ flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+ }
+
+ return flush_bits;
}
uint32_t
-radv_clear_htile(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image *image,
- const VkImageSubresourceRange *range,
- uint32_t value)
+radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value)
{
- uint32_t level_count = radv_get_levelCount(image, range);
- uint32_t flush_bits = 0;
- uint32_t htile_mask;
-
- htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
-
- if (level_count != image->info.levels) {
- assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
-
- /* Clear individuals levels separately. */
- for (uint32_t l = 0; l < level_count; l++) {
- uint32_t level = range->baseMipLevel + l;
- uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
- image->planes[0].surface.u.gfx9.htile_levels[level].offset;
- uint32_t size = image->planes[0].surface.u.gfx9.htile_levels[level].size;
-
- /* Do not clear this level if it can be compressed. */
- if (!size)
- continue;
-
- if (htile_mask == UINT_MAX) {
- /* Clear the whole HTILE buffer. */
- flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset,
- size, value);
- } else {
- /* Only clear depth or stencil bytes in the HTILE buffer. */
- flush_bits = clear_htile_mask(cmd_buffer, image, image->bo, offset,
- size, value, htile_mask);
- }
- }
- } else {
- unsigned layer_count = radv_get_layerCount(image, range);
- uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
- uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
- image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
-
- if (htile_mask == UINT_MAX) {
- /* Clear the whole HTILE buffer. */
- flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset,
- size, value);
- } else {
- /* Only clear depth or stencil bytes in the HTILE buffer. */
- flush_bits = clear_htile_mask(cmd_buffer, image, image->bo, offset,
- size, value, htile_mask);
- }
- }
-
- return flush_bits;
+ uint32_t level_count = radv_get_levelCount(image, range);
+ uint32_t flush_bits = 0;
+ uint32_t htile_mask;
+
+ htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
+
+ if (level_count != image->info.levels) {
+ assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);
+
+ /* Clear individuals levels separately. */
+ for (uint32_t l = 0; l < level_count; l++) {
+ uint32_t level = range->baseMipLevel + l;
+ uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
+ image->planes[0].surface.u.gfx9.htile_levels[level].offset;
+ uint32_t size = image->planes[0].surface.u.gfx9.htile_levels[level].size;
+
+ /* Do not clear this level if it can be compressed. */
+ if (!size)
+ continue;
+
+ if (htile_mask == UINT_MAX) {
+ /* Clear the whole HTILE buffer. */
+ flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+ } else {
+ /* Only clear depth or stencil bytes in the HTILE buffer. */
+ flush_bits =
+ clear_htile_mask(cmd_buffer, image, image->bo, offset, size, value, htile_mask);
+ }
+ }
+ } else {
+ unsigned layer_count = radv_get_layerCount(image, range);
+ uint64_t size = image->planes[0].surface.htile_slice_size * layer_count;
+ uint64_t offset = image->offset + image->planes[0].surface.htile_offset +
+ image->planes[0].surface.htile_slice_size * range->baseArrayLayer;
+
+ if (htile_mask == UINT_MAX) {
+ /* Clear the whole HTILE buffer. */
+ flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);
+ } else {
+ /* Only clear depth or stencil bytes in the HTILE buffer. */
+ flush_bits =
+ clear_htile_mask(cmd_buffer, image, image->bo, offset, size, value, htile_mask);
+ }
+ }
+
+ return flush_bits;
}
enum {
- RADV_DCC_CLEAR_REG = 0x20202020U,
- RADV_DCC_CLEAR_MAIN_1 = 0x80808080U,
- RADV_DCC_CLEAR_SECONDARY_1 = 0x40404040U
+ RADV_DCC_CLEAR_REG = 0x20202020U,
+ RADV_DCC_CLEAR_MAIN_1 = 0x80808080U,
+ RADV_DCC_CLEAR_SECONDARY_1 = 0x40404040U
};
-static void vi_get_fast_clear_parameters(struct radv_device *device,
- VkFormat image_format,
- VkFormat view_format,
- const VkClearColorValue *clear_value,
- uint32_t* reset_value,
- bool *can_avoid_fast_clear_elim)
+static void
+vi_get_fast_clear_parameters(struct radv_device *device, VkFormat image_format,
+ VkFormat view_format, const VkClearColorValue *clear_value,
+ uint32_t *reset_value, bool *can_avoid_fast_clear_elim)
{
- bool values[4] = {0};
- int extra_channel;
- bool main_value = false;
- bool extra_value = false;
- bool has_color = false;
- bool has_alpha = false;
- *can_avoid_fast_clear_elim = false;
-
- *reset_value = RADV_DCC_CLEAR_REG;
-
- const struct util_format_description *desc = vk_format_description(view_format);
- if (view_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
- view_format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
- view_format == VK_FORMAT_B5G6R5_UNORM_PACK16)
- extra_channel = -1;
- else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
- if (vi_alpha_is_on_msb(device, view_format))
- extra_channel = desc->nr_channels - 1;
- else
- extra_channel = 0;
- } else
- return;
-
- for (int i = 0; i < 4; i++) {
- int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
- if (desc->swizzle[i] < PIPE_SWIZZLE_X ||
- desc->swizzle[i] > PIPE_SWIZZLE_W)
- continue;
-
- if (desc->channel[i].pure_integer &&
- desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
- /* Use the maximum value for clamping the clear color. */
- int max = u_bit_consecutive(0, desc->channel[i].size - 1);
-
- values[i] = clear_value->int32[i] != 0;
- if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
- return;
- } else if (desc->channel[i].pure_integer &&
- desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
- /* Use the maximum value for clamping the clear color. */
- unsigned max = u_bit_consecutive(0, desc->channel[i].size);
-
- values[i] = clear_value->uint32[i] != 0U;
- if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
- return;
- } else {
- values[i] = clear_value->float32[i] != 0.0F;
- if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
- return;
- }
-
- if (index == extra_channel) {
- extra_value = values[i];
- has_alpha = true;
- } else {
- main_value = values[i];
- has_color = true;
- }
- }
-
- /* If alpha isn't present, make it the same as color, and vice versa. */
- if (!has_alpha)
- extra_value = main_value;
- else if (!has_color)
- main_value = extra_value;
-
- for (int i = 0; i < 4; ++i)
- if (values[i] != main_value &&
- desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
- desc->swizzle[i] >= PIPE_SWIZZLE_X &&
- desc->swizzle[i] <= PIPE_SWIZZLE_W)
- return;
-
- *can_avoid_fast_clear_elim = true;
- *reset_value = 0;
- if (main_value)
- *reset_value |= RADV_DCC_CLEAR_MAIN_1;
-
- if (extra_value)
- *reset_value |= RADV_DCC_CLEAR_SECONDARY_1;
- return;
+ bool values[4] = {0};
+ int extra_channel;
+ bool main_value = false;
+ bool extra_value = false;
+ bool has_color = false;
+ bool has_alpha = false;
+ *can_avoid_fast_clear_elim = false;
+
+ *reset_value = RADV_DCC_CLEAR_REG;
+
+ const struct util_format_description *desc = vk_format_description(view_format);
+ if (view_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
+ view_format == VK_FORMAT_R5G6B5_UNORM_PACK16 || view_format == VK_FORMAT_B5G6R5_UNORM_PACK16)
+ extra_channel = -1;
+ else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
+ if (vi_alpha_is_on_msb(device, view_format))
+ extra_channel = desc->nr_channels - 1;
+ else
+ extra_channel = 0;
+ } else
+ return;
+
+ for (int i = 0; i < 4; i++) {
+ int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
+ if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
+ continue;
+
+ if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ /* Use the maximum value for clamping the clear color. */
+ int max = u_bit_consecutive(0, desc->channel[i].size - 1);
+
+ values[i] = clear_value->int32[i] != 0;
+ if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
+ return;
+ } else if (desc->channel[i].pure_integer &&
+ desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
+ /* Use the maximum value for clamping the clear color. */
+ unsigned max = u_bit_consecutive(0, desc->channel[i].size);
+
+ values[i] = clear_value->uint32[i] != 0U;
+ if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
+ return;
+ } else {
+ values[i] = clear_value->float32[i] != 0.0F;
+ if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
+ return;
+ }
+
+ if (index == extra_channel) {
+ extra_value = values[i];
+ has_alpha = true;
+ } else {
+ main_value = values[i];
+ has_color = true;
+ }
+ }
+
+ /* If alpha isn't present, make it the same as color, and vice versa. */
+ if (!has_alpha)
+ extra_value = main_value;
+ else if (!has_color)
+ main_value = extra_value;
+
+ for (int i = 0; i < 4; ++i)
+ if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
+ desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)
+ return;
+
+ *can_avoid_fast_clear_elim = true;
+ *reset_value = 0;
+ if (main_value)
+ *reset_value |= RADV_DCC_CLEAR_MAIN_1;
+
+ if (extra_value)
+ *reset_value |= RADV_DCC_CLEAR_SECONDARY_1;
+ return;
}
static bool
-radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- VkImageLayout image_layout,
- bool in_render_loop,
- const VkClearRect *clear_rect,
- VkClearColorValue clear_value,
- uint32_t view_mask)
+radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+ VkImageLayout image_layout, bool in_render_loop,
+ const VkClearRect *clear_rect, VkClearColorValue clear_value,
+ uint32_t view_mask)
{
- uint32_t clear_color[2];
-
- if (!iview || !iview->support_fast_clear)
- return false;
-
- if (!radv_layout_can_fast_clear(cmd_buffer->device, iview->image, image_layout, in_render_loop,
- radv_image_queue_family_mask(iview->image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index)))
- return false;
-
- if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
- clear_rect->rect.extent.width != iview->image->info.width ||
- clear_rect->rect.extent.height != iview->image->info.height)
- return false;
-
- if (view_mask && (iview->image->info.array_size >= 32 ||
- (1u << iview->image->info.array_size) - 1u != view_mask))
- return false;
- if (!view_mask && clear_rect->baseArrayLayer != 0)
- return false;
- if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
- return false;
-
- /* DCC */
- if (!radv_format_pack_clear_color(iview->vk_format,
- clear_color, &clear_value))
- return false;
-
- if (!radv_image_has_clear_value(iview->image) &&
- (clear_color[0] != 0 || clear_color[1] != 0))
- return false;
-
- if (radv_dcc_enabled(iview->image, iview->base_mip)) {
- bool can_avoid_fast_clear_elim;
- uint32_t reset_value;
-
- vi_get_fast_clear_parameters(cmd_buffer->device,
- iview->image->vk_format,
- iview->vk_format,
- &clear_value, &reset_value,
- &can_avoid_fast_clear_elim);
-
- if (iview->image->info.samples > 1) {
- /* DCC fast clear with MSAA should clear CMASK. */
- /* FIXME: This doesn't work for now. There is a
- * hardware bug with fast clears and DCC for MSAA
- * textures. AMDVLK has a workaround but it doesn't
- * seem to work here. Note that we might emit useless
- * CB flushes but that shouldn't matter.
- */
- if (!can_avoid_fast_clear_elim)
- return false;
- }
-
- if (iview->image->info.levels > 1 &&
- cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
- for (uint32_t l = 0; l < iview->level_count; l++) {
- uint32_t level = iview->base_mip + l;
- struct legacy_surf_level *surf_level =
- &iview->image->planes[0].surface.u.legacy.level[level];
-
- /* Do not fast clears if one level can't be
- * fast cleared.
- */
- if (!surf_level->dcc_fast_clear_size)
- return false;
- }
- }
- }
-
- return true;
+ uint32_t clear_color[2];
+
+ if (!iview || !iview->support_fast_clear)
+ return false;
+
+ if (!radv_layout_can_fast_clear(
+ cmd_buffer->device, iview->image, image_layout, in_render_loop,
+ radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index)))
+ return false;
+
+ if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
+ clear_rect->rect.extent.width != iview->image->info.width ||
+ clear_rect->rect.extent.height != iview->image->info.height)
+ return false;
+
+ if (view_mask && (iview->image->info.array_size >= 32 ||
+ (1u << iview->image->info.array_size) - 1u != view_mask))
+ return false;
+ if (!view_mask && clear_rect->baseArrayLayer != 0)
+ return false;
+ if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
+ return false;
+
+ /* DCC */
+ if (!radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value))
+ return false;
+
+ if (!radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
+ return false;
+
+ if (radv_dcc_enabled(iview->image, iview->base_mip)) {
+ bool can_avoid_fast_clear_elim;
+ uint32_t reset_value;
+
+ vi_get_fast_clear_parameters(cmd_buffer->device, iview->image->vk_format, iview->vk_format,
+ &clear_value, &reset_value, &can_avoid_fast_clear_elim);
+
+ if (iview->image->info.samples > 1) {
+ /* DCC fast clear with MSAA should clear CMASK. */
+ /* FIXME: This doesn't work for now. There is a
+ * hardware bug with fast clears and DCC for MSAA
+ * textures. AMDVLK has a workaround but it doesn't
+ * seem to work here. Note that we might emit useless
+ * CB flushes but that shouldn't matter.
+ */
+ if (!can_avoid_fast_clear_elim)
+ return false;
+ }
+
+ if (iview->image->info.levels > 1 &&
+ cmd_buffer->device->physical_device->rad_info.chip_class == GFX8) {
+ for (uint32_t l = 0; l < iview->level_count; l++) {
+ uint32_t level = iview->base_mip + l;
+ struct legacy_surf_level *surf_level =
+ &iview->image->planes[0].surface.u.legacy.level[level];
+
+ /* Do not fast clears if one level can't be
+ * fast cleared.
+ */
+ if (!surf_level->dcc_fast_clear_size)
+ return false;
+ }
+ }
+ }
+
+ return true;
}
-
static void
-radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- const VkClearAttachment *clear_att,
- uint32_t subpass_att,
- enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush)
+radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
+ const VkClearAttachment *clear_att, uint32_t subpass_att,
+ enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
{
- VkClearColorValue clear_value = clear_att->clearValue.color;
- uint32_t clear_color[2], flush_bits = 0;
- uint32_t cmask_clear_value;
- VkImageSubresourceRange range = {
- .aspectMask = iview->aspect_mask,
- .baseMipLevel = iview->base_mip,
- .levelCount = iview->level_count,
- .baseArrayLayer = iview->base_layer,
- .layerCount = iview->layer_count,
- };
-
- if (pre_flush) {
- enum radv_cmd_flush_bits bits =
- radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, iview->image) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);
- cmd_buffer->state.flush_bits |= bits & ~ *pre_flush;
- *pre_flush |= cmd_buffer->state.flush_bits;
- }
-
- /* DCC */
- radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);
-
- cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
-
- /* clear cmask buffer */
- bool need_decompress_pass = false;
- if (radv_dcc_enabled(iview->image, iview->base_mip)) {
- uint32_t reset_value;
- bool can_avoid_fast_clear_elim;
-
- vi_get_fast_clear_parameters(cmd_buffer->device,
- iview->image->vk_format,
- iview->vk_format,
- &clear_value, &reset_value,
- &can_avoid_fast_clear_elim);
-
- if (radv_image_has_cmask(iview->image)) {
- flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
- &range, cmask_clear_value);
- }
-
- if (!can_avoid_fast_clear_elim)
- need_decompress_pass = true;
-
- flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range,
- reset_value);
- } else {
- flush_bits = radv_clear_cmask(cmd_buffer, iview->image,
- &range, cmask_clear_value);
-
- /* Fast clearing with CMASK should always be eliminated. */
- need_decompress_pass = true;
- }
-
- if (post_flush) {
- *post_flush |= flush_bits;
- }
-
- /* Update the FCE predicate to perform a fast-clear eliminate. */
- radv_update_fce_metadata(cmd_buffer, iview->image, &range,
- need_decompress_pass);
-
- radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att,
- clear_color);
+ VkClearColorValue clear_value = clear_att->clearValue.color;
+ uint32_t clear_color[2], flush_bits = 0;
+ uint32_t cmask_clear_value;
+ VkImageSubresourceRange range = {
+ .aspectMask = iview->aspect_mask,
+ .baseMipLevel = iview->base_mip,
+ .levelCount = iview->level_count,
+ .baseArrayLayer = iview->base_layer,
+ .layerCount = iview->layer_count,
+ };
+
+ if (pre_flush) {
+ enum radv_cmd_flush_bits bits =
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, iview->image) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);
+ cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
+ *pre_flush |= cmd_buffer->state.flush_bits;
+ }
+
+ /* DCC */
+ radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);
+
+ cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
+
+ /* clear cmask buffer */
+ bool need_decompress_pass = false;
+ if (radv_dcc_enabled(iview->image, iview->base_mip)) {
+ uint32_t reset_value;
+ bool can_avoid_fast_clear_elim;
+
+ vi_get_fast_clear_parameters(cmd_buffer->device, iview->image->vk_format, iview->vk_format,
+ &clear_value, &reset_value, &can_avoid_fast_clear_elim);
+
+ if (radv_image_has_cmask(iview->image)) {
+ flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
+ }
+
+ if (!can_avoid_fast_clear_elim)
+ need_decompress_pass = true;
+
+ flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);
+ } else {
+ flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
+
+ /* Fast clearing with CMASK should always be eliminated. */
+ need_decompress_pass = true;
+ }
+
+ if (post_flush) {
+ *post_flush |= flush_bits;
+ }
+
+ /* Update the FCE predicate to perform a fast-clear eliminate. */
+ radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);
+
+ radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att, clear_color);
}
/**
* The parameters mean that same as those in vkCmdClearAttachments.
*/
static void
-emit_clear(struct radv_cmd_buffer *cmd_buffer,
- const VkClearAttachment *clear_att,
- const VkClearRect *clear_rect,
- enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush,
- uint32_t view_mask,
- bool ds_resolve_clear)
+emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
+ const VkClearRect *clear_rect, enum radv_cmd_flush_bits *pre_flush,
+ enum radv_cmd_flush_bits *post_flush, uint32_t view_mask, bool ds_resolve_clear)
{
- const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- VkImageAspectFlags aspects = clear_att->aspectMask;
-
- if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
- const uint32_t subpass_att = clear_att->colorAttachment;
- assert(subpass_att < subpass->color_count);
- const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
- if (pass_att == VK_ATTACHMENT_UNUSED)
- return;
-
- VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
- bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
- const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
- VkClearColorValue clear_value = clear_att->clearValue.color;
-
- if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop,
- clear_rect, clear_value, view_mask)) {
- radv_fast_clear_color(cmd_buffer, iview, clear_att,
- subpass_att, pre_flush,
- post_flush);
- } else {
- emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
- }
- } else {
- struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;
-
- if (ds_resolve_clear)
- ds_att = subpass->ds_resolve_attachment;
-
- if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
- return;
-
- VkImageLayout image_layout = ds_att->layout;
- bool in_render_loop = ds_att->in_render_loop;
- const struct radv_image_view *iview = fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
- VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
-
- assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
- VK_IMAGE_ASPECT_STENCIL_BIT));
-
- if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout,
- in_render_loop, aspects, clear_rect,
- clear_value, view_mask)) {
- radv_fast_clear_depth(cmd_buffer, iview, clear_att,
- pre_flush, post_flush);
- } else {
- emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect,
- ds_att, view_mask);
- }
- }
+ const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ VkImageAspectFlags aspects = clear_att->aspectMask;
+
+ if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
+ const uint32_t subpass_att = clear_att->colorAttachment;
+ assert(subpass_att < subpass->color_count);
+ const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
+ if (pass_att == VK_ATTACHMENT_UNUSED)
+ return;
+
+ VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
+ bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
+ const struct radv_image_view *iview =
+ fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
+ VkClearColorValue clear_value = clear_att->clearValue.color;
+
+ if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop, clear_rect,
+ clear_value, view_mask)) {
+ radv_fast_clear_color(cmd_buffer, iview, clear_att, subpass_att, pre_flush, post_flush);
+ } else {
+ emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
+ }
+ } else {
+ struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;
+
+ if (ds_resolve_clear)
+ ds_att = subpass->ds_resolve_attachment;
+
+ if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
+ return;
+
+ VkImageLayout image_layout = ds_att->layout;
+ bool in_render_loop = ds_att->in_render_loop;
+ const struct radv_image_view *iview =
+ fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
+ VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+
+ assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
+
+ if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout, in_render_loop, aspects,
+ clear_rect, clear_value, view_mask)) {
+ radv_fast_clear_depth(cmd_buffer, iview, clear_att, pre_flush, post_flush);
+ } else {
+ emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, ds_att, view_mask);
+ }
+ }
}
static inline bool
radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)
{
- uint32_t view_mask = cmd_state->subpass->view_mask;
- return (a != VK_ATTACHMENT_UNUSED &&
- cmd_state->attachments[a].pending_clear_aspects &&
- (!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
+ uint32_t view_mask = cmd_state->subpass->view_mask;
+ return (a != VK_ATTACHMENT_UNUSED && cmd_state->attachments[a].pending_clear_aspects &&
+ (!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
}
static bool
radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_cmd_state *cmd_state = &cmd_buffer->state;
- uint32_t a;
+ struct radv_cmd_state *cmd_state = &cmd_buffer->state;
+ uint32_t a;
- if (!cmd_state->subpass)
- return false;
+ if (!cmd_state->subpass)
+ return false;
- for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
- a = cmd_state->subpass->color_attachments[i].attachment;
- if (radv_attachment_needs_clear(cmd_state, a))
- return true;
- }
+ for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
+ a = cmd_state->subpass->color_attachments[i].attachment;
+ if (radv_attachment_needs_clear(cmd_state, a))
+ return true;
+ }
- if (cmd_state->subpass->depth_stencil_attachment) {
- a = cmd_state->subpass->depth_stencil_attachment->attachment;
- if (radv_attachment_needs_clear(cmd_state, a))
- return true;
- }
+ if (cmd_state->subpass->depth_stencil_attachment) {
+ a = cmd_state->subpass->depth_stencil_attachment->attachment;
+ if (radv_attachment_needs_clear(cmd_state, a))
+ return true;
+ }
- if (!cmd_state->subpass->ds_resolve_attachment)
- return false;
+ if (!cmd_state->subpass->ds_resolve_attachment)
+ return false;
- a = cmd_state->subpass->ds_resolve_attachment->attachment;
- return radv_attachment_needs_clear(cmd_state, a);
+ a = cmd_state->subpass->ds_resolve_attachment->attachment;
+ return radv_attachment_needs_clear(cmd_state, a);
}
static void
radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
- struct radv_attachment_state *attachment,
- const VkClearAttachment *clear_att,
- enum radv_cmd_flush_bits *pre_flush,
- enum radv_cmd_flush_bits *post_flush,
- bool ds_resolve_clear)
+ struct radv_attachment_state *attachment,
+ const VkClearAttachment *clear_att,
+ enum radv_cmd_flush_bits *pre_flush,
+ enum radv_cmd_flush_bits *post_flush, bool ds_resolve_clear)
{
- struct radv_cmd_state *cmd_state = &cmd_buffer->state;
- uint32_t view_mask = cmd_state->subpass->view_mask;
+ struct radv_cmd_state *cmd_state = &cmd_buffer->state;
+ uint32_t view_mask = cmd_state->subpass->view_mask;
- VkClearRect clear_rect = {
- .rect = cmd_state->render_area,
- .baseArrayLayer = 0,
- .layerCount = cmd_state->framebuffer->layers,
- };
+ VkClearRect clear_rect = {
+ .rect = cmd_state->render_area,
+ .baseArrayLayer = 0,
+ .layerCount = cmd_state->framebuffer->layers,
+ };
- radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
+ radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
- emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
- view_mask & ~attachment->cleared_views, ds_resolve_clear);
- if (view_mask)
- attachment->cleared_views |= view_mask;
- else
- attachment->pending_clear_aspects = 0;
+ emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
+ view_mask & ~attachment->cleared_views, ds_resolve_clear);
+ if (view_mask)
+ attachment->cleared_views |= view_mask;
+ else
+ attachment->pending_clear_aspects = 0;
- radv_describe_end_render_pass_clear(cmd_buffer);
+ radv_describe_end_render_pass_clear(cmd_buffer);
}
/**
@@ -1895,456 +1721,418 @@ radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
void
radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_cmd_state *cmd_state = &cmd_buffer->state;
- struct radv_meta_saved_state saved_state;
- enum radv_cmd_flush_bits pre_flush = 0;
- enum radv_cmd_flush_bits post_flush = 0;
-
- if (!radv_subpass_needs_clear(cmd_buffer))
- return;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_CONSTANTS);
-
- for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
- uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
-
- if (!radv_attachment_needs_clear(cmd_state, a))
- continue;
-
- assert(cmd_state->attachments[a].pending_clear_aspects ==
- VK_IMAGE_ASPECT_COLOR_BIT);
-
- VkClearAttachment clear_att = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .colorAttachment = i, /* Use attachment index relative to subpass */
- .clearValue = cmd_state->attachments[a].clear_value,
- };
-
- radv_subpass_clear_attachment(cmd_buffer,
- &cmd_state->attachments[a],
- &clear_att, &pre_flush,
- &post_flush, false);
- }
-
- if (cmd_state->subpass->depth_stencil_attachment) {
- uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
- if (radv_attachment_needs_clear(cmd_state, ds)) {
- VkClearAttachment clear_att = {
- .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
- .clearValue = cmd_state->attachments[ds].clear_value,
- };
-
- radv_subpass_clear_attachment(cmd_buffer,
- &cmd_state->attachments[ds],
- &clear_att, &pre_flush,
- &post_flush, false);
- }
- }
-
- if (cmd_state->subpass->ds_resolve_attachment) {
- uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;
- if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {
- VkClearAttachment clear_att = {
- .aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,
- .clearValue = cmd_state->attachments[ds_resolve].clear_value,
- };
-
- radv_subpass_clear_attachment(cmd_buffer,
- &cmd_state->attachments[ds_resolve],
- &clear_att, &pre_flush,
- &post_flush, true);
- }
- }
-
- radv_meta_restore(&saved_state, cmd_buffer);
- cmd_buffer->state.flush_bits |= post_flush;
+ struct radv_cmd_state *cmd_state = &cmd_buffer->state;
+ struct radv_meta_saved_state saved_state;
+ enum radv_cmd_flush_bits pre_flush = 0;
+ enum radv_cmd_flush_bits post_flush = 0;
+
+ if (!radv_subpass_needs_clear(cmd_buffer))
+ return;
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
+
+ for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
+ uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
+
+ if (!radv_attachment_needs_clear(cmd_state, a))
+ continue;
+
+ assert(cmd_state->attachments[a].pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
+
+ VkClearAttachment clear_att = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .colorAttachment = i, /* Use attachment index relative to subpass */
+ .clearValue = cmd_state->attachments[a].clear_value,
+ };
+
+ radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[a], &clear_att, &pre_flush,
+ &post_flush, false);
+ }
+
+ if (cmd_state->subpass->depth_stencil_attachment) {
+ uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
+ if (radv_attachment_needs_clear(cmd_state, ds)) {
+ VkClearAttachment clear_att = {
+ .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
+ .clearValue = cmd_state->attachments[ds].clear_value,
+ };
+
+ radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds], &clear_att,
+ &pre_flush, &post_flush, false);
+ }
+ }
+
+ if (cmd_state->subpass->ds_resolve_attachment) {
+ uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;
+ if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {
+ VkClearAttachment clear_att = {
+ .aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,
+ .clearValue = cmd_state->attachments[ds_resolve].clear_value,
+ };
+
+ radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds_resolve], &clear_att,
+ &pre_flush, &post_flush, true);
+ }
+ }
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+ cmd_buffer->state.flush_bits |= post_flush;
}
static void
-radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout image_layout,
- const VkImageSubresourceRange *range,
- VkFormat format, int level, int layer,
- const VkClearValue *clear_val)
+radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ VkImageLayout image_layout, const VkImageSubresourceRange *range,
+ VkFormat format, int level, int layer, const VkClearValue *clear_val)
{
- VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
- struct radv_image_view iview;
- uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
- uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
-
- radv_image_view_init(&iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
- .format = format,
- .subresourceRange = {
- .aspectMask = range->aspectMask,
- .baseMipLevel = range->baseMipLevel + level,
- .levelCount = 1,
- .baseArrayLayer = range->baseArrayLayer + layer,
- .layerCount = 1
- },
- }, NULL);
-
- VkFramebuffer fb;
- radv_CreateFramebuffer(device_h,
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(&iview),
- },
- .width = width,
- .height = height,
- .layers = 1
- },
- &cmd_buffer->pool->alloc,
- &fb);
-
- VkAttachmentDescription2 att_desc = {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = iview.vk_format,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = image_layout,
- .finalLayout = image_layout,
- };
-
- VkSubpassDescription2 subpass_desc = {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = NULL,
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- };
-
- const VkAttachmentReference2 att_ref = {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = image_layout,
- };
-
- if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
- subpass_desc.colorAttachmentCount = 1;
- subpass_desc.pColorAttachments = &att_ref;
- } else {
- subpass_desc.pDepthStencilAttachment = &att_ref;
- }
-
- VkRenderPass pass;
- radv_CreateRenderPass2(device_h,
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &att_desc,
- .subpassCount = 1,
- .pSubpasses = &subpass_desc,
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- }
- },
- &cmd_buffer->pool->alloc,
- &pass);
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderArea = {
- .offset = { 0, 0, },
- .extent = {
- .width = width,
- .height = height,
- },
- },
- .renderPass = pass,
- .framebuffer = fb,
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
-
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- VkClearAttachment clear_att = {
- .aspectMask = range->aspectMask,
- .colorAttachment = 0,
- .clearValue = *clear_val,
- };
-
- VkClearRect clear_rect = {
- .rect = {
- .offset = { 0, 0 },
- .extent = { width, height },
- },
- .baseArrayLayer = range->baseArrayLayer,
- .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
- };
-
- emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
-
- radv_cmd_buffer_end_render_pass(cmd_buffer);
- radv_DestroyRenderPass(device_h, pass,
- &cmd_buffer->pool->alloc);
- radv_DestroyFramebuffer(device_h, fb,
- &cmd_buffer->pool->alloc);
+ VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
+ struct radv_image_view iview;
+ uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
+ uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
+
+ radv_image_view_init(&iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
+ .format = format,
+ .subresourceRange = {.aspectMask = range->aspectMask,
+ .baseMipLevel = range->baseMipLevel + level,
+ .levelCount = 1,
+ .baseArrayLayer = range->baseArrayLayer + layer,
+ .layerCount = 1},
+ },
+ NULL);
+
+ VkFramebuffer fb;
+ radv_CreateFramebuffer(
+ device_h,
+ &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments =
+ (VkImageView[]){
+ radv_image_view_to_handle(&iview),
+ },
+ .width = width,
+ .height = height,
+ .layers = 1},
+ &cmd_buffer->pool->alloc, &fb);
+
+ VkAttachmentDescription2 att_desc = {
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = iview.vk_format,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = image_layout,
+ .finalLayout = image_layout,
+ };
+
+ VkSubpassDescription2 subpass_desc = {
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment = NULL,
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ };
+
+ const VkAttachmentReference2 att_ref = {
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = image_layout,
+ };
+
+ if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+ subpass_desc.colorAttachmentCount = 1;
+ subpass_desc.pColorAttachments = &att_ref;
+ } else {
+ subpass_desc.pDepthStencilAttachment = &att_ref;
+ }
+
+ VkRenderPass pass;
+ radv_CreateRenderPass2(
+ device_h,
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments = &att_desc,
+ .subpassCount = 1,
+ .pSubpasses = &subpass_desc,
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}}},
+ &cmd_buffer->pool->alloc, &pass);
+
+ radv_cmd_buffer_begin_render_pass(cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderArea =
+ {
+ .offset =
+ {
+ 0,
+ 0,
+ },
+ .extent =
+ {
+ .width = width,
+ .height = height,
+ },
+ },
+ .renderPass = pass,
+ .framebuffer = fb,
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ VkClearAttachment clear_att = {
+ .aspectMask = range->aspectMask,
+ .colorAttachment = 0,
+ .clearValue = *clear_val,
+ };
+
+ VkClearRect clear_rect = {
+ .rect =
+ {
+ .offset = {0, 0},
+ .extent = {width, height},
+ },
+ .baseArrayLayer = range->baseArrayLayer,
+ .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+ };
+
+ emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
+
+ radv_cmd_buffer_end_render_pass(cmd_buffer);
+ radv_DestroyRenderPass(device_h, pass, &cmd_buffer->pool->alloc);
+ radv_DestroyFramebuffer(device_h, fb, &cmd_buffer->pool->alloc);
}
/**
* Return TRUE if a fast color or depth clear has been performed.
*/
static bool
-radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkFormat format,
- VkImageLayout image_layout,
- bool in_render_loop,
- const VkImageSubresourceRange *range,
- const VkClearValue *clear_val)
+radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
+ VkImageLayout image_layout, bool in_render_loop,
+ const VkImageSubresourceRange *range, const VkClearValue *clear_val)
{
- struct radv_image_view iview;
-
- radv_image_view_init(&iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
- .format = image->vk_format,
- .subresourceRange = {
- .aspectMask = range->aspectMask,
- .baseMipLevel = range->baseMipLevel,
- .levelCount = range->levelCount,
- .baseArrayLayer = range->baseArrayLayer,
- .layerCount = range->layerCount,
- },
- }, NULL);
-
- VkClearRect clear_rect = {
- .rect = {
- .offset = { 0, 0 },
- .extent = {
- radv_minify(image->info.width, range->baseMipLevel),
- radv_minify(image->info.height, range->baseMipLevel),
- },
- },
- .baseArrayLayer = range->baseArrayLayer,
- .layerCount = range->layerCount,
- };
-
- VkClearAttachment clear_att = {
- .aspectMask = range->aspectMask,
- .colorAttachment = 0,
- .clearValue = *clear_val,
- };
-
- if (vk_format_is_color(format)) {
- if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout,
- in_render_loop, &clear_rect,
- clear_att.clearValue.color, 0)) {
- radv_fast_clear_color(cmd_buffer, &iview, &clear_att,
- clear_att.colorAttachment,
- NULL, NULL);
- return true;
- }
- } else {
- if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout,
- in_render_loop,range->aspectMask,
- &clear_rect, clear_att.clearValue.depthStencil,
- 0)) {
- radv_fast_clear_depth(cmd_buffer, &iview, &clear_att,
- NULL, NULL);
- return true;
- }
- }
-
- return false;
+ struct radv_image_view iview;
+
+ radv_image_view_init(&iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
+ .format = image->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = range->aspectMask,
+ .baseMipLevel = range->baseMipLevel,
+ .levelCount = range->levelCount,
+ .baseArrayLayer = range->baseArrayLayer,
+ .layerCount = range->layerCount,
+ },
+ },
+ NULL);
+
+ VkClearRect clear_rect = {
+ .rect =
+ {
+ .offset = {0, 0},
+ .extent =
+ {
+ radv_minify(image->info.width, range->baseMipLevel),
+ radv_minify(image->info.height, range->baseMipLevel),
+ },
+ },
+ .baseArrayLayer = range->baseArrayLayer,
+ .layerCount = range->layerCount,
+ };
+
+ VkClearAttachment clear_att = {
+ .aspectMask = range->aspectMask,
+ .colorAttachment = 0,
+ .clearValue = *clear_val,
+ };
+
+ if (vk_format_is_color(format)) {
+ if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, in_render_loop, &clear_rect,
+ clear_att.clearValue.color, 0)) {
+ radv_fast_clear_color(cmd_buffer, &iview, &clear_att, clear_att.colorAttachment, NULL,
+ NULL);
+ return true;
+ }
+ } else {
+ if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, in_render_loop,
+ range->aspectMask, &clear_rect,
+ clear_att.clearValue.depthStencil, 0)) {
+ radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, NULL, NULL);
+ return true;
+ }
+ }
+
+ return false;
}
static void
-radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- VkImageLayout image_layout,
- const VkClearValue *clear_value,
- uint32_t range_count,
- const VkImageSubresourceRange *ranges,
- bool cs)
+radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ VkImageLayout image_layout, const VkClearValue *clear_value,
+ uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs)
{
- VkFormat format = image->vk_format;
- VkClearValue internal_clear_value;
-
- if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
- internal_clear_value.color = clear_value->color;
- else
- internal_clear_value.depthStencil = clear_value->depthStencil;
-
- if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
- uint32_t value;
- format = VK_FORMAT_R32_UINT;
- value = float3_to_rgb9e5(clear_value->color.float32);
- internal_clear_value.color.uint32[0] = value;
- }
-
- if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
- uint8_t r, g;
- format = VK_FORMAT_R8_UINT;
- r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
- g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
- internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
- }
-
- for (uint32_t r = 0; r < range_count; r++) {
- const VkImageSubresourceRange *range = &ranges[r];
-
- /* Try to perform a fast clear first, otherwise fallback to
- * the legacy path.
- */
- if (!cs &&
- radv_fast_clear_range(cmd_buffer, image, format,
- image_layout, false, range,
- &internal_clear_value)) {
- continue;
- }
-
- for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
- const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
- radv_minify(image->info.depth, range->baseMipLevel + l) :
- radv_get_layerCount(image, range);
- for (uint32_t s = 0; s < layer_count; ++s) {
-
- if (cs) {
- struct radv_meta_blit2d_surf surf;
- surf.format = format;
- surf.image = image;
- surf.level = range->baseMipLevel + l;
- surf.layer = range->baseArrayLayer + s;
- surf.aspect_mask = range->aspectMask;
- surf.disable_compression = true;
- radv_meta_clear_image_cs(cmd_buffer, &surf,
- &internal_clear_value.color);
- } else {
- radv_clear_image_layer(cmd_buffer, image, image_layout,
- range, format, l, s, &internal_clear_value);
- }
- }
- }
- }
+ VkFormat format = image->vk_format;
+ VkClearValue internal_clear_value;
+
+ if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
+ internal_clear_value.color = clear_value->color;
+ else
+ internal_clear_value.depthStencil = clear_value->depthStencil;
+
+ if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
+ uint32_t value;
+ format = VK_FORMAT_R32_UINT;
+ value = float3_to_rgb9e5(clear_value->color.float32);
+ internal_clear_value.color.uint32[0] = value;
+ }
+
+ if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
+ uint8_t r, g;
+ format = VK_FORMAT_R8_UINT;
+ r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
+ g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
+ internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
+ }
+
+ for (uint32_t r = 0; r < range_count; r++) {
+ const VkImageSubresourceRange *range = &ranges[r];
+
+ /* Try to perform a fast clear first, otherwise fallback to
+ * the legacy path.
+ */
+ if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, false, range,
+ &internal_clear_value)) {
+ continue;
+ }
+
+ for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
+ const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D
+ ? radv_minify(image->info.depth, range->baseMipLevel + l)
+ : radv_get_layerCount(image, range);
+ for (uint32_t s = 0; s < layer_count; ++s) {
+
+ if (cs) {
+ struct radv_meta_blit2d_surf surf;
+ surf.format = format;
+ surf.image = image;
+ surf.level = range->baseMipLevel + l;
+ surf.layer = range->baseArrayLayer + s;
+ surf.aspect_mask = range->aspectMask;
+ surf.disable_compression = true;
+ radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);
+ } else {
+ radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, s,
+ &internal_clear_value);
+ }
+ }
+ }
+ }
}
-void radv_CmdClearColorImage(
- VkCommandBuffer commandBuffer,
- VkImage image_h,
- VkImageLayout imageLayout,
- const VkClearColorValue* pColor,
- uint32_t rangeCount,
- const VkImageSubresourceRange* pRanges)
+void
+radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
+ const VkClearColorValue *pColor, uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_image, image, image_h);
- struct radv_meta_saved_state saved_state;
- bool cs;
-
- cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
- !radv_image_is_renderable(cmd_buffer->device, image);
-
- if (cs) {
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
- } else {
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_CONSTANTS);
- }
-
- radv_cmd_clear_image(cmd_buffer, image, imageLayout,
- (const VkClearValue *) pColor,
- rangeCount, pRanges, cs);
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_image, image, image_h);
+ struct radv_meta_saved_state saved_state;
+ bool cs;
+
+ cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
+ !radv_image_is_renderable(cmd_buffer->device, image);
+
+ if (cs) {
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+ } else {
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
+ }
+
+ radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount,
+ pRanges, cs);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
-void radv_CmdClearDepthStencilImage(
- VkCommandBuffer commandBuffer,
- VkImage image_h,
- VkImageLayout imageLayout,
- const VkClearDepthStencilValue* pDepthStencil,
- uint32_t rangeCount,
- const VkImageSubresourceRange* pRanges)
+void
+radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
+ VkImageLayout imageLayout,
+ const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_image, image, image_h);
- struct radv_meta_saved_state saved_state;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_image, image, image_h);
+ struct radv_meta_saved_state saved_state;
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_CONSTANTS);
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
- radv_cmd_clear_image(cmd_buffer, image, imageLayout,
- (const VkClearValue *) pDepthStencil,
- rangeCount, pRanges, false);
+ radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil,
+ rangeCount, pRanges, false);
- radv_meta_restore(&saved_state, cmd_buffer);
+ radv_meta_restore(&saved_state, cmd_buffer);
}
-void radv_CmdClearAttachments(
- VkCommandBuffer commandBuffer,
- uint32_t attachmentCount,
- const VkClearAttachment* pAttachments,
- uint32_t rectCount,
- const VkClearRect* pRects)
+void
+radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
+ const VkClearAttachment *pAttachments, uint32_t rectCount,
+ const VkClearRect *pRects)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- struct radv_meta_saved_state saved_state;
- enum radv_cmd_flush_bits pre_flush = 0;
- enum radv_cmd_flush_bits post_flush = 0;
-
- if (!cmd_buffer->state.subpass)
- return;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_CONSTANTS);
-
- /* FINISHME: We can do better than this dumb loop. It thrashes too much
- * state.
- */
- for (uint32_t a = 0; a < attachmentCount; ++a) {
- for (uint32_t r = 0; r < rectCount; ++r) {
- emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
- cmd_buffer->state.subpass->view_mask, false);
- }
- }
-
- radv_meta_restore(&saved_state, cmd_buffer);
- cmd_buffer->state.flush_bits |= post_flush;
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ struct radv_meta_saved_state saved_state;
+ enum radv_cmd_flush_bits pre_flush = 0;
+ enum radv_cmd_flush_bits post_flush = 0;
+
+ if (!cmd_buffer->state.subpass)
+ return;
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
+
+ /* FINISHME: We can do better than this dumb loop. It thrashes too much
+ * state.
+ */
+ for (uint32_t a = 0; a < attachmentCount; ++a) {
+ for (uint32_t r = 0; r < rectCount; ++r) {
+ emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
+ cmd_buffer->state.subpass->view_mask, false);
+ }
+ }
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+ cmd_buffer->state.flush_bits |= post_flush;
}
diff --git a/src/amd/vulkan/radv_meta_copy.c b/src/amd/vulkan/radv_meta_copy.c
index 44e61ca4b0a..224419de139 100644
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -27,8 +27,8 @@
static VkExtent3D
meta_image_block_size(const struct radv_image *image)
{
- const struct util_format_description *desc = vk_format_description(image->vk_format);
- return (VkExtent3D) { desc->block.width, desc->block.height, 1 };
+ const struct util_format_description *desc = vk_format_description(image->vk_format);
+ return (VkExtent3D){desc->block.width, desc->block.height, 1};
}
/* Returns the user-provided VkBufferImageCopy::imageExtent in units of
@@ -36,16 +36,16 @@ meta_image_block_size(const struct radv_image *image)
* if Image is uncompressed or compressed, respectively.
*/
static struct VkExtent3D
-meta_region_extent_el(const struct radv_image *image,
- const VkImageType imageType,
+meta_region_extent_el(const struct radv_image *image, const VkImageType imageType,
const struct VkExtent3D *extent)
{
- const VkExtent3D block = meta_image_block_size(image);
- return radv_sanitize_image_extent(imageType, (VkExtent3D) {
- .width = DIV_ROUND_UP(extent->width , block.width),
- .height = DIV_ROUND_UP(extent->height, block.height),
- .depth = DIV_ROUND_UP(extent->depth , block.depth),
- });
+ const VkExtent3D block = meta_image_block_size(image);
+ return radv_sanitize_image_extent(imageType,
+ (VkExtent3D){
+ .width = DIV_ROUND_UP(extent->width, block.width),
+ .height = DIV_ROUND_UP(extent->height, block.height),
+ .depth = DIV_ROUND_UP(extent->depth, block.depth),
+ });
}
/* Returns the user-provided VkBufferImageCopy::imageOffset in units of
@@ -53,517 +53,481 @@ meta_region_extent_el(const struct radv_image *image,
* if Image is uncompressed or compressed, respectively.
*/
static struct VkOffset3D
-meta_region_offset_el(const struct radv_image *image,
- const struct VkOffset3D *offset)
+meta_region_offset_el(const struct radv_image *image, const struct VkOffset3D *offset)
{
- const VkExtent3D block = meta_image_block_size(image);
- return radv_sanitize_image_offset(image->type, (VkOffset3D) {
- .x = offset->x / block.width,
- .y = offset->y / block.height,
- .z = offset->z / block.depth,
- });
+ const VkExtent3D block = meta_image_block_size(image);
+ return radv_sanitize_image_offset(image->type, (VkOffset3D){
+ .x = offset->x / block.width,
+ .y = offset->y / block.height,
+ .z = offset->z / block.depth,
+ });
}
static VkFormat
vk_format_for_size(int bs)
{
- switch (bs) {
- case 1: return VK_FORMAT_R8_UINT;
- case 2: return VK_FORMAT_R8G8_UINT;
- case 4: return VK_FORMAT_R8G8B8A8_UINT;
- case 8: return VK_FORMAT_R16G16B16A16_UINT;
- case 12: return VK_FORMAT_R32G32B32_UINT;
- case 16: return VK_FORMAT_R32G32B32A32_UINT;
- default:
- unreachable("Invalid format block size");
- }
+ switch (bs) {
+ case 1:
+ return VK_FORMAT_R8_UINT;
+ case 2:
+ return VK_FORMAT_R8G8_UINT;
+ case 4:
+ return VK_FORMAT_R8G8B8A8_UINT;
+ case 8:
+ return VK_FORMAT_R16G16B16A16_UINT;
+ case 12:
+ return VK_FORMAT_R32G32B32_UINT;
+ case 16:
+ return VK_FORMAT_R32G32B32A32_UINT;
+ default:
+ unreachable("Invalid format block size");
+ }
}
static struct radv_meta_blit2d_surf
-blit_surf_for_image_level_layer(struct radv_image *image,
- VkImageLayout layout,
- const VkImageSubresourceLayers *subres,
- VkImageAspectFlags aspect_mask)
+blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout,
+ const VkImageSubresourceLayers *subres,
+ VkImageAspectFlags aspect_mask)
{
- VkFormat format = radv_get_aspect_format(image, aspect_mask);
-
- if (!radv_dcc_enabled(image, subres->mipLevel) &&
- !(radv_image_is_tc_compat_htile(image)))
- format = vk_format_for_size(vk_format_get_blocksize(format));
-
- format = vk_format_no_srgb(format);
-
- return (struct radv_meta_blit2d_surf) {
- .format = format,
- .bs = vk_format_get_blocksize(format),
- .level = subres->mipLevel,
- .layer = subres->baseArrayLayer,
- .image = image,
- .aspect_mask = aspect_mask,
- .current_layout = layout,
- };
+ VkFormat format = radv_get_aspect_format(image, aspect_mask);
+
+ if (!radv_dcc_enabled(image, subres->mipLevel) && !(radv_image_is_tc_compat_htile(image)))
+ format = vk_format_for_size(vk_format_get_blocksize(format));
+
+ format = vk_format_no_srgb(format);
+
+ return (struct radv_meta_blit2d_surf){
+ .format = format,
+ .bs = vk_format_get_blocksize(format),
+ .level = subres->mipLevel,
+ .layer = subres->baseArrayLayer,
+ .image = image,
+ .aspect_mask = aspect_mask,
+ .current_layout = layout,
+ };
}
bool
radv_image_is_renderable(struct radv_device *device, struct radv_image *image)
{
- if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
- image->vk_format == VK_FORMAT_R32G32B32_SINT ||
- image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)
- return false;
-
- if (device->physical_device->rad_info.chip_class >= GFX9 &&
- image->type == VK_IMAGE_TYPE_3D &&
- vk_format_get_blocksizebits(image->vk_format) == 128 &&
- vk_format_is_compressed(image->vk_format))
- return false;
- return true;
+ if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
+ image->vk_format == VK_FORMAT_R32G32B32_SINT ||
+ image->vk_format == VK_FORMAT_R32G32B32_SFLOAT)
+ return false;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && image->type == VK_IMAGE_TYPE_3D &&
+ vk_format_get_blocksizebits(image->vk_format) == 128 &&
+ vk_format_is_compressed(image->vk_format))
+ return false;
+ return true;
}
static void
-copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer* buffer,
- struct radv_image* image,
- VkImageLayout layout,
- const VkBufferImageCopy2KHR* region)
+copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
+ struct radv_image *image, VkImageLayout layout,
+ const VkBufferImageCopy2KHR *region)
{
- struct radv_meta_saved_state saved_state;
- bool old_predicating;
- bool cs;
-
- /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
- * VK_SAMPLE_COUNT_1_BIT."
- */
- assert(image->info.samples == 1);
-
- cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
- !radv_image_is_renderable(cmd_buffer->device, image);
-
- radv_meta_save(&saved_state, cmd_buffer,
- (cs ? RADV_META_SAVE_COMPUTE_PIPELINE :
- RADV_META_SAVE_GRAPHICS_PIPELINE) |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- /* VK_EXT_conditional_rendering says that copy commands should not be
- * affected by conditional rendering.
- */
- old_predicating = cmd_buffer->state.predicating;
- cmd_buffer->state.predicating = false;
-
- /**
- * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
- * extent is the size in texels of the source image to copy in width,
- * height and depth. 1D images use only x and width. 2D images use x, y,
- * width and height. 3D images use x, y, z, width, height and depth.
- *
- *
- * Also, convert the offsets and extent from units of texels to units of
- * blocks - which is the highest resolution accessible in this command.
- */
- const VkOffset3D img_offset_el =
- meta_region_offset_el(image, &region->imageOffset);
- const VkExtent3D bufferExtent = {
- .width = region->bufferRowLength ?
- region->bufferRowLength : region->imageExtent.width,
- .height = region->bufferImageHeight ?
- region->bufferImageHeight : region->imageExtent.height,
- };
- const VkExtent3D buf_extent_el =
- meta_region_extent_el(image, image->type, &bufferExtent);
-
- /* Start creating blit rect */
- const VkExtent3D img_extent_el =
- meta_region_extent_el(image, image->type, &region->imageExtent);
- struct radv_meta_blit2d_rect rect = {
- .width = img_extent_el.width,
- .height = img_extent_el.height,
- };
-
- /* Create blit surfaces */
- struct radv_meta_blit2d_surf img_bsurf =
- blit_surf_for_image_level_layer(image,
- layout,
- &region->imageSubresource,
- region->imageSubresource.aspectMask);
-
- if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
- uint32_t queue_mask = radv_image_queue_family_mask(image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
- bool compressed = radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
- if (compressed) {
- radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) {
- .aspectMask = region->imageSubresource.aspectMask,
- .baseMipLevel = region->imageSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = region->imageSubresource.baseArrayLayer,
- .layerCount = region->imageSubresource.layerCount,
- });
- img_bsurf.disable_compression = true;
- }
- img_bsurf.format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format));
- }
-
- struct radv_meta_blit2d_buffer buf_bsurf = {
- .bs = img_bsurf.bs,
- .format = img_bsurf.format,
- .buffer = buffer,
- .offset = region->bufferOffset,
- .pitch = buf_extent_el.width,
- };
-
- if (image->type == VK_IMAGE_TYPE_3D)
- img_bsurf.layer = img_offset_el.z;
- /* Loop through each 3D or array slice */
- unsigned num_slices_3d = img_extent_el.depth;
- unsigned num_slices_array = region->imageSubresource.layerCount;
- unsigned slice_3d = 0;
- unsigned slice_array = 0;
- while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
-
- rect.dst_x = img_offset_el.x;
- rect.dst_y = img_offset_el.y;
-
-
- /* Perform Blit */
- if (cs) {
- radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
- } else {
- radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
- }
-
- /* Once we've done the blit, all of the actual information about
- * the image is embedded in the command buffer so we can just
- * increment the offset directly in the image effectively
- * re-binding it to different backing memory.
- */
- buf_bsurf.offset += buf_extent_el.width *
- buf_extent_el.height * buf_bsurf.bs;
- img_bsurf.layer++;
- if (image->type == VK_IMAGE_TYPE_3D)
- slice_3d++;
- else
- slice_array++;
- }
-
- /* Restore conditional rendering. */
- cmd_buffer->state.predicating = old_predicating;
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_meta_saved_state saved_state;
+ bool old_predicating;
+ bool cs;
+
+ /* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
+ * VK_SAMPLE_COUNT_1_BIT."
+ */
+ assert(image->info.samples == 1);
+
+ cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
+ !radv_image_is_renderable(cmd_buffer->device, image);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) |
+ RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ /* VK_EXT_conditional_rendering says that copy commands should not be
+ * affected by conditional rendering.
+ */
+ old_predicating = cmd_buffer->state.predicating;
+ cmd_buffer->state.predicating = false;
+
+ /**
+ * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
+ * extent is the size in texels of the source image to copy in width,
+ * height and depth. 1D images use only x and width. 2D images use x, y,
+ * width and height. 3D images use x, y, z, width, height and depth.
+ *
+ *
+ * Also, convert the offsets and extent from units of texels to units of
+ * blocks - which is the highest resolution accessible in this command.
+ */
+ const VkOffset3D img_offset_el = meta_region_offset_el(image, &region->imageOffset);
+ const VkExtent3D bufferExtent = {
+ .width = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width,
+ .height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height,
+ };
+ const VkExtent3D buf_extent_el = meta_region_extent_el(image, image->type, &bufferExtent);
+
+ /* Start creating blit rect */
+ const VkExtent3D img_extent_el = meta_region_extent_el(image, image->type, &region->imageExtent);
+ struct radv_meta_blit2d_rect rect = {
+ .width = img_extent_el.width,
+ .height = img_extent_el.height,
+ };
+
+ /* Create blit surfaces */
+ struct radv_meta_blit2d_surf img_bsurf = blit_surf_for_image_level_layer(
+ image, layout, &region->imageSubresource, region->imageSubresource.aspectMask);
+
+ if (!radv_is_buffer_format_supported(img_bsurf.format, NULL)) {
+ uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+ bool compressed =
+ radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
+ if (compressed) {
+ radv_decompress_dcc(cmd_buffer, image,
+ &(VkImageSubresourceRange){
+ .aspectMask = region->imageSubresource.aspectMask,
+ .baseMipLevel = region->imageSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = region->imageSubresource.baseArrayLayer,
+ .layerCount = region->imageSubresource.layerCount,
+ });
+ img_bsurf.disable_compression = true;
+ }
+ img_bsurf.format = vk_format_for_size(vk_format_get_blocksize(img_bsurf.format));
+ }
+
+ struct radv_meta_blit2d_buffer buf_bsurf = {
+ .bs = img_bsurf.bs,
+ .format = img_bsurf.format,
+ .buffer = buffer,
+ .offset = region->bufferOffset,
+ .pitch = buf_extent_el.width,
+ };
+
+ if (image->type == VK_IMAGE_TYPE_3D)
+ img_bsurf.layer = img_offset_el.z;
+ /* Loop through each 3D or array slice */
+ unsigned num_slices_3d = img_extent_el.depth;
+ unsigned num_slices_array = region->imageSubresource.layerCount;
+ unsigned slice_3d = 0;
+ unsigned slice_array = 0;
+ while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
+
+ rect.dst_x = img_offset_el.x;
+ rect.dst_y = img_offset_el.y;
+
+ /* Perform Blit */
+ if (cs) {
+ radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
+ } else {
+ radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
+ }
+
+ /* Once we've done the blit, all of the actual information about
+ * the image is embedded in the command buffer so we can just
+ * increment the offset directly in the image effectively
+ * re-binding it to different backing memory.
+ */
+ buf_bsurf.offset += buf_extent_el.width * buf_extent_el.height * buf_bsurf.bs;
+ img_bsurf.layer++;
+ if (image->type == VK_IMAGE_TYPE_3D)
+ slice_3d++;
+ else
+ slice_array++;
+ }
+
+ /* Restore conditional rendering. */
+ cmd_buffer->state.predicating = old_predicating;
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
-void radv_CmdCopyBufferToImage2KHR(
- VkCommandBuffer commandBuffer,
- const VkCopyBufferToImageInfo2KHR* pCopyBufferToImageInfo)
+void
+radv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
- RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
-
- for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
- copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
- pCopyBufferToImageInfo->dstImageLayout,
- &pCopyBufferToImageInfo->pRegions[r]);
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
+ RADV_FROM_HANDLE(radv_image, dst_image, pCopyBufferToImageInfo->dstImage);
+
+ for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
+ copy_buffer_to_image(cmd_buffer, src_buffer, dst_image,
+ pCopyBufferToImageInfo->dstImageLayout,
+ &pCopyBufferToImageInfo->pRegions[r]);
+ }
}
static void
-copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_buffer *buffer,
- struct radv_image *image,
- VkImageLayout layout,
- const VkBufferImageCopy2KHR *region)
+copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
+ struct radv_image *image, VkImageLayout layout,
+ const VkBufferImageCopy2KHR *region)
{
- struct radv_meta_saved_state saved_state;
- bool old_predicating;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- /* VK_EXT_conditional_rendering says that copy commands should not be
- * affected by conditional rendering.
- */
- old_predicating = cmd_buffer->state.predicating;
- cmd_buffer->state.predicating = false;
-
- /**
- * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
- * extent is the size in texels of the source image to copy in width,
- * height and depth. 1D images use only x and width. 2D images use x, y,
- * width and height. 3D images use x, y, z, width, height and depth.
- *
- *
- * Also, convert the offsets and extent from units of texels to units of
- * blocks - which is the highest resolution accessible in this command.
- */
- const VkOffset3D img_offset_el =
- meta_region_offset_el(image, &region->imageOffset);
- const VkExtent3D bufferExtent = {
- .width = region->bufferRowLength ?
- region->bufferRowLength : region->imageExtent.width,
- .height = region->bufferImageHeight ?
- region->bufferImageHeight : region->imageExtent.height,
- };
- const VkExtent3D buf_extent_el =
- meta_region_extent_el(image, image->type, &bufferExtent);
-
- /* Start creating blit rect */
- const VkExtent3D img_extent_el =
- meta_region_extent_el(image, image->type, &region->imageExtent);
- struct radv_meta_blit2d_rect rect = {
- .width = img_extent_el.width,
- .height = img_extent_el.height,
- };
-
- /* Create blit surfaces */
- struct radv_meta_blit2d_surf img_info =
- blit_surf_for_image_level_layer(image,
- layout,
- &region->imageSubresource,
- region->imageSubresource.aspectMask);
-
- if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
- uint32_t queue_mask = radv_image_queue_family_mask(image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
- bool compressed = radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
- if (compressed) {
- radv_decompress_dcc(cmd_buffer, image, &(VkImageSubresourceRange) {
- .aspectMask = region->imageSubresource.aspectMask,
- .baseMipLevel = region->imageSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = region->imageSubresource.baseArrayLayer,
- .layerCount = region->imageSubresource.layerCount,
- });
- img_info.disable_compression = true;
- }
- img_info.format = vk_format_for_size(vk_format_get_blocksize(img_info.format));
- }
-
- struct radv_meta_blit2d_buffer buf_info = {
- .bs = img_info.bs,
- .format = img_info.format,
- .buffer = buffer,
- .offset = region->bufferOffset,
- .pitch = buf_extent_el.width,
- };
-
- if (image->type == VK_IMAGE_TYPE_3D)
- img_info.layer = img_offset_el.z;
- /* Loop through each 3D or array slice */
- unsigned num_slices_3d = img_extent_el.depth;
- unsigned num_slices_array = region->imageSubresource.layerCount;
- unsigned slice_3d = 0;
- unsigned slice_array = 0;
- while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
-
- rect.src_x = img_offset_el.x;
- rect.src_y = img_offset_el.y;
-
-
- /* Perform Blit */
- radv_meta_image_to_buffer(cmd_buffer, &img_info, &buf_info, 1, &rect);
-
- buf_info.offset += buf_extent_el.width *
- buf_extent_el.height * buf_info.bs;
- img_info.layer++;
- if (image->type == VK_IMAGE_TYPE_3D)
- slice_3d++;
- else
- slice_array++;
- }
-
- /* Restore conditional rendering. */
- cmd_buffer->state.predicating = old_predicating;
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_meta_saved_state saved_state;
+ bool old_predicating;
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ /* VK_EXT_conditional_rendering says that copy commands should not be
+ * affected by conditional rendering.
+ */
+ old_predicating = cmd_buffer->state.predicating;
+ cmd_buffer->state.predicating = false;
+
+ /**
+ * From the Vulkan 1.0.6 spec: 18.3 Copying Data Between Images
+ * extent is the size in texels of the source image to copy in width,
+ * height and depth. 1D images use only x and width. 2D images use x, y,
+ * width and height. 3D images use x, y, z, width, height and depth.
+ *
+ *
+ * Also, convert the offsets and extent from units of texels to units of
+ * blocks - which is the highest resolution accessible in this command.
+ */
+ const VkOffset3D img_offset_el = meta_region_offset_el(image, &region->imageOffset);
+ const VkExtent3D bufferExtent = {
+ .width = region->bufferRowLength ? region->bufferRowLength : region->imageExtent.width,
+ .height = region->bufferImageHeight ? region->bufferImageHeight : region->imageExtent.height,
+ };
+ const VkExtent3D buf_extent_el = meta_region_extent_el(image, image->type, &bufferExtent);
+
+ /* Start creating blit rect */
+ const VkExtent3D img_extent_el = meta_region_extent_el(image, image->type, &region->imageExtent);
+ struct radv_meta_blit2d_rect rect = {
+ .width = img_extent_el.width,
+ .height = img_extent_el.height,
+ };
+
+ /* Create blit surfaces */
+ struct radv_meta_blit2d_surf img_info = blit_surf_for_image_level_layer(
+ image, layout, &region->imageSubresource, region->imageSubresource.aspectMask);
+
+ if (!radv_is_buffer_format_supported(img_info.format, NULL)) {
+ uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+ bool compressed =
+ radv_layout_dcc_compressed(cmd_buffer->device, image, layout, false, queue_mask);
+ if (compressed) {
+ radv_decompress_dcc(cmd_buffer, image,
+ &(VkImageSubresourceRange){
+ .aspectMask = region->imageSubresource.aspectMask,
+ .baseMipLevel = region->imageSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = region->imageSubresource.baseArrayLayer,
+ .layerCount = region->imageSubresource.layerCount,
+ });
+ img_info.disable_compression = true;
+ }
+ img_info.format = vk_format_for_size(vk_format_get_blocksize(img_info.format));
+ }
+
+ struct radv_meta_blit2d_buffer buf_info = {
+ .bs = img_info.bs,
+ .format = img_info.format,
+ .buffer = buffer,
+ .offset = region->bufferOffset,
+ .pitch = buf_extent_el.width,
+ };
+
+ if (image->type == VK_IMAGE_TYPE_3D)
+ img_info.layer = img_offset_el.z;
+ /* Loop through each 3D or array slice */
+ unsigned num_slices_3d = img_extent_el.depth;
+ unsigned num_slices_array = region->imageSubresource.layerCount;
+ unsigned slice_3d = 0;
+ unsigned slice_array = 0;
+ while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
+
+ rect.src_x = img_offset_el.x;
+ rect.src_y = img_offset_el.y;
+
+ /* Perform Blit */
+ radv_meta_image_to_buffer(cmd_buffer, &img_info, &buf_info, 1, &rect);
+
+ buf_info.offset += buf_extent_el.width * buf_extent_el.height * buf_info.bs;
+ img_info.layer++;
+ if (image->type == VK_IMAGE_TYPE_3D)
+ slice_3d++;
+ else
+ slice_array++;
+ }
+
+ /* Restore conditional rendering. */
+ cmd_buffer->state.predicating = old_predicating;
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
-void radv_CmdCopyImageToBuffer2KHR(
- VkCommandBuffer commandBuffer,
- const VkCopyImageToBufferInfo2KHR* pCopyImageToBufferInfo)
+void
+radv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
- RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
-
- for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
- copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
- pCopyImageToBufferInfo->srcImageLayout,
- &pCopyImageToBufferInfo->pRegions[r]);
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_image, src_image, pCopyImageToBufferInfo->srcImage);
+ RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
+
+ for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
+ copy_image_to_buffer(cmd_buffer, dst_buffer, src_image,
+ pCopyImageToBufferInfo->srcImageLayout,
+ &pCopyImageToBufferInfo->pRegions[r]);
+ }
}
static void
-copy_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkImageLayout src_image_layout,
- struct radv_image *dst_image,
- VkImageLayout dst_image_layout,
- const VkImageCopy2KHR *region)
+copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkImageLayout src_image_layout, struct radv_image *dst_image,
+ VkImageLayout dst_image_layout, const VkImageCopy2KHR *region)
{
- struct radv_meta_saved_state saved_state;
- bool old_predicating;
- bool cs;
-
- /* From the Vulkan 1.0 spec:
- *
- * vkCmdCopyImage can be used to copy image data between multisample
- * images, but both images must have the same number of samples.
- */
- assert(src_image->info.samples == dst_image->info.samples);
-
- cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
- !radv_image_is_renderable(cmd_buffer->device, dst_image);
-
- radv_meta_save(&saved_state, cmd_buffer,
- (cs ? RADV_META_SAVE_COMPUTE_PIPELINE :
- RADV_META_SAVE_GRAPHICS_PIPELINE) |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- /* VK_EXT_conditional_rendering says that copy commands should not be
- * affected by conditional rendering.
- */
- old_predicating = cmd_buffer->state.predicating;
- cmd_buffer->state.predicating = false;
-
- VkImageAspectFlags src_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT, VK_IMAGE_ASPECT_PLANE_2_BIT};
- VkImageAspectFlags dst_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT, VK_IMAGE_ASPECT_PLANE_2_BIT};
- unsigned aspect_count = region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT ? src_image->plane_count : 1;
- if (region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
- src_aspects[0] = region->srcSubresource.aspectMask;
- if (region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
- dst_aspects[0] = region->dstSubresource.aspectMask;
-
- for (unsigned a = 0; a < aspect_count; ++a) {
- /* Create blit surfaces */
- struct radv_meta_blit2d_surf b_src =
- blit_surf_for_image_level_layer(src_image,
- src_image_layout,
- &region->srcSubresource,
- src_aspects[a]);
-
- struct radv_meta_blit2d_surf b_dst =
- blit_surf_for_image_level_layer(dst_image,
- dst_image_layout,
- &region->dstSubresource,
- dst_aspects[a]);
-
- uint32_t dst_queue_mask = radv_image_queue_family_mask(dst_image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
- bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image, dst_image_layout, false, dst_queue_mask);
- uint32_t src_queue_mask = radv_image_queue_family_mask(src_image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
- bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image, src_image_layout, false, src_queue_mask);
-
- if (!src_compressed || radv_dcc_formats_compatible(b_src.format, b_dst.format)) {
- b_src.format = b_dst.format;
- } else if (!dst_compressed) {
- b_dst.format = b_src.format;
- } else {
- radv_decompress_dcc(cmd_buffer, dst_image, &(VkImageSubresourceRange) {
- .aspectMask = dst_aspects[a],
- .baseMipLevel = region->dstSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = region->dstSubresource.baseArrayLayer,
- .layerCount = region->dstSubresource.layerCount,
- });
- b_dst.format = b_src.format;
- b_dst.disable_compression = true;
- }
-
-
- /**
- * From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
- * imageExtent is the size in texels of the image to copy in width, height
- * and depth. 1D images use only x and width. 2D images use x, y, width
- * and height. 3D images use x, y, z, width, height and depth.
- *
- * Also, convert the offsets and extent from units of texels to units of
- * blocks - which is the highest resolution accessible in this command.
- */
- const VkOffset3D dst_offset_el =
- meta_region_offset_el(dst_image, &region->dstOffset);
- const VkOffset3D src_offset_el =
- meta_region_offset_el(src_image, &region->srcOffset);
-
- /*
- * From Vulkan 1.0.68, "Copying Data Between Images":
- * "When copying between compressed and uncompressed formats
- * the extent members represent the texel dimensions of the
- * source image and not the destination."
- * However, we must use the destination image type to avoid
- * clamping depth when copying multiple layers of a 2D image to
- * a 3D image.
- */
- const VkExtent3D img_extent_el =
- meta_region_extent_el(src_image, dst_image->type, &region->extent);
-
- /* Start creating blit rect */
- struct radv_meta_blit2d_rect rect = {
- .width = img_extent_el.width,
- .height = img_extent_el.height,
- };
-
- if (src_image->type == VK_IMAGE_TYPE_3D)
- b_src.layer = src_offset_el.z;
-
- if (dst_image->type == VK_IMAGE_TYPE_3D)
- b_dst.layer = dst_offset_el.z;
-
- /* Loop through each 3D or array slice */
- unsigned num_slices_3d = img_extent_el.depth;
- unsigned num_slices_array = region->dstSubresource.layerCount;
- unsigned slice_3d = 0;
- unsigned slice_array = 0;
- while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
-
- /* Finish creating blit rect */
- rect.dst_x = dst_offset_el.x;
- rect.dst_y = dst_offset_el.y;
- rect.src_x = src_offset_el.x;
- rect.src_y = src_offset_el.y;
-
- /* Perform Blit */
- if (cs) {
- radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
- } else {
- radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
- }
-
- b_src.layer++;
- b_dst.layer++;
- if (dst_image->type == VK_IMAGE_TYPE_3D)
- slice_3d++;
- else
- slice_array++;
- }
- }
-
- /* Restore conditional rendering. */
- cmd_buffer->state.predicating = old_predicating;
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_meta_saved_state saved_state;
+ bool old_predicating;
+ bool cs;
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * vkCmdCopyImage can be used to copy image data between multisample
+ * images, but both images must have the same number of samples.
+ */
+ assert(src_image->info.samples == dst_image->info.samples);
+
+ cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||
+ !radv_image_is_renderable(cmd_buffer->device, dst_image);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ (cs ? RADV_META_SAVE_COMPUTE_PIPELINE : RADV_META_SAVE_GRAPHICS_PIPELINE) |
+ RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ /* VK_EXT_conditional_rendering says that copy commands should not be
+ * affected by conditional rendering.
+ */
+ old_predicating = cmd_buffer->state.predicating;
+ cmd_buffer->state.predicating = false;
+
+ VkImageAspectFlags src_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT};
+ VkImageAspectFlags dst_aspects[3] = {VK_IMAGE_ASPECT_PLANE_0_BIT, VK_IMAGE_ASPECT_PLANE_1_BIT,
+ VK_IMAGE_ASPECT_PLANE_2_BIT};
+ unsigned aspect_count =
+ region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT ? src_image->plane_count : 1;
+ if (region->srcSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
+ src_aspects[0] = region->srcSubresource.aspectMask;
+ if (region->dstSubresource.aspectMask != VK_IMAGE_ASPECT_COLOR_BIT)
+ dst_aspects[0] = region->dstSubresource.aspectMask;
+
+ for (unsigned a = 0; a < aspect_count; ++a) {
+ /* Create blit surfaces */
+ struct radv_meta_blit2d_surf b_src = blit_surf_for_image_level_layer(
+ src_image, src_image_layout, &region->srcSubresource, src_aspects[a]);
+
+ struct radv_meta_blit2d_surf b_dst = blit_surf_for_image_level_layer(
+ dst_image, dst_image_layout, &region->dstSubresource, dst_aspects[a]);
+
+ uint32_t dst_queue_mask = radv_image_queue_family_mask(
+ dst_image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index);
+ bool dst_compressed = radv_layout_dcc_compressed(cmd_buffer->device, dst_image,
+ dst_image_layout, false, dst_queue_mask);
+ uint32_t src_queue_mask = radv_image_queue_family_mask(
+ src_image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index);
+ bool src_compressed = radv_layout_dcc_compressed(cmd_buffer->device, src_image,
+ src_image_layout, false, src_queue_mask);
+
+ if (!src_compressed || radv_dcc_formats_compatible(b_src.format, b_dst.format)) {
+ b_src.format = b_dst.format;
+ } else if (!dst_compressed) {
+ b_dst.format = b_src.format;
+ } else {
+ radv_decompress_dcc(cmd_buffer, dst_image,
+ &(VkImageSubresourceRange){
+ .aspectMask = dst_aspects[a],
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = region->dstSubresource.baseArrayLayer,
+ .layerCount = region->dstSubresource.layerCount,
+ });
+ b_dst.format = b_src.format;
+ b_dst.disable_compression = true;
+ }
+
+ /**
+ * From the Vulkan 1.0.6 spec: 18.4 Copying Data Between Buffers and Images
+ * imageExtent is the size in texels of the image to copy in width, height
+ * and depth. 1D images use only x and width. 2D images use x, y, width
+ * and height. 3D images use x, y, z, width, height and depth.
+ *
+ * Also, convert the offsets and extent from units of texels to units of
+ * blocks - which is the highest resolution accessible in this command.
+ */
+ const VkOffset3D dst_offset_el = meta_region_offset_el(dst_image, &region->dstOffset);
+ const VkOffset3D src_offset_el = meta_region_offset_el(src_image, &region->srcOffset);
+
+ /*
+ * From Vulkan 1.0.68, "Copying Data Between Images":
+ * "When copying between compressed and uncompressed formats
+ * the extent members represent the texel dimensions of the
+ * source image and not the destination."
+ * However, we must use the destination image type to avoid
+ * clamping depth when copying multiple layers of a 2D image to
+ * a 3D image.
+ */
+ const VkExtent3D img_extent_el =
+ meta_region_extent_el(src_image, dst_image->type, &region->extent);
+
+ /* Start creating blit rect */
+ struct radv_meta_blit2d_rect rect = {
+ .width = img_extent_el.width,
+ .height = img_extent_el.height,
+ };
+
+ if (src_image->type == VK_IMAGE_TYPE_3D)
+ b_src.layer = src_offset_el.z;
+
+ if (dst_image->type == VK_IMAGE_TYPE_3D)
+ b_dst.layer = dst_offset_el.z;
+
+ /* Loop through each 3D or array slice */
+ unsigned num_slices_3d = img_extent_el.depth;
+ unsigned num_slices_array = region->dstSubresource.layerCount;
+ unsigned slice_3d = 0;
+ unsigned slice_array = 0;
+ while (slice_3d < num_slices_3d && slice_array < num_slices_array) {
+
+ /* Finish creating blit rect */
+ rect.dst_x = dst_offset_el.x;
+ rect.dst_y = dst_offset_el.y;
+ rect.src_x = src_offset_el.x;
+ rect.src_y = src_offset_el.y;
+
+ /* Perform Blit */
+ if (cs) {
+ radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
+ } else {
+ radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
+ }
+
+ b_src.layer++;
+ b_dst.layer++;
+ if (dst_image->type == VK_IMAGE_TYPE_3D)
+ slice_3d++;
+ else
+ slice_array++;
+ }
+ }
+
+ /* Restore conditional rendering. */
+ cmd_buffer->state.predicating = old_predicating;
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
-void radv_CmdCopyImage2KHR(
- VkCommandBuffer commandBuffer,
- const VkCopyImageInfo2KHR* pCopyImageInfo)
+void
+radv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, const VkCopyImageInfo2KHR *pCopyImageInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
- RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
-
- for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
- copy_image(cmd_buffer,
- src_image, pCopyImageInfo->srcImageLayout,
- dst_image, pCopyImageInfo->dstImageLayout,
- &pCopyImageInfo->pRegions[r]);
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_image, src_image, pCopyImageInfo->srcImage);
+ RADV_FROM_HANDLE(radv_image, dst_image, pCopyImageInfo->dstImage);
+
+ for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
+ copy_image(cmd_buffer, src_image, pCopyImageInfo->srcImageLayout, dst_image,
+ pCopyImageInfo->dstImageLayout, &pCopyImageInfo->pRegions[r]);
+ }
}
diff --git a/src/amd/vulkan/radv_meta_dcc_retile.c b/src/amd/vulkan/radv_meta_dcc_retile.c
index 6153155e081..d2c2466c461 100644
--- a/src/amd/vulkan/radv_meta_dcc_retile.c
+++ b/src/amd/vulkan/radv_meta_dcc_retile.c
@@ -21,295 +21,271 @@
* IN THE SOFTWARE.
*/
-#include "radv_private.h"
#include "radv_meta.h"
+#include "radv_private.h"
static nir_shader *
build_dcc_retile_compute_shader(struct radv_device *dev)
{
- const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
- false,
- GLSL_TYPE_UINT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");
-
- b.shader->info.cs.local_size[0] = 256;
- b.shader->info.cs.local_size[1] = 1;
- b.shader->info.cs.local_size[2] = 1;
-
- nir_variable *indices = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "indices_in");
- indices->data.descriptor_set = 0;
- indices->data.binding = 0;
- nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "dcc_in");
- input_dcc->data.descriptor_set = 0;
- input_dcc->data.binding = 1;
- nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "dcc_out");
- output_dcc->data.descriptor_set = 0;
- output_dcc->data.binding = 2;
-
- nir_ssa_def *indices_ref = &nir_build_deref_var(&b, indices)->dest.ssa;
- nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
- nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- 0, 0, 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_intrinsic_instr *index_vals = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
- index_vals->num_components = 2;
- index_vals->src[0] = nir_src_for_ssa(indices_ref);
- index_vals->src[1] = nir_src_for_ssa(global_id);
- index_vals->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
- index_vals->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
- nir_ssa_dest_init(&index_vals->instr, &index_vals->dest, 2, 32, "indices");
- nir_builder_instr_insert(&b, &index_vals->instr);
-
- nir_ssa_def *src = nir_channels(&b, &index_vals->dest.ssa, 1);
- nir_ssa_def *dst = nir_channels(&b, &index_vals->dest.ssa, 2);
-
- nir_intrinsic_instr *dcc_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
- dcc_val->num_components = 1;
- dcc_val->src[0] = nir_src_for_ssa(input_dcc_ref);
- dcc_val->src[1] = nir_src_for_ssa(nir_vec4(&b, src, src, src, src));
- dcc_val->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
- dcc_val->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
- nir_ssa_dest_init(&dcc_val->instr, &dcc_val->dest, 1, 32, "dcc_val");
- nir_builder_instr_insert(&b, &dcc_val->instr);
-
- nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
- store->num_components = 1;
- store->src[0] = nir_src_for_ssa(output_dcc_ref);
- store->src[1] = nir_src_for_ssa(nir_vec4(&b, dst, dst, dst, dst));
- store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
- store->src[3] = nir_src_for_ssa(&dcc_val->dest.ssa);
- store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
-
- nir_builder_instr_insert(&b, &store->instr);
- return b.shader;
+ const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_UINT);
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");
+
+ b.shader->info.cs.local_size[0] = 256;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *indices = nir_variable_create(b.shader, nir_var_uniform, buf_type, "indices_in");
+ indices->data.descriptor_set = 0;
+ indices->data.binding = 0;
+ nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
+ input_dcc->data.descriptor_set = 0;
+ input_dcc->data.binding = 1;
+ nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out");
+ output_dcc->data.descriptor_set = 0;
+ output_dcc->data.binding = 2;
+
+ nir_ssa_def *indices_ref = &nir_build_deref_var(&b, indices)->dest.ssa;
+ nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
+ nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size = nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], 0, 0, 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_intrinsic_instr *index_vals =
+ nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
+ index_vals->num_components = 2;
+ index_vals->src[0] = nir_src_for_ssa(indices_ref);
+ index_vals->src[1] = nir_src_for_ssa(global_id);
+ index_vals->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+ index_vals->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
+ nir_ssa_dest_init(&index_vals->instr, &index_vals->dest, 2, 32, "indices");
+ nir_builder_instr_insert(&b, &index_vals->instr);
+
+ nir_ssa_def *src = nir_channels(&b, &index_vals->dest.ssa, 1);
+ nir_ssa_def *dst = nir_channels(&b, &index_vals->dest.ssa, 2);
+
+ nir_intrinsic_instr *dcc_val =
+ nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);
+ dcc_val->num_components = 1;
+ dcc_val->src[0] = nir_src_for_ssa(input_dcc_ref);
+ dcc_val->src[1] = nir_src_for_ssa(nir_vec4(&b, src, src, src, src));
+ dcc_val->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+ dcc_val->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));
+ nir_ssa_dest_init(&dcc_val->instr, &dcc_val->dest, 1, 32, "dcc_val");
+ nir_builder_instr_insert(&b, &dcc_val->instr);
+
+ nir_intrinsic_instr *store =
+ nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
+ store->num_components = 1;
+ store->src[0] = nir_src_for_ssa(output_dcc_ref);
+ store->src[1] = nir_src_for_ssa(nir_vec4(&b, dst, dst, dst, dst));
+ store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
+ store->src[3] = nir_src_for_ssa(&dcc_val->dest.ssa);
+ store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
+
+ nir_builder_instr_insert(&b, &store->instr);
+ return b.shader;
}
void
radv_device_finish_meta_dcc_retile_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->dcc_retile.pipeline,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->dcc_retile.p_layout,
- &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->dcc_retile.ds_layout,
- &state->alloc);
-
- /* Reset for next finish. */
- memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout,
+ &state->alloc);
+
+ /* Reset for next finish. */
+ memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
}
VkResult
radv_device_init_meta_dcc_retile_state(struct radv_device *device)
{
- VkResult result = VK_SUCCESS;
- nir_shader *cs = build_dcc_retile_compute_shader(device);
-
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 3,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 2,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.dcc_retile.ds_layout);
- if (result != VK_SUCCESS)
- goto cleanup;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
- .pushConstantRangeCount = 0,
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.dcc_retile.p_layout);
- if (result != VK_SUCCESS)
- goto cleanup;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.dcc_retile.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.dcc_retile.pipeline);
- if (result != VK_SUCCESS)
- goto cleanup;
+ VkResult result = VK_SUCCESS;
+ nir_shader *cs = build_dcc_retile_compute_shader(device);
+
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 3,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 2,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.dcc_retile.ds_layout);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
+ .pushConstantRangeCount = 0,
+ };
+
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ /* compute shader */
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.dcc_retile.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, &device->meta_state.dcc_retile.pipeline);
+ if (result != VK_SUCCESS)
+ goto cleanup;
cleanup:
- if (result != VK_SUCCESS)
- radv_device_finish_meta_dcc_retile_state(device);
- ralloc_free(cs);
- return result;
+ if (result != VK_SUCCESS)
+ radv_device_finish_meta_dcc_retile_state(device);
+ ralloc_free(cs);
+ return result;
}
void
radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
{
- struct radv_meta_saved_state saved_state;
- struct radv_device *device = cmd_buffer->device;
- uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
-
- assert(image->type == VK_IMAGE_TYPE_2D);
- assert(image->info.array_size == 1 && image->info.levels == 1);
-
- struct radv_cmd_state *state = &cmd_buffer->state;
-
- state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
-
- /* Compile pipelines if not already done so. */
- if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {
- VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
- RADV_META_SAVE_COMPUTE_PIPELINE);
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.dcc_retile.pipeline);
-
- struct radv_buffer buffer = {
- .size = image->size,
- .bo = image->bo,
- .offset = image->offset
- };
-
- struct radv_buffer retile_buffer = {
- .size = retile_map_size,
- .bo = image->retile_map,
- .offset = 0
- };
-
- struct radv_buffer_view views[3];
- VkBufferView view_handles[3];
- radv_buffer_view_init(views + 0, cmd_buffer->device, &(VkBufferViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .buffer = radv_buffer_to_handle(&retile_buffer),
- .offset = 0,
- .range = retile_map_size,
- .format = image->planes[0].surface.u.gfx9.dcc_retile_use_uint16 ?
- VK_FORMAT_R16G16_UINT : VK_FORMAT_R32G32_UINT,
- });
- radv_buffer_view_init(views + 1, cmd_buffer->device, &(VkBufferViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .buffer = radv_buffer_to_handle(&buffer),
- .offset = image->planes[0].surface.dcc_offset,
- .range = image->planes[0].surface.dcc_size,
- .format = VK_FORMAT_R8_UINT,
- });
- radv_buffer_view_init(views + 2, cmd_buffer->device, &(VkBufferViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
- .buffer = radv_buffer_to_handle(&buffer),
- .offset = image->planes[0].surface.display_dcc_offset,
- .range = image->planes[0].surface.u.gfx9.display_dcc_size,
- .format = VK_FORMAT_R8_UINT,
- });
- for (unsigned i = 0; i < 3; ++i)
- view_handles[i] = radv_buffer_view_to_handle(&views[i]);
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.dcc_retile.p_layout,
- 0, /* set */
- 3, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = &view_handles[0],
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = &view_handles[1],
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 2,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
- .pTexelBufferView = &view_handles[2],
- },
- });
-
- /* src+dst pairs count double, so the number of DCC bytes we move is
- * actually half of dcc_retile_num_elements. */
- radv_unaligned_dispatch(cmd_buffer, image->planes[0].surface.u.gfx9.dcc_retile_num_elements / 2, 1, 1);
-
- radv_meta_restore(&saved_state, cmd_buffer);
-
- state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+ struct radv_meta_saved_state saved_state;
+ struct radv_device *device = cmd_buffer->device;
+ uint32_t retile_map_size = ac_surface_get_retile_map_size(&image->planes[0].surface);
+
+ assert(image->type == VK_IMAGE_TYPE_2D);
+ assert(image->info.array_size == 1 && image->info.levels == 1);
+
+ struct radv_cmd_state *state = &cmd_buffer->state;
+
+ state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+
+ /* Compile pipelines if not already done so. */
+ if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {
+ VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.dcc_retile.pipeline);
+
+ struct radv_buffer buffer = {.size = image->size, .bo = image->bo, .offset = image->offset};
+
+ struct radv_buffer retile_buffer = {.size = retile_map_size,
+ .bo = image->retile_map,
+ .offset = 0};
+
+ struct radv_buffer_view views[3];
+ VkBufferView view_handles[3];
+ radv_buffer_view_init(
+ views + 0, cmd_buffer->device,
+ &(VkBufferViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .buffer = radv_buffer_to_handle(&retile_buffer),
+ .offset = 0,
+ .range = retile_map_size,
+ .format = image->planes[0].surface.u.gfx9.dcc_retile_use_uint16 ? VK_FORMAT_R16G16_UINT
+ : VK_FORMAT_R32G32_UINT,
+ });
+ radv_buffer_view_init(views + 1, cmd_buffer->device,
+ &(VkBufferViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .buffer = radv_buffer_to_handle(&buffer),
+ .offset = image->planes[0].surface.dcc_offset,
+ .range = image->planes[0].surface.dcc_size,
+ .format = VK_FORMAT_R8_UINT,
+ });
+ radv_buffer_view_init(views + 2, cmd_buffer->device,
+ &(VkBufferViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
+ .buffer = radv_buffer_to_handle(&buffer),
+ .offset = image->planes[0].surface.display_dcc_offset,
+ .range = image->planes[0].surface.u.gfx9.display_dcc_size,
+ .format = VK_FORMAT_R8_UINT,
+ });
+ for (unsigned i = 0; i < 3; ++i)
+ view_handles[i] = radv_buffer_view_to_handle(&views[i]);
+
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.dcc_retile.p_layout, 0, /* set */
+ 3, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = &view_handles[0],
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = &view_handles[1],
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 2,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+ .pTexelBufferView = &view_handles[2],
+ },
+ });
+
+ /* src+dst pairs count double, so the number of DCC bytes we move is
+ * actually half of dcc_retile_num_elements. */
+ radv_unaligned_dispatch(cmd_buffer, image->planes[0].surface.u.gfx9.dcc_retile_num_elements / 2,
+ 1, 1);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+
+ state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
}
-
diff --git a/src/amd/vulkan/radv_meta_decompress.c b/src/amd/vulkan/radv_meta_decompress.c
index 25a3ff8bba7..73922d1d751 100644
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -29,542 +29,506 @@
#include "sid.h"
enum radv_depth_op {
- DEPTH_DECOMPRESS,
- DEPTH_RESUMMARIZE,
+ DEPTH_DECOMPRESS,
+ DEPTH_RESUMMARIZE,
};
enum radv_depth_decompress {
- DECOMPRESS_DEPTH_STENCIL,
- DECOMPRESS_DEPTH,
- DECOMPRESS_STENCIL,
+ DECOMPRESS_DEPTH_STENCIL,
+ DECOMPRESS_DEPTH,
+ DECOMPRESS_STENCIL,
};
static VkResult
-create_pass(struct radv_device *device,
- uint32_t samples,
- VkRenderPass *pass)
+create_pass(struct radv_device *device, uint32_t samples, VkRenderPass *pass)
{
- VkResult result;
- VkDevice device_h = radv_device_to_handle(device);
- const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
- VkAttachmentDescription2 attachment;
-
- attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
- attachment.flags = 0;
- attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
- attachment.samples = samples;
- attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
- attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
- attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-
- result = radv_CreateRenderPass2(device_h,
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &attachment,
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- },
- alloc,
- pass);
-
- return result;
+ VkResult result;
+ VkDevice device_h = radv_device_to_handle(device);
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ VkAttachmentDescription2 attachment;
+
+ attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
+ attachment.flags = 0;
+ attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
+ attachment.samples = samples;
+ attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+ attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+ attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
+ attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+ attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
+
+ result = radv_CreateRenderPass2(
+ device_h,
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments = &attachment,
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ alloc, pass);
+
+ return result;
}
static VkResult
create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
{
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 0,
- .pSetLayouts = NULL,
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = NULL,
- };
-
- return radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- layout);
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pSetLayouts = NULL,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = NULL,
+ };
+
+ return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc, layout);
}
static VkResult
-create_pipeline(struct radv_device *device,
- uint32_t samples,
- VkRenderPass pass,
- VkPipelineLayout layout,
- enum radv_depth_op op,
- enum radv_depth_decompress decompress,
- VkPipeline *pipeline)
+create_pipeline(struct radv_device *device, uint32_t samples, VkRenderPass pass,
+ VkPipelineLayout layout, enum radv_depth_op op,
+ enum radv_depth_decompress decompress, VkPipeline *pipeline)
{
- VkResult result;
- VkDevice device_h = radv_device_to_handle(device);
-
- mtx_lock(&device->meta_state.mtx);
- if (*pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
- nir_shader *fs_module = radv_meta_build_nir_fs_noop();
-
- if (!vs_module || !fs_module) {
- /* XXX: Need more accurate error */
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto cleanup;
- }
-
- const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
- .sampleLocationsEnable = false,
- };
-
- const VkGraphicsPipelineCreateInfo pipeline_create_info = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
- .pStages = (VkPipelineShaderStageCreateInfo[]) {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs_module),
- .pName = "main",
- },
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs_module),
- .pName = "main",
- },
- },
- .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
- },
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .depthClampEnable = false,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .pNext = &sample_locs_create_info,
- .rasterizationSamples = samples,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .logicOpEnable = false,
- .attachmentCount = 0,
- .pAttachments = NULL,
- },
- .pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .depthTestEnable = false,
- .depthWriteEnable = false,
- .depthBoundsTestEnable = false,
- .stencilTestEnable = false,
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 3,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
- },
- },
- .layout = layout,
- .renderPass = pass,
- .subpass = 0,
- };
-
- struct radv_graphics_pipeline_create_info extra = {
- .use_rectlist = true,
- .depth_compress_disable = decompress == DECOMPRESS_DEPTH_STENCIL ||
- decompress == DECOMPRESS_DEPTH,
- .stencil_compress_disable = decompress == DECOMPRESS_DEPTH_STENCIL ||
- decompress == DECOMPRESS_STENCIL,
- .resummarize_enable = op == DEPTH_RESUMMARIZE,
- };
-
- result = radv_graphics_pipeline_create(device_h,
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &pipeline_create_info, &extra,
- &device->meta_state.alloc,
- pipeline);
+ VkResult result;
+ VkDevice device_h = radv_device_to_handle(device);
+
+ mtx_lock(&device->meta_state.mtx);
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
+ nir_shader *fs_module = radv_meta_build_nir_fs_noop();
+
+ if (!vs_module || !fs_module) {
+ /* XXX: Need more accurate error */
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto cleanup;
+ }
+
+ const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
+ .sampleLocationsEnable = false,
+ };
+
+ const VkGraphicsPipelineCreateInfo pipeline_create_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages =
+ (VkPipelineShaderStageCreateInfo[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs_module),
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs_module),
+ .pName = "main",
+ },
+ },
+ .pVertexInputState =
+ &(VkPipelineVertexInputStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
+ },
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = false,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ },
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .pNext = &sample_locs_create_info,
+ .rasterizationSamples = samples,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState =
+ &(VkPipelineColorBlendStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = 0,
+ .pAttachments = NULL,
+ },
+ .pDepthStencilState =
+ &(VkPipelineDepthStencilStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = false,
+ .depthWriteEnable = false,
+ .depthBoundsTestEnable = false,
+ .stencilTestEnable = false,
+ },
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 3,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
+ },
+ },
+ .layout = layout,
+ .renderPass = pass,
+ .subpass = 0,
+ };
+
+ struct radv_graphics_pipeline_create_info extra = {
+ .use_rectlist = true,
+ .depth_compress_disable =
+ decompress == DECOMPRESS_DEPTH_STENCIL || decompress == DECOMPRESS_DEPTH,
+ .stencil_compress_disable =
+ decompress == DECOMPRESS_DEPTH_STENCIL || decompress == DECOMPRESS_STENCIL,
+ .resummarize_enable = op == DEPTH_RESUMMARIZE,
+ };
+
+ result = radv_graphics_pipeline_create(
+ device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info,
+ &extra, &device->meta_state.alloc, pipeline);
cleanup:
- ralloc_free(fs_module);
- ralloc_free(vs_module);
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ ralloc_free(fs_module);
+ ralloc_free(vs_module);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
void
radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->depth_decomp[i].pass,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->depth_decomp[i].p_layout,
- &state->alloc);
-
- for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->depth_decomp[i].decompress_pipeline[j],
- &state->alloc);
- }
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->depth_decomp[i].resummarize_pipeline,
- &state->alloc);
- }
+ struct radv_meta_state *state = &device->meta_state;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->depth_decomp[i].pass,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,
+ &state->alloc);
+
+ for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->depth_decomp[i].decompress_pipeline[j], &state->alloc);
+ }
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->depth_decomp[i].resummarize_pipeline, &state->alloc);
+ }
}
VkResult
radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
{
- struct radv_meta_state *state = &device->meta_state;
- VkResult res = VK_SUCCESS;
-
- for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
- uint32_t samples = 1 << i;
-
- res = create_pass(device, samples, &state->depth_decomp[i].pass);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_pipeline_layout(device,
- &state->depth_decomp[i].p_layout);
- if (res != VK_SUCCESS)
- goto fail;
-
- if (on_demand)
- continue;
-
- for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
- res = create_pipeline(device, samples,
- state->depth_decomp[i].pass,
- state->depth_decomp[i].p_layout,
- DEPTH_DECOMPRESS,
- j,
- &state->depth_decomp[i].decompress_pipeline[j]);
- if (res != VK_SUCCESS)
- goto fail;
- }
-
- res = create_pipeline(device, samples,
- state->depth_decomp[i].pass,
- state->depth_decomp[i].p_layout,
- DEPTH_RESUMMARIZE,
- 0, /* unused */
- &state->depth_decomp[i].resummarize_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
- }
-
- return VK_SUCCESS;
+ struct radv_meta_state *state = &device->meta_state;
+ VkResult res = VK_SUCCESS;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+ uint32_t samples = 1 << i;
+
+ res = create_pass(device, samples, &state->depth_decomp[i].pass);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ if (on_demand)
+ continue;
+
+ for (uint32_t j = 0; j < NUM_DEPTH_DECOMPRESS_PIPELINES; j++) {
+ res = create_pipeline(device, samples, state->depth_decomp[i].pass,
+ state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS, j,
+ &state->depth_decomp[i].decompress_pipeline[j]);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ res = create_pipeline(device, samples, state->depth_decomp[i].pass,
+ state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE, 0, /* unused */
+ &state->depth_decomp[i].resummarize_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ return VK_SUCCESS;
fail:
- radv_device_finish_meta_depth_decomp_state(device);
- return res;
+ radv_device_finish_meta_depth_decomp_state(device);
+ return res;
}
static VkPipeline *
-radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange,
- enum radv_depth_op op)
+radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)
{
- struct radv_meta_state *state = &cmd_buffer->device->meta_state;
- uint32_t samples = image->info.samples;
- uint32_t samples_log2 = ffs(samples) - 1;
- enum radv_depth_decompress decompress;
- VkPipeline *pipeline;
-
- if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
- decompress = DECOMPRESS_DEPTH;
- } else if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
- decompress = DECOMPRESS_STENCIL;
- } else {
- decompress = DECOMPRESS_DEPTH_STENCIL;
- }
-
- if (!state->depth_decomp[samples_log2].decompress_pipeline[decompress]) {
- VkResult ret;
-
- for (uint32_t i = 0; i < NUM_DEPTH_DECOMPRESS_PIPELINES; i++) {
- ret = create_pipeline(cmd_buffer->device, samples,
- state->depth_decomp[samples_log2].pass,
- state->depth_decomp[samples_log2].p_layout,
- DEPTH_DECOMPRESS,
- i,
- &state->depth_decomp[samples_log2].decompress_pipeline[i]);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return NULL;
- }
- }
-
- ret = create_pipeline(cmd_buffer->device, samples,
- state->depth_decomp[samples_log2].pass,
- state->depth_decomp[samples_log2].p_layout,
- DEPTH_RESUMMARIZE,
- 0, /* unused */
- &state->depth_decomp[samples_log2].resummarize_pipeline);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return NULL;
- }
- }
-
- switch (op) {
- case DEPTH_DECOMPRESS:
- pipeline = &state->depth_decomp[samples_log2].decompress_pipeline[decompress];
- break;
- case DEPTH_RESUMMARIZE:
- pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
- break;
- default:
- unreachable("unknown operation");
- }
-
- return pipeline;
+ struct radv_meta_state *state = &cmd_buffer->device->meta_state;
+ uint32_t samples = image->info.samples;
+ uint32_t samples_log2 = ffs(samples) - 1;
+ enum radv_depth_decompress decompress;
+ VkPipeline *pipeline;
+
+ if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+ decompress = DECOMPRESS_DEPTH;
+ } else if (subresourceRange->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
+ decompress = DECOMPRESS_STENCIL;
+ } else {
+ decompress = DECOMPRESS_DEPTH_STENCIL;
+ }
+
+ if (!state->depth_decomp[samples_log2].decompress_pipeline[decompress]) {
+ VkResult ret;
+
+ for (uint32_t i = 0; i < NUM_DEPTH_DECOMPRESS_PIPELINES; i++) {
+ ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
+ state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS, i,
+ &state->depth_decomp[samples_log2].decompress_pipeline[i]);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return NULL;
+ }
+ }
+
+ ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
+ state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE,
+ 0, /* unused */
+ &state->depth_decomp[samples_log2].resummarize_pipeline);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return NULL;
+ }
+ }
+
+ switch (op) {
+ case DEPTH_DECOMPRESS:
+ pipeline = &state->depth_decomp[samples_log2].decompress_pipeline[decompress];
+ break;
+ case DEPTH_RESUMMARIZE:
+ pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
+ break;
+ default:
+ unreachable("unknown operation");
+ }
+
+ return pipeline;
}
static void
-radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range,
- int level, int layer)
+radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, int level, int layer)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_state *state = &device->meta_state;
- uint32_t samples_log2 = ffs(image->info.samples) - 1;
- struct radv_image_view iview;
- uint32_t width, height;
-
- width = radv_minify(image->info.width, range->baseMipLevel + level);
- height = radv_minify(image->info.height, range->baseMipLevel + level);
-
- radv_image_view_init(&iview, device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
- .format = image->vk_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
- .baseMipLevel = range->baseMipLevel + level,
- .levelCount = 1,
- .baseArrayLayer = range->baseArrayLayer + layer,
- .layerCount = 1,
- },
- }, NULL);
-
-
- VkFramebuffer fb_h;
- radv_CreateFramebuffer(radv_device_to_handle(device),
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(&iview)
- },
- .width = width,
- .height = height,
- .layers = 1
- }, &cmd_buffer->pool->alloc, &fb_h);
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = state->depth_decomp[samples_log2].pass,
- .framebuffer = fb_h,
- .renderArea = {
- .offset = {
- 0,
- 0,
- },
- .extent = {
- width,
- height,
- }
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
- radv_cmd_buffer_end_render_pass(cmd_buffer);
-
- radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
- &cmd_buffer->pool->alloc);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_state *state = &device->meta_state;
+ uint32_t samples_log2 = ffs(image->info.samples) - 1;
+ struct radv_image_view iview;
+ uint32_t width, height;
+
+ width = radv_minify(image->info.width, range->baseMipLevel + level);
+ height = radv_minify(image->info.height, range->baseMipLevel + level);
+
+ radv_image_view_init(&iview, device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
+ .format = image->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
+ .baseMipLevel = range->baseMipLevel + level,
+ .levelCount = 1,
+ .baseArrayLayer = range->baseArrayLayer + layer,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ VkFramebuffer fb_h;
+ radv_CreateFramebuffer(
+ radv_device_to_handle(device),
+ &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
+ .width = width,
+ .height = height,
+ .layers = 1},
+ &cmd_buffer->pool->alloc, &fb_h);
+
+ radv_cmd_buffer_begin_render_pass(cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = state->depth_decomp[samples_log2].pass,
+ .framebuffer = fb_h,
+ .renderArea = {.offset =
+ {
+ 0,
+ 0,
+ },
+ .extent =
+ {
+ width,
+ height,
+ }},
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+ radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+ radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
}
-static void radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange,
- struct radv_sample_locations_state *sample_locs,
- enum radv_depth_op op)
+static void
+radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange,
+ struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)
{
- struct radv_meta_saved_state saved_state;
- VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- VkPipeline *pipeline;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_SAMPLE_LOCATIONS |
- RADV_META_SAVE_PASS);
-
- pipeline = radv_get_depth_pipeline(cmd_buffer, image,
- subresourceRange, op);
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-
- if (sample_locs) {
- assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
-
- /* Set the sample locations specified during explicit or
- * automatic layout transitions, otherwise the depth decompress
- * pass uses the default HW locations.
- */
- radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT) {
- .sampleLocationsPerPixel = sample_locs->per_pixel,
- .sampleLocationGridSize = sample_locs->grid_size,
- .sampleLocationsCount = sample_locs->count,
- .pSampleLocations = sample_locs->locations,
- });
- }
-
- for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
-
- /* Do not decompress levels without HTILE. */
- if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
- continue;
-
- uint32_t width =
- radv_minify(image->info.width,
- subresourceRange->baseMipLevel + l);
- uint32_t height =
- radv_minify(image->info.height,
- subresourceRange->baseMipLevel + l);
-
- radv_CmdSetViewport(cmd_buffer_h, 0, 1,
- &(VkViewport) {
- .x = 0,
- .y = 0,
- .width = width,
- .height = height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(cmd_buffer_h, 0, 1,
- &(VkRect2D) {
- .offset = { 0, 0 },
- .extent = { width, height },
- });
-
- for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
- radv_process_depth_image_layer(cmd_buffer, image,
- subresourceRange, l, s);
- }
- }
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_meta_saved_state saved_state;
+ VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+ VkPipeline *pipeline;
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS);
+
+ pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+ *pipeline);
+
+ if (sample_locs) {
+ assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
+
+ /* Set the sample locations specified during explicit or
+ * automatic layout transitions, otherwise the depth decompress
+ * pass uses the default HW locations.
+ */
+ radv_CmdSetSampleLocationsEXT(cmd_buffer_h,
+ &(VkSampleLocationsInfoEXT){
+ .sampleLocationsPerPixel = sample_locs->per_pixel,
+ .sampleLocationGridSize = sample_locs->grid_size,
+ .sampleLocationsCount = sample_locs->count,
+ .pSampleLocations = sample_locs->locations,
+ });
+ }
+
+ for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
+
+ /* Do not decompress levels without HTILE. */
+ if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
+ continue;
+
+ uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
+ uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
+
+ radv_CmdSetViewport(cmd_buffer_h, 0, 1,
+ &(VkViewport){.x = 0,
+ .y = 0,
+ .width = width,
+ .height = height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(cmd_buffer_h, 0, 1,
+ &(VkRect2D){
+ .offset = {0, 0},
+ .extent = {width, height},
+ });
+
+ for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
+ radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
+ }
+ }
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
-void radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange,
- struct radv_sample_locations_state *sample_locs)
+void
+radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange,
+ struct radv_sample_locations_state *sample_locs)
{
- struct radv_barrier_data barrier = {0};
+ struct radv_barrier_data barrier = {0};
- barrier.layout_transitions.depth_stencil_expand = 1;
- radv_describe_layout_transition(cmd_buffer, &barrier);
+ barrier.layout_transitions.depth_stencil_expand = 1;
+ radv_describe_layout_transition(cmd_buffer, &barrier);
- assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
- radv_process_depth_stencil(cmd_buffer, image, subresourceRange,
- sample_locs, DEPTH_DECOMPRESS);
+ assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
+ radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);
}
-void radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange,
- struct radv_sample_locations_state *sample_locs)
+void
+radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange,
+ struct radv_sample_locations_state *sample_locs)
{
- struct radv_barrier_data barrier = {0};
+ struct radv_barrier_data barrier = {0};
- barrier.layout_transitions.depth_stencil_resummarize = 1;
- radv_describe_layout_transition(cmd_buffer, &barrier);
+ barrier.layout_transitions.depth_stencil_resummarize = 1;
+ radv_describe_layout_transition(cmd_buffer, &barrier);
- assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
- radv_process_depth_stencil(cmd_buffer, image, subresourceRange,
- sample_locs, DEPTH_RESUMMARIZE);
+ assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
+ radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);
}
diff --git a/src/amd/vulkan/radv_meta_fast_clear.c b/src/amd/vulkan/radv_meta_fast_clear.c
index ea29bdec9f2..9a62664fc95 100644
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -28,955 +28,901 @@
#include "radv_private.h"
#include "sid.h"
-
static nir_shader *
build_dcc_decompress_compute_shader(struct radv_device *dev)
{
- const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D,
- false,
- GLSL_TYPE_FLOAT);
-
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_decompress_compute");
-
- /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
- b.shader->info.cs.local_size[0] = 16;
- b.shader->info.cs.local_size[1] = 16;
- b.shader->info.cs.local_size[2] = 1;
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- buf_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
- tex->op = nir_texop_txf;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
- tex->src[1].src_type = nir_tex_src_lod;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = nir_type_float32;
- tex->is_array = false;
- tex->coord_components = 2;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_scoped_barrier(&b, .execution_scope=NIR_SCOPE_WORKGROUP,
- .memory_scope=NIR_SCOPE_WORKGROUP,
- .memory_semantics=NIR_MEMORY_ACQ_REL,
- .memory_modes=nir_var_mem_ssbo);
-
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- global_id, nir_ssa_undef(&b, 1, 32), &tex->dest.ssa,
- nir_imm_int(&b, 0));
- return b.shader;
+ const struct glsl_type *buf_type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
+
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_decompress_compute");
+
+ /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
+ b.shader->info.cs.local_size[0] = 16;
+ b.shader->info.cs.local_size[1] = 16;
+ b.shader->info.cs.local_size[2] = 1;
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
+ tex->op = nir_texop_txf;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(nir_channels(&b, global_id, 3));
+ tex->src[1].src_type = nir_tex_src_lod;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = nir_type_float32;
+ tex->is_array = false;
+ tex->coord_components = 2;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP,
+ .memory_scope = NIR_SCOPE_WORKGROUP, .memory_semantics = NIR_MEMORY_ACQ_REL,
+ .memory_modes = nir_var_mem_ssbo);
+
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
+ nir_ssa_undef(&b, 1, 32), &tex->dest.ssa, nir_imm_int(&b, 0));
+ return b.shader;
}
static VkResult
create_dcc_compress_compute(struct radv_device *device)
{
- VkResult result = VK_SUCCESS;
- nir_shader *cs = build_dcc_decompress_compute_shader(device);
-
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
- if (result != VK_SUCCESS)
- goto cleanup;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = NULL,
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
- if (result != VK_SUCCESS)
- goto cleanup;
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
- if (result != VK_SUCCESS)
- goto cleanup;
+ VkResult result = VK_SUCCESS;
+ nir_shader *cs = build_dcc_decompress_compute_shader(device);
+
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(
+ radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = NULL,
+ };
+
+ result = radv_CreatePipelineLayout(
+ radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
+ &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ /* compute shader */
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL,
+ &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+ if (result != VK_SUCCESS)
+ goto cleanup;
cleanup:
- ralloc_free(cs);
- return result;
+ ralloc_free(cs);
+ return result;
}
static VkResult
create_pass(struct radv_device *device)
{
- VkResult result;
- VkDevice device_h = radv_device_to_handle(device);
- const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
- VkAttachmentDescription2 attachment;
-
- attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
- attachment.format = VK_FORMAT_UNDEFINED;
- attachment.samples = 1;
- attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-
- result = radv_CreateRenderPass2(device_h,
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &attachment,
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = (VkAttachmentReference2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- },
- },
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = VK_ATTACHMENT_UNUSED,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- },
- alloc,
- &device->meta_state.fast_clear_flush.pass);
-
- return result;
+ VkResult result;
+ VkDevice device_h = radv_device_to_handle(device);
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ VkAttachmentDescription2 attachment;
+
+ attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
+ attachment.format = VK_FORMAT_UNDEFINED;
+ attachment.samples = 1;
+ attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+ attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+
+ result = radv_CreateRenderPass2(
+ device_h,
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments = &attachment,
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments =
+ (VkAttachmentReference2[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = VK_ATTACHMENT_UNUSED,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ alloc, &device->meta_state.fast_clear_flush.pass);
+
+ return result;
}
static VkResult
create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
{
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 0,
- .pSetLayouts = NULL,
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = NULL,
- };
-
- return radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- layout);
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pSetLayouts = NULL,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = NULL,
+ };
+
+ return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc, layout);
}
static VkResult
-create_pipeline(struct radv_device *device,
- VkShaderModule vs_module_h,
- VkPipelineLayout layout)
+create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
{
- VkResult result;
- VkDevice device_h = radv_device_to_handle(device);
-
- nir_shader *fs_module = radv_meta_build_nir_fs_noop();
-
- if (!fs_module) {
- /* XXX: Need more accurate error */
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto cleanup;
- }
-
- const VkPipelineShaderStageCreateInfo stages[2] = {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vs_module_h,
- .pName = "main",
- },
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs_module),
- .pName = "main",
- },
- };
-
- const VkPipelineVertexInputStateCreateInfo vi_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
- };
-
- const VkPipelineInputAssemblyStateCreateInfo ia_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- };
-
- const VkPipelineColorBlendStateCreateInfo blend_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .logicOpEnable = false,
- .attachmentCount = 1,
- .pAttachments = (VkPipelineColorBlendAttachmentState []) {
- {
- .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT |
- VK_COLOR_COMPONENT_A_BIT,
- },
- }
- };
- const VkPipelineRasterizationStateCreateInfo rs_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .depthClampEnable = false,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- };
-
- result = radv_graphics_pipeline_create(device_h,
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &(VkGraphicsPipelineCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
- .pStages = stages,
-
- .pVertexInputState = &vi_state,
- .pInputAssemblyState = &ia_state,
-
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &rs_state,
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pColorBlendState = &blend_state,
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
- .layout = layout,
- .renderPass = device->meta_state.fast_clear_flush.pass,
- .subpass = 0,
- },
- &(struct radv_graphics_pipeline_create_info) {
- .use_rectlist = true,
- .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
- },
- &device->meta_state.alloc,
- &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
- if (result != VK_SUCCESS)
- goto cleanup;
-
- result = radv_graphics_pipeline_create(device_h,
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &(VkGraphicsPipelineCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
- .pStages = stages,
-
- .pVertexInputState = &vi_state,
- .pInputAssemblyState = &ia_state,
-
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &rs_state,
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pColorBlendState = &blend_state,
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
- .layout = layout,
- .renderPass = device->meta_state.fast_clear_flush.pass,
- .subpass = 0,
- },
- &(struct radv_graphics_pipeline_create_info) {
- .use_rectlist = true,
- .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
- },
- &device->meta_state.alloc,
- &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
- if (result != VK_SUCCESS)
- goto cleanup;
-
- result = radv_graphics_pipeline_create(device_h,
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &(VkGraphicsPipelineCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
- .pStages = stages,
-
- .pVertexInputState = &vi_state,
- .pInputAssemblyState = &ia_state,
-
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &rs_state,
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pColorBlendState = &blend_state,
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
- .layout = layout,
- .renderPass = device->meta_state.fast_clear_flush.pass,
- .subpass = 0,
- },
- &(struct radv_graphics_pipeline_create_info) {
- .use_rectlist = true,
- .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
- },
- &device->meta_state.alloc,
- &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
- if (result != VK_SUCCESS)
- goto cleanup;
-
- goto cleanup;
+ VkResult result;
+ VkDevice device_h = radv_device_to_handle(device);
+
+ nir_shader *fs_module = radv_meta_build_nir_fs_noop();
+
+ if (!fs_module) {
+ /* XXX: Need more accurate error */
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto cleanup;
+ }
+
+ const VkPipelineShaderStageCreateInfo stages[2] = {
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vs_module_h,
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs_module),
+ .pName = "main",
+ },
+ };
+
+ const VkPipelineVertexInputStateCreateInfo vi_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
+ };
+
+ const VkPipelineInputAssemblyStateCreateInfo ia_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ };
+
+ const VkPipelineColorBlendStateCreateInfo blend_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = 1,
+ .pAttachments = (VkPipelineColorBlendAttachmentState[]){
+ {
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ },
+ }};
+ const VkPipelineRasterizationStateCreateInfo rs_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = false,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ };
+
+ result = radv_graphics_pipeline_create(
+ device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &(VkGraphicsPipelineCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages = stages,
+
+ .pVertexInputState = &vi_state,
+ .pInputAssemblyState = &ia_state,
+
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &rs_state,
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState = &blend_state,
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = layout,
+ .renderPass = device->meta_state.fast_clear_flush.pass,
+ .subpass = 0,
+ },
+ &(struct radv_graphics_pipeline_create_info){
+ .use_rectlist = true,
+ .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
+ },
+ &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ result = radv_graphics_pipeline_create(
+ device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &(VkGraphicsPipelineCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages = stages,
+
+ .pVertexInputState = &vi_state,
+ .pInputAssemblyState = &ia_state,
+
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &rs_state,
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState = &blend_state,
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = layout,
+ .renderPass = device->meta_state.fast_clear_flush.pass,
+ .subpass = 0,
+ },
+ &(struct radv_graphics_pipeline_create_info){
+ .use_rectlist = true,
+ .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
+ },
+ &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ result = radv_graphics_pipeline_create(
+ device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &(VkGraphicsPipelineCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages = stages,
+
+ .pVertexInputState = &vi_state,
+ .pInputAssemblyState = &ia_state,
+
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState = &rs_state,
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState = &blend_state,
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = layout,
+ .renderPass = device->meta_state.fast_clear_flush.pass,
+ .subpass = 0,
+ },
+ &(struct radv_graphics_pipeline_create_info){
+ .use_rectlist = true,
+ .custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
+ },
+ &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ goto cleanup;
cleanup:
- ralloc_free(fs_module);
- return result;
+ ralloc_free(fs_module);
+ return result;
}
void
radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.dcc_decompress_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.fmask_decompress_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.cmask_eliminate_pipeline,
- &state->alloc);
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->fast_clear_flush.pass, &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->fast_clear_flush.p_layout,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fast_clear_flush.dcc_decompress_compute_pipeline,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->fast_clear_flush.dcc_decompress_compute_p_layout,
- &state->alloc);
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->fast_clear_flush.dcc_decompress_compute_ds_layout,
- &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->fast_clear_flush.pass,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device),
+ state->fast_clear_flush.dcc_decompress_compute_p_layout,
+ &state->alloc);
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ state->fast_clear_flush.dcc_decompress_compute_ds_layout,
+ &state->alloc);
}
static VkResult
radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
{
- VkResult res = VK_SUCCESS;
-
- mtx_lock(&device->meta_state.mtx);
- if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
- if (!vs_module) {
- /* XXX: Need more accurate error */
- res = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail;
- }
-
- res = create_pass(device);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_pipeline_layout(device,
- &device->meta_state.fast_clear_flush.p_layout);
- if (res != VK_SUCCESS)
- goto fail;
-
- VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
- res = create_pipeline(device, vs_module_h,
- device->meta_state.fast_clear_flush.p_layout);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_dcc_compress_compute(device);
- if (res != VK_SUCCESS)
- goto fail;
-
- goto cleanup;
+ VkResult res = VK_SUCCESS;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
+ if (!vs_module) {
+ /* XXX: Need more accurate error */
+ res = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail;
+ }
+
+ res = create_pass(device);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
+ res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_dcc_compress_compute(device);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ goto cleanup;
fail:
- radv_device_finish_meta_fast_clear_flush_state(device);
+ radv_device_finish_meta_fast_clear_flush_state(device);
cleanup:
- ralloc_free(vs_module);
- mtx_unlock(&device->meta_state.mtx);
+ ralloc_free(vs_module);
+ mtx_unlock(&device->meta_state.mtx);
- return res;
+ return res;
}
-
VkResult
radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
{
- if (on_demand)
- return VK_SUCCESS;
+ if (on_demand)
+ return VK_SUCCESS;
- return radv_device_init_meta_fast_clear_flush_state_internal(device);
+ return radv_device_init_meta_fast_clear_flush_state_internal(device);
}
static void
radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- uint64_t pred_offset, bool value)
+ struct radv_image *image, uint64_t pred_offset,
+ bool value)
{
- uint64_t va = 0;
+ uint64_t va = 0;
- if (value) {
- va = radv_buffer_get_va(image->bo) + image->offset;
- va += pred_offset;
- }
+ if (value) {
+ va = radv_buffer_get_va(image->bo) + image->offset;
+ va += pred_offset;
+ }
- si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
+ si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
}
static void
-radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range,
- int level, int layer, bool flush_cb)
+radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, int level, int layer,
+ bool flush_cb)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_image_view iview;
- uint32_t width, height;
-
- width = radv_minify(image->info.width, range->baseMipLevel + level);
- height = radv_minify(image->info.height, range->baseMipLevel + level);
-
- radv_image_view_init(&iview, device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
- .format = image->vk_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = range->baseMipLevel + level,
- .levelCount = 1,
- .baseArrayLayer = range->baseArrayLayer + layer,
- .layerCount = 1,
- },
- }, NULL);
-
- VkFramebuffer fb_h;
- radv_CreateFramebuffer(radv_device_to_handle(device),
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(&iview)
- },
- .width = width,
- .height = height,
- .layers = 1
- }, &cmd_buffer->pool->alloc, &fb_h);
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.fast_clear_flush.pass,
- .framebuffer = fb_h,
- .renderArea = {
- .offset = { 0, 0, },
- .extent = { width, height, }
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
-
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- if (flush_cb)
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
-
- radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
-
- if (flush_cb)
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
-
- radv_cmd_buffer_end_render_pass(cmd_buffer);
-
- radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h,
- &cmd_buffer->pool->alloc);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_image_view iview;
+ uint32_t width, height;
+
+ width = radv_minify(image->info.width, range->baseMipLevel + level);
+ height = radv_minify(image->info.height, range->baseMipLevel + level);
+
+ radv_image_view_init(&iview, device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
+ .format = image->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = range->baseMipLevel + level,
+ .levelCount = 1,
+ .baseArrayLayer = range->baseArrayLayer + layer,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ VkFramebuffer fb_h;
+ radv_CreateFramebuffer(
+ radv_device_to_handle(device),
+ &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
+ .width = width,
+ .height = height,
+ .layers = 1},
+ &cmd_buffer->pool->alloc, &fb_h);
+
+ radv_cmd_buffer_begin_render_pass(cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.fast_clear_flush.pass,
+ .framebuffer = fb_h,
+ .renderArea = {.offset =
+ {
+ 0,
+ 0,
+ },
+ .extent =
+ {
+ width,
+ height,
+ }},
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ if (flush_cb)
+ cmd_buffer->state.flush_bits |=
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
+
+ radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+
+ if (flush_cb)
+ cmd_buffer->state.flush_bits |=
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
+
+ radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+ radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
}
static void
-radv_process_color_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange,
- bool decompress_dcc)
+radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange, bool decompress_dcc)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_state saved_state;
- bool flush_cb = false;
- VkPipeline *pipeline;
-
- if (decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
- pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
- } else if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
- pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
- } else {
- pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
- }
-
- if (!*pipeline) {
- VkResult ret;
-
- ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- if (pipeline == &device->meta_state.fast_clear_flush.dcc_decompress_pipeline ||
- pipeline == &device->meta_state.fast_clear_flush.fmask_decompress_pipeline) {
- /* Flushing CB is required before and after DCC_DECOMPRESS or
- * FMASK_DECOMPRESS.
- */
- flush_cb = true;
- }
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_PASS);
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-
- for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
- uint32_t width, height;
-
- /* Do not decompress levels without DCC. */
- if (decompress_dcc &&
- !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
- continue;
-
- width = radv_minify(image->info.width,
- subresourceRange->baseMipLevel + l);
- height = radv_minify(image->info.height,
- subresourceRange->baseMipLevel + l);
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
- &(VkViewport) {
- .x = 0,
- .y = 0,
- .width = width,
- .height = height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
- &(VkRect2D) {
- .offset = { 0, 0 },
- .extent = { width, height },
- });
-
- for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
- radv_process_color_image_layer(cmd_buffer, image,
- subresourceRange, l, s,
- flush_cb);
- }
- }
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_saved_state saved_state;
+ bool flush_cb = false;
+ VkPipeline *pipeline;
+
+ if (decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
+ pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
+ } else if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
+ pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
+ } else {
+ pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
+ }
+
+ if (!*pipeline) {
+ VkResult ret;
+
+ ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ if (pipeline == &device->meta_state.fast_clear_flush.dcc_decompress_pipeline ||
+ pipeline == &device->meta_state.fast_clear_flush.fmask_decompress_pipeline) {
+ /* Flushing CB is required before and after DCC_DECOMPRESS or
+ * FMASK_DECOMPRESS.
+ */
+ flush_cb = true;
+ }
+
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+ *pipeline);
+
+ for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
+ uint32_t width, height;
+
+ /* Do not decompress levels without DCC. */
+ if (decompress_dcc && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
+ continue;
+
+ width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
+ height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
+
+ radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = 0,
+ .y = 0,
+ .width = width,
+ .height = height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkRect2D){
+ .offset = {0, 0},
+ .extent = {width, height},
+ });
+
+ for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
+ radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);
+ }
+ }
+
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
static void
-radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange,
- bool decompress_dcc)
+radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange, bool decompress_dcc)
{
- bool use_predication = false;
- bool old_predicating = false;
-
- assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-
- if ((decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) ||
- (!(radv_image_has_fmask(image) && !image->tc_compatible_cmask) && image->fce_pred_offset)) {
- use_predication = true;
- }
-
- /* If we are asked for DCC decompression without DCC predicates we cannot
- * use the FCE predicate. */
- if (decompress_dcc && image->dcc_pred_offset == 0)
- use_predication = false;
-
- if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
- (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
- subresourceRange->baseArrayLayer != 0)) {
- /* Only use predication if the image has DCC with mipmaps or
- * if the range of layers covers the whole image because the
- * predication is based on mip level.
- */
- use_predication = false;
- }
-
- if (use_predication) {
- uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
- image->fce_pred_offset;
- pred_offset += 8 * subresourceRange->baseMipLevel;
-
- old_predicating = cmd_buffer->state.predicating;
-
- radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
- cmd_buffer->state.predicating = true;
- }
-
- radv_process_color_image(cmd_buffer, image, subresourceRange,
- decompress_dcc);
-
- if (use_predication) {
- uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset :
- image->fce_pred_offset;
- pred_offset += 8 * subresourceRange->baseMipLevel;
-
- cmd_buffer->state.predicating = old_predicating;
-
- radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
-
- if (cmd_buffer->state.predication_type != -1) {
- /* Restore previous conditional rendering user state. */
- si_emit_set_predication_state(cmd_buffer,
- cmd_buffer->state.predication_type,
- cmd_buffer->state.predication_op,
- cmd_buffer->state.predication_va);
- }
- }
-
- if (image->fce_pred_offset != 0) {
- /* Clear the image's fast-clear eliminate predicate because
- * FMASK and DCC also imply a fast-clear eliminate.
- */
- radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
- }
-
- if (radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
- /* Mark the image as being decompressed. */
- if (decompress_dcc)
- radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
- }
+ bool use_predication = false;
+ bool old_predicating = false;
+
+ assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
+
+ if ((decompress_dcc && radv_dcc_enabled(image, subresourceRange->baseMipLevel)) ||
+ (!(radv_image_has_fmask(image) && !image->tc_compatible_cmask) && image->fce_pred_offset)) {
+ use_predication = true;
+ }
+
+ /* If we are asked for DCC decompression without DCC predicates we cannot
+ * use the FCE predicate. */
+ if (decompress_dcc && image->dcc_pred_offset == 0)
+ use_predication = false;
+
+ if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
+ (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
+ subresourceRange->baseArrayLayer != 0)) {
+ /* Only use predication if the image has DCC with mipmaps or
+ * if the range of layers covers the whole image because the
+ * predication is based on mip level.
+ */
+ use_predication = false;
+ }
+
+ if (use_predication) {
+ uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : image->fce_pred_offset;
+ pred_offset += 8 * subresourceRange->baseMipLevel;
+
+ old_predicating = cmd_buffer->state.predicating;
+
+ radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
+ cmd_buffer->state.predicating = true;
+ }
+
+ radv_process_color_image(cmd_buffer, image, subresourceRange, decompress_dcc);
+
+ if (use_predication) {
+ uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : image->fce_pred_offset;
+ pred_offset += 8 * subresourceRange->baseMipLevel;
+
+ cmd_buffer->state.predicating = old_predicating;
+
+ radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
+
+ if (cmd_buffer->state.predication_type != -1) {
+ /* Restore previous conditional rendering user state. */
+ si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
+ cmd_buffer->state.predication_op,
+ cmd_buffer->state.predication_va);
+ }
+ }
+
+ if (image->fce_pred_offset != 0) {
+ /* Clear the image's fast-clear eliminate predicate because
+ * FMASK and DCC also imply a fast-clear eliminate.
+ */
+ radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
+ }
+
+ if (radv_dcc_enabled(image, subresourceRange->baseMipLevel)) {
+ /* Mark the image as being decompressed. */
+ if (decompress_dcc)
+ radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
+ }
}
void
-radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
+radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange)
{
- struct radv_barrier_data barrier = {0};
+ struct radv_barrier_data barrier = {0};
- if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
- barrier.layout_transitions.fmask_decompress = 1;
- } else {
- barrier.layout_transitions.fast_clear_eliminate = 1;
- }
- radv_describe_layout_transition(cmd_buffer, &barrier);
+ if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
+ barrier.layout_transitions.fmask_decompress = 1;
+ } else {
+ barrier.layout_transitions.fast_clear_eliminate = 1;
+ }
+ radv_describe_layout_transition(cmd_buffer, &barrier);
- assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
- radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);
+ assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
+ radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);
}
static void
-radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
+radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange)
{
- radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
+ radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
}
static void
-radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
+radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange)
{
- struct radv_meta_saved_state saved_state;
- struct radv_image_view load_iview = {0};
- struct radv_image_view store_iview = {0};
- struct radv_device *device = cmd_buffer->device;
-
- cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT,
- image);
-
- if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
- VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS |
- RADV_META_SAVE_COMPUTE_PIPELINE);
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
-
- for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
- uint32_t width, height;
-
- /* Do not decompress levels without DCC. */
- if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
- continue;
-
- width = radv_minify(image->info.width,
- subresourceRange->baseMipLevel + l);
- height = radv_minify(image->info.height,
- subresourceRange->baseMipLevel + l);
-
- for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
- radv_image_view_init(&load_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = image->vk_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = subresourceRange->baseMipLevel + l,
- .levelCount = 1,
- .baseArrayLayer = subresourceRange->baseArrayLayer + s,
- .layerCount = 1
- },
- }, NULL);
- radv_image_view_init(&store_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = VK_IMAGE_VIEW_TYPE_2D,
- .format = image->vk_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = subresourceRange->baseMipLevel + l,
- .levelCount = 1,
- .baseArrayLayer = subresourceRange->baseArrayLayer + s,
- .layerCount = 1
- },
- }, &(struct radv_image_view_extra_create_info) {
- .disable_compression = true
- });
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&load_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&store_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
-
- radv_unaligned_dispatch(cmd_buffer, width, height, 1);
- }
- }
-
- /* Mark this image as actually being decompressed. */
- radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
-
- radv_meta_restore(&saved_state, cmd_buffer);
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
-
- /* Initialize the DCC metadata as "fully expanded". */
- cmd_buffer->state.flush_bits |=
- radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
+ struct radv_meta_saved_state saved_state;
+ struct radv_image_view load_iview = {0};
+ struct radv_image_view store_iview = {0};
+ struct radv_device *device = cmd_buffer->device;
+
+ cmd_buffer->state.flush_bits |=
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+
+ if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
+ VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
+
+ for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
+ uint32_t width, height;
+
+ /* Do not decompress levels without DCC. */
+ if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
+ continue;
+
+ width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
+ height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
+
+ for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
+ radv_image_view_init(
+ &load_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image->vk_format,
+ .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = subresourceRange->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = subresourceRange->baseArrayLayer + s,
+ .layerCount = 1},
+ },
+ NULL);
+ radv_image_view_init(
+ &store_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = VK_IMAGE_VIEW_TYPE_2D,
+ .format = image->vk_format,
+ .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = subresourceRange->baseMipLevel + l,
+ .levelCount = 1,
+ .baseArrayLayer = subresourceRange->baseArrayLayer + s,
+ .layerCount = 1},
+ },
+ &(struct radv_image_view_extra_create_info){.disable_compression = true});
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&load_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&store_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
+
+ radv_unaligned_dispatch(cmd_buffer, width, height, 1);
+ }
+ }
+
+ /* Mark this image as actually being decompressed. */
+ radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+
+ /* Initialize the DCC metadata as "fully expanded". */
+ cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
}
void
-radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
+radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
const VkImageSubresourceRange *subresourceRange)
{
- struct radv_barrier_data barrier = {0};
+ struct radv_barrier_data barrier = {0};
- barrier.layout_transitions.dcc_decompress = 1;
- radv_describe_layout_transition(cmd_buffer, &barrier);
+ barrier.layout_transitions.dcc_decompress = 1;
+ radv_describe_layout_transition(cmd_buffer, &barrier);
- if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
- radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
- else
- radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
+ radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
+ else
+ radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
}
diff --git a/src/amd/vulkan/radv_meta_fmask_expand.c b/src/amd/vulkan/radv_meta_fmask_expand.c
index 4b7b685d6b0..dc2327162e1 100644
--- a/src/amd/vulkan/radv_meta_fmask_expand.c
+++ b/src/amd/vulkan/radv_meta_fmask_expand.c
@@ -29,279 +29,249 @@
static nir_shader *
build_fmask_expand_compute_shader(struct radv_device *device, int samples)
{
- const struct glsl_type *type =
- glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type =
- glsl_image_type(GLSL_SAMPLER_DIM_MS, true,
- GLSL_TYPE_FLOAT);
-
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_fmask_expand_cs-%d", samples);
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
-
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
- output_img->data.access = ACCESS_NON_READABLE;
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
-
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
- nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
-
- nir_ssa_def *tex_coord = nir_vec3(&b, nir_channel(&b, global_id, 0),
- nir_channel(&b, global_id, 1),
- layer_id);
-
- nir_tex_instr *tex_instr[8];
- for (uint32_t i = 0; i < samples; i++) {
- tex_instr[i] = nir_tex_instr_create(b.shader, 3);
-
- nir_tex_instr *tex = tex_instr[i];
- tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
- tex->op = nir_texop_txf_ms;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(tex_coord);
- tex->src[1].src_type = nir_tex_src_ms_index;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = nir_type_float32;
- tex->is_array = true;
- tex->coord_components = 3;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
- }
-
- nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, tex_coord, 0),
- nir_channel(&b, tex_coord, 1),
- nir_channel(&b, tex_coord, 2),
- nir_imm_int(&b, 0));
-
- for (uint32_t i = 0; i < samples; i++) {
- nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
-
- nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i),
- outval, nir_imm_int(&b, 0));
- }
-
- return b.shader;
+ const struct glsl_type *type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, true, GLSL_TYPE_FLOAT);
+
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_fmask_expand_cs-%d", samples);
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+ output_img->data.access = ACCESS_NON_READABLE;
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
+
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+ nir_ssa_def *output_img_deref = &nir_build_deref_var(&b, output_img)->dest.ssa;
+
+ nir_ssa_def *tex_coord =
+ nir_vec3(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), layer_id);
+
+ nir_tex_instr *tex_instr[8];
+ for (uint32_t i = 0; i < samples; i++) {
+ tex_instr[i] = nir_tex_instr_create(b.shader, 3);
+
+ nir_tex_instr *tex = tex_instr[i];
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex->op = nir_texop_txf_ms;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(tex_coord);
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = nir_type_float32;
+ tex->is_array = true;
+ tex->coord_components = 3;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+ }
+
+ nir_ssa_def *img_coord =
+ nir_vec4(&b, nir_channel(&b, tex_coord, 0), nir_channel(&b, tex_coord, 1),
+ nir_channel(&b, tex_coord, 2), nir_imm_int(&b, 0));
+
+ for (uint32_t i = 0; i < samples; i++) {
+ nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
+
+ nir_image_deref_store(&b, output_img_deref, img_coord, nir_imm_int(&b, i), outval,
+ nir_imm_int(&b, 0));
+ }
+
+ return b.shader;
}
void
-radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *subresourceRange)
+radv_expand_fmask_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *subresourceRange)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_state saved_state;
- const uint32_t samples = image->info.samples;
- const uint32_t samples_log2 = ffs(samples) - 1;
- unsigned layer_count = radv_get_layerCount(image, subresourceRange);
- struct radv_image_view iview;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_DESCRIPTORS);
-
- VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2];
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT |
- VK_ACCESS_SHADER_WRITE_BIT, image);
-
- radv_image_view_init(&iview, device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(image),
- .viewType = radv_meta_get_view_type(image),
- .format = vk_format_no_srgb(image->vk_format),
- .subresourceRange = {
- .aspectMask = subresourceRange->aspectMask,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = subresourceRange->baseArrayLayer,
- .layerCount = layer_count,
- },
- }, NULL);
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- cmd_buffer->device->meta_state.fmask_expand.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL
- },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(&iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL
- },
- }
- }
- });
-
- radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, layer_count);
-
- radv_meta_restore(&saved_state, cmd_buffer);
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
-
- /* Re-initialize FMASK in fully expanded mode. */
- cmd_buffer->state.flush_bits |=
- radv_init_fmask(cmd_buffer, image, subresourceRange);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_saved_state saved_state;
+ const uint32_t samples = image->info.samples;
+ const uint32_t samples_log2 = ffs(samples) - 1;
+ unsigned layer_count = radv_get_layerCount(image, subresourceRange);
+ struct radv_image_view iview;
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+
+ VkPipeline pipeline = device->meta_state.fmask_expand.pipeline[samples_log2];
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ pipeline);
+
+ cmd_buffer->state.flush_bits |= radv_dst_access_flush(
+ cmd_buffer, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, image);
+
+ radv_image_view_init(&iview, device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(image),
+ .viewType = radv_meta_get_view_type(image),
+ .format = vk_format_no_srgb(image->vk_format),
+ .subresourceRange =
+ {
+ .aspectMask = subresourceRange->aspectMask,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = subresourceRange->baseArrayLayer,
+ .layerCount = layer_count,
+ },
+ },
+ NULL);
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
+ cmd_buffer->device->meta_state.fmask_expand.p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(&iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }}});
+
+ radv_unaligned_dispatch(cmd_buffer, image->info.width, image->info.height, layer_count);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
+
+ /* Re-initialize FMASK in fully expanded mode. */
+ cmd_buffer->state.flush_bits |= radv_init_fmask(cmd_buffer, image, subresourceRange);
}
-void radv_device_finish_meta_fmask_expand_state(struct radv_device *device)
+void
+radv_device_finish_meta_fmask_expand_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->fmask_expand.pipeline[i],
- &state->alloc);
- }
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->fmask_expand.p_layout,
- &state->alloc);
-
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->fmask_expand.ds_layout,
- &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+ radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_expand.pipeline[i],
+ &state->alloc);
+ }
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_expand.p_layout,
+ &state->alloc);
+
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_expand.ds_layout,
+ &state->alloc);
}
static VkResult
-create_fmask_expand_pipeline(struct radv_device *device,
- int samples,
- VkPipeline *pipeline)
+create_fmask_expand_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
{
- struct radv_meta_state *state = &device->meta_state;
- VkResult result;
- nir_shader *cs = build_fmask_expand_compute_shader(device, samples);;
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = state->fmask_expand.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&state->cache),
- 1, &vk_pipeline_info, NULL,
- pipeline);
-
- ralloc_free(cs);
- return result;
+ struct radv_meta_state *state = &device->meta_state;
+ VkResult result;
+ nir_shader *cs = build_fmask_expand_compute_shader(device, samples);
+ ;
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = state->fmask_expand.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&state->cache), 1,
+ &vk_pipeline_info, NULL, pipeline);
+
+ ralloc_free(cs);
+ return result;
}
VkResult
radv_device_init_meta_fmask_expand_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
- VkResult result;
-
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info, &state->alloc,
- &state->fmask_expand.ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineLayoutCreateInfo color_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &state->fmask_expand.ds_layout,
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = NULL,
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &color_create_info, &state->alloc,
- &state->fmask_expand.p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
- uint32_t samples = 1 << i;
- result = create_fmask_expand_pipeline(device, samples,
- &state->fmask_expand.pipeline[i]);
- if (result != VK_SUCCESS)
- goto fail;
- }
-
- return VK_SUCCESS;
+ struct radv_meta_state *state = &device->meta_state;
+ VkResult result;
+
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &state->alloc, &state->fmask_expand.ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo color_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &state->fmask_expand.ds_layout,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = NULL,
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &color_create_info,
+ &state->alloc, &state->fmask_expand.p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
+ uint32_t samples = 1 << i;
+ result = create_fmask_expand_pipeline(device, samples, &state->fmask_expand.pipeline[i]);
+ if (result != VK_SUCCESS)
+ goto fail;
+ }
+
+ return VK_SUCCESS;
fail:
- radv_device_finish_meta_fmask_expand_state(device);
- return result;
+ radv_device_finish_meta_fmask_expand_state(device);
+ return result;
}
diff --git a/src/amd/vulkan/radv_meta_resolve.c b/src/amd/vulkan/radv_meta_resolve.c
index 301725d19da..3a23f392249 100644
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -24,751 +24,708 @@
#include <assert.h>
#include <stdbool.h>
+#include "nir/nir_builder.h"
#include "radv_meta.h"
#include "radv_private.h"
-#include "vk_format.h"
-#include "nir/nir_builder.h"
#include "sid.h"
+#include "vk_format.h"
/* emit 0, 0, 0, 1 */
static nir_shader *
build_nir_fs(void)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- nir_variable *f_color; /* vec4, fragment output color */
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_variable *f_color; /* vec4, fragment output color */
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs");
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs");
- f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
- "f_color");
- f_color->data.location = FRAG_RESULT_DATA0;
- nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);
+ f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+ f_color->data.location = FRAG_RESULT_DATA0;
+ nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);
- return b.shader;
+ return b.shader;
}
static VkResult
create_pass(struct radv_device *device, VkFormat vk_format, VkRenderPass *pass)
{
- VkResult result;
- VkDevice device_h = radv_device_to_handle(device);
- const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
- VkAttachmentDescription2 attachments[2];
- int i;
-
- for (i = 0; i < 2; i++) {
- attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
- attachments[i].format = vk_format;
- attachments[i].samples = 1;
- attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
- attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
- }
- attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- attachments[1].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- attachments[1].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
-
- result = radv_CreateRenderPass2(device_h,
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 2,
- .pAttachments = attachments,
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 2,
- .pColorAttachments = (VkAttachmentReference2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- },
- {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 1,
- .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
- },
- },
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = VK_ATTACHMENT_UNUSED,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- },
- alloc,
- pass);
-
- return result;
+ VkResult result;
+ VkDevice device_h = radv_device_to_handle(device);
+ const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
+ VkAttachmentDescription2 attachments[2];
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
+ attachments[i].format = vk_format;
+ attachments[i].samples = 1;
+ attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
+ attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
+ }
+ attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ attachments[1].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ attachments[1].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+
+ result = radv_CreateRenderPass2(
+ device_h,
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 2,
+ .pAttachments = attachments,
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 2,
+ .pColorAttachments =
+ (VkAttachmentReference2[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 1,
+ .layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+ },
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = VK_ATTACHMENT_UNUSED,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ alloc, pass);
+
+ return result;
}
static VkResult
-create_pipeline(struct radv_device *device,
- VkShaderModule vs_module_h,
- VkPipeline *pipeline,
- VkRenderPass pass)
+create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeline *pipeline,
+ VkRenderPass pass)
{
- VkResult result;
- VkDevice device_h = radv_device_to_handle(device);
-
- nir_shader *fs_module = build_nir_fs();
- if (!fs_module) {
- /* XXX: Need more accurate error */
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto cleanup;
- }
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 0,
- .pSetLayouts = NULL,
- .pushConstantRangeCount = 0,
- .pPushConstantRanges = NULL,
- };
-
- if (!device->meta_state.resolve.p_layout) {
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.resolve.p_layout);
- if (result != VK_SUCCESS)
- goto cleanup;
- }
-
- result = radv_graphics_pipeline_create(device_h,
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &(VkGraphicsPipelineCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = 2,
- .pStages = (VkPipelineShaderStageCreateInfo[]) {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vs_module_h,
- .pName = "main",
- },
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs_module),
- .pName = "main",
- },
- },
- .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
- },
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .depthClampEnable = false,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = NULL,
- .alphaToCoverageEnable = false,
- .alphaToOneEnable = false,
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .logicOpEnable = false,
- .attachmentCount = 2,
- .pAttachments = (VkPipelineColorBlendAttachmentState []) {
- {
- .colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT |
- VK_COLOR_COMPONENT_A_BIT,
- },
- {
- .colorWriteMask = 0,
-
- }
- },
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 2,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- },
- },
- .layout = device->meta_state.resolve.p_layout,
- .renderPass = pass,
- .subpass = 0,
- },
- &(struct radv_graphics_pipeline_create_info) {
- .use_rectlist = true,
- .custom_blend_mode = V_028808_CB_RESOLVE,
- },
- &device->meta_state.alloc, pipeline);
- if (result != VK_SUCCESS)
- goto cleanup;
-
- goto cleanup;
+ VkResult result;
+ VkDevice device_h = radv_device_to_handle(device);
+
+ nir_shader *fs_module = build_nir_fs();
+ if (!fs_module) {
+ /* XXX: Need more accurate error */
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto cleanup;
+ }
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 0,
+ .pSetLayouts = NULL,
+ .pushConstantRangeCount = 0,
+ .pPushConstantRanges = NULL,
+ };
+
+ if (!device->meta_state.resolve.p_layout) {
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc, &device->meta_state.resolve.p_layout);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+ }
+
+ result = radv_graphics_pipeline_create(
+ device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &(VkGraphicsPipelineCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = 2,
+ .pStages =
+ (VkPipelineShaderStageCreateInfo[]){
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vs_module_h,
+ .pName = "main",
+ },
+ {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs_module),
+ .pName = "main",
+ },
+ },
+ .pVertexInputState =
+ &(VkPipelineVertexInputStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
+ },
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .depthClampEnable = false,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
+ },
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = NULL,
+ .alphaToCoverageEnable = false,
+ .alphaToOneEnable = false,
+ },
+ .pColorBlendState =
+ &(VkPipelineColorBlendStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .logicOpEnable = false,
+ .attachmentCount = 2,
+ .pAttachments =
+ (VkPipelineColorBlendAttachmentState[]){
+ {
+ .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+ VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+ },
+ {
+ .colorWriteMask = 0,
+
+ }},
+ },
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 2,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ },
+ },
+ .layout = device->meta_state.resolve.p_layout,
+ .renderPass = pass,
+ .subpass = 0,
+ },
+ &(struct radv_graphics_pipeline_create_info){
+ .use_rectlist = true,
+ .custom_blend_mode = V_028808_CB_RESOLVE,
+ },
+ &device->meta_state.alloc, pipeline);
+ if (result != VK_SUCCESS)
+ goto cleanup;
+
+ goto cleanup;
cleanup:
- ralloc_free(fs_module);
- return result;
+ ralloc_free(fs_module);
+ return result;
}
void
radv_device_finish_meta_resolve_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
-
- for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->resolve.pass[j], &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve.pipeline[j], &state->alloc);
- }
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->resolve.p_layout, &state->alloc);
-
+ struct radv_meta_state *state = &device->meta_state;
+
+ for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->resolve.pass[j], &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j],
+ &state->alloc);
+ }
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout,
+ &state->alloc);
}
VkResult
radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)
{
- if (on_demand)
- return VK_SUCCESS;
-
- VkResult res = VK_SUCCESS;
- struct radv_meta_state *state = &device->meta_state;
- nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
- if (!vs_module) {
- /* XXX: Need more accurate error */
- res = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail;
- }
-
- for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
- VkFormat format = radv_fs_key_format_exemplars[i];
- unsigned fs_key = radv_format_meta_fs_key(device, format);
- res = create_pass(device, format, &state->resolve.pass[fs_key]);
- if (res != VK_SUCCESS)
- goto fail;
-
- VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
- res = create_pipeline(device, vs_module_h,
- &state->resolve.pipeline[fs_key], state->resolve.pass[fs_key]);
- if (res != VK_SUCCESS)
- goto fail;
- }
-
- goto cleanup;
+ if (on_demand)
+ return VK_SUCCESS;
+
+ VkResult res = VK_SUCCESS;
+ struct radv_meta_state *state = &device->meta_state;
+ nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
+ if (!vs_module) {
+ /* XXX: Need more accurate error */
+ res = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail;
+ }
+
+ for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
+ VkFormat format = radv_fs_key_format_exemplars[i];
+ unsigned fs_key = radv_format_meta_fs_key(device, format);
+ res = create_pass(device, format, &state->resolve.pass[fs_key]);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
+ res = create_pipeline(device, vs_module_h, &state->resolve.pipeline[fs_key],
+ state->resolve.pass[fs_key]);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ goto cleanup;
fail:
- radv_device_finish_meta_resolve_state(device);
+ radv_device_finish_meta_resolve_state(device);
cleanup:
- ralloc_free(vs_module);
+ ralloc_free(vs_module);
- return res;
+ return res;
}
static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image *src_image,
- const struct radv_image *dst_image,
- VkFormat vk_format,
- const VkOffset2D *dest_offset,
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
+ const struct radv_image *dst_image, VkFormat vk_format, const VkOffset2D *dest_offset,
const VkExtent2D *resolve_extent)
{
- struct radv_device *device = cmd_buffer->device;
- VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- unsigned fs_key = radv_format_meta_fs_key(device, vk_format);
-
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, src_image) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, src_image) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
-
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- device->meta_state.resolve.pipeline[fs_key]);
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = dest_offset->x,
- .y = dest_offset->y,
- .width = resolve_extent->width,
- .height = resolve_extent->height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = *dest_offset,
- .extent = *resolve_extent,
- });
-
- radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
+ struct radv_device *device = cmd_buffer->device;
+ VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+ unsigned fs_key = radv_format_meta_fs_key(device, vk_format);
+
+ cmd_buffer->state.flush_bits |=
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, src_image) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, src_image) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
+
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ device->meta_state.resolve.pipeline[fs_key]);
+
+ radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = dest_offset->x,
+ .y = dest_offset->y,
+ .width = resolve_extent->width,
+ .height = resolve_extent->height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkRect2D){
+ .offset = *dest_offset,
+ .extent = *resolve_extent,
+ });
+
+ radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+ cmd_buffer->state.flush_bits |=
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
}
enum radv_resolve_method {
- RESOLVE_HW,
- RESOLVE_COMPUTE,
- RESOLVE_FRAGMENT,
+ RESOLVE_HW,
+ RESOLVE_COMPUTE,
+ RESOLVE_FRAGMENT,
};
-static bool image_hw_resolve_compat(const struct radv_device *device,
- struct radv_image *src_image,
- struct radv_image *dst_image)
+static bool
+image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image,
+ struct radv_image *dst_image)
{
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- return dst_image->planes[0].surface.u.gfx9.surf.swizzle_mode ==
- src_image->planes[0].surface.u.gfx9.surf.swizzle_mode;
- } else {
- return dst_image->planes[0].surface.micro_tile_mode ==
- src_image->planes[0].surface.micro_tile_mode;
- }
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ return dst_image->planes[0].surface.u.gfx9.surf.swizzle_mode ==
+ src_image->planes[0].surface.u.gfx9.surf.swizzle_mode;
+ } else {
+ return dst_image->planes[0].surface.micro_tile_mode ==
+ src_image->planes[0].surface.micro_tile_mode;
+ }
}
-static void radv_pick_resolve_method_images(struct radv_device *device,
- struct radv_image *src_image,
- VkFormat src_format,
- struct radv_image *dest_image,
- VkImageLayout dest_image_layout,
- bool dest_render_loop,
- struct radv_cmd_buffer *cmd_buffer,
- enum radv_resolve_method *method)
+static void
+radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image,
+ VkFormat src_format, struct radv_image *dest_image,
+ VkImageLayout dest_image_layout, bool dest_render_loop,
+ struct radv_cmd_buffer *cmd_buffer,
+ enum radv_resolve_method *method)
{
- uint32_t queue_mask = radv_image_queue_family_mask(dest_image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
-
- if (vk_format_is_color(src_format)) {
- /* Using the fragment resolve path is currently a hint to
- * avoid decompressing DCC for partial resolves and
- * re-initialize it after resolving using compute.
- * TODO: Add support for layered and int to the fragment path.
- */
- if (radv_layout_dcc_compressed(device, dest_image, dest_image_layout,
- dest_render_loop, queue_mask)) {
- *method = RESOLVE_FRAGMENT;
- } else if (!image_hw_resolve_compat(device, src_image, dest_image)) {
- /* The micro tile mode only needs to match for the HW
- * resolve path which is the default path for non-DCC
- * resolves.
- */
- *method = RESOLVE_COMPUTE;
- }
-
- if (src_format == VK_FORMAT_R16G16_UNORM ||
- src_format == VK_FORMAT_R16G16_SNORM)
- *method = RESOLVE_COMPUTE;
- else if (vk_format_is_int(src_format))
- *method = RESOLVE_COMPUTE;
- else if (src_image->info.array_size > 1 ||
- dest_image->info.array_size > 1)
- *method = RESOLVE_COMPUTE;
- } else {
- if (src_image->info.array_size > 1 ||
- dest_image->info.array_size > 1)
- *method = RESOLVE_COMPUTE;
- else
- *method = RESOLVE_FRAGMENT;
- }
+ uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+
+ if (vk_format_is_color(src_format)) {
+ /* Using the fragment resolve path is currently a hint to
+ * avoid decompressing DCC for partial resolves and
+ * re-initialize it after resolving using compute.
+ * TODO: Add support for layered and int to the fragment path.
+ */
+ if (radv_layout_dcc_compressed(device, dest_image, dest_image_layout, dest_render_loop,
+ queue_mask)) {
+ *method = RESOLVE_FRAGMENT;
+ } else if (!image_hw_resolve_compat(device, src_image, dest_image)) {
+ /* The micro tile mode only needs to match for the HW
+ * resolve path which is the default path for non-DCC
+ * resolves.
+ */
+ *method = RESOLVE_COMPUTE;
+ }
+
+ if (src_format == VK_FORMAT_R16G16_UNORM || src_format == VK_FORMAT_R16G16_SNORM)
+ *method = RESOLVE_COMPUTE;
+ else if (vk_format_is_int(src_format))
+ *method = RESOLVE_COMPUTE;
+ else if (src_image->info.array_size > 1 || dest_image->info.array_size > 1)
+ *method = RESOLVE_COMPUTE;
+ } else {
+ if (src_image->info.array_size > 1 || dest_image->info.array_size > 1)
+ *method = RESOLVE_COMPUTE;
+ else
+ *method = RESOLVE_FRAGMENT;
+ }
}
static VkResult
-build_resolve_pipeline(struct radv_device *device,
- unsigned fs_key)
+build_resolve_pipeline(struct radv_device *device, unsigned fs_key)
{
- VkResult result = VK_SUCCESS;
+ VkResult result = VK_SUCCESS;
- if (device->meta_state.resolve.pipeline[fs_key])
- return result;
+ if (device->meta_state.resolve.pipeline[fs_key])
+ return result;
- mtx_lock(&device->meta_state.mtx);
- if (device->meta_state.resolve.pipeline[fs_key]) {
- mtx_unlock(&device->meta_state.mtx);
- return result;
- }
+ mtx_lock(&device->meta_state.mtx);
+ if (device->meta_state.resolve.pipeline[fs_key]) {
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
+ }
- nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
+ nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
- result = create_pass(device, radv_fs_key_format_exemplars[fs_key], &device->meta_state.resolve.pass[fs_key]);
- if (result != VK_SUCCESS)
- goto fail;
+ result = create_pass(device, radv_fs_key_format_exemplars[fs_key],
+ &device->meta_state.resolve.pass[fs_key]);
+ if (result != VK_SUCCESS)
+ goto fail;
- VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
- result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key], device->meta_state.resolve.pass[fs_key]);
+ VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
+ result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key],
+ device->meta_state.resolve.pass[fs_key]);
fail:
- ralloc_free(vs_module);
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ ralloc_free(vs_module);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static void
-radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkImageLayout src_image_layout,
- struct radv_image *dst_image,
- VkImageLayout dst_image_layout,
- const VkImageResolve2KHR *region)
+radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkImageLayout src_image_layout, struct radv_image *dst_image,
+ VkImageLayout dst_image_layout, const VkImageResolve2KHR *region)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_state saved_state;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE);
-
- assert(src_image->info.samples > 1);
- if (src_image->info.samples <= 1) {
- /* this causes GPU hangs if we get past here */
- fprintf(stderr, "radv: Illegal resolve operation (src not multisampled), will hang GPU.");
- return;
- }
- assert(dst_image->info.samples == 1);
-
- if (src_image->info.array_size > 1)
- radv_finishme("vkCmdResolveImage: multisample array images");
-
- unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk_format);
-
- /* From the Vulkan 1.0 spec:
- *
- * - The aspectMask member of srcSubresource and dstSubresource must
- * only contain VK_IMAGE_ASPECT_COLOR_BIT
- *
- * - The layerCount member of srcSubresource and dstSubresource must
- * match
- */
- assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(region->srcSubresource.layerCount ==
- region->dstSubresource.layerCount);
-
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, &region->srcSubresource,
- &region->srcOffset);
-
- const uint32_t dst_base_layer =
- radv_meta_get_iview_layer(dst_image, &region->dstSubresource,
- &region->dstOffset);
-
- /**
- * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
- *
- * extent is the size in texels of the source image to resolve in width,
- * height and depth. 1D images use only x and width. 2D images use x, y,
- * width and height. 3D images use x, y, z, width, height and depth.
- *
- * srcOffset and dstOffset select the initial x, y, and z offsets in
- * texels of the sub-regions of the source and destination image data.
- * extent is the size in texels of the source image to resolve in width,
- * height and depth. 1D images use only x and width. 2D images use x, y,
- * width and height. 3D images use x, y, z, width, height and depth.
- */
- const struct VkExtent3D extent =
- radv_sanitize_image_extent(src_image->type, region->extent);
- const struct VkOffset3D dstOffset =
- radv_sanitize_image_offset(dst_image->type, region->dstOffset);
-
- if (radv_dcc_enabled(dst_image, region->dstSubresource.mipLevel)) {
- VkImageSubresourceRange range = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = region->dstSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = dst_base_layer,
- .layerCount = region->dstSubresource.layerCount,
- };
-
- cmd_buffer->state.flush_bits |=
- radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
- }
-
- for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
- ++layer) {
-
- VkResult ret = build_resolve_pipeline(device, fs_key);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- break;
- }
-
- struct radv_image_view src_iview;
- radv_image_view_init(&src_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(src_image),
- .viewType = radv_meta_get_view_type(src_image),
- .format = src_image->vk_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = region->srcSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = src_base_layer + layer,
- .layerCount = 1,
- },
- }, NULL);
-
- struct radv_image_view dst_iview;
- radv_image_view_init(&dst_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(dst_image),
- .viewType = radv_meta_get_view_type(dst_image),
- .format = dst_image->vk_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = region->dstSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = dst_base_layer + layer,
- .layerCount = 1,
- },
- }, NULL);
-
- VkFramebuffer fb_h;
- radv_CreateFramebuffer(radv_device_to_handle(device),
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 2,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(&src_iview),
- radv_image_view_to_handle(&dst_iview),
- },
- .width = radv_minify(dst_image->info.width,
- region->dstSubresource.mipLevel),
- .height = radv_minify(dst_image->info.height,
- region->dstSubresource.mipLevel),
- .layers = 1
- },
- &cmd_buffer->pool->alloc,
- &fb_h);
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = device->meta_state.resolve.pass[fs_key],
- .framebuffer = fb_h,
- .renderArea = {
- .offset = {
- dstOffset.x,
- dstOffset.y,
- },
- .extent = {
- extent.width,
- extent.height,
- }
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
-
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- emit_resolve(cmd_buffer, src_image, dst_image,
- dst_iview.vk_format,
- &(VkOffset2D) {
- .x = dstOffset.x,
- .y = dstOffset.y,
- },
- &(VkExtent2D) {
- .width = extent.width,
- .height = extent.height,
- });
-
- radv_cmd_buffer_end_render_pass(cmd_buffer);
-
- radv_DestroyFramebuffer(radv_device_to_handle(device),
- fb_h, &cmd_buffer->pool->alloc);
- }
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_saved_state saved_state;
+
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);
+
+ assert(src_image->info.samples > 1);
+ if (src_image->info.samples <= 1) {
+ /* this causes GPU hangs if we get past here */
+ fprintf(stderr, "radv: Illegal resolve operation (src not multisampled), will hang GPU.");
+ return;
+ }
+ assert(dst_image->info.samples == 1);
+
+ if (src_image->info.array_size > 1)
+ radv_finishme("vkCmdResolveImage: multisample array images");
+
+ unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk_format);
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * - The aspectMask member of srcSubresource and dstSubresource must
+ * only contain VK_IMAGE_ASPECT_COLOR_BIT
+ *
+ * - The layerCount member of srcSubresource and dstSubresource must
+ * match
+ */
+ assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
+
+ const uint32_t src_base_layer =
+ radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
+
+ const uint32_t dst_base_layer =
+ radv_meta_get_iview_layer(dst_image, &region->dstSubresource, &region->dstOffset);
+
+ /**
+ * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
+ *
+ * extent is the size in texels of the source image to resolve in width,
+ * height and depth. 1D images use only x and width. 2D images use x, y,
+ * width and height. 3D images use x, y, z, width, height and depth.
+ *
+ * srcOffset and dstOffset select the initial x, y, and z offsets in
+ * texels of the sub-regions of the source and destination image data.
+ * extent is the size in texels of the source image to resolve in width,
+ * height and depth. 1D images use only x and width. 2D images use x, y,
+ * width and height. 3D images use x, y, z, width, height and depth.
+ */
+ const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
+ const struct VkOffset3D dstOffset =
+ radv_sanitize_image_offset(dst_image->type, region->dstOffset);
+
+ if (radv_dcc_enabled(dst_image, region->dstSubresource.mipLevel)) {
+ VkImageSubresourceRange range = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dst_base_layer,
+ .layerCount = region->dstSubresource.layerCount,
+ };
+
+ cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
+ }
+
+ for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
+
+ VkResult ret = build_resolve_pipeline(device, fs_key);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ break;
+ }
+
+ struct radv_image_view src_iview;
+ radv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(src_image),
+ .viewType = radv_meta_get_view_type(src_image),
+ .format = src_image->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ struct radv_image_view dst_iview;
+ radv_image_view_init(&dst_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(dst_image),
+ .viewType = radv_meta_get_view_type(dst_image),
+ .format = dst_image->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dst_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ VkFramebuffer fb_h;
+ radv_CreateFramebuffer(
+ radv_device_to_handle(device),
+ &(VkFramebufferCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 2,
+ .pAttachments =
+ (VkImageView[]){
+ radv_image_view_to_handle(&src_iview),
+ radv_image_view_to_handle(&dst_iview),
+ },
+ .width = radv_minify(dst_image->info.width, region->dstSubresource.mipLevel),
+ .height = radv_minify(dst_image->info.height, region->dstSubresource.mipLevel),
+ .layers = 1},
+ &cmd_buffer->pool->alloc, &fb_h);
+
+ radv_cmd_buffer_begin_render_pass(cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = device->meta_state.resolve.pass[fs_key],
+ .framebuffer = fb_h,
+ .renderArea = {.offset =
+ {
+ dstOffset.x,
+ dstOffset.y,
+ },
+ .extent =
+ {
+ extent.width,
+ extent.height,
+ }},
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ emit_resolve(cmd_buffer, src_image, dst_image, dst_iview.vk_format,
+ &(VkOffset2D){
+ .x = dstOffset.x,
+ .y = dstOffset.y,
+ },
+ &(VkExtent2D){
+ .width = extent.width,
+ .height = extent.height,
+ });
+
+ radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+ radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
+ }
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
static void
-resolve_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkImageLayout src_image_layout,
- struct radv_image *dst_image,
- VkImageLayout dst_image_layout,
- const VkImageResolve2KHR *region,
- enum radv_resolve_method resolve_method)
+resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkImageLayout src_image_layout, struct radv_image *dst_image,
+ VkImageLayout dst_image_layout, const VkImageResolve2KHR *region,
+ enum radv_resolve_method resolve_method)
{
- switch (resolve_method) {
- case RESOLVE_HW:
- radv_meta_resolve_hardware_image(cmd_buffer,
- src_image,
- src_image_layout,
- dst_image,
- dst_image_layout,
- region);
- break;
- case RESOLVE_FRAGMENT:
- radv_meta_resolve_fragment_image(cmd_buffer,
- src_image,
- src_image_layout,
- dst_image,
- dst_image_layout,
- region);
- break;
- case RESOLVE_COMPUTE:
- radv_meta_resolve_compute_image(cmd_buffer,
- src_image,
- src_image->vk_format,
- src_image_layout,
- dst_image,
- dst_image->vk_format,
- dst_image_layout,
- region);
- break;
- default:
- assert(!"Invalid resolve method selected");
- }
+ switch (resolve_method) {
+ case RESOLVE_HW:
+ radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image,
+ dst_image_layout, region);
+ break;
+ case RESOLVE_FRAGMENT:
+ radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image,
+ dst_image_layout, region);
+ break;
+ case RESOLVE_COMPUTE:
+ radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk_format, src_image_layout,
+ dst_image, dst_image->vk_format, dst_image_layout, region);
+ break;
+ default:
+ assert(!"Invalid resolve method selected");
+ }
}
-void radv_CmdResolveImage2KHR(
- VkCommandBuffer commandBuffer,
- const VkResolveImageInfo2KHR* pResolveImageInfo)
+void
+radv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
+ const VkResolveImageInfo2KHR *pResolveImageInfo)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
- RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
- VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
- VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
- enum radv_resolve_method resolve_method = RESOLVE_HW;
- /* we can use the hw resolve only for single full resolves */
- if (pResolveImageInfo->regionCount == 1) {
- if (pResolveImageInfo->pRegions[0].srcOffset.x ||
- pResolveImageInfo->pRegions[0].srcOffset.y ||
- pResolveImageInfo->pRegions[0].srcOffset.z)
- resolve_method = RESOLVE_COMPUTE;
- if (pResolveImageInfo->pRegions[0].dstOffset.x ||
- pResolveImageInfo->pRegions[0].dstOffset.y ||
- pResolveImageInfo->pRegions[0].dstOffset.z)
- resolve_method = RESOLVE_COMPUTE;
-
- if (pResolveImageInfo->pRegions[0].extent.width != src_image->info.width ||
- pResolveImageInfo->pRegions[0].extent.height != src_image->info.height ||
- pResolveImageInfo->pRegions[0].extent.depth != src_image->info.depth)
- resolve_method = RESOLVE_COMPUTE;
- } else
- resolve_method = RESOLVE_COMPUTE;
-
- radv_pick_resolve_method_images(cmd_buffer->device, src_image,
- src_image->vk_format, dst_image,
- dst_image_layout, false, cmd_buffer,
- &resolve_method);
-
- for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
- resolve_image(cmd_buffer, src_image, src_image_layout,
- dst_image, dst_image_layout,
- &pResolveImageInfo->pRegions[r], resolve_method);
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
+ RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
+ VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
+ VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
+ enum radv_resolve_method resolve_method = RESOLVE_HW;
+ /* we can use the hw resolve only for single full resolves */
+ if (pResolveImageInfo->regionCount == 1) {
+ if (pResolveImageInfo->pRegions[0].srcOffset.x ||
+ pResolveImageInfo->pRegions[0].srcOffset.y || pResolveImageInfo->pRegions[0].srcOffset.z)
+ resolve_method = RESOLVE_COMPUTE;
+ if (pResolveImageInfo->pRegions[0].dstOffset.x ||
+ pResolveImageInfo->pRegions[0].dstOffset.y || pResolveImageInfo->pRegions[0].dstOffset.z)
+ resolve_method = RESOLVE_COMPUTE;
+
+ if (pResolveImageInfo->pRegions[0].extent.width != src_image->info.width ||
+ pResolveImageInfo->pRegions[0].extent.height != src_image->info.height ||
+ pResolveImageInfo->pRegions[0].extent.depth != src_image->info.depth)
+ resolve_method = RESOLVE_COMPUTE;
+ } else
+ resolve_method = RESOLVE_COMPUTE;
+
+ radv_pick_resolve_method_images(cmd_buffer->device, src_image, src_image->vk_format, dst_image,
+ dst_image_layout, false, cmd_buffer, &resolve_method);
+
+ for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
+ resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout,
+ &pResolveImageInfo->pRegions[r], resolve_method);
+ }
}
static void
radv_cmd_buffer_resolve_subpass_hw(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- struct radv_meta_saved_state saved_state;
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE);
-
- for (uint32_t i = 0; i < subpass->color_count; ++i) {
- struct radv_subpass_attachment src_att = subpass->color_attachments[i];
- struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
-
- if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
- struct radv_image *src_img = src_iview->image;
-
- struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
- struct radv_image *dst_img = dest_iview->image;
-
- if (radv_dcc_enabled(dst_img, dest_iview->base_mip)) {
- VkImageSubresourceRange range = {
- .aspectMask = dest_iview->aspect_mask,
- .baseMipLevel = dest_iview->base_mip,
- .levelCount = dest_iview->level_count,
- .baseArrayLayer = dest_iview->base_layer,
- .layerCount = dest_iview->layer_count,
- };
-
- cmd_buffer->state.flush_bits |=
- radv_init_dcc(cmd_buffer, dst_img, &range, 0xffffffff);
- cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
- }
-
- struct radv_subpass resolve_subpass = {
- .color_count = 2,
- .color_attachments = (struct radv_subpass_attachment[]) { src_att, dest_att },
- .depth_stencil_attachment = NULL,
- };
-
- radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
-
- VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk_format));
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- continue;
- }
-
- emit_resolve(cmd_buffer, src_img, dst_img,
- dest_iview->vk_format,
- &(VkOffset2D) { 0, 0 },
- &(VkExtent2D) { fb->width, fb->height });
- }
-
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ struct radv_meta_saved_state saved_state;
+
+ radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);
+
+ for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+ struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+
+ if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+ struct radv_image *src_img = src_iview->image;
+
+ struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
+ struct radv_image *dst_img = dest_iview->image;
+
+ if (radv_dcc_enabled(dst_img, dest_iview->base_mip)) {
+ VkImageSubresourceRange range = {
+ .aspectMask = dest_iview->aspect_mask,
+ .baseMipLevel = dest_iview->base_mip,
+ .levelCount = dest_iview->level_count,
+ .baseArrayLayer = dest_iview->base_layer,
+ .layerCount = dest_iview->layer_count,
+ };
+
+ cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_img, &range, 0xffffffff);
+ cmd_buffer->state.attachments[dest_att.attachment].current_layout =
+ VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+ }
+
+ struct radv_subpass resolve_subpass = {
+ .color_count = 2,
+ .color_attachments = (struct radv_subpass_attachment[]){src_att, dest_att},
+ .depth_stencil_attachment = NULL,
+ };
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+ VkResult ret = build_resolve_pipeline(
+ cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk_format));
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ continue;
+ }
+
+ emit_resolve(cmd_buffer, src_img, dst_img, dest_iview->vk_format, &(VkOffset2D){0, 0},
+ &(VkExtent2D){fb->width, fb->height});
+ }
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
/**
@@ -777,125 +734,114 @@ radv_cmd_buffer_resolve_subpass_hw(struct radv_cmd_buffer *cmd_buffer)
void
radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
{
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- enum radv_resolve_method resolve_method = RESOLVE_HW;
-
- if (!subpass->has_color_resolve && !subpass->ds_resolve_attachment)
- return;
-
- radv_describe_begin_render_pass_resolve(cmd_buffer);
-
- if (subpass->ds_resolve_attachment) {
- struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
- struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
- struct radv_image_view *src_iview =
- cmd_buffer->state.attachments[src_att.attachment].iview;
- struct radv_image_view *dst_iview =
- cmd_buffer->state.attachments[dst_att.attachment].iview;
-
- /* Make sure to not clear the depth/stencil attachment after resolves. */
- cmd_buffer->state.attachments[dst_att.attachment].pending_clear_aspects = 0;
-
- radv_pick_resolve_method_images(cmd_buffer->device,
- src_iview->image,
- src_iview->vk_format,
- dst_iview->image,
- dst_att.layout,
- dst_att.in_render_loop,
- cmd_buffer,
- &resolve_method);
-
- if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) &&
- subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
- if (resolve_method == RESOLVE_FRAGMENT) {
- radv_depth_stencil_resolve_subpass_fs(cmd_buffer,
- VK_IMAGE_ASPECT_DEPTH_BIT,
- subpass->depth_resolve_mode);
- } else {
- assert(resolve_method == RESOLVE_COMPUTE);
- radv_depth_stencil_resolve_subpass_cs(cmd_buffer,
- VK_IMAGE_ASPECT_DEPTH_BIT,
- subpass->depth_resolve_mode);
- }
- }
-
- if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) &&
- subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
- if (resolve_method == RESOLVE_FRAGMENT) {
- radv_depth_stencil_resolve_subpass_fs(cmd_buffer,
- VK_IMAGE_ASPECT_STENCIL_BIT,
- subpass->stencil_resolve_mode);
- } else {
- assert(resolve_method == RESOLVE_COMPUTE);
- radv_depth_stencil_resolve_subpass_cs(cmd_buffer,
- VK_IMAGE_ASPECT_STENCIL_BIT,
- subpass->stencil_resolve_mode);
- }
- }
-
- /* From the Vulkan spec 1.2.165:
- *
- * "VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT specifies
- * write access to a color, resolve, or depth/stencil
- * resolve attachment during a render pass or via
- * certain subpass load and store operations."
- *
- * Yes, it's counterintuitive but it makes sense because ds
- * resolve operations happen late at the end of the subpass.
- *
- * That said, RADV is wrong because it executes the subpass
- * end barrier *before* any subpass resolves instead of after.
- *
- * TODO: Fix this properly by executing subpass end barriers
- * after subpass resolves.
- */
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
- if (radv_image_has_htile(dst_iview->image))
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
- }
-
- if (subpass->has_color_resolve) {
- for (uint32_t i = 0; i < subpass->color_count; ++i) {
- struct radv_subpass_attachment src_att = subpass->color_attachments[i];
- struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
-
- if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- /* Make sure to not clear color attachments after resolves. */
- cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0;
-
- struct radv_image *dst_img = cmd_buffer->state.attachments[dest_att.attachment].iview->image;
- struct radv_image_view *src_iview= cmd_buffer->state.attachments[src_att.attachment].iview;
- struct radv_image *src_img = src_iview->image;
-
- radv_pick_resolve_method_images(cmd_buffer->device, src_img,
- src_iview->vk_format, dst_img,
- dest_att.layout,
- dest_att.in_render_loop,
- cmd_buffer, &resolve_method);
-
- if (resolve_method == RESOLVE_FRAGMENT) {
- break;
- }
- }
-
- switch (resolve_method) {
- case RESOLVE_HW:
- radv_cmd_buffer_resolve_subpass_hw(cmd_buffer);
- break;
- case RESOLVE_COMPUTE:
- radv_cmd_buffer_resolve_subpass_cs(cmd_buffer);
- break;
- case RESOLVE_FRAGMENT:
- radv_cmd_buffer_resolve_subpass_fs(cmd_buffer);
- break;
- default:
- unreachable("Invalid resolve method");
- }
- }
-
- radv_describe_end_render_pass_resolve(cmd_buffer);
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ enum radv_resolve_method resolve_method = RESOLVE_HW;
+
+ if (!subpass->has_color_resolve && !subpass->ds_resolve_attachment)
+ return;
+
+ radv_describe_begin_render_pass_resolve(cmd_buffer);
+
+ if (subpass->ds_resolve_attachment) {
+ struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+ struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
+ struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+ struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
+
+ /* Make sure to not clear the depth/stencil attachment after resolves. */
+ cmd_buffer->state.attachments[dst_att.attachment].pending_clear_aspects = 0;
+
+ radv_pick_resolve_method_images(cmd_buffer->device, src_iview->image, src_iview->vk_format,
+ dst_iview->image, dst_att.layout, dst_att.in_render_loop,
+ cmd_buffer, &resolve_method);
+
+ if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) &&
+ subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+ if (resolve_method == RESOLVE_FRAGMENT) {
+ radv_depth_stencil_resolve_subpass_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,
+ subpass->depth_resolve_mode);
+ } else {
+ assert(resolve_method == RESOLVE_COMPUTE);
+ radv_depth_stencil_resolve_subpass_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,
+ subpass->depth_resolve_mode);
+ }
+ }
+
+ if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) &&
+ subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {
+ if (resolve_method == RESOLVE_FRAGMENT) {
+ radv_depth_stencil_resolve_subpass_fs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,
+ subpass->stencil_resolve_mode);
+ } else {
+ assert(resolve_method == RESOLVE_COMPUTE);
+ radv_depth_stencil_resolve_subpass_cs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,
+ subpass->stencil_resolve_mode);
+ }
+ }
+
+ /* From the Vulkan spec 1.2.165:
+ *
+ * "VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT specifies
+ * write access to a color, resolve, or depth/stencil
+ * resolve attachment during a render pass or via
+ * certain subpass load and store operations."
+ *
+ * Yes, it's counterintuitive but it makes sense because ds
+ * resolve operations happen late at the end of the subpass.
+ *
+ * That said, RADV is wrong because it executes the subpass
+ * end barrier *before* any subpass resolves instead of after.
+ *
+ * TODO: Fix this properly by executing subpass end barriers
+ * after subpass resolves.
+ */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ if (radv_image_has_htile(dst_iview->image))
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+ }
+
+ if (subpass->has_color_resolve) {
+ for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+ struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+
+ if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ /* Make sure to not clear color attachments after resolves. */
+ cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0;
+
+ struct radv_image *dst_img =
+ cmd_buffer->state.attachments[dest_att.attachment].iview->image;
+ struct radv_image_view *src_iview =
+ cmd_buffer->state.attachments[src_att.attachment].iview;
+ struct radv_image *src_img = src_iview->image;
+
+ radv_pick_resolve_method_images(cmd_buffer->device, src_img, src_iview->vk_format, dst_img,
+ dest_att.layout, dest_att.in_render_loop, cmd_buffer,
+ &resolve_method);
+
+ if (resolve_method == RESOLVE_FRAGMENT) {
+ break;
+ }
+ }
+
+ switch (resolve_method) {
+ case RESOLVE_HW:
+ radv_cmd_buffer_resolve_subpass_hw(cmd_buffer);
+ break;
+ case RESOLVE_COMPUTE:
+ radv_cmd_buffer_resolve_subpass_cs(cmd_buffer);
+ break;
+ case RESOLVE_FRAGMENT:
+ radv_cmd_buffer_resolve_subpass_fs(cmd_buffer);
+ break;
+ default:
+ unreachable("Invalid resolve method");
+ }
+ }
+
+ radv_describe_end_render_pass_resolve(cmd_buffer);
}
/**
@@ -905,94 +851,89 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
void
radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer)
{
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
- uint32_t layer_count = fb->layers;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ uint32_t layer_count = fb->layers;
- if (subpass->view_mask)
- layer_count = util_last_bit(subpass->view_mask);
+ if (subpass->view_mask)
+ layer_count = util_last_bit(subpass->view_mask);
- for (uint32_t i = 0; i < subpass->color_count; ++i) {
- struct radv_subpass_attachment src_att = subpass->color_attachments[i];
- struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+ for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+ struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
- if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
- continue;
+ if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+ continue;
- struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
- struct radv_image *src_image = src_iview->image;
+ struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+ struct radv_image *src_image = src_iview->image;
- VkImageResolve2KHR region = {0};
- region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
- region.srcSubresource.aspectMask = src_iview->aspect_mask;
- region.srcSubresource.mipLevel = 0;
- region.srcSubresource.baseArrayLayer = src_iview->base_layer;
- region.srcSubresource.layerCount = layer_count;
+ VkImageResolve2KHR region = {0};
+ region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
+ region.srcSubresource.aspectMask = src_iview->aspect_mask;
+ region.srcSubresource.mipLevel = 0;
+ region.srcSubresource.baseArrayLayer = src_iview->base_layer;
+ region.srcSubresource.layerCount = layer_count;
- radv_decompress_resolve_src(cmd_buffer, src_image,
- src_att.layout, &region);
- }
+ radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
+ }
}
static struct radv_sample_locations_state *
radv_get_resolve_sample_locations(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_cmd_state *state = &cmd_buffer->state;
- uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+ struct radv_cmd_state *state = &cmd_buffer->state;
+ uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
- for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
- if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
- return &state->subpass_sample_locs[i].sample_location;
- }
+ for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
+ if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
+ return &state->subpass_sample_locs[i].sample_location;
+ }
- return NULL;
+ return NULL;
}
/**
* Decompress CMask/FMask before resolving a multisampled source image.
*/
void
-radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkImageLayout src_image_layout,
- const VkImageResolve2KHR *region)
+radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkImageLayout src_image_layout, const VkImageResolve2KHR *region)
{
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, &region->srcSubresource,
- &region->srcOffset);
-
- VkImageMemoryBarrier barrier = {0};
- barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
- barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
- barrier.oldLayout = src_image_layout;
- barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
- barrier.image = radv_image_to_handle(src_image);
- barrier.subresourceRange = (VkImageSubresourceRange) {
- .aspectMask = region->srcSubresource.aspectMask,
- .baseMipLevel = region->srcSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = src_base_layer,
- .layerCount = region->srcSubresource.layerCount,
- };
-
- if (src_image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {
- /* If the depth/stencil image uses different sample
- * locations, we need them during HTILE decompressions.
- */
- struct radv_sample_locations_state *sample_locs =
- radv_get_resolve_sample_locations(cmd_buffer);
-
- barrier.pNext = &(VkSampleLocationsInfoEXT) {
- .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
- .sampleLocationsPerPixel = sample_locs->per_pixel,
- .sampleLocationGridSize = sample_locs->grid_size,
- .sampleLocationsCount = sample_locs->count,
- .pSampleLocations = sample_locs->locations,
- };
- }
-
- radv_CmdPipelineBarrier(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- false, 0, NULL, 0, NULL, 1, &barrier);
+ const uint32_t src_base_layer =
+ radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
+
+ VkImageMemoryBarrier barrier = {0};
+ barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+ barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+ barrier.oldLayout = src_image_layout;
+ barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+ barrier.image = radv_image_to_handle(src_image);
+ barrier.subresourceRange = (VkImageSubresourceRange){
+ .aspectMask = region->srcSubresource.aspectMask,
+ .baseMipLevel = region->srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_base_layer,
+ .layerCount = region->srcSubresource.layerCount,
+ };
+
+ if (src_image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {
+ /* If the depth/stencil image uses different sample
+ * locations, we need them during HTILE decompressions.
+ */
+ struct radv_sample_locations_state *sample_locs =
+ radv_get_resolve_sample_locations(cmd_buffer);
+
+ barrier.pNext = &(VkSampleLocationsInfoEXT){
+ .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
+ .sampleLocationsPerPixel = sample_locs->per_pixel,
+ .sampleLocationGridSize = sample_locs->grid_size,
+ .sampleLocationsCount = sample_locs->count,
+ .pSampleLocations = sample_locs->locations,
+ };
+ }
+
+ radv_CmdPipelineBarrier(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, false, 0, NULL, 0, NULL, 1,
+ &barrier);
}
diff --git a/src/amd/vulkan/radv_meta_resolve_cs.c b/src/amd/vulkan/radv_meta_resolve_cs.c
index b5167b0cf49..176063967e0 100644
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -21,863 +21,760 @@
* IN THE SOFTWARE.
*/
-
#include <assert.h>
#include <stdbool.h>
+#include "nir/nir_builder.h"
#include "radv_meta.h"
#include "radv_private.h"
-#include "nir/nir_builder.h"
#include "sid.h"
#include "vk_format.h"
-static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
- nir_ssa_def *input)
+static nir_ssa_def *
+radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
{
- unsigned i;
-
- nir_ssa_def *cmp[3];
- for (i = 0; i < 3; i++)
- cmp[i] = nir_flt(b, nir_channel(b, input, i),
- nir_imm_int(b, 0x3b4d2e1c));
-
- nir_ssa_def *ltvals[3];
- for (i = 0; i < 3; i++)
- ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
- nir_imm_float(b, 12.92));
-
- nir_ssa_def *gtvals[3];
-
- for (i = 0; i < 3; i++) {
- gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
- nir_imm_float(b, 1.0/2.4));
- gtvals[i] = nir_fmul(b, gtvals[i],
- nir_imm_float(b, 1.055));
- gtvals[i] = nir_fsub(b, gtvals[i],
- nir_imm_float(b, 0.055));
- }
-
- nir_ssa_def *comp[4];
- for (i = 0; i < 3; i++)
- comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
- comp[3] = nir_channels(b, input, 1 << 3);
- return nir_vec(b, comp, 4);
+ unsigned i;
+
+ nir_ssa_def *cmp[3];
+ for (i = 0; i < 3; i++)
+ cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));
+
+ nir_ssa_def *ltvals[3];
+ for (i = 0; i < 3; i++)
+ ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
+
+ nir_ssa_def *gtvals[3];
+
+ for (i = 0; i < 3; i++) {
+ gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));
+ gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
+ gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
+ }
+
+ nir_ssa_def *comp[4];
+ for (i = 0; i < 3; i++)
+ comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
+ comp[3] = nir_channels(b, input, 1 << 3);
+ return nir_vec(b, comp, 4);
}
static nir_shader *
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
{
- const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
- false,
- false,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D,
- false,
- GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
- "meta_resolve_cs-%d-%s",
- samples,
- is_integer ? "int" : (is_srgb ? "srgb" : "float"));
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
-
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- sampler_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
-
- nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range=16);
- nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range=16);
-
- nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, src_offset), 0x3);
- nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
-
- radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
- color, img_coord);
-
- nir_ssa_def *outval = nir_load_var(&b, color);
- if (is_srgb)
- outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
-
- nir_ssa_def *coord = nir_iadd(&b, global_id, dst_offset);
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- coord, nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
- return b.shader;
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs-%d-%s", samples,
+ is_integer ? "int" : (is_srgb ? "srgb" : "float"));
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+
+ nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
+ nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
+
+ nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, src_offset), 0x3);
+ nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
+
+ radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, img_coord);
+
+ nir_ssa_def *outval = nir_load_var(&b, color);
+ if (is_srgb)
+ outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
+
+ nir_ssa_def *coord = nir_iadd(&b, global_id, dst_offset);
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+ nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
+ return b.shader;
}
enum {
- DEPTH_RESOLVE,
- STENCIL_RESOLVE,
+ DEPTH_RESOLVE,
+ STENCIL_RESOLVE,
};
static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
{
- switch (resolve_mode) {
- case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
- return "zero";
- case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
- return "average";
- case VK_RESOLVE_MODE_MIN_BIT_KHR:
- return "min";
- case VK_RESOLVE_MODE_MAX_BIT_KHR:
- return "max";
- default:
- unreachable("invalid resolve mode");
- }
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+ return "zero";
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ return "average";
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ return "min";
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ return "max";
+ default:
+ unreachable("invalid resolve mode");
+ }
}
static nir_shader *
-build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples,
- int index,
- VkResolveModeFlagBits resolve_mode)
+build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
+ VkResolveModeFlagBits resolve_mode)
{
- const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
- false,
- true,
- GLSL_TYPE_FLOAT);
- const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D,
- true,
- GLSL_TYPE_FLOAT);
-
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
- "meta_resolve_cs_%s-%s-%d",
- index == DEPTH_RESOLVE ? "depth" : "stencil",
- get_resolve_mode_str(resolve_mode), samples);
- b.shader->info.cs.local_size[0] = 8;
- b.shader->info.cs.local_size[1] = 8;
- b.shader->info.cs.local_size[2] = 1;
-
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- sampler_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
- img_type, "out_img");
- output_img->data.descriptor_set = 0;
- output_img->data.binding = 1;
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
-
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
-
- nir_ssa_def *img_coord = nir_vec3(&b, nir_channel(&b, global_id, 0),
- nir_channel(&b, global_id, 1),
- layer_id);
-
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
- nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
- tex->op = nir_texop_txf_ms;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(img_coord);
- tex->src[1].src_type = nir_tex_src_ms_index;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = type;
- tex->is_array = true;
- tex->coord_components = 3;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_ssa_def *outval = &tex->dest.ssa;
-
- if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
- for (int i = 1; i < samples; i++) {
- nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
- tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
- tex_add->op = nir_texop_txf_ms;
- tex_add->src[0].src_type = nir_tex_src_coord;
- tex_add->src[0].src = nir_src_for_ssa(img_coord);
- tex_add->src[1].src_type = nir_tex_src_ms_index;
- tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
- tex_add->src[2].src_type = nir_tex_src_texture_deref;
- tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
- tex_add->dest_type = type;
- tex_add->is_array = true;
- tex_add->coord_components = 3;
-
- nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex_add->instr);
-
- switch (resolve_mode) {
- case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
- assert(index == DEPTH_RESOLVE);
- outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
- break;
- case VK_RESOLVE_MODE_MIN_BIT_KHR:
- if (index == DEPTH_RESOLVE)
- outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
- else
- outval = nir_umin(&b, outval, &tex_add->dest.ssa);
- break;
- case VK_RESOLVE_MODE_MAX_BIT_KHR:
- if (index == DEPTH_RESOLVE)
- outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
- else
- outval = nir_umax(&b, outval, &tex_add->dest.ssa);
- break;
- default:
- unreachable("invalid resolve mode");
- }
- }
-
- if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
- outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
- }
-
- nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0),
- nir_channel(&b, img_coord, 1),
- nir_channel(&b, img_coord, 2),
- nir_imm_int(&b, 0));
- nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa,
- coord, nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
- return b.shader;
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
+ const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
+
+ nir_builder b = nir_builder_init_simple_shader(
+ MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs_%s-%s-%d",
+ index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
+ b.shader->info.cs.local_size[0] = 8;
+ b.shader->info.cs.local_size[1] = 8;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
+ output_img->data.descriptor_set = 0;
+ output_img->data.binding = 1;
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
+
+ nir_ssa_def *img_coord =
+ nir_vec3(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), layer_id);
+
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex->op = nir_texop_txf_ms;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(img_coord);
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = type;
+ tex->is_array = true;
+ tex->coord_components = 3;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_ssa_def *outval = &tex->dest.ssa;
+
+ if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+ for (int i = 1; i < samples; i++) {
+ nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+ tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex_add->op = nir_texop_txf_ms;
+ tex_add->src[0].src_type = nir_tex_src_coord;
+ tex_add->src[0].src = nir_src_for_ssa(img_coord);
+ tex_add->src[1].src_type = nir_tex_src_ms_index;
+ tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ tex_add->src[2].src_type = nir_tex_src_texture_deref;
+ tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex_add->dest_type = type;
+ tex_add->is_array = true;
+ tex_add->coord_components = 3;
+
+ nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex_add->instr);
+
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ assert(index == DEPTH_RESOLVE);
+ outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+ break;
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+ else
+ outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+ break;
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+ else
+ outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+ break;
+ default:
+ unreachable("invalid resolve mode");
+ }
+ }
+
+ if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+ outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+ }
+
+ nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
+ nir_channel(&b, img_coord, 2), nir_imm_int(&b, 0));
+ nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
+ nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
+ return b.shader;
}
static VkResult
create_layout(struct radv_device *device)
{
- VkResult result;
- /*
- * two descriptors one for the image being sampled
- * one for the buffer being written.
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.resolve_compute.ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.resolve_compute.p_layout);
- if (result != VK_SUCCESS)
- goto fail;
- return VK_SUCCESS;
+ VkResult result;
+ /*
+ * two descriptors one for the image being sampled
+ * one for the buffer being written.
+ */
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.resolve_compute.ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.resolve_compute.p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+ return VK_SUCCESS;
fail:
- return result;
+ return result;
}
static VkResult
-create_resolve_pipeline(struct radv_device *device,
- int samples,
- bool is_integer,
- bool is_srgb,
- VkPipeline *pipeline)
+create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
+ VkPipeline *pipeline)
{
- VkResult result;
-
-
- mtx_lock(&device->meta_state.mtx);
- if (*pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
-
- /* compute shader */
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.resolve_compute.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- ralloc_free(cs);
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
+ VkResult result;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
+
+ /* compute shader */
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.resolve_compute.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ ralloc_free(cs);
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
fail:
- ralloc_free(cs);
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ ralloc_free(cs);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
static VkResult
-create_depth_stencil_resolve_pipeline(struct radv_device *device,
- int samples,
- int index,
- VkResolveModeFlagBits resolve_mode,
- VkPipeline *pipeline)
+create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
+ VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
{
- VkResult result;
-
- mtx_lock(&device->meta_state.mtx);
- if (*pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- nir_shader *cs = build_depth_stencil_resolve_compute_shader(device, samples,
- index, resolve_mode);
-
- /* compute shader */
- VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.resolve_compute.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &vk_pipeline_info, NULL,
- pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- ralloc_free(cs);
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
+ VkResult result;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ nir_shader *cs =
+ build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
+
+ /* compute shader */
+ VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.resolve_compute.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(radv_device_to_handle(device),
+ radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &vk_pipeline_info, NULL, pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ ralloc_free(cs);
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
fail:
- ralloc_free(cs);
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ ralloc_free(cs);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
{
- struct radv_meta_state *state = &device->meta_state;
- VkResult res;
-
- res = create_layout(device);
- if (res != VK_SUCCESS)
- goto fail;
-
- if (on_demand)
- return VK_SUCCESS;
-
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- uint32_t samples = 1 << i;
-
- res = create_resolve_pipeline(device, samples, false, false,
- &state->resolve_compute.rc[i].pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_resolve_pipeline(device, samples, true, false,
- &state->resolve_compute.rc[i].i_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_resolve_pipeline(device, samples, false, true,
- &state->resolve_compute.rc[i].srgb_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, samples,
- DEPTH_RESOLVE,
- VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
- &state->resolve_compute.depth[i].average_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, samples,
- DEPTH_RESOLVE,
- VK_RESOLVE_MODE_MAX_BIT_KHR,
- &state->resolve_compute.depth[i].max_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, samples,
- DEPTH_RESOLVE,
- VK_RESOLVE_MODE_MIN_BIT_KHR,
- &state->resolve_compute.depth[i].min_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, samples,
- STENCIL_RESOLVE,
- VK_RESOLVE_MODE_MAX_BIT_KHR,
- &state->resolve_compute.stencil[i].max_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, samples,
- STENCIL_RESOLVE,
- VK_RESOLVE_MODE_MIN_BIT_KHR,
- &state->resolve_compute.stencil[i].min_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
- }
-
- res = create_depth_stencil_resolve_pipeline(device, 0,
- DEPTH_RESOLVE,
- VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
- &state->resolve_compute.depth_zero_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, 0,
- STENCIL_RESOLVE,
- VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
- &state->resolve_compute.stencil_zero_pipeline);
- if (res != VK_SUCCESS)
- goto fail;
-
- return VK_SUCCESS;
+ struct radv_meta_state *state = &device->meta_state;
+ VkResult res;
+
+ res = create_layout(device);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ if (on_demand)
+ return VK_SUCCESS;
+
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+ uint32_t samples = 1 << i;
+
+ res = create_resolve_pipeline(device, samples, false, false,
+ &state->resolve_compute.rc[i].pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_resolve_pipeline(device, samples, true, false,
+ &state->resolve_compute.rc[i].i_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_resolve_pipeline(device, samples, false, true,
+ &state->resolve_compute.rc[i].srgb_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(
+ device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
+ &state->resolve_compute.depth[i].average_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_MAX_BIT_KHR,
+ &state->resolve_compute.depth[i].max_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_MIN_BIT_KHR,
+ &state->resolve_compute.depth[i].min_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_MAX_BIT_KHR,
+ &state->resolve_compute.stencil[i].max_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_MIN_BIT_KHR,
+ &state->resolve_compute.stencil[i].min_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+ &state->resolve_compute.depth_zero_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
+ &state->resolve_compute.stencil_zero_pipeline);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ return VK_SUCCESS;
fail:
- radv_device_finish_meta_resolve_compute_state(device);
- return res;
+ radv_device_finish_meta_resolve_compute_state(device);
+ return res;
}
void
radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.rc[i].pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.rc[i].i_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.rc[i].srgb_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.depth[i].average_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.depth[i].max_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.depth[i].min_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.stencil[i].max_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.stencil[i].min_pipeline,
- &state->alloc);
- }
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.depth_zero_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_compute.stencil_zero_pipeline,
- &state->alloc);
-
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->resolve_compute.ds_layout,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->resolve_compute.p_layout,
- &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.depth[i].average_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.depth[i].max_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.depth[i].min_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
+ }
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
+ &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
+ &state->alloc);
+
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
+ &state->alloc);
}
static VkPipeline *
-radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview)
+radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_state *state = &device->meta_state;
- uint32_t samples = src_iview->image->info.samples;
- uint32_t samples_log2 = ffs(samples) - 1;
- VkPipeline *pipeline;
-
- if (vk_format_is_int(src_iview->vk_format))
- pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
- else if (vk_format_is_srgb(src_iview->vk_format))
- pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
- else
- pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
-
- if (!*pipeline) {
- VkResult ret;
-
- ret = create_resolve_pipeline(device, samples,
- vk_format_is_int(src_iview->vk_format),
- vk_format_is_srgb(src_iview->vk_format),
- pipeline);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return NULL;
- }
- }
-
- return pipeline;
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_state *state = &device->meta_state;
+ uint32_t samples = src_iview->image->info.samples;
+ uint32_t samples_log2 = ffs(samples) - 1;
+ VkPipeline *pipeline;
+
+ if (vk_format_is_int(src_iview->vk_format))
+ pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
+ else if (vk_format_is_srgb(src_iview->vk_format))
+ pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
+ else
+ pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
+
+ if (!*pipeline) {
+ VkResult ret;
+
+ ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk_format),
+ vk_format_is_srgb(src_iview->vk_format), pipeline);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return NULL;
+ }
+ }
+
+ return pipeline;
}
static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview,
- struct radv_image_view *dest_iview,
- const VkOffset2D *src_offset,
- const VkOffset2D *dest_offset,
- const VkExtent2D *resolve_extent)
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
+ const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
{
- struct radv_device *device = cmd_buffer->device;
- VkPipeline *pipeline;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.resolve_compute.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
+ struct radv_device *device = cmd_buffer->device;
+ VkPipeline *pipeline;
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dest_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
-
- pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
-
- unsigned push_constants[4] = {
- src_offset->x,
- src_offset->y,
- dest_offset->x,
- dest_offset->y,
- };
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.resolve_compute.p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
- push_constants);
- radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
-
+ .imageView = radv_image_view_to_handle(dest_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
+
+ pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ *pipeline);
+
+ unsigned push_constants[4] = {
+ src_offset->x,
+ src_offset->y,
+ dest_offset->x,
+ dest_offset->y,
+ };
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
+ 0, 16, push_constants);
+ radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
}
static void
-emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview,
- struct radv_image_view *dest_iview,
- const VkExtent3D *resolve_extent,
- VkImageAspectFlags aspects,
- VkResolveModeFlagBits resolve_mode)
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,
+ VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
{
- struct radv_device *device = cmd_buffer->device;
- const uint32_t samples = src_iview->image->info.samples;
- const uint32_t samples_log2 = ffs(samples) - 1;
- VkPipeline *pipeline;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.resolve_compute.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
+ struct radv_device *device = cmd_buffer->device;
+ const uint32_t samples = src_iview->image->info.samples;
+ const uint32_t samples_log2 = ffs(samples) - 1;
+ VkPipeline *pipeline;
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
+ 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {.sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
+ }},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+ .pImageInfo = (VkDescriptorImageInfo[]){
+ {
.sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(dest_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- }
- });
-
- switch (resolve_mode) {
- case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
- if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
- pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
- else
- pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
- break;
- case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
- assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
- pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
- break;
- case VK_RESOLVE_MODE_MIN_BIT_KHR:
- if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
- pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
- else
- pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
- break;
- case VK_RESOLVE_MODE_MAX_BIT_KHR:
- if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
- pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
- else
- pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
- break;
- default:
- unreachable("invalid resolve mode");
- }
-
- if (!*pipeline) {
- int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
- VkResult ret;
-
- ret = create_depth_stencil_resolve_pipeline(device, samples,
- index, resolve_mode,
- pipeline);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
-
- radv_unaligned_dispatch(cmd_buffer, resolve_extent->width,
- resolve_extent->height, resolve_extent->depth);
-
+ .imageView = radv_image_view_to_handle(dest_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }}});
+
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
+ break;
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+ pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
+ break;
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
+ break;
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
+ break;
+ default:
+ unreachable("invalid resolve mode");
+ }
+
+ if (!*pipeline) {
+ int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+ VkResult ret;
+
+ ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ *pipeline);
+
+ radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
+ resolve_extent->depth);
}
-void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkFormat src_format,
- VkImageLayout src_image_layout,
- struct radv_image *dest_image,
- VkFormat dest_format,
- VkImageLayout dest_image_layout,
- const VkImageResolve2KHR *region)
+void
+radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkFormat src_format, VkImageLayout src_image_layout,
+ struct radv_image *dest_image, VkFormat dest_format,
+ VkImageLayout dest_image_layout, const VkImageResolve2KHR *region)
{
- struct radv_meta_saved_state saved_state;
-
- radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout,
- region);
-
- /* For partial resolves, DCC should be decompressed before resolving
- * because the metadata is re-initialized to the uncompressed after.
- */
- uint32_t queue_mask = radv_image_queue_family_mask(dest_image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
-
- if (radv_layout_dcc_compressed(cmd_buffer->device, dest_image,
- dest_image_layout, false, queue_mask) &&
- (region->dstOffset.x ||
- region->dstOffset.y ||
- region->dstOffset.z ||
- region->extent.width != dest_image->info.width ||
- region->extent.height != dest_image->info.height ||
- region->extent.depth != dest_image->info.depth)) {
- radv_decompress_dcc(cmd_buffer, dest_image, &(VkImageSubresourceRange) {
- .aspectMask = region->dstSubresource.aspectMask,
- .baseMipLevel = region->dstSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = region->dstSubresource.baseArrayLayer,
- .layerCount = region->dstSubresource.layerCount,
- });
- }
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
-
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, &region->srcSubresource,
- &region->srcOffset);
-
- const uint32_t dest_base_layer =
- radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
- &region->dstOffset);
-
- const struct VkExtent3D extent =
- radv_sanitize_image_extent(src_image->type, region->extent);
- const struct VkOffset3D srcOffset =
- radv_sanitize_image_offset(src_image->type, region->srcOffset);
- const struct VkOffset3D dstOffset =
- radv_sanitize_image_offset(dest_image->type, region->dstOffset);
-
- for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
- ++layer) {
-
- struct radv_image_view src_iview;
- radv_image_view_init(&src_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(src_image),
- .viewType = radv_meta_get_view_type(src_image),
- .format = src_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = region->srcSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = src_base_layer + layer,
- .layerCount = 1,
- },
- }, NULL);
-
- struct radv_image_view dest_iview;
- radv_image_view_init(&dest_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(dest_image),
- .viewType = radv_meta_get_view_type(dest_image),
- .format = vk_to_non_srgb_format(dest_format),
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = region->dstSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = dest_base_layer + layer,
- .layerCount = 1,
- },
- }, NULL);
-
- emit_resolve(cmd_buffer,
- &src_iview,
- &dest_iview,
- &(VkOffset2D) {srcOffset.x, srcOffset.y },
- &(VkOffset2D) {dstOffset.x, dstOffset.y },
- &(VkExtent2D) {extent.width, extent.height });
- }
-
- radv_meta_restore(&saved_state, cmd_buffer);
-
- if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
- radv_layout_dcc_compressed(cmd_buffer->device, dest_image,
- dest_image_layout, false, queue_mask)) {
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VCACHE;
-
- VkImageSubresourceRange range = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = region->dstSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = dest_base_layer,
- .layerCount = region->dstSubresource.layerCount,
- };
-
- cmd_buffer->state.flush_bits |=
- radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
- }
+ struct radv_meta_saved_state saved_state;
+
+ radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
+
+ /* For partial resolves, DCC should be decompressed before resolving
+ * because the metadata is re-initialized to the uncompressed after.
+ */
+ uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+
+ if (radv_layout_dcc_compressed(cmd_buffer->device, dest_image, dest_image_layout, false,
+ queue_mask) &&
+ (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
+ region->extent.width != dest_image->info.width ||
+ region->extent.height != dest_image->info.height ||
+ region->extent.depth != dest_image->info.depth)) {
+ radv_decompress_dcc(cmd_buffer, dest_image,
+ &(VkImageSubresourceRange){
+ .aspectMask = region->dstSubresource.aspectMask,
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = region->dstSubresource.baseArrayLayer,
+ .layerCount = region->dstSubresource.layerCount,
+ });
+ }
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
+
+ const uint32_t src_base_layer =
+ radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
+
+ const uint32_t dest_base_layer =
+ radv_meta_get_iview_layer(dest_image, &region->dstSubresource, &region->dstOffset);
+
+ const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
+ const struct VkOffset3D srcOffset =
+ radv_sanitize_image_offset(src_image->type, region->srcOffset);
+ const struct VkOffset3D dstOffset =
+ radv_sanitize_image_offset(dest_image->type, region->dstOffset);
+
+ for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
+
+ struct radv_image_view src_iview;
+ radv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(src_image),
+ .viewType = radv_meta_get_view_type(src_image),
+ .format = src_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ struct radv_image_view dest_iview;
+ radv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(dest_image),
+ .viewType = radv_meta_get_view_type(dest_image),
+ .format = vk_to_non_srgb_format(dest_format),
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
+ &(VkOffset2D){dstOffset.x, dstOffset.y},
+ &(VkExtent2D){extent.width, extent.height});
+ }
+
+ radv_meta_restore(&saved_state, cmd_buffer);
+
+ if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
+ radv_layout_dcc_compressed(cmd_buffer->device, dest_image, dest_image_layout, false,
+ queue_mask)) {
+
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
+
+ VkImageSubresourceRange range = {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_base_layer,
+ .layerCount = region->dstSubresource.layerCount,
+ };
+
+ cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
+ }
}
/**
@@ -886,174 +783,164 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
void
radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- struct radv_subpass_barrier barrier;
- uint32_t layer_count = fb->layers;
-
- if (subpass->view_mask)
- layer_count = util_last_bit(subpass->view_mask);
-
- /* Resolves happen before the end-of-subpass barriers get executed, so
- * we have to make the attachment shader-readable.
- */
- barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT |
- VK_ACCESS_SHADER_WRITE_BIT;
- radv_subpass_barrier(cmd_buffer, &barrier);
-
- for (uint32_t i = 0; i < subpass->color_count; ++i) {
- struct radv_subpass_attachment src_att = subpass->color_attachments[i];
- struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
-
- if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
- struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
-
- VkImageResolve2KHR region = {
- .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
- .extent = (VkExtent3D){ fb->width, fb->height, 1 },
- .srcSubresource = (VkImageSubresourceLayers) {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .mipLevel = src_iview->base_mip,
- .baseArrayLayer = src_iview->base_layer,
- .layerCount = layer_count,
- },
- .dstSubresource = (VkImageSubresourceLayers) {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .mipLevel = dst_iview->base_mip,
- .baseArrayLayer = dst_iview->base_layer,
- .layerCount = layer_count,
- },
- .srcOffset = (VkOffset3D){ 0, 0, 0 },
- .dstOffset = (VkOffset3D){ 0, 0, 0 },
- };
-
- radv_meta_resolve_compute_image(cmd_buffer,
- src_iview->image,
- src_iview->vk_format,
- src_att.layout,
- dst_iview->image,
- dst_iview->vk_format,
- dst_att.layout,
- &region);
- }
-
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
+ struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ struct radv_subpass_barrier barrier;
+ uint32_t layer_count = fb->layers;
+
+ if (subpass->view_mask)
+ layer_count = util_last_bit(subpass->view_mask);
+
+ /* Resolves happen before the end-of-subpass barriers get executed, so
+ * we have to make the attachment shader-readable.
+ */
+ barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
+ radv_subpass_barrier(cmd_buffer, &barrier);
+
+ for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+ struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
+
+ if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+ struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
+
+ VkImageResolve2KHR region = {
+ .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
+ .extent = (VkExtent3D){fb->width, fb->height, 1},
+ .srcSubresource =
+ (VkImageSubresourceLayers){
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .mipLevel = src_iview->base_mip,
+ .baseArrayLayer = src_iview->base_layer,
+ .layerCount = layer_count,
+ },
+ .dstSubresource =
+ (VkImageSubresourceLayers){
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .mipLevel = dst_iview->base_mip,
+ .baseArrayLayer = dst_iview->base_layer,
+ .layerCount = layer_count,
+ },
+ .srcOffset = (VkOffset3D){0, 0, 0},
+ .dstOffset = (VkOffset3D){0, 0, 0},
+ };
+
+ radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk_format,
+ src_att.layout, dst_iview->image, dst_iview->vk_format,
+ dst_att.layout, &region);
+ }
+
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
}
void
radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
- VkResolveModeFlagBits resolve_mode)
+ VkImageAspectFlags aspects,
+ VkResolveModeFlagBits resolve_mode)
{
- struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- struct radv_meta_saved_state saved_state;
- uint32_t layer_count = fb->layers;
-
- if (subpass->view_mask)
- layer_count = util_last_bit(subpass->view_mask);
-
- /* Resolves happen before the end-of-subpass barriers get executed, so
- * we have to make the attachment shader-readable.
- */
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, NULL) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
-
- struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
- struct radv_image_view *src_iview =
- cmd_buffer->state.attachments[src_att.attachment].iview;
- struct radv_image *src_image = src_iview->image;
-
- VkImageResolve2KHR region = {0};
- region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
- region.srcSubresource.aspectMask = aspects;
- region.srcSubresource.mipLevel = 0;
- region.srcSubresource.baseArrayLayer = src_iview->base_layer;
- region.srcSubresource.layerCount = layer_count;
-
- radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_DESCRIPTORS);
-
- struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
- struct radv_image_view *dst_iview =
- cmd_buffer->state.attachments[dest_att.attachment].iview;
- struct radv_image *dst_image = dst_iview->image;
-
- struct radv_image_view tsrc_iview;
- radv_image_view_init(&tsrc_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(src_image),
- .viewType = radv_meta_get_view_type(src_image),
- .format = src_iview->vk_format,
- .subresourceRange = {
- .aspectMask = aspects,
- .baseMipLevel = src_iview->base_mip,
- .levelCount = 1,
- .baseArrayLayer = src_iview->base_layer,
- .layerCount = layer_count,
- },
- }, NULL);
-
- struct radv_image_view tdst_iview;
- radv_image_view_init(&tdst_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(dst_image),
- .viewType = radv_meta_get_view_type(dst_image),
- .format = dst_iview->vk_format,
- .subresourceRange = {
- .aspectMask = aspects,
- .baseMipLevel = dst_iview->base_mip,
- .levelCount = 1,
- .baseArrayLayer = dst_iview->base_layer,
- .layerCount = layer_count,
- },
- }, NULL);
-
- emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
- &(VkExtent3D) { fb->width, fb->height, layer_count },
- aspects, resolve_mode);
-
- cmd_buffer->state.flush_bits |=
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VCACHE |
- radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
-
- VkImageLayout layout =
- cmd_buffer->state.attachments[dest_att.attachment].current_layout;
- uint32_t queue_mask = radv_image_queue_family_mask(dst_image,
- cmd_buffer->queue_family_index,
- cmd_buffer->queue_family_index);
-
- if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image,
- layout, false, queue_mask)) {
- VkImageSubresourceRange range = {0};
- range.aspectMask = aspects;
- range.baseMipLevel = dst_iview->base_mip;
- range.levelCount = 1;
- range.baseArrayLayer = dst_iview->base_layer;
- range.layerCount = layer_count;
-
- uint32_t htile_value =
- radv_get_htile_initial_value(cmd_buffer->device, dst_image);
-
- cmd_buffer->state.flush_bits |=
- radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
- }
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ struct radv_meta_saved_state saved_state;
+ uint32_t layer_count = fb->layers;
+
+ if (subpass->view_mask)
+ layer_count = util_last_bit(subpass->view_mask);
+
+ /* Resolves happen before the end-of-subpass barriers get executed, so
+ * we have to make the attachment shader-readable.
+ */
+ cmd_buffer->state.flush_bits |=
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, NULL) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
+
+ struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+ struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+ struct radv_image *src_image = src_iview->image;
+
+ VkImageResolve2KHR region = {0};
+ region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
+ region.srcSubresource.aspectMask = aspects;
+ region.srcSubresource.mipLevel = 0;
+ region.srcSubresource.baseArrayLayer = src_iview->base_layer;
+ region.srcSubresource.layerCount = layer_count;
+
+ radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+
+ struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
+ struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
+ struct radv_image *dst_image = dst_iview->image;
+
+ struct radv_image_view tsrc_iview;
+ radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(src_image),
+ .viewType = radv_meta_get_view_type(src_image),
+ .format = src_iview->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = aspects,
+ .baseMipLevel = src_iview->base_mip,
+ .levelCount = 1,
+ .baseArrayLayer = src_iview->base_layer,
+ .layerCount = layer_count,
+ },
+ },
+ NULL);
+
+ struct radv_image_view tdst_iview;
+ radv_image_view_init(&tdst_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(dst_image),
+ .viewType = radv_meta_get_view_type(dst_image),
+ .format = dst_iview->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = aspects,
+ .baseMipLevel = dst_iview->base_mip,
+ .levelCount = 1,
+ .baseArrayLayer = dst_iview->base_layer,
+ .layerCount = layer_count,
+ },
+ },
+ NULL);
+
+ emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
+ &(VkExtent3D){fb->width, fb->height, layer_count}, aspects,
+ resolve_mode);
+
+ cmd_buffer->state.flush_bits |=
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
+
+ VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
+ uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index,
+ cmd_buffer->queue_family_index);
+
+ if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {
+ VkImageSubresourceRange range = {0};
+ range.aspectMask = aspects;
+ range.baseMipLevel = dst_iview->base_mip;
+ range.levelCount = 1;
+ range.baseArrayLayer = dst_iview->base_layer;
+ range.layerCount = layer_count;
+
+ uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
+
+ cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
+ }
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
diff --git a/src/amd/vulkan/radv_meta_resolve_fs.c b/src/amd/vulkan/radv_meta_resolve_fs.c
index eae098e1a6b..d926bf62775 100644
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -21,1165 +21,1094 @@
* IN THE SOFTWARE.
*/
-
#include <assert.h>
#include <stdbool.h>
+#include "nir/nir_builder.h"
#include "radv_meta.h"
#include "radv_private.h"
-#include "nir/nir_builder.h"
#include "sid.h"
#include "vk_format.h"
static nir_shader *
build_nir_vertex_shader(void)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_resolve_vs");
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_resolve_vs");
- nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "gl_Position");
- pos_out->data.location = VARYING_SLOT_POS;
+ nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
+ pos_out->data.location = VARYING_SLOT_POS;
- nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+ nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
- nir_store_var(&b, pos_out, outvec, 0xf);
- return b.shader;
+ nir_store_var(&b, pos_out, outvec, 0xf);
+ return b.shader;
}
static nir_shader *
build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
- false,
- false,
- GLSL_TYPE_FLOAT);
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
+ nir_builder b = nir_builder_init_simple_shader(
+ MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- sampler_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
- nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
- vec4, "f_color");
- color_out->data.location = FRAG_RESULT_DATA0;
+ nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
+ color_out->data.location = FRAG_RESULT_DATA0;
- nir_ssa_def *pos_in = nir_channels(&b, nir_load_frag_coord(&b), 0x3);
- nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), 0, 8);
+ nir_ssa_def *pos_in = nir_channels(&b, nir_load_frag_coord(&b), 0x3);
+ nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), 0, 8);
- nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
+ nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
- nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, src_offset), 0x3);
- nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
+ nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, src_offset), 0x3);
+ nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
- radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
- color, img_coord);
+ radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, img_coord);
- nir_ssa_def *outval = nir_load_var(&b, color);
- nir_store_var(&b, color_out, outval, 0xf);
- return b.shader;
+ nir_ssa_def *outval = nir_load_var(&b, color);
+ nir_store_var(&b, color_out, outval, 0xf);
+ return b.shader;
}
-
static VkResult
create_layout(struct radv_device *device)
{
- VkResult result;
- /*
- * one descriptors for the image being sampled
- */
- VkDescriptorSetLayoutCreateInfo ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 1,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.resolve_fragment.ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
-
- VkPipelineLayoutCreateInfo pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.resolve_fragment.ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.resolve_fragment.p_layout);
- if (result != VK_SUCCESS)
- goto fail;
- return VK_SUCCESS;
+ VkResult result;
+ /*
+ * one descriptors for the image being sampled
+ */
+ VkDescriptorSetLayoutCreateInfo ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 1,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.resolve_fragment.ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.resolve_fragment.ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8},
+ };
+
+ result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.resolve_fragment.p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+ return VK_SUCCESS;
fail:
- return result;
+ return result;
}
static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
- .vertexBindingDescriptionCount = 0,
- .vertexAttributeDescriptionCount = 0,
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+ .vertexBindingDescriptionCount = 0,
+ .vertexAttributeDescriptionCount = 0,
};
static VkResult
-create_resolve_pipeline(struct radv_device *device,
- int samples_log2,
- VkFormat format)
+create_resolve_pipeline(struct radv_device *device, int samples_log2, VkFormat format)
{
- mtx_lock(&device->meta_state.mtx);
-
- unsigned fs_key = radv_format_meta_fs_key(device, format);
- VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
- if (*pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- VkResult result;
- bool is_integer = false;
- uint32_t samples = 1 << samples_log2;
- const VkPipelineVertexInputStateCreateInfo *vi_create_info;
- vi_create_info = &normal_vi_create_info;
- if (vk_format_is_int(format))
- is_integer = true;
-
- nir_shader *fs = build_resolve_fragment_shader(device, is_integer, samples);
- nir_shader *vs = build_nir_vertex_shader();
-
- VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][0];
-
- assert(!*rp);
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs),
- .pName = "main",
- .pSpecializationInfo = NULL
- }, {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs),
- .pName = "main",
- .pSpecializationInfo = NULL
- },
- };
-
-
- for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
- VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
- result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = format,
- .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = layout,
- .finalLayout = layout,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 1,
- .pColorAttachments = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = layout,
- },
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = VK_ATTACHMENT_UNUSED,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- }, &device->meta_state.alloc, rp + dst_layout);
- }
-
-
- const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = ARRAY_SIZE(pipeline_shader_stages),
- .pStages = pipeline_shader_stages,
- .pVertexInputState = vi_create_info,
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkPipelineColorBlendAttachmentState []) {
- { .colorWriteMask =
- VK_COLOR_COMPONENT_A_BIT |
- VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT },
- }
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 9,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_LINE_WIDTH,
- VK_DYNAMIC_STATE_DEPTH_BIAS,
- VK_DYNAMIC_STATE_BLEND_CONSTANTS,
- VK_DYNAMIC_STATE_DEPTH_BOUNDS,
- VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
- VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
- VK_DYNAMIC_STATE_STENCIL_REFERENCE,
- },
- },
- .flags = 0,
- .layout = device->meta_state.resolve_fragment.p_layout,
- .renderPass = *rp,
- .subpass = 0,
- };
-
- const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
- .use_rectlist = true
- };
-
- result = radv_graphics_pipeline_create(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc,
- pipeline);
- ralloc_free(vs);
- ralloc_free(fs);
-
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ mtx_lock(&device->meta_state.mtx);
+
+ unsigned fs_key = radv_format_meta_fs_key(device, format);
+ VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ VkResult result;
+ bool is_integer = false;
+ uint32_t samples = 1 << samples_log2;
+ const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+ vi_create_info = &normal_vi_create_info;
+ if (vk_format_is_int(format))
+ is_integer = true;
+
+ nir_shader *fs = build_resolve_fragment_shader(device, is_integer, samples);
+ nir_shader *vs = build_nir_vertex_shader();
+
+ VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][0];
+
+ assert(!*rp);
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ };
+
+ for (unsigned dst_layout = 0; dst_layout < RADV_META_DST_LAYOUT_COUNT; ++dst_layout) {
+ VkImageLayout layout = radv_meta_dst_layout_to_layout(dst_layout);
+ result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = format,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = layout,
+ .finalLayout = layout,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 1,
+ .pColorAttachments =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = layout,
+ },
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = VK_ATTACHMENT_UNUSED,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ &device->meta_state.alloc, rp + dst_layout);
+ }
+
+ const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = vi_create_info,
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+ },
+ .pColorBlendState =
+ &(VkPipelineColorBlendStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments =
+ (VkPipelineColorBlendAttachmentState[]){
+ {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+ }},
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 9,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.resolve_fragment.p_layout,
+ .renderPass = *rp,
+ .subpass = 0,
+ };
+
+ const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+ result = radv_graphics_pipeline_create(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc, pipeline);
+ ralloc_free(vs);
+ ralloc_free(fs);
+
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
-enum {
- DEPTH_RESOLVE,
- STENCIL_RESOLVE
-};
+enum { DEPTH_RESOLVE, STENCIL_RESOLVE };
static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
{
- switch (resolve_mode) {
- case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
- return "zero";
- case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
- return "average";
- case VK_RESOLVE_MODE_MIN_BIT_KHR:
- return "min";
- case VK_RESOLVE_MODE_MAX_BIT_KHR:
- return "max";
- default:
- unreachable("invalid resolve mode");
- }
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+ return "zero";
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ return "average";
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ return "min";
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ return "max";
+ default:
+ unreachable("invalid resolve mode");
+ }
}
static nir_shader *
-build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples,
- int index,
- VkResolveModeFlagBits resolve_mode)
+build_depth_stencil_resolve_fragment_shader(struct radv_device *dev, int samples, int index,
+ VkResolveModeFlagBits resolve_mode)
{
- const struct glsl_type *vec4 = glsl_vec4_type();
- const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
- false,
- false,
- GLSL_TYPE_FLOAT);
-
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
- "meta_resolve_fs_%s-%s-%d",
- index == DEPTH_RESOLVE ? "depth" : "stencil",
- get_resolve_mode_str(resolve_mode), samples);
-
- nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
- sampler_type, "s_tex");
- input_img->data.descriptor_set = 0;
- input_img->data.binding = 0;
-
- nir_variable *fs_out = nir_variable_create(b.shader,
- nir_var_shader_out, vec4,
- "f_out");
- fs_out->data.location =
- index == DEPTH_RESOLVE ? FRAG_RESULT_DEPTH : FRAG_RESULT_STENCIL;
-
- nir_ssa_def *pos_in = nir_channels(&b, nir_load_frag_coord(&b), 0x3);
-
- nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
-
- nir_ssa_def *img_coord = nir_channels(&b, pos_int, 0x3);
-
- nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
-
- nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
-
- nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
- tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
- tex->op = nir_texop_txf_ms;
- tex->src[0].src_type = nir_tex_src_coord;
- tex->src[0].src = nir_src_for_ssa(img_coord);
- tex->src[1].src_type = nir_tex_src_ms_index;
- tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
- tex->src[2].src_type = nir_tex_src_texture_deref;
- tex->src[2].src = nir_src_for_ssa(input_img_deref);
- tex->dest_type = type;
- tex->is_array = false;
- tex->coord_components = 2;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex->instr);
-
- nir_ssa_def *outval = &tex->dest.ssa;
-
- if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
- for (int i = 1; i < samples; i++) {
- nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
- tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
- tex_add->op = nir_texop_txf_ms;
- tex_add->src[0].src_type = nir_tex_src_coord;
- tex_add->src[0].src = nir_src_for_ssa(img_coord);
- tex_add->src[1].src_type = nir_tex_src_ms_index;
- tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
- tex_add->src[2].src_type = nir_tex_src_texture_deref;
- tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
- tex_add->dest_type = type;
- tex_add->is_array = false;
- tex_add->coord_components = 2;
-
- nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
- nir_builder_instr_insert(&b, &tex_add->instr);
-
- switch (resolve_mode) {
- case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
- assert(index == DEPTH_RESOLVE);
- outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
- break;
- case VK_RESOLVE_MODE_MIN_BIT_KHR:
- if (index == DEPTH_RESOLVE)
- outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
- else
- outval = nir_umin(&b, outval, &tex_add->dest.ssa);
- break;
- case VK_RESOLVE_MODE_MAX_BIT_KHR:
- if (index == DEPTH_RESOLVE)
- outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
- else
- outval = nir_umax(&b, outval, &tex_add->dest.ssa);
- break;
- default:
- unreachable("invalid resolve mode");
- }
- }
-
- if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
- outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
- }
-
- nir_store_var(&b, fs_out, outval, 0x1);
-
- return b.shader;
+ const struct glsl_type *vec4 = glsl_vec4_type();
+ const struct glsl_type *sampler_type =
+ glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
+
+ nir_builder b = nir_builder_init_simple_shader(
+ MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs_%s-%s-%d",
+ index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
+
+ nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
+ input_img->data.descriptor_set = 0;
+ input_img->data.binding = 0;
+
+ nir_variable *fs_out = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_out");
+ fs_out->data.location = index == DEPTH_RESOLVE ? FRAG_RESULT_DEPTH : FRAG_RESULT_STENCIL;
+
+ nir_ssa_def *pos_in = nir_channels(&b, nir_load_frag_coord(&b), 0x3);
+
+ nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
+
+ nir_ssa_def *img_coord = nir_channels(&b, pos_int, 0x3);
+
+ nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
+
+ nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
+
+ nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
+ tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex->op = nir_texop_txf_ms;
+ tex->src[0].src_type = nir_tex_src_coord;
+ tex->src[0].src = nir_src_for_ssa(img_coord);
+ tex->src[1].src_type = nir_tex_src_ms_index;
+ tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
+ tex->src[2].src_type = nir_tex_src_texture_deref;
+ tex->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex->dest_type = type;
+ tex->is_array = false;
+ tex->coord_components = 2;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex->instr);
+
+ nir_ssa_def *outval = &tex->dest.ssa;
+
+ if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
+ for (int i = 1; i < samples; i++) {
+ nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
+ tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+ tex_add->op = nir_texop_txf_ms;
+ tex_add->src[0].src_type = nir_tex_src_coord;
+ tex_add->src[0].src = nir_src_for_ssa(img_coord);
+ tex_add->src[1].src_type = nir_tex_src_ms_index;
+ tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
+ tex_add->src[2].src_type = nir_tex_src_texture_deref;
+ tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
+ tex_add->dest_type = type;
+ tex_add->is_array = false;
+ tex_add->coord_components = 2;
+
+ nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+ nir_builder_instr_insert(&b, &tex_add->instr);
+
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ assert(index == DEPTH_RESOLVE);
+ outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
+ break;
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
+ else
+ outval = nir_umin(&b, outval, &tex_add->dest.ssa);
+ break;
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
+ else
+ outval = nir_umax(&b, outval, &tex_add->dest.ssa);
+ break;
+ default:
+ unreachable("invalid resolve mode");
+ }
+ }
+
+ if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
+ outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
+ }
+
+ nir_store_var(&b, fs_out, outval, 0x1);
+
+ return b.shader;
}
static VkResult
-create_depth_stencil_resolve_pipeline(struct radv_device *device,
- int samples_log2,
- int index,
- VkResolveModeFlagBits resolve_mode)
+create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples_log2, int index,
+ VkResolveModeFlagBits resolve_mode)
{
- VkRenderPass *render_pass;
- VkPipeline *pipeline;
- VkFormat src_format;
- VkResult result;
-
- mtx_lock(&device->meta_state.mtx);
-
- switch (resolve_mode) {
- case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
- if (index == DEPTH_RESOLVE)
- pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline;
- else
- pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline;
- break;
- case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
- assert(index == DEPTH_RESOLVE);
- pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline;
- break;
- case VK_RESOLVE_MODE_MIN_BIT_KHR:
- if (index == DEPTH_RESOLVE)
- pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline;
- else
- pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline;
- break;
- case VK_RESOLVE_MODE_MAX_BIT_KHR:
- if (index == DEPTH_RESOLVE)
- pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline;
- else
- pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline;
- break;
- default:
- unreachable("invalid resolve mode");
- }
-
- if (*pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
-
- uint32_t samples = 1 << samples_log2;
- nir_shader *fs = build_depth_stencil_resolve_fragment_shader(device, samples, index, resolve_mode);
- nir_shader *vs = build_nir_vertex_shader();
-
- VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
- {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_VERTEX_BIT,
- .module = vk_shader_module_handle_from_nir(vs),
- .pName = "main",
- .pSpecializationInfo = NULL
- }, {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
- .module = vk_shader_module_handle_from_nir(fs),
- .pName = "main",
- .pSpecializationInfo = NULL
- },
- };
-
- if (index == DEPTH_RESOLVE) {
- src_format = VK_FORMAT_D32_SFLOAT;
- render_pass = &device->meta_state.resolve_fragment.depth_render_pass;
- } else {
- render_pass = &device->meta_state.resolve_fragment.stencil_render_pass;
- src_format = VK_FORMAT_S8_UINT;
- }
-
- if (!*render_pass) {
- result = radv_CreateRenderPass2(radv_device_to_handle(device),
- &(VkRenderPassCreateInfo2) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
- .attachmentCount = 1,
- .pAttachments = &(VkAttachmentDescription2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
- .format = src_format,
- .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
- .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
- .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
- .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
- .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
- .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .subpassCount = 1,
- .pSubpasses = &(VkSubpassDescription2) {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
- .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
- .inputAttachmentCount = 0,
- .colorAttachmentCount = 0,
- .pColorAttachments = NULL,
- .pResolveAttachments = NULL,
- .pDepthStencilAttachment = &(VkAttachmentReference2) {
- .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
- .attachment = 0,
- .layout = VK_IMAGE_LAYOUT_GENERAL,
- },
- .preserveAttachmentCount = 0,
- .pPreserveAttachments = NULL,
- },
- .dependencyCount = 2,
- .pDependencies = (VkSubpassDependency2[]) {
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = 0,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- },
- {
- .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
- .srcSubpass = 0,
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = 0,
- .dependencyFlags = 0
- }
- },
- }, &device->meta_state.alloc, render_pass);
- }
-
- VkStencilOp stencil_op =
- index == DEPTH_RESOLVE ? VK_STENCIL_OP_KEEP : VK_STENCIL_OP_REPLACE;
-
- VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
- .depthTestEnable = true,
- .depthWriteEnable = index == DEPTH_RESOLVE,
- .stencilTestEnable = index == STENCIL_RESOLVE,
- .depthCompareOp = VK_COMPARE_OP_ALWAYS,
- .front = {
- .failOp = stencil_op,
- .passOp = stencil_op,
- .depthFailOp = stencil_op,
- .compareOp = VK_COMPARE_OP_ALWAYS,
- },
- .back = {
- .failOp = stencil_op,
- .passOp = stencil_op,
- .depthFailOp = stencil_op,
- .compareOp = VK_COMPARE_OP_ALWAYS,
- }
- };
-
- const VkPipelineVertexInputStateCreateInfo *vi_create_info;
- vi_create_info = &normal_vi_create_info;
-
- const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
- .stageCount = ARRAY_SIZE(pipeline_shader_stages),
- .pStages = pipeline_shader_stages,
- .pVertexInputState = vi_create_info,
- .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
- .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
- .primitiveRestartEnable = false,
- },
- .pViewportState = &(VkPipelineViewportStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
- .viewportCount = 1,
- .scissorCount = 1,
- },
- .pDepthStencilState = &depth_stencil_state,
- .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
- .rasterizerDiscardEnable = false,
- .polygonMode = VK_POLYGON_MODE_FILL,
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
- },
- .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
- .rasterizationSamples = 1,
- .sampleShadingEnable = false,
- .pSampleMask = (VkSampleMask[]) { UINT32_MAX },
- },
- .pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
- .attachmentCount = 0,
- .pAttachments = (VkPipelineColorBlendAttachmentState []) {
- { .colorWriteMask =
- VK_COLOR_COMPONENT_A_BIT |
- VK_COLOR_COMPONENT_R_BIT |
- VK_COLOR_COMPONENT_G_BIT |
- VK_COLOR_COMPONENT_B_BIT },
- }
- },
- .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
- .dynamicStateCount = 9,
- .pDynamicStates = (VkDynamicState[]) {
- VK_DYNAMIC_STATE_VIEWPORT,
- VK_DYNAMIC_STATE_SCISSOR,
- VK_DYNAMIC_STATE_LINE_WIDTH,
- VK_DYNAMIC_STATE_DEPTH_BIAS,
- VK_DYNAMIC_STATE_BLEND_CONSTANTS,
- VK_DYNAMIC_STATE_DEPTH_BOUNDS,
- VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
- VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
- VK_DYNAMIC_STATE_STENCIL_REFERENCE,
- },
- },
- .flags = 0,
- .layout = device->meta_state.resolve_fragment.p_layout,
- .renderPass = *render_pass,
- .subpass = 0,
- };
-
- const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
- .use_rectlist = true
- };
-
- result = radv_graphics_pipeline_create(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- &vk_pipeline_info, &radv_pipeline_info,
- &device->meta_state.alloc,
- pipeline);
-
- ralloc_free(vs);
- ralloc_free(fs);
-
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ VkRenderPass *render_pass;
+ VkPipeline *pipeline;
+ VkFormat src_format;
+ VkResult result;
+
+ mtx_lock(&device->meta_state.mtx);
+
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline;
+ break;
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ assert(index == DEPTH_RESOLVE);
+ pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline;
+ break;
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline;
+ break;
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ if (index == DEPTH_RESOLVE)
+ pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline;
+ break;
+ default:
+ unreachable("invalid resolve mode");
+ }
+
+ if (*pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+
+ uint32_t samples = 1 << samples_log2;
+ nir_shader *fs =
+ build_depth_stencil_resolve_fragment_shader(device, samples, index, resolve_mode);
+ nir_shader *vs = build_nir_vertex_shader();
+
+ VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_VERTEX_BIT,
+ .module = vk_shader_module_handle_from_nir(vs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ {.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+ .module = vk_shader_module_handle_from_nir(fs),
+ .pName = "main",
+ .pSpecializationInfo = NULL},
+ };
+
+ if (index == DEPTH_RESOLVE) {
+ src_format = VK_FORMAT_D32_SFLOAT;
+ render_pass = &device->meta_state.resolve_fragment.depth_render_pass;
+ } else {
+ render_pass = &device->meta_state.resolve_fragment.stencil_render_pass;
+ src_format = VK_FORMAT_S8_UINT;
+ }
+
+ if (!*render_pass) {
+ result = radv_CreateRenderPass2(
+ radv_device_to_handle(device),
+ &(VkRenderPassCreateInfo2){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
+ .attachmentCount = 1,
+ .pAttachments =
+ &(VkAttachmentDescription2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
+ .format = src_format,
+ .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+ .storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+ .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+ .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+ .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+ .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .subpassCount = 1,
+ .pSubpasses =
+ &(VkSubpassDescription2){
+ .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
+ .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+ .inputAttachmentCount = 0,
+ .colorAttachmentCount = 0,
+ .pColorAttachments = NULL,
+ .pResolveAttachments = NULL,
+ .pDepthStencilAttachment =
+ &(VkAttachmentReference2){
+ .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
+ .attachment = 0,
+ .layout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ .preserveAttachmentCount = 0,
+ .pPreserveAttachments = NULL,
+ },
+ .dependencyCount = 2,
+ .pDependencies =
+ (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = 0,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0},
+ {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
+ .srcSubpass = 0,
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0}},
+ },
+ &device->meta_state.alloc, render_pass);
+ }
+
+ VkStencilOp stencil_op = index == DEPTH_RESOLVE ? VK_STENCIL_OP_KEEP : VK_STENCIL_OP_REPLACE;
+
+ VkPipelineDepthStencilStateCreateInfo depth_stencil_state = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+ .depthTestEnable = true,
+ .depthWriteEnable = index == DEPTH_RESOLVE,
+ .stencilTestEnable = index == STENCIL_RESOLVE,
+ .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+ .front =
+ {
+ .failOp = stencil_op,
+ .passOp = stencil_op,
+ .depthFailOp = stencil_op,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ },
+ .back = {
+ .failOp = stencil_op,
+ .passOp = stencil_op,
+ .depthFailOp = stencil_op,
+ .compareOp = VK_COMPARE_OP_ALWAYS,
+ }};
+
+ const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+ vi_create_info = &normal_vi_create_info;
+
+ const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+ .stageCount = ARRAY_SIZE(pipeline_shader_stages),
+ .pStages = pipeline_shader_stages,
+ .pVertexInputState = vi_create_info,
+ .pInputAssemblyState =
+ &(VkPipelineInputAssemblyStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+ .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+ .primitiveRestartEnable = false,
+ },
+ .pViewportState =
+ &(VkPipelineViewportStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+ .viewportCount = 1,
+ .scissorCount = 1,
+ },
+ .pDepthStencilState = &depth_stencil_state,
+ .pRasterizationState =
+ &(VkPipelineRasterizationStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+ .rasterizerDiscardEnable = false,
+ .polygonMode = VK_POLYGON_MODE_FILL,
+ .cullMode = VK_CULL_MODE_NONE,
+ .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE},
+ .pMultisampleState =
+ &(VkPipelineMultisampleStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+ .rasterizationSamples = 1,
+ .sampleShadingEnable = false,
+ .pSampleMask = (VkSampleMask[]){UINT32_MAX},
+ },
+ .pColorBlendState =
+ &(VkPipelineColorBlendStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+ .attachmentCount = 0,
+ .pAttachments =
+ (VkPipelineColorBlendAttachmentState[]){
+ {.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
+ VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT},
+ }},
+ .pDynamicState =
+ &(VkPipelineDynamicStateCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+ .dynamicStateCount = 9,
+ .pDynamicStates =
+ (VkDynamicState[]){
+ VK_DYNAMIC_STATE_VIEWPORT,
+ VK_DYNAMIC_STATE_SCISSOR,
+ VK_DYNAMIC_STATE_LINE_WIDTH,
+ VK_DYNAMIC_STATE_DEPTH_BIAS,
+ VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+ VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+ VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+ VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+ },
+ },
+ .flags = 0,
+ .layout = device->meta_state.resolve_fragment.p_layout,
+ .renderPass = *render_pass,
+ .subpass = 0,
+ };
+
+ const struct radv_graphics_pipeline_create_info radv_pipeline_info = {.use_rectlist = true};
+
+ result = radv_graphics_pipeline_create(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache),
+ &vk_pipeline_info, &radv_pipeline_info, &device->meta_state.alloc, pipeline);
+
+ ralloc_free(vs);
+ ralloc_free(fs);
+
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
VkResult
radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand)
{
- VkResult res;
-
- res = create_layout(device);
- if (res != VK_SUCCESS)
- goto fail;
-
- if (on_demand)
- return VK_SUCCESS;
-
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- res = create_resolve_pipeline(device, i, radv_fs_key_format_exemplars[j]);
- if (res != VK_SUCCESS)
- goto fail;
- }
-
- res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_AVERAGE_BIT_KHR);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_MIN_BIT_KHR);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_MAX_BIT_KHR);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_MIN_BIT_KHR);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_MAX_BIT_KHR);
- if (res != VK_SUCCESS)
- goto fail;
- }
-
- res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
- VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
- if (res != VK_SUCCESS)
- goto fail;
-
- res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
- VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
- if (res != VK_SUCCESS)
- goto fail;
-
- return VK_SUCCESS;
+ VkResult res;
+
+ res = create_layout(device);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ if (on_demand)
+ return VK_SUCCESS;
+
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+ for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+ res = create_resolve_pipeline(device, i, radv_fs_key_format_exemplars[j]);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_AVERAGE_BIT_KHR);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_MIN_BIT_KHR);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, i, DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_MAX_BIT_KHR);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_MIN_BIT_KHR);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, i, STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_MAX_BIT_KHR);
+ if (res != VK_SUCCESS)
+ goto fail;
+ }
+
+ res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
+ VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR);
+ if (res != VK_SUCCESS)
+ goto fail;
+
+ return VK_SUCCESS;
fail:
- radv_device_finish_meta_resolve_fragment_state(device);
- return res;
+ radv_device_finish_meta_resolve_fragment_state(device);
+ return res;
}
void
radv_device_finish_meta_resolve_fragment_state(struct radv_device *device)
{
- struct radv_meta_state *state = &device->meta_state;
- for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
- for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
- for(unsigned k =0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->resolve_fragment.rc[i].render_pass[j][k],
- &state->alloc);
- }
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.rc[i].pipeline[j],
- &state->alloc);
- }
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.depth[i].average_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.depth[i].max_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.depth[i].min_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.stencil[i].max_pipeline,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.stencil[i].min_pipeline,
- &state->alloc);
- }
-
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->resolve_fragment.depth_render_pass,
- &state->alloc);
- radv_DestroyRenderPass(radv_device_to_handle(device),
- state->resolve_fragment.stencil_render_pass,
- &state->alloc);
-
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.depth_zero_pipeline,
- &state->alloc);
- radv_DestroyPipeline(radv_device_to_handle(device),
- state->resolve_fragment.stencil_zero_pipeline,
- &state->alloc);
-
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- state->resolve_fragment.ds_layout,
- &state->alloc);
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- state->resolve_fragment.p_layout,
- &state->alloc);
+ struct radv_meta_state *state = &device->meta_state;
+ for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+ for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+ for (unsigned k = 0; k < RADV_META_DST_LAYOUT_COUNT; ++k) {
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->resolve_fragment.rc[i].render_pass[j][k], &state->alloc);
+ }
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_fragment.rc[i].pipeline[j], &state->alloc);
+ }
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_fragment.depth[i].average_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_fragment.depth[i].max_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_fragment.depth[i].min_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_fragment.stencil[i].max_pipeline, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_fragment.stencil[i].min_pipeline, &state->alloc);
+ }
+
+ radv_DestroyRenderPass(radv_device_to_handle(device), state->resolve_fragment.depth_render_pass,
+ &state->alloc);
+ radv_DestroyRenderPass(radv_device_to_handle(device),
+ state->resolve_fragment.stencil_render_pass, &state->alloc);
+
+ radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_fragment.depth_zero_pipeline,
+ &state->alloc);
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ state->resolve_fragment.stencil_zero_pipeline, &state->alloc);
+
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_fragment.ds_layout,
+ &state->alloc);
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_fragment.p_layout,
+ &state->alloc);
}
static VkPipeline *
-radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview,
- struct radv_image_view *dst_iview)
+radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ struct radv_image_view *dst_iview)
{
- struct radv_device *device = cmd_buffer->device;
- unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk_format);
- const uint32_t samples = src_iview->image->info.samples;
- const uint32_t samples_log2 = ffs(samples) - 1;
- VkPipeline *pipeline;
-
- pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
- if (!*pipeline ) {
- VkResult ret;
-
- ret = create_resolve_pipeline(device, samples_log2,
- radv_fs_key_format_exemplars[fs_key]);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return NULL;
- }
- }
-
- return pipeline;
+ struct radv_device *device = cmd_buffer->device;
+ unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dst_iview->vk_format);
+ const uint32_t samples = src_iview->image->info.samples;
+ const uint32_t samples_log2 = ffs(samples) - 1;
+ VkPipeline *pipeline;
+
+ pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+ if (!*pipeline) {
+ VkResult ret;
+
+ ret = create_resolve_pipeline(device, samples_log2, radv_fs_key_format_exemplars[fs_key]);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return NULL;
+ }
+ }
+
+ return pipeline;
}
static void
-emit_resolve(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview,
- struct radv_image_view *dest_iview,
- const VkOffset2D *src_offset,
- const VkOffset2D *dest_offset,
- const VkExtent2D *resolve_extent)
+emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
+ const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
{
- struct radv_device *device = cmd_buffer->device;
- VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
- VkPipeline *pipeline;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_GRAPHICS,
- cmd_buffer->device->meta_state.resolve_fragment.p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- });
-
- cmd_buffer->state.flush_bits |=
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, src_iview->image) |
- radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dest_iview->image);
-
- unsigned push_constants[2] = {
- src_offset->x - dest_offset->x,
- src_offset->y - dest_offset->y,
- };
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.resolve_fragment.p_layout,
- VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8,
- push_constants);
-
- pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview, dest_iview);
-
- radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
- *pipeline);
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = dest_offset->x,
- .y = dest_offset->y,
- .width = resolve_extent->width,
- .height = resolve_extent->height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = *dest_offset,
- .extent = *resolve_extent,
- });
-
- radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
- cmd_buffer->state.flush_bits |=
- radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dest_iview->image);
+ struct radv_device *device = cmd_buffer->device;
+ VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+ VkPipeline *pipeline;
+
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ cmd_buffer->device->meta_state.resolve_fragment.p_layout,
+ 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ });
+
+ cmd_buffer->state.flush_bits |=
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, src_iview->image) |
+ radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dest_iview->image);
+
+ unsigned push_constants[2] = {
+ src_offset->x - dest_offset->x,
+ src_offset->y - dest_offset->y,
+ };
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+ device->meta_state.resolve_fragment.p_layout, VK_SHADER_STAGE_FRAGMENT_BIT,
+ 0, 8, push_constants);
+
+ pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview, dest_iview);
+
+ radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
+
+ radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = dest_offset->x,
+ .y = dest_offset->y,
+ .width = resolve_extent->width,
+ .height = resolve_extent->height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkRect2D){
+ .offset = *dest_offset,
+ .extent = *resolve_extent,
+ });
+
+ radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+ cmd_buffer->state.flush_bits |=
+ radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dest_iview->image);
}
static void
-emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image_view *src_iview,
- struct radv_image_view *dst_iview,
- const VkExtent2D *resolve_extent,
- VkImageAspectFlags aspects,
- VkResolveModeFlagBits resolve_mode)
+emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
+ struct radv_image_view *dst_iview, const VkExtent2D *resolve_extent,
+ VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
{
- struct radv_device *device = cmd_buffer->device;
- const uint32_t samples = src_iview->image->info.samples;
- const uint32_t samples_log2 = ffs(samples) - 1;
- VkPipeline *pipeline;
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_GRAPHICS,
- cmd_buffer->device->meta_state.resolve_fragment.p_layout,
- 0, /* set */
- 1, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
- .pImageInfo = (VkDescriptorImageInfo[]) {
- {
- .sampler = VK_NULL_HANDLE,
- .imageView = radv_image_view_to_handle(src_iview),
- .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
- },
- }
- },
- });
-
- switch (resolve_mode) {
- case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
- if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
- pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline;
- else
- pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline;
- break;
- case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
- assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
- pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline;
- break;
- case VK_RESOLVE_MODE_MIN_BIT_KHR:
- if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
- pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline;
- else
- pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline;
- break;
- case VK_RESOLVE_MODE_MAX_BIT_KHR:
- if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
- pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline;
- else
- pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline;
- break;
- default:
- unreachable("invalid resolve mode");
- }
-
- if (!*pipeline) {
- int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
- VkResult ret;
-
- ret = create_depth_stencil_resolve_pipeline(device, samples_log2,
- index, resolve_mode);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
-
- radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
- .x = 0,
- .y = 0,
- .width = resolve_extent->width,
- .height = resolve_extent->height,
- .minDepth = 0.0f,
- .maxDepth = 1.0f
- });
-
- radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
- .offset = (VkOffset2D) { 0, 0 },
- .extent = *resolve_extent,
- });
-
- radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+ struct radv_device *device = cmd_buffer->device;
+ const uint32_t samples = src_iview->image->info.samples;
+ const uint32_t samples_log2 = ffs(samples) - 1;
+ VkPipeline *pipeline;
+
+ radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+ cmd_buffer->device->meta_state.resolve_fragment.p_layout,
+ 0, /* set */
+ 1, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+ .pImageInfo =
+ (VkDescriptorImageInfo[]){
+ {
+ .sampler = VK_NULL_HANDLE,
+ .imageView = radv_image_view_to_handle(src_iview),
+ .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+ },
+ }},
+ });
+
+ switch (resolve_mode) {
+ case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_fragment.depth_zero_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_fragment.stencil_zero_pipeline;
+ break;
+ case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
+ assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
+ pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].average_pipeline;
+ break;
+ case VK_RESOLVE_MODE_MIN_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].min_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].min_pipeline;
+ break;
+ case VK_RESOLVE_MODE_MAX_BIT_KHR:
+ if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
+ pipeline = &device->meta_state.resolve_fragment.depth[samples_log2].max_pipeline;
+ else
+ pipeline = &device->meta_state.resolve_fragment.stencil[samples_log2].max_pipeline;
+ break;
+ default:
+ unreachable("invalid resolve mode");
+ }
+
+ if (!*pipeline) {
+ int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
+ VkResult ret;
+
+ ret = create_depth_stencil_resolve_pipeline(device, samples_log2, index, resolve_mode);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+ *pipeline);
+
+ radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkViewport){.x = 0,
+ .y = 0,
+ .width = resolve_extent->width,
+ .height = resolve_extent->height,
+ .minDepth = 0.0f,
+ .maxDepth = 1.0f});
+
+ radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
+ &(VkRect2D){
+ .offset = (VkOffset2D){0, 0},
+ .extent = *resolve_extent,
+ });
+
+ radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
}
-void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *src_image,
- VkImageLayout src_image_layout,
- struct radv_image *dest_image,
- VkImageLayout dest_image_layout,
- const VkImageResolve2KHR *region)
+void
+radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
+ VkImageLayout src_image_layout, struct radv_image *dest_image,
+ VkImageLayout dest_image_layout, const VkImageResolve2KHR *region)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_state saved_state;
- const uint32_t samples = src_image->info.samples;
- const uint32_t samples_log2 = ffs(samples) - 1;
- unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dest_image->vk_format);
- unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
- VkRenderPass rp;
-
- radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout,
- region);
-
- if (!device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout]) {
- VkResult ret = create_resolve_pipeline(device, samples_log2, radv_fs_key_format_exemplars[fs_key]);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout];
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
- assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
-
- const uint32_t src_base_layer =
- radv_meta_get_iview_layer(src_image, &region->srcSubresource,
- &region->srcOffset);
-
- const uint32_t dest_base_layer =
- radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
- &region->dstOffset);
-
- const struct VkExtent3D extent =
- radv_sanitize_image_extent(src_image->type, region->extent);
- const struct VkOffset3D srcOffset =
- radv_sanitize_image_offset(src_image->type, region->srcOffset);
- const struct VkOffset3D dstOffset =
- radv_sanitize_image_offset(dest_image->type, region->dstOffset);
-
- for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
- ++layer) {
-
- struct radv_image_view src_iview;
- radv_image_view_init(&src_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(src_image),
- .viewType = radv_meta_get_view_type(src_image),
- .format = src_image->vk_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = region->srcSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = src_base_layer + layer,
- .layerCount = 1,
- },
- }, NULL);
-
- struct radv_image_view dest_iview;
- radv_image_view_init(&dest_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(dest_image),
- .viewType = radv_meta_get_view_type(dest_image),
- .format = dest_image->vk_format,
- .subresourceRange = {
- .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
- .baseMipLevel = region->dstSubresource.mipLevel,
- .levelCount = 1,
- .baseArrayLayer = dest_base_layer + layer,
- .layerCount = 1,
- },
- }, NULL);
-
-
- VkFramebuffer fb;
- radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
- &(VkFramebufferCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
- .attachmentCount = 1,
- .pAttachments = (VkImageView[]) {
- radv_image_view_to_handle(&dest_iview),
- },
- .width = extent.width + dstOffset.x,
- .height = extent.height + dstOffset.y,
- .layers = 1
- }, &cmd_buffer->pool->alloc, &fb);
-
- radv_cmd_buffer_begin_render_pass(cmd_buffer,
- &(VkRenderPassBeginInfo) {
- .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
- .renderPass = rp,
- .framebuffer = fb,
- .renderArea = {
- .offset = { dstOffset.x, dstOffset.y, },
- .extent = { extent.width, extent.height },
- },
- .clearValueCount = 0,
- .pClearValues = NULL,
- }, NULL);
-
- radv_cmd_buffer_set_subpass(cmd_buffer,
- &cmd_buffer->state.pass->subpasses[0]);
-
- emit_resolve(cmd_buffer,
- &src_iview,
- &dest_iview,
- &(VkOffset2D) { srcOffset.x, srcOffset.y },
- &(VkOffset2D) { dstOffset.x, dstOffset.y },
- &(VkExtent2D) { extent.width, extent.height });
-
- radv_cmd_buffer_end_render_pass(cmd_buffer);
-
- radv_DestroyFramebuffer(radv_device_to_handle(cmd_buffer->device), fb, &cmd_buffer->pool->alloc);
- }
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_saved_state saved_state;
+ const uint32_t samples = src_image->info.samples;
+ const uint32_t samples_log2 = ffs(samples) - 1;
+ unsigned fs_key = radv_format_meta_fs_key(cmd_buffer->device, dest_image->vk_format);
+ unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
+ VkRenderPass rp;
+
+ radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
+
+ if (!device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout]) {
+ VkResult ret =
+ create_resolve_pipeline(device, samples_log2, radv_fs_key_format_exemplars[fs_key]);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key][dst_layout];
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+ assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
+
+ const uint32_t src_base_layer =
+ radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
+
+ const uint32_t dest_base_layer =
+ radv_meta_get_iview_layer(dest_image, &region->dstSubresource, &region->dstOffset);
+
+ const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
+ const struct VkOffset3D srcOffset =
+ radv_sanitize_image_offset(src_image->type, region->srcOffset);
+ const struct VkOffset3D dstOffset =
+ radv_sanitize_image_offset(dest_image->type, region->dstOffset);
+
+ for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
+
+ struct radv_image_view src_iview;
+ radv_image_view_init(&src_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(src_image),
+ .viewType = radv_meta_get_view_type(src_image),
+ .format = src_image->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->srcSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = src_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ struct radv_image_view dest_iview;
+ radv_image_view_init(&dest_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(dest_image),
+ .viewType = radv_meta_get_view_type(dest_image),
+ .format = dest_image->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+ .baseMipLevel = region->dstSubresource.mipLevel,
+ .levelCount = 1,
+ .baseArrayLayer = dest_base_layer + layer,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ VkFramebuffer fb;
+ radv_CreateFramebuffer(
+ radv_device_to_handle(cmd_buffer->device),
+ &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+ .attachmentCount = 1,
+ .pAttachments =
+ (VkImageView[]){
+ radv_image_view_to_handle(&dest_iview),
+ },
+ .width = extent.width + dstOffset.x,
+ .height = extent.height + dstOffset.y,
+ .layers = 1},
+ &cmd_buffer->pool->alloc, &fb);
+
+ radv_cmd_buffer_begin_render_pass(cmd_buffer,
+ &(VkRenderPassBeginInfo){
+ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+ .renderPass = rp,
+ .framebuffer = fb,
+ .renderArea =
+ {
+ .offset =
+ {
+ dstOffset.x,
+ dstOffset.y,
+ },
+ .extent = {extent.width, extent.height},
+ },
+ .clearValueCount = 0,
+ .pClearValues = NULL,
+ },
+ NULL);
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
+
+ emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
+ &(VkOffset2D){dstOffset.x, dstOffset.y},
+ &(VkExtent2D){extent.width, extent.height});
+
+ radv_cmd_buffer_end_render_pass(cmd_buffer);
+
+ radv_DestroyFramebuffer(radv_device_to_handle(cmd_buffer->device), fb,
+ &cmd_buffer->pool->alloc);
+ }
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
-
/**
* Emit any needed resolves for the current subpass.
*/
void
radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
{
- struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- struct radv_meta_saved_state saved_state;
- struct radv_subpass_barrier barrier;
-
- /* Resolves happen before the end-of-subpass barriers get executed,
- * so we have to make the attachment shader-readable */
- barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
- barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
- radv_subpass_barrier(cmd_buffer, &barrier);
-
- radv_decompress_resolve_subpass_src(cmd_buffer);
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- for (uint32_t i = 0; i < subpass->color_count; ++i) {
- struct radv_subpass_attachment src_att = subpass->color_attachments[i];
- struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
-
- if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
- struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
-
- struct radv_subpass resolve_subpass = {
- .color_count = 1,
- .color_attachments = (struct radv_subpass_attachment[]) { dest_att },
- .depth_stencil_attachment = NULL,
- };
-
- radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
-
- emit_resolve(cmd_buffer,
- src_iview,
- dest_iview,
- &(VkOffset2D) { 0, 0 },
- &(VkOffset2D) { 0, 0 },
- &(VkExtent2D) { fb->width, fb->height });
- }
-
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ struct radv_meta_saved_state saved_state;
+ struct radv_subpass_barrier barrier;
+
+ /* Resolves happen before the end-of-subpass barriers get executed,
+ * so we have to make the attachment shader-readable */
+ barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+ radv_subpass_barrier(cmd_buffer, &barrier);
+
+ radv_decompress_resolve_subpass_src(cmd_buffer);
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ for (uint32_t i = 0; i < subpass->color_count; ++i) {
+ struct radv_subpass_attachment src_att = subpass->color_attachments[i];
+ struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
+
+ if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
+ struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+
+ struct radv_subpass resolve_subpass = {
+ .color_count = 1,
+ .color_attachments = (struct radv_subpass_attachment[]){dest_att},
+ .depth_stencil_attachment = NULL,
+ };
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+ emit_resolve(cmd_buffer, src_iview, dest_iview, &(VkOffset2D){0, 0}, &(VkOffset2D){0, 0},
+ &(VkExtent2D){fb->width, fb->height});
+ }
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
/**
@@ -1187,73 +1116,70 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
*/
void
radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
- VkResolveModeFlagBits resolve_mode)
+ VkImageAspectFlags aspects,
+ VkResolveModeFlagBits resolve_mode)
{
- struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
- const struct radv_subpass *subpass = cmd_buffer->state.subpass;
- struct radv_meta_saved_state saved_state;
- struct radv_subpass_barrier barrier;
-
- /* Resolves happen before the end-of-subpass barriers get executed,
- * so we have to make the attachment shader-readable */
- barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
- barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
- radv_subpass_barrier(cmd_buffer, &barrier);
-
- struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
- struct radv_image_view *src_iview =
- cmd_buffer->state.attachments[src_att.attachment].iview;
- struct radv_image *src_image = src_iview->image;
-
- VkImageResolve2KHR region = {0};
- region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
- region.srcSubresource.aspectMask = aspects;
- region.srcSubresource.mipLevel = 0;
- region.srcSubresource.baseArrayLayer = 0;
- region.srcSubresource.layerCount = 1;
-
- radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_GRAPHICS_PIPELINE |
- RADV_META_SAVE_DESCRIPTORS);
-
- struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
- struct radv_image_view *dst_iview =
- cmd_buffer->state.attachments[dst_att.attachment].iview;
-
- struct radv_subpass resolve_subpass = {
- .color_count = 0,
- .color_attachments = NULL,
- .depth_stencil_attachment = (struct radv_subpass_attachment *) { &dst_att },
- };
-
- radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
-
- struct radv_image_view tsrc_iview;
- radv_image_view_init(&tsrc_iview, cmd_buffer->device,
- &(VkImageViewCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
- .image = radv_image_to_handle(src_image),
- .viewType = radv_meta_get_view_type(src_image),
- .format = src_iview->vk_format,
- .subresourceRange = {
- .aspectMask = aspects,
- .baseMipLevel = 0,
- .levelCount = 1,
- .baseArrayLayer = 0,
- .layerCount = 1,
- },
- }, NULL);
-
- emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview,
- &(VkExtent2D) { fb->width, fb->height },
- aspects,
- resolve_mode);
-
- radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+ const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+ struct radv_meta_saved_state saved_state;
+ struct radv_subpass_barrier barrier;
+
+ /* Resolves happen before the end-of-subpass barriers get executed,
+ * so we have to make the attachment shader-readable */
+ barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ barrier.src_access_mask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
+ radv_subpass_barrier(cmd_buffer, &barrier);
+
+ struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
+ struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
+ struct radv_image *src_image = src_iview->image;
+
+ VkImageResolve2KHR region = {0};
+ region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
+ region.srcSubresource.aspectMask = aspects;
+ region.srcSubresource.mipLevel = 0;
+ region.srcSubresource.baseArrayLayer = 0;
+ region.srcSubresource.layerCount = 1;
+
+ radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
+
+ radv_meta_save(&saved_state, cmd_buffer,
+ RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
+
+ struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
+ struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
+
+ struct radv_subpass resolve_subpass = {
+ .color_count = 0,
+ .color_attachments = NULL,
+ .depth_stencil_attachment = (struct radv_subpass_attachment *){&dst_att},
+ };
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
+
+ struct radv_image_view tsrc_iview;
+ radv_image_view_init(&tsrc_iview, cmd_buffer->device,
+ &(VkImageViewCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+ .image = radv_image_to_handle(src_image),
+ .viewType = radv_meta_get_view_type(src_image),
+ .format = src_iview->vk_format,
+ .subresourceRange =
+ {
+ .aspectMask = aspects,
+ .baseMipLevel = 0,
+ .levelCount = 1,
+ .baseArrayLayer = 0,
+ .layerCount = 1,
+ },
+ },
+ NULL);
+
+ emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, dst_iview,
+ &(VkExtent2D){fb->width, fb->height}, aspects, resolve_mode);
+
+ radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
diff --git a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
index ced1a83c082..a42852faac8 100644
--- a/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
+++ b/src/amd/vulkan/radv_nir_lower_ycbcr_textures.c
@@ -21,321 +21,289 @@
* IN THE SOFTWARE.
*/
-#include "radv_private.h"
-#include "radv_shader.h"
-#include "vk_format.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
#include "nir/nir_vulkan.h"
+#include "radv_private.h"
+#include "radv_shader.h"
+#include "vk_format.h"
struct ycbcr_state {
- nir_builder *builder;
- nir_ssa_def *image_size;
- nir_tex_instr *origin_tex;
- nir_deref_instr *tex_deref;
- const struct radv_sampler_ycbcr_conversion *conversion;
+ nir_builder *builder;
+ nir_ssa_def *image_size;
+ nir_tex_instr *origin_tex;
+ nir_deref_instr *tex_deref;
+ const struct radv_sampler_ycbcr_conversion *conversion;
};
static nir_ssa_def *
get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
{
- nir_builder *b = state->builder;
- const struct glsl_type *type = texture->type;
- nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
+ nir_builder *b = state->builder;
+ const struct glsl_type *type = texture->type;
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
- tex->op = nir_texop_txs;
- tex->sampler_dim = glsl_get_sampler_dim(type);
- tex->is_array = glsl_sampler_type_is_array(type);
- tex->is_shadow = glsl_sampler_type_is_shadow(type);
- tex->dest_type = nir_type_int32;
+ tex->op = nir_texop_txs;
+ tex->sampler_dim = glsl_get_sampler_dim(type);
+ tex->is_array = glsl_sampler_type_is_array(type);
+ tex->is_shadow = glsl_sampler_type_is_shadow(type);
+ tex->dest_type = nir_type_int32;
- tex->src[0].src_type = nir_tex_src_texture_deref;
- tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
+ tex->src[0].src_type = nir_tex_src_texture_deref;
+ tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
- nir_ssa_dest_init(&tex->instr, &tex->dest,
- nir_tex_instr_dest_size(tex), 32, NULL);
- nir_builder_instr_insert(b, &tex->instr);
+ nir_ssa_dest_init(&tex->instr, &tex->dest, nir_tex_instr_dest_size(tex), 32, NULL);
+ nir_builder_instr_insert(b, &tex->instr);
- return nir_i2f32(b, &tex->dest.ssa);
+ return nir_i2f32(b, &tex->dest.ssa);
}
static nir_ssa_def *
-implicit_downsampled_coord(nir_builder *b,
- nir_ssa_def *value,
- nir_ssa_def *max_value,
+implicit_downsampled_coord(nir_builder *b, nir_ssa_def *value, nir_ssa_def *max_value,
int div_scale)
{
- return nir_fadd(b,
- value,
- nir_fdiv(b,
- nir_imm_float(b, 1.0f),
- nir_fmul(b,
- nir_imm_float(b, div_scale),
- max_value)));
+ return nir_fadd(
+ b, value,
+ nir_fdiv(b, nir_imm_float(b, 1.0f), nir_fmul(b, nir_imm_float(b, div_scale), max_value)));
}
static nir_ssa_def *
-implicit_downsampled_coords(struct ycbcr_state *state,
- nir_ssa_def *old_coords)
+implicit_downsampled_coords(struct ycbcr_state *state, nir_ssa_def *old_coords)
{
- nir_builder *b = state->builder;
- const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
- nir_ssa_def *image_size = NULL;
- nir_ssa_def *comp[4] = { NULL, };
- enum pipe_video_chroma_format chroma_format = pipe_format_to_chroma_format(vk_format_to_pipe_format(state->conversion->format));
- const unsigned divisors[2] = {
- chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_422 ? 2 : 1,
- chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_420 ? 2 : 1
- };
-
- for (int c = 0; c < old_coords->num_components; c++) {
- if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 &&
- conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
- if (!image_size)
- image_size = get_texture_size(state, state->tex_deref);
-
- comp[c] = implicit_downsampled_coord(b,
- nir_channel(b, old_coords, c),
- nir_channel(b, image_size, c),
- divisors[c]);
- } else {
- comp[c] = nir_channel(b, old_coords, c);
- }
- }
-
- return nir_vec(b, comp, old_coords->num_components);
+ nir_builder *b = state->builder;
+ const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
+ nir_ssa_def *image_size = NULL;
+ nir_ssa_def *comp[4] = {
+ NULL,
+ };
+ enum pipe_video_chroma_format chroma_format =
+ pipe_format_to_chroma_format(vk_format_to_pipe_format(state->conversion->format));
+ const unsigned divisors[2] = {chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_422 ? 2 : 1,
+ chroma_format <= PIPE_VIDEO_CHROMA_FORMAT_420 ? 2 : 1};
+
+ for (int c = 0; c < old_coords->num_components; c++) {
+ if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 &&
+ conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
+ if (!image_size)
+ image_size = get_texture_size(state, state->tex_deref);
+
+ comp[c] = implicit_downsampled_coord(b, nir_channel(b, old_coords, c),
+ nir_channel(b, image_size, c), divisors[c]);
+ } else {
+ comp[c] = nir_channel(b, old_coords, c);
+ }
+ }
+
+ return nir_vec(b, comp, old_coords->num_components);
}
static nir_ssa_def *
-create_plane_tex_instr_implicit(struct ycbcr_state *state,
- uint32_t plane)
+create_plane_tex_instr_implicit(struct ycbcr_state *state, uint32_t plane)
{
- nir_builder *b = state->builder;
- nir_tex_instr *old_tex = state->origin_tex;
- nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs+ 1);
- for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
- tex->src[i].src_type = old_tex->src[i].src_type;
-
- switch (old_tex->src[i].src_type) {
- case nir_tex_src_coord:
- if (plane && true/*state->conversion->chroma_reconstruction*/) {
- assert(old_tex->src[i].src.is_ssa);
- tex->src[i].src =
- nir_src_for_ssa(implicit_downsampled_coords(state,
- old_tex->src[i].src.ssa));
- break;
- }
- /* fall through */
- default:
- nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
- break;
- }
- }
-
- tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
- tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
-
- tex->sampler_dim = old_tex->sampler_dim;
- tex->dest_type = old_tex->dest_type;
- tex->is_array = old_tex->is_array;
-
- tex->op = old_tex->op;
- tex->coord_components = old_tex->coord_components;
- tex->is_new_style_shadow = old_tex->is_new_style_shadow;
- tex->component = old_tex->component;
-
- tex->texture_index = old_tex->texture_index;
- tex->sampler_index = old_tex->sampler_index;
-
- nir_ssa_dest_init(&tex->instr, &tex->dest,
- old_tex->dest.ssa.num_components,
- nir_dest_bit_size(old_tex->dest), NULL);
- nir_builder_instr_insert(b, &tex->instr);
-
- return &tex->dest.ssa;
+ nir_builder *b = state->builder;
+ nir_tex_instr *old_tex = state->origin_tex;
+ nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs + 1);
+ for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
+ tex->src[i].src_type = old_tex->src[i].src_type;
+
+ switch (old_tex->src[i].src_type) {
+ case nir_tex_src_coord:
+ if (plane && true /*state->conversion->chroma_reconstruction*/) {
+ assert(old_tex->src[i].src.is_ssa);
+ tex->src[i].src =
+ nir_src_for_ssa(implicit_downsampled_coords(state, old_tex->src[i].src.ssa));
+ break;
+ }
+ /* fall through */
+ default:
+ nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
+ break;
+ }
+ }
+
+ tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
+ tex->src[tex->num_srcs - 1].src_type = nir_tex_src_plane;
+
+ tex->sampler_dim = old_tex->sampler_dim;
+ tex->dest_type = old_tex->dest_type;
+ tex->is_array = old_tex->is_array;
+
+ tex->op = old_tex->op;
+ tex->coord_components = old_tex->coord_components;
+ tex->is_new_style_shadow = old_tex->is_new_style_shadow;
+ tex->component = old_tex->component;
+
+ tex->texture_index = old_tex->texture_index;
+ tex->sampler_index = old_tex->sampler_index;
+
+ nir_ssa_dest_init(&tex->instr, &tex->dest, old_tex->dest.ssa.num_components,
+ nir_dest_bit_size(old_tex->dest), NULL);
+ nir_builder_instr_insert(b, &tex->instr);
+
+ return &tex->dest.ssa;
}
struct swizzle_info {
- unsigned plane[4];
- unsigned swizzle[4];
+ unsigned plane[4];
+ unsigned swizzle[4];
};
static struct swizzle_info
get_plane_swizzles(VkFormat format)
{
- int planes = vk_format_get_plane_count(format);
- switch (planes) {
- case 3:
- return (struct swizzle_info) {
- {2, 0, 1, 0},
- {0, 0, 0, 3}
- };
- case 2:
- return (struct swizzle_info) {
- {1, 0, 1, 0},
- {1, 0, 0, 3}
- };
- case 1:
- return (struct swizzle_info) {
- {0, 0, 0, 0},
- {0, 1, 2, 3}
- };
- default:
- unreachable("unhandled plane count for ycbcr swizzling");
- }
+ int planes = vk_format_get_plane_count(format);
+ switch (planes) {
+ case 3:
+ return (struct swizzle_info){{2, 0, 1, 0}, {0, 0, 0, 3}};
+ case 2:
+ return (struct swizzle_info){{1, 0, 1, 0}, {1, 0, 0, 3}};
+ case 1:
+ return (struct swizzle_info){{0, 0, 0, 0}, {0, 1, 2, 3}};
+ default:
+ unreachable("unhandled plane count for ycbcr swizzling");
+ }
}
-
static nir_ssa_def *
-build_swizzled_components(nir_builder *builder,
- VkFormat format,
- VkComponentMapping mapping,
+build_swizzled_components(nir_builder *builder, VkFormat format, VkComponentMapping mapping,
nir_ssa_def **plane_values)
{
- struct swizzle_info plane_swizzle = get_plane_swizzles(format);
- enum pipe_swizzle swizzles[4];
- nir_ssa_def *values[4];
-
- vk_format_compose_swizzles(&mapping, (const unsigned char[4]){0,1,2,3}, swizzles);
-
- nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
- nir_ssa_def *one = nir_imm_float(builder, 1.0f);
-
- for (unsigned i = 0; i < 4; ++i) {
- switch(swizzles[i]) {
- case PIPE_SWIZZLE_X:
- case PIPE_SWIZZLE_Y:
- case PIPE_SWIZZLE_Z:
- case PIPE_SWIZZLE_W: {
- unsigned channel = swizzles[i] - PIPE_SWIZZLE_X;
- values[i] = nir_channel(builder,
- plane_values[plane_swizzle.plane[channel]],
- plane_swizzle.swizzle[channel]);
- break;
- }
- case PIPE_SWIZZLE_0:
- values[i] = zero;
- break;
- case PIPE_SWIZZLE_1:
- values[i] = one;
- break;
- default:
- unreachable("unhandled swizzle");
- }
- }
- return nir_vec(builder, values, 4);
+ struct swizzle_info plane_swizzle = get_plane_swizzles(format);
+ enum pipe_swizzle swizzles[4];
+ nir_ssa_def *values[4];
+
+ vk_format_compose_swizzles(&mapping, (const unsigned char[4]){0, 1, 2, 3}, swizzles);
+
+ nir_ssa_def *zero = nir_imm_float(builder, 0.0f);
+ nir_ssa_def *one = nir_imm_float(builder, 1.0f);
+
+ for (unsigned i = 0; i < 4; ++i) {
+ switch (swizzles[i]) {
+ case PIPE_SWIZZLE_X:
+ case PIPE_SWIZZLE_Y:
+ case PIPE_SWIZZLE_Z:
+ case PIPE_SWIZZLE_W: {
+ unsigned channel = swizzles[i] - PIPE_SWIZZLE_X;
+ values[i] = nir_channel(builder, plane_values[plane_swizzle.plane[channel]],
+ plane_swizzle.swizzle[channel]);
+ break;
+ }
+ case PIPE_SWIZZLE_0:
+ values[i] = zero;
+ break;
+ case PIPE_SWIZZLE_1:
+ values[i] = one;
+ break;
+ default:
+ unreachable("unhandled swizzle");
+ }
+ }
+ return nir_vec(builder, values, 4);
}
static bool
-try_lower_tex_ycbcr(const struct radv_pipeline_layout *layout,
- nir_builder *builder,
+try_lower_tex_ycbcr(const struct radv_pipeline_layout *layout, nir_builder *builder,
nir_tex_instr *tex)
{
- int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
- assert(deref_src_idx >= 0);
- nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
-
- nir_variable *var = nir_deref_instr_get_variable(deref);
- const struct radv_descriptor_set_layout *set_layout =
- layout->set[var->data.descriptor_set].layout;
- const struct radv_descriptor_set_binding_layout *binding =
- &set_layout->binding[var->data.binding];
- const struct radv_sampler_ycbcr_conversion *ycbcr_samplers =
- radv_immutable_ycbcr_samplers(set_layout, var->data.binding);
-
- if (!ycbcr_samplers)
- return false;
-
- /* For the following instructions, we don't apply any change and let the
- * instruction apply to the first plane.
- */
- if (tex->op == nir_texop_txs ||
- tex->op == nir_texop_query_levels ||
- tex->op == nir_texop_lod)
- return false;
-
- assert(tex->texture_index == 0);
- unsigned array_index = 0;
- if (deref->deref_type != nir_deref_type_var) {
- assert(deref->deref_type == nir_deref_type_array);
- if (!nir_src_is_const(deref->arr.index))
- return false;
- array_index = nir_src_as_uint(deref->arr.index);
- array_index = MIN2(array_index, binding->array_size - 1);
- }
- const struct radv_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
-
- if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED)
- return false;
-
- struct ycbcr_state state = {
- .builder = builder,
- .origin_tex = tex,
- .tex_deref = deref,
- .conversion = ycbcr_sampler,
- };
-
- builder->cursor = nir_before_instr(&tex->instr);
-
- VkFormat format = state.conversion->format;
- const int plane_count = vk_format_get_plane_count(format);
- nir_ssa_def *plane_values[3];
-
- for (int p = 0; p < plane_count; ++p) {
- plane_values[p] = create_plane_tex_instr_implicit(&state, p);
- }
-
- nir_ssa_def *result = build_swizzled_components(builder, format, ycbcr_sampler->components, plane_values);
- if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
- VkFormat first_format = vk_format_get_plane_format(format, 0);
- uint32_t bits = vk_format_get_component_bits(first_format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X);
- /* TODO: swizzle and bpcs */
- uint32_t bpcs[3] = {bits, bits, bits};
- result = nir_convert_ycbcr_to_rgb(builder,
- state.conversion->ycbcr_model,
- state.conversion->ycbcr_range,
- result,
- bpcs);
- }
-
- nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
- nir_instr_remove(&tex->instr);
-
- return true;
+ int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
+ assert(deref_src_idx >= 0);
+ nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
+
+ nir_variable *var = nir_deref_instr_get_variable(deref);
+ const struct radv_descriptor_set_layout *set_layout =
+ layout->set[var->data.descriptor_set].layout;
+ const struct radv_descriptor_set_binding_layout *binding =
+ &set_layout->binding[var->data.binding];
+ const struct radv_sampler_ycbcr_conversion *ycbcr_samplers =
+ radv_immutable_ycbcr_samplers(set_layout, var->data.binding);
+
+ if (!ycbcr_samplers)
+ return false;
+
+ /* For the following instructions, we don't apply any change and let the
+ * instruction apply to the first plane.
+ */
+ if (tex->op == nir_texop_txs || tex->op == nir_texop_query_levels || tex->op == nir_texop_lod)
+ return false;
+
+ assert(tex->texture_index == 0);
+ unsigned array_index = 0;
+ if (deref->deref_type != nir_deref_type_var) {
+ assert(deref->deref_type == nir_deref_type_array);
+ if (!nir_src_is_const(deref->arr.index))
+ return false;
+ array_index = nir_src_as_uint(deref->arr.index);
+ array_index = MIN2(array_index, binding->array_size - 1);
+ }
+ const struct radv_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;
+
+ if (ycbcr_sampler->format == VK_FORMAT_UNDEFINED)
+ return false;
+
+ struct ycbcr_state state = {
+ .builder = builder,
+ .origin_tex = tex,
+ .tex_deref = deref,
+ .conversion = ycbcr_sampler,
+ };
+
+ builder->cursor = nir_before_instr(&tex->instr);
+
+ VkFormat format = state.conversion->format;
+ const int plane_count = vk_format_get_plane_count(format);
+ nir_ssa_def *plane_values[3];
+
+ for (int p = 0; p < plane_count; ++p) {
+ plane_values[p] = create_plane_tex_instr_implicit(&state, p);
+ }
+
+ nir_ssa_def *result =
+ build_swizzled_components(builder, format, ycbcr_sampler->components, plane_values);
+ if (state.conversion->ycbcr_model != VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) {
+ VkFormat first_format = vk_format_get_plane_format(format, 0);
+ uint32_t bits =
+ vk_format_get_component_bits(first_format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X);
+ /* TODO: swizzle and bpcs */
+ uint32_t bpcs[3] = {bits, bits, bits};
+ result = nir_convert_ycbcr_to_rgb(builder, state.conversion->ycbcr_model,
+ state.conversion->ycbcr_range, result, bpcs);
+ }
+
+ nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
+ nir_instr_remove(&tex->instr);
+
+ return true;
}
bool
-radv_nir_lower_ycbcr_textures(nir_shader *shader,
- const struct radv_pipeline_layout *layout)
+radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout)
{
- bool progress = false;
+ bool progress = false;
- nir_foreach_function(function, shader) {
- if (!function->impl)
- continue;
+ nir_foreach_function (function, shader) {
+ if (!function->impl)
+ continue;
- bool function_progress = false;
- nir_builder builder;
- nir_builder_init(&builder, function->impl);
+ bool function_progress = false;
+ nir_builder builder;
+ nir_builder_init(&builder, function->impl);
- nir_foreach_block(block, function->impl) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_tex)
- continue;
+ nir_foreach_block (block, function->impl) {
+ nir_foreach_instr_safe (instr, block) {
+ if (instr->type != nir_instr_type_tex)
+ continue;
- nir_tex_instr *tex = nir_instr_as_tex(instr);
- function_progress |= try_lower_tex_ycbcr(layout, &builder, tex);
- }
- }
+ nir_tex_instr *tex = nir_instr_as_tex(instr);
+ function_progress |= try_lower_tex_ycbcr(layout, &builder, tex);
+ }
+ }
- if (function_progress) {
- nir_metadata_preserve(function->impl,
- nir_metadata_block_index |
- nir_metadata_dominance);
- }
+ if (function_progress) {
+ nir_metadata_preserve(function->impl, nir_metadata_block_index | nir_metadata_dominance);
+ }
- progress |= function_progress;
- }
+ progress |= function_progress;
+ }
- return progress;
+ return progress;
}
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index a91f8af6b4a..59e9fee3118 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -25,1597 +25,1468 @@
* IN THE SOFTWARE.
*/
+#include "nir/nir.h"
+#include "radv_debug.h"
#include "radv_private.h"
#include "radv_shader.h"
-#include "radv_shader_helper.h"
#include "radv_shader_args.h"
-#include "radv_debug.h"
-#include "nir/nir.h"
+#include "radv_shader_helper.h"
-#include "sid.h"
#include "ac_binary.h"
-#include "ac_llvm_util.h"
+#include "ac_exp_param.h"
#include "ac_llvm_build.h"
+#include "ac_llvm_util.h"
#include "ac_shader_abi.h"
#include "ac_shader_util.h"
-#include "ac_exp_param.h"
+#include "sid.h"
#define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
struct radv_shader_context {
- struct ac_llvm_context ac;
- const struct nir_shader *shader;
- struct ac_shader_abi abi;
- const struct radv_shader_args *args;
+ struct ac_llvm_context ac;
+ const struct nir_shader *shader;
+ struct ac_shader_abi abi;
+ const struct radv_shader_args *args;
- gl_shader_stage stage;
+ gl_shader_stage stage;
- unsigned max_workgroup_size;
- LLVMContextRef context;
- LLVMValueRef main_function;
+ unsigned max_workgroup_size;
+ LLVMContextRef context;
+ LLVMValueRef main_function;
- LLVMValueRef descriptor_sets[MAX_SETS];
+ LLVMValueRef descriptor_sets[MAX_SETS];
- LLVMValueRef ring_offsets;
+ LLVMValueRef ring_offsets;
- LLVMValueRef vs_rel_patch_id;
+ LLVMValueRef vs_rel_patch_id;
- LLVMValueRef gs_wave_id;
- LLVMValueRef gs_vtx_offset[6];
+ LLVMValueRef gs_wave_id;
+ LLVMValueRef gs_vtx_offset[6];
- LLVMValueRef esgs_ring;
- LLVMValueRef gsvs_ring[4];
- LLVMValueRef hs_ring_tess_offchip;
- LLVMValueRef hs_ring_tess_factor;
+ LLVMValueRef esgs_ring;
+ LLVMValueRef gsvs_ring[4];
+ LLVMValueRef hs_ring_tess_offchip;
+ LLVMValueRef hs_ring_tess_factor;
- LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
+ LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
- uint64_t output_mask;
+ uint64_t output_mask;
- LLVMValueRef gs_next_vertex[4];
- LLVMValueRef gs_curprim_verts[4];
- LLVMValueRef gs_generated_prims[4];
- LLVMValueRef gs_ngg_emit;
- LLVMValueRef gs_ngg_scratch;
+ LLVMValueRef gs_next_vertex[4];
+ LLVMValueRef gs_curprim_verts[4];
+ LLVMValueRef gs_generated_prims[4];
+ LLVMValueRef gs_ngg_emit;
+ LLVMValueRef gs_ngg_scratch;
- LLVMValueRef vertexptr; /* GFX10 only */
+ LLVMValueRef vertexptr; /* GFX10 only */
};
struct radv_shader_output_values {
- LLVMValueRef values[4];
- unsigned slot_name;
- unsigned slot_index;
- unsigned usage_mask;
+ LLVMValueRef values[4];
+ unsigned slot_name;
+ unsigned slot_index;
+ unsigned usage_mask;
};
static inline struct radv_shader_context *
radv_shader_context_from_abi(struct ac_shader_abi *abi)
{
- return container_of(abi, struct radv_shader_context, abi);
+ return container_of(abi, struct radv_shader_context, abi);
}
static LLVMValueRef
-create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module,
- LLVMBuilderRef builder,
- const struct ac_shader_args *args,
- enum ac_llvm_calling_convention convention,
- unsigned max_workgroup_size,
- const struct radv_nir_compiler_options *options)
+create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuilderRef builder,
+ const struct ac_shader_args *args, enum ac_llvm_calling_convention convention,
+ unsigned max_workgroup_size, const struct radv_nir_compiler_options *options)
{
- LLVMValueRef main_function =
- ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
+ LLVMValueRef main_function = ac_build_main(args, ctx, convention, "main", ctx->voidt, module);
- if (options->address32_hi) {
- ac_llvm_add_target_dep_function_attr(main_function,
- "amdgpu-32bit-address-high-bits",
- options->address32_hi);
- }
+ if (options->address32_hi) {
+ ac_llvm_add_target_dep_function_attr(main_function, "amdgpu-32bit-address-high-bits",
+ options->address32_hi);
+ }
- ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
+ ac_llvm_set_workgroup_size(main_function, max_workgroup_size);
- return main_function;
+ return main_function;
}
static void
load_descriptor_sets(struct radv_shader_context *ctx)
{
- uint32_t mask = ctx->args->shader_info->desc_set_used_mask;
- if (ctx->args->shader_info->need_indirect_descriptor_sets) {
- LLVMValueRef desc_sets =
- ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[0]);
- while (mask) {
- int i = u_bit_scan(&mask);
-
- ctx->descriptor_sets[i] =
- ac_build_load_to_sgpr(&ctx->ac, desc_sets,
- LLVMConstInt(ctx->ac.i32, i, false));
+ uint32_t mask = ctx->args->shader_info->desc_set_used_mask;
+ if (ctx->args->shader_info->need_indirect_descriptor_sets) {
+ LLVMValueRef desc_sets = ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[0]);
+ while (mask) {
+ int i = u_bit_scan(&mask);
- }
- } else {
- while (mask) {
- int i = u_bit_scan(&mask);
+ ctx->descriptor_sets[i] =
+ ac_build_load_to_sgpr(&ctx->ac, desc_sets, LLVMConstInt(ctx->ac.i32, i, false));
+ }
+ } else {
+ while (mask) {
+ int i = u_bit_scan(&mask);
- ctx->descriptor_sets[i] =
- ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[i]);
- }
- }
+ ctx->descriptor_sets[i] = ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[i]);
+ }
+ }
}
static enum ac_llvm_calling_convention
get_llvm_calling_convention(LLVMValueRef func, gl_shader_stage stage)
{
- switch (stage) {
- case MESA_SHADER_VERTEX:
- case MESA_SHADER_TESS_EVAL:
- return AC_LLVM_AMDGPU_VS;
- break;
- case MESA_SHADER_GEOMETRY:
- return AC_LLVM_AMDGPU_GS;
- break;
- case MESA_SHADER_TESS_CTRL:
- return AC_LLVM_AMDGPU_HS;
- break;
- case MESA_SHADER_FRAGMENT:
- return AC_LLVM_AMDGPU_PS;
- break;
- case MESA_SHADER_COMPUTE:
- return AC_LLVM_AMDGPU_CS;
- break;
- default:
- unreachable("Unhandle shader type");
- }
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_EVAL:
+ return AC_LLVM_AMDGPU_VS;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ return AC_LLVM_AMDGPU_GS;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ return AC_LLVM_AMDGPU_HS;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ return AC_LLVM_AMDGPU_PS;
+ break;
+ case MESA_SHADER_COMPUTE:
+ return AC_LLVM_AMDGPU_CS;
+ break;
+ default:
+ unreachable("Unhandle shader type");
+ }
}
/* Returns whether the stage is a stage that can be directly before the GS */
-static bool is_pre_gs_stage(gl_shader_stage stage)
+static bool
+is_pre_gs_stage(gl_shader_stage stage)
{
- return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
+ return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
}
-static void create_function(struct radv_shader_context *ctx,
- gl_shader_stage stage,
- bool has_previous_stage)
+static void
+create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has_previous_stage)
{
- if (ctx->ac.chip_class >= GFX10) {
- if (is_pre_gs_stage(stage) && ctx->args->options->key.vs_common_out.as_ngg) {
- /* On GFX10, VS is merged into GS for NGG. */
- stage = MESA_SHADER_GEOMETRY;
- has_previous_stage = true;
- }
- }
+ if (ctx->ac.chip_class >= GFX10) {
+ if (is_pre_gs_stage(stage) && ctx->args->options->key.vs_common_out.as_ngg) {
+ /* On GFX10, VS is merged into GS for NGG. */
+ stage = MESA_SHADER_GEOMETRY;
+ has_previous_stage = true;
+ }
+ }
- ctx->main_function = create_llvm_function(
- &ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
- get_llvm_calling_convention(ctx->main_function, stage),
- ctx->max_workgroup_size,
- ctx->args->options);
+ ctx->main_function =
+ create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
+ get_llvm_calling_convention(ctx->main_function, stage),
+ ctx->max_workgroup_size, ctx->args->options);
- ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
- LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
- NULL, 0, AC_FUNC_ATTR_READNONE);
- ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
- ac_array_in_const_addr_space(ctx->ac.v4i32), "");
+ ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
+ LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), NULL, 0,
+ AC_FUNC_ATTR_READNONE);
+ ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
+ ac_array_in_const_addr_space(ctx->ac.v4i32), "");
- load_descriptor_sets(ctx);
-
- if (stage == MESA_SHADER_TESS_CTRL ||
- (stage == MESA_SHADER_VERTEX && ctx->args->options->key.vs_common_out.as_ls) ||
- /* GFX9 has the ESGS ring buffer in LDS. */
- (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) {
- ac_declare_lds_as_pointer(&ctx->ac);
- }
+ load_descriptor_sets(ctx);
+ if (stage == MESA_SHADER_TESS_CTRL ||
+ (stage == MESA_SHADER_VERTEX && ctx->args->options->key.vs_common_out.as_ls) ||
+ /* GFX9 has the ESGS ring buffer in LDS. */
+ (stage == MESA_SHADER_GEOMETRY && has_previous_stage)) {
+ ac_declare_lds_as_pointer(&ctx->ac);
+ }
}
-
static LLVMValueRef
-radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index,
- unsigned desc_set, unsigned binding)
+radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_set,
+ unsigned binding)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
- struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
- struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
- unsigned base_offset = layout->binding[binding].offset;
- LLVMValueRef offset, stride;
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
+ struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
+ struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
+ unsigned base_offset = layout->binding[binding].offset;
+ LLVMValueRef offset, stride;
- if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
- layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
- unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
- layout->binding[binding].dynamic_offset_offset;
- desc_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.push_constants);
- base_offset = pipeline_layout->push_constant_size + 16 * idx;
- stride = LLVMConstInt(ctx->ac.i32, 16, false);
- } else
- stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
+ if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+ unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
+ layout->binding[binding].dynamic_offset_offset;
+ desc_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.push_constants);
+ base_offset = pipeline_layout->push_constant_size + 16 * idx;
+ stride = LLVMConstInt(ctx->ac.i32, 16, false);
+ } else
+ stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
- offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
+ offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
- if (layout->binding[binding].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
- offset = ac_build_imad(&ctx->ac, index, stride, offset);
- }
+ if (layout->binding[binding].type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ offset = ac_build_imad(&ctx->ac, index, stride, offset);
+ }
- desc_ptr = LLVMBuildGEP(ctx->ac.builder, desc_ptr, &offset, 1, "");
- desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32);
+ desc_ptr = LLVMBuildGEP(ctx->ac.builder, desc_ptr, &offset, 1, "");
+ desc_ptr = ac_cast_ptr(&ctx->ac, desc_ptr, ctx->ac.v4i32);
- return desc_ptr;
+ return desc_ptr;
}
static uint32_t
radv_get_sample_pos_offset(uint32_t num_samples)
{
- uint32_t sample_pos_offset = 0;
+ uint32_t sample_pos_offset = 0;
- switch (num_samples) {
- case 2:
- sample_pos_offset = 1;
- break;
- case 4:
- sample_pos_offset = 3;
- break;
- case 8:
- sample_pos_offset = 7;
- break;
- default:
- break;
- }
- return sample_pos_offset;
+ switch (num_samples) {
+ case 2:
+ sample_pos_offset = 1;
+ break;
+ case 4:
+ sample_pos_offset = 3;
+ break;
+ case 8:
+ sample_pos_offset = 7;
+ break;
+ default:
+ break;
+ }
+ return sample_pos_offset;
}
-static LLVMValueRef load_sample_position(struct ac_shader_abi *abi,
- LLVMValueRef sample_id)
+static LLVMValueRef
+load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- LLVMValueRef result;
- LLVMValueRef index = LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false);
- LLVMValueRef ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ring_offsets, &index, 1, "");
+ LLVMValueRef result;
+ LLVMValueRef index = LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false);
+ LLVMValueRef ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ring_offsets, &index, 1, "");
- ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
- ac_array_in_const_addr_space(ctx->ac.v2f32), "");
+ ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ac_array_in_const_addr_space(ctx->ac.v2f32), "");
- uint32_t sample_pos_offset =
- radv_get_sample_pos_offset(ctx->args->options->key.fs.num_samples);
+ uint32_t sample_pos_offset = radv_get_sample_pos_offset(ctx->args->options->key.fs.num_samples);
- sample_id =
- LLVMBuildAdd(ctx->ac.builder, sample_id,
- LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), "");
- result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
+ sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id,
+ LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), "");
+ result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
- return result;
+ return result;
}
-
-static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
+static LLVMValueRef
+load_sample_mask_in(struct ac_shader_abi *abi)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- uint8_t log2_ps_iter_samples;
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ uint8_t log2_ps_iter_samples;
- if (ctx->args->shader_info->ps.uses_sample_shading) {
- log2_ps_iter_samples =
- util_logbase2(ctx->args->options->key.fs.num_samples);
- } else {
- log2_ps_iter_samples = ctx->args->options->key.fs.log2_ps_iter_samples;
- }
+ if (ctx->args->shader_info->ps.uses_sample_shading) {
+ log2_ps_iter_samples = util_logbase2(ctx->args->options->key.fs.num_samples);
+ } else {
+ log2_ps_iter_samples = ctx->args->options->key.fs.log2_ps_iter_samples;
+ }
- LLVMValueRef result, sample_id;
- if (log2_ps_iter_samples) {
- /* gl_SampleMaskIn[0] = (SampleCoverage & (1 << gl_SampleID)). */
- sample_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.ancillary), 8, 4);
- sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 1, false), sample_id, "");
- result = LLVMBuildAnd(ctx->ac.builder, sample_id,
- ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage), "");
- } else {
- result = ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage);
- }
+ LLVMValueRef result, sample_id;
+ if (log2_ps_iter_samples) {
+ /* gl_SampleMaskIn[0] = (SampleCoverage & (1 << gl_SampleID)). */
+ sample_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.ancillary), 8, 4);
+ sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, 1, false), sample_id, "");
+ result = LLVMBuildAnd(ctx->ac.builder, sample_id,
+ ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage), "");
+ } else {
+ result = ac_get_arg(&ctx->ac, ctx->args->ac.sample_coverage);
+ }
- return result;
+ return result;
}
-
-static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
- unsigned stream,
- LLVMValueRef vertexidx,
- LLVMValueRef *addrs);
+static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream,
+ LLVMValueRef vertexidx, LLVMValueRef *addrs);
static void
-visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream,
- LLVMValueRef vertexidx, LLVMValueRef *addrs)
-{
- unsigned offset = 0;
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-
- if (ctx->args->options->key.vs_common_out.as_ngg) {
- gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
- return;
- }
-
- for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
- unsigned output_usage_mask =
- ctx->args->shader_info->gs.output_usage_mask[i];
- uint8_t output_stream =
- ctx->args->shader_info->gs.output_streams[i];
- LLVMValueRef *out_ptr = &addrs[i * 4];
- int length = util_last_bit(output_usage_mask);
-
- if (!(ctx->output_mask & (1ull << i)) ||
- output_stream != stream)
- continue;
-
- for (unsigned j = 0; j < length; j++) {
- if (!(output_usage_mask & (1 << j)))
- continue;
-
- LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
- out_ptr[j], "");
- LLVMValueRef voffset =
- LLVMConstInt(ctx->ac.i32, offset *
- ctx->shader->info.gs.vertices_out, false);
-
- offset++;
-
- voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
- voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
-
- out_val = ac_to_integer(&ctx->ac, out_val);
- out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
-
- ac_build_buffer_store_dword(&ctx->ac,
- ctx->gsvs_ring[stream],
- out_val, 1,
- voffset,
- ac_get_arg(&ctx->ac,
- ctx->args->ac.gs2vs_offset),
- 0, ac_glc | ac_slc | ac_swizzled);
- }
- }
-
- ac_build_sendmsg(&ctx->ac,
- AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
- ctx->gs_wave_id);
+visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
+ LLVMValueRef *addrs)
+{
+ unsigned offset = 0;
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+ if (ctx->args->options->key.vs_common_out.as_ngg) {
+ gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
+ return;
+ }
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+ uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
+ LLVMValueRef *out_ptr = &addrs[i * 4];
+ int length = util_last_bit(output_usage_mask);
+
+ if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
+ continue;
+
+ for (unsigned j = 0; j < length; j++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
+ LLVMValueRef voffset =
+ LLVMConstInt(ctx->ac.i32, offset * ctx->shader->info.gs.vertices_out, false);
+
+ offset++;
+
+ voffset = LLVMBuildAdd(ctx->ac.builder, voffset, vertexidx, "");
+ voffset = LLVMBuildMul(ctx->ac.builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
+
+ out_val = ac_to_integer(&ctx->ac, out_val);
+ out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
+
+ ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring[stream], out_val, 1, voffset,
+ ac_get_arg(&ctx->ac, ctx->args->ac.gs2vs_offset), 0,
+ ac_glc | ac_slc | ac_swizzled);
+ }
+ }
+
+ ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
+ ctx->gs_wave_id);
}
static void
visit_end_primitive(struct ac_shader_abi *abi, unsigned stream)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- if (ctx->args->options->key.vs_common_out.as_ngg) {
- LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
- return;
- }
+ if (ctx->args->options->key.vs_common_out.as_ngg) {
+ LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
+ return;
+ }
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id);
+ ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
+ ctx->gs_wave_id);
}
static LLVMValueRef
load_tess_coord(struct ac_shader_abi *abi)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- LLVMValueRef coord[4] = {
- ac_get_arg(&ctx->ac, ctx->args->ac.tes_u),
- ac_get_arg(&ctx->ac, ctx->args->ac.tes_v),
- ctx->ac.f32_0,
- ctx->ac.f32_0,
- };
+ LLVMValueRef coord[4] = {
+ ac_get_arg(&ctx->ac, ctx->args->ac.tes_u),
+ ac_get_arg(&ctx->ac, ctx->args->ac.tes_v),
+ ctx->ac.f32_0,
+ ctx->ac.f32_0,
+ };
- if (ctx->shader->info.tess.primitive_mode == GL_TRIANGLES)
- coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
- LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
+ if (ctx->shader->info.tess.primitive_mode == GL_TRIANGLES)
+ coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
+ LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
- return ac_build_gather_values(&ctx->ac, coord, 3);
+ return ac_build_gather_values(&ctx->ac, coord, 3);
}
static LLVMValueRef
load_ring_tess_factors(struct ac_shader_abi *abi)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- assert(ctx->stage == MESA_SHADER_TESS_CTRL);
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ assert(ctx->stage == MESA_SHADER_TESS_CTRL);
- return ctx->hs_ring_tess_factor;
+ return ctx->hs_ring_tess_factor;
}
static LLVMValueRef
load_ring_tess_offchip(struct ac_shader_abi *abi)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- assert(ctx->stage == MESA_SHADER_TESS_CTRL ||
- ctx->stage == MESA_SHADER_TESS_EVAL);
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ assert(ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL);
- return ctx->hs_ring_tess_offchip;
+ return ctx->hs_ring_tess_offchip;
}
static LLVMValueRef
load_ring_esgs(struct ac_shader_abi *abi)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- assert(ctx->stage == MESA_SHADER_VERTEX ||
- ctx->stage == MESA_SHADER_TESS_EVAL ||
- ctx->stage == MESA_SHADER_GEOMETRY);
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ assert(ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL ||
+ ctx->stage == MESA_SHADER_GEOMETRY);
- return ctx->esgs_ring;
+ return ctx->esgs_ring;
}
-static LLVMValueRef radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero)
+static LLVMValueRef
+radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero)
{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- return ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex);
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ return ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex);
}
-static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi,
- LLVMValueRef buffer_ptr, bool write, bool non_uniform)
-{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- LLVMValueRef result;
-
- if (!non_uniform)
- LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
-
- if (non_uniform && LLVMGetPointerAddressSpace(LLVMTypeOf(buffer_ptr)) == AC_ADDR_SPACE_CONST_32BIT) {
- /* 32-bit seems to always use SMEM. addrspacecast from 32-bit -> 64-bit is broken. */
- buffer_ptr = LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.i32, ""),
- buffer_ptr = LLVMBuildZExt(ctx->ac.builder, buffer_ptr, ctx->ac.i64, "");
- uint64_t hi = (uint64_t)ctx->args->options->address32_hi << 32;
- buffer_ptr = LLVMBuildOr(ctx->ac.builder, buffer_ptr, LLVMConstInt(ctx->ac.i64, hi, false), "");
- buffer_ptr = LLVMBuildIntToPtr(ctx->ac.builder, buffer_ptr, LLVMPointerType(ctx->ac.v4i32, AC_ADDR_SPACE_CONST), "");
- }
-
- result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
- LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
-
- return result;
-}
-
-static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi,
- unsigned desc_set, unsigned binding,
- bool valid_binding, LLVMValueRef buffer_ptr)
-{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- LLVMValueRef result;
-
- if (valid_binding) {
- struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
- struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
-
- if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
- uint32_t desc_type = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
- S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
- S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
-
- if (ctx->ac.chip_class >= GFX10) {
- desc_type |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
- S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
- S_008F0C_RESOURCE_LEVEL(1);
- } else {
- desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
- S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
- }
-
- LLVMValueRef desc_components[4] = {
- LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.intptr, ""),
- LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->args->options->address32_hi), false),
- LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
- LLVMConstInt(ctx->ac.i32, desc_type, false),
- };
-
- return ac_build_gather_values(&ctx->ac, desc_components, 4);
- }
- }
-
- LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
-
- result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
- LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
-
- return result;
-}
-
-static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
- unsigned descriptor_set,
- unsigned base_index,
- unsigned constant_index,
- LLVMValueRef index,
- enum ac_descriptor_type desc_type,
- bool image, bool write,
- bool bindless)
-{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
- LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
- struct radv_descriptor_set_layout *layout = ctx->args->options->layout->set[descriptor_set].layout;
- struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
- unsigned offset = binding->offset;
- unsigned stride = binding->size;
- unsigned type_size;
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMTypeRef type;
-
- assert(base_index < layout->binding_count);
-
- switch (desc_type) {
- case AC_DESC_IMAGE:
- type = ctx->ac.v8i32;
- type_size = 32;
- break;
- case AC_DESC_FMASK:
- type = ctx->ac.v8i32;
- offset += 32;
- type_size = 32;
- break;
- case AC_DESC_SAMPLER:
- type = ctx->ac.v4i32;
- if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
- offset += radv_combined_image_descriptor_sampler_offset(binding);
- }
-
- type_size = 16;
- break;
- case AC_DESC_BUFFER:
- type = ctx->ac.v4i32;
- type_size = 16;
- break;
- case AC_DESC_PLANE_0:
- case AC_DESC_PLANE_1:
- case AC_DESC_PLANE_2:
- type = ctx->ac.v8i32;
- type_size = 32;
- offset += 32 * (desc_type - AC_DESC_PLANE_0);
- break;
- default:
- unreachable("invalid desc_type\n");
- }
-
- offset += constant_index * stride;
-
- if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
- (!index || binding->immutable_samplers_equal)) {
- if (binding->immutable_samplers_equal)
- constant_index = 0;
-
- const uint32_t *samplers = radv_immutable_samplers(layout, binding);
-
- LLVMValueRef constants[] = {
- LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
- LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
- LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
- LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
- };
- return ac_build_gather_values(&ctx->ac, constants, 4);
- }
-
- assert(stride % type_size == 0);
-
- LLVMValueRef adjusted_index = index;
- if (!adjusted_index)
- adjusted_index = ctx->ac.i32_0;
-
- adjusted_index = LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
-
- LLVMValueRef val_offset = LLVMConstInt(ctx->ac.i32, offset, 0);
- list = LLVMBuildGEP(builder, list, &val_offset, 1, "");
- list = LLVMBuildPointerCast(builder, list,
- ac_array_in_const32_addr_space(type), "");
-
- LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
-
- /* 3 plane formats always have same size and format for plane 1 & 2, so
- * use the tail from plane 1 so that we can store only the first 16 bytes
- * of the last plane. */
- if (desc_type == AC_DESC_PLANE_2) {
- LLVMValueRef descriptor2 = radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index, AC_DESC_PLANE_1,image, write, bindless);
-
- LLVMValueRef components[8];
- for (unsigned i = 0; i < 4; ++i)
- components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
-
- for (unsigned i = 4; i < 8; ++i)
- components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
- descriptor = ac_build_gather_values(&ctx->ac, components, 8);
- }
-
- return descriptor;
+static LLVMValueRef
+radv_load_ssbo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr, bool write, bool non_uniform)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef result;
+
+ if (!non_uniform)
+ LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+
+ if (non_uniform &&
+ LLVMGetPointerAddressSpace(LLVMTypeOf(buffer_ptr)) == AC_ADDR_SPACE_CONST_32BIT) {
+ /* 32-bit seems to always use SMEM. addrspacecast from 32-bit -> 64-bit is broken. */
+ buffer_ptr = LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.i32, ""),
+ buffer_ptr = LLVMBuildZExt(ctx->ac.builder, buffer_ptr, ctx->ac.i64, "");
+ uint64_t hi = (uint64_t)ctx->args->options->address32_hi << 32;
+ buffer_ptr =
+ LLVMBuildOr(ctx->ac.builder, buffer_ptr, LLVMConstInt(ctx->ac.i64, hi, false), "");
+ buffer_ptr = LLVMBuildIntToPtr(ctx->ac.builder, buffer_ptr,
+ LLVMPointerType(ctx->ac.v4i32, AC_ADDR_SPACE_CONST), "");
+ }
+
+ result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
+ LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
+
+ return result;
+}
+
+static LLVMValueRef
+radv_load_ubo(struct ac_shader_abi *abi, unsigned desc_set, unsigned binding, bool valid_binding,
+ LLVMValueRef buffer_ptr)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef result;
+
+ if (valid_binding) {
+ struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
+ struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
+
+ if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
+ uint32_t desc_type =
+ S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+ S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+
+ if (ctx->ac.chip_class >= GFX10) {
+ desc_type |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
+ } else {
+ desc_type |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+ }
+
+ LLVMValueRef desc_components[4] = {
+ LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.intptr, ""),
+ LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->args->options->address32_hi),
+ false),
+ LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
+ LLVMConstInt(ctx->ac.i32, desc_type, false),
+ };
+
+ return ac_build_gather_values(&ctx->ac, desc_components, 4);
+ }
+ }
+
+ LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
+
+ result = LLVMBuildLoad(ctx->ac.builder, buffer_ptr, "");
+ LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
+
+ return result;
+}
+
+static LLVMValueRef
+radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsigned base_index,
+ unsigned constant_index, LLVMValueRef index,
+ enum ac_descriptor_type desc_type, bool image, bool write, bool bindless)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+ LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
+ struct radv_descriptor_set_layout *layout =
+ ctx->args->options->layout->set[descriptor_set].layout;
+ struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
+ unsigned offset = binding->offset;
+ unsigned stride = binding->size;
+ unsigned type_size;
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMTypeRef type;
+
+ assert(base_index < layout->binding_count);
+
+ switch (desc_type) {
+ case AC_DESC_IMAGE:
+ type = ctx->ac.v8i32;
+ type_size = 32;
+ break;
+ case AC_DESC_FMASK:
+ type = ctx->ac.v8i32;
+ offset += 32;
+ type_size = 32;
+ break;
+ case AC_DESC_SAMPLER:
+ type = ctx->ac.v4i32;
+ if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
+ offset += radv_combined_image_descriptor_sampler_offset(binding);
+ }
+
+ type_size = 16;
+ break;
+ case AC_DESC_BUFFER:
+ type = ctx->ac.v4i32;
+ type_size = 16;
+ break;
+ case AC_DESC_PLANE_0:
+ case AC_DESC_PLANE_1:
+ case AC_DESC_PLANE_2:
+ type = ctx->ac.v8i32;
+ type_size = 32;
+ offset += 32 * (desc_type - AC_DESC_PLANE_0);
+ break;
+ default:
+ unreachable("invalid desc_type\n");
+ }
+
+ offset += constant_index * stride;
+
+ if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
+ (!index || binding->immutable_samplers_equal)) {
+ if (binding->immutable_samplers_equal)
+ constant_index = 0;
+
+ const uint32_t *samplers = radv_immutable_samplers(layout, binding);
+
+ LLVMValueRef constants[] = {
+ LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
+ LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
+ LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
+ LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
+ };
+ return ac_build_gather_values(&ctx->ac, constants, 4);
+ }
+
+ assert(stride % type_size == 0);
+
+ LLVMValueRef adjusted_index = index;
+ if (!adjusted_index)
+ adjusted_index = ctx->ac.i32_0;
+
+ adjusted_index =
+ LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
+
+ LLVMValueRef val_offset = LLVMConstInt(ctx->ac.i32, offset, 0);
+ list = LLVMBuildGEP(builder, list, &val_offset, 1, "");
+ list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(type), "");
+
+ LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
+
+ /* 3 plane formats always have same size and format for plane 1 & 2, so
+ * use the tail from plane 1 so that we can store only the first 16 bytes
+ * of the last plane. */
+ if (desc_type == AC_DESC_PLANE_2) {
+ LLVMValueRef descriptor2 =
+ radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index,
+ AC_DESC_PLANE_1, image, write, bindless);
+
+ LLVMValueRef components[8];
+ for (unsigned i = 0; i < 4; ++i)
+ components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
+
+ for (unsigned i = 4; i < 8; ++i)
+ components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
+ descriptor = ac_build_gather_values(&ctx->ac, components, 8);
+ }
+
+ return descriptor;
}
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
* so we may need to fix it up. */
static LLVMValueRef
-adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
- unsigned adjustment,
- LLVMValueRef alpha)
-{
- if (adjustment == AC_FETCH_FORMAT_NONE)
- return alpha;
-
- LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
-
- alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
-
- if (adjustment == AC_FETCH_FORMAT_SSCALED)
- alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
- else
- alpha = ac_to_integer(&ctx->ac, alpha);
-
- /* For the integer-like cases, do a natural sign extension.
- *
- * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
- * and happen to contain 0, 1, 2, 3 as the two LSBs of the
- * exponent.
- */
- alpha = LLVMBuildShl(ctx->ac.builder, alpha,
- adjustment == AC_FETCH_FORMAT_SNORM ?
- LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
- alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
-
- /* Convert back to the right type. */
- if (adjustment == AC_FETCH_FORMAT_SNORM) {
- LLVMValueRef clamp;
- LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
- alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
- clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
- alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
- } else if (adjustment == AC_FETCH_FORMAT_SSCALED) {
- alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
- }
-
- return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
+adjust_vertex_fetch_alpha(struct radv_shader_context *ctx, unsigned adjustment, LLVMValueRef alpha)
+{
+ if (adjustment == AC_FETCH_FORMAT_NONE)
+ return alpha;
+
+ LLVMValueRef c30 = LLVMConstInt(ctx->ac.i32, 30, 0);
+
+ alpha = LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.f32, "");
+
+ if (adjustment == AC_FETCH_FORMAT_SSCALED)
+ alpha = LLVMBuildFPToUI(ctx->ac.builder, alpha, ctx->ac.i32, "");
+ else
+ alpha = ac_to_integer(&ctx->ac, alpha);
+
+ /* For the integer-like cases, do a natural sign extension.
+ *
+ * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0
+ * and happen to contain 0, 1, 2, 3 as the two LSBs of the
+ * exponent.
+ */
+ alpha =
+ LLVMBuildShl(ctx->ac.builder, alpha,
+ adjustment == AC_FETCH_FORMAT_SNORM ? LLVMConstInt(ctx->ac.i32, 7, 0) : c30, "");
+ alpha = LLVMBuildAShr(ctx->ac.builder, alpha, c30, "");
+
+ /* Convert back to the right type. */
+ if (adjustment == AC_FETCH_FORMAT_SNORM) {
+ LLVMValueRef clamp;
+ LLVMValueRef neg_one = LLVMConstReal(ctx->ac.f32, -1.0);
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ clamp = LLVMBuildFCmp(ctx->ac.builder, LLVMRealULT, alpha, neg_one, "");
+ alpha = LLVMBuildSelect(ctx->ac.builder, clamp, neg_one, alpha, "");
+ } else if (adjustment == AC_FETCH_FORMAT_SSCALED) {
+ alpha = LLVMBuildSIToFP(ctx->ac.builder, alpha, ctx->ac.f32, "");
+ }
+
+ return LLVMBuildBitCast(ctx->ac.builder, alpha, ctx->ac.i32, "");
}
static LLVMValueRef
-radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
- LLVMValueRef value,
- unsigned num_channels,
- bool is_float)
+radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx, LLVMValueRef value,
+ unsigned num_channels, bool is_float)
{
- LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0;
- LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1;
- LLVMValueRef chan[4];
+ LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0;
+ LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1;
+ LLVMValueRef chan[4];
- if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
- unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
+ if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+ unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
- if (num_channels == 4 && num_channels == vec_size)
- return value;
+ if (num_channels == 4 && num_channels == vec_size)
+ return value;
- num_channels = MIN2(num_channels, vec_size);
+ num_channels = MIN2(num_channels, vec_size);
- for (unsigned i = 0; i < num_channels; i++)
- chan[i] = ac_llvm_extract_elem(&ctx->ac, value, i);
- } else {
- assert(num_channels == 1);
- chan[0] = value;
- }
+ for (unsigned i = 0; i < num_channels; i++)
+ chan[i] = ac_llvm_extract_elem(&ctx->ac, value, i);
+ } else {
+ assert(num_channels == 1);
+ chan[0] = value;
+ }
- for (unsigned i = num_channels; i < 4; i++) {
- chan[i] = i == 3 ? one : zero;
- chan[i] = ac_to_integer(&ctx->ac, chan[i]);
- }
+ for (unsigned i = num_channels; i < 4; i++) {
+ chan[i] = i == 3 ? one : zero;
+ chan[i] = ac_to_integer(&ctx->ac, chan[i]);
+ }
- return ac_build_gather_values(&ctx->ac, chan, 4);
+ return ac_build_gather_values(&ctx->ac, chan, 4);
}
static void
-handle_vs_input_decl(struct radv_shader_context *ctx,
- struct nir_variable *variable)
-{
- LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.vertex_buffers);
- LLVMValueRef t_offset;
- LLVMValueRef t_list;
- LLVMValueRef input;
- LLVMValueRef buffer_index;
- unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
-
-
- enum glsl_base_type type = glsl_get_base_type(variable->type);
- for (unsigned i = 0; i < attrib_count; ++i) {
- LLVMValueRef output[4];
- unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
- unsigned attrib_format = ctx->args->options->key.vs.vertex_attribute_formats[attrib_index];
- unsigned data_format = attrib_format & 0x0f;
- unsigned num_format = (attrib_format >> 4) & 0x07;
- bool is_float = num_format != V_008F0C_BUF_NUM_FORMAT_UINT &&
- num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
- uint8_t input_usage_mask =
- ctx->args->shader_info->vs.input_usage_mask[variable->data.location + i];
- unsigned num_input_channels = util_last_bit(input_usage_mask);
-
- if (num_input_channels == 0)
- continue;
-
- if (ctx->args->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
- uint32_t divisor = ctx->args->options->key.vs.instance_rate_divisors[attrib_index];
-
- if (divisor) {
- buffer_index = ctx->abi.instance_id;
-
- if (divisor != 1) {
- buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
- LLVMConstInt(ctx->ac.i32, divisor, 0), "");
- }
- } else {
- buffer_index = ctx->ac.i32_0;
- }
-
- buffer_index = LLVMBuildAdd(ctx->ac.builder,
- ac_get_arg(&ctx->ac,
- ctx->args->ac.start_instance),\
- buffer_index, "");
- } else {
- buffer_index = LLVMBuildAdd(ctx->ac.builder,
- ctx->abi.vertex_id,
- ac_get_arg(&ctx->ac,
- ctx->args->ac.base_vertex), "");
- }
-
- const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
-
- /* Adjust the number of channels to load based on the vertex
- * attribute format.
- */
- unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
- unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
- unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
- unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
- unsigned alpha_adjust = ctx->args->options->key.vs.alpha_adjust[attrib_index];
-
- if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
- /* Always load, at least, 3 channels for formats that
- * need to be shuffled because X<->Z.
- */
- num_channels = MAX2(num_channels, 3);
- }
-
- t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
- t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
-
- /* Always split typed vertex buffer loads on GFX6 and GFX10+
- * to avoid any alignment issues that triggers memory
- * violations and eventually a GPU hang. This can happen if
- * the stride (static or dynamic) is unaligned and also if the
- * VBO offset is aligned to a scalar (eg. stride is 8 and VBO
- * offset is 2 for R16G16B16A16_SNORM).
- */
- if (ctx->ac.chip_class == GFX6 ||
- ctx->ac.chip_class >= GFX10) {
- unsigned chan_format = vtx_info->chan_format;
- LLVMValueRef values[4];
-
- assert(ctx->ac.chip_class == GFX6 ||
- ctx->ac.chip_class >= GFX10);
-
- for (unsigned chan = 0; chan < num_channels; chan++) {
- unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
- LLVMValueRef chan_index = buffer_index;
-
- if (attrib_stride != 0 && chan_offset > attrib_stride) {
- LLVMValueRef buffer_offset =
- LLVMConstInt(ctx->ac.i32,
- chan_offset / attrib_stride, false);
-
- chan_index = LLVMBuildAdd(ctx->ac.builder,
- buffer_index,
- buffer_offset, "");
-
- chan_offset = chan_offset % attrib_stride;
- }
-
- values[chan] = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
- chan_index,
- LLVMConstInt(ctx->ac.i32, chan_offset, false),
- ctx->ac.i32_0, ctx->ac.i32_0, 1,
- chan_format, num_format, 0, true);
- }
-
- input = ac_build_gather_values(&ctx->ac, values, num_channels);
- } else {
- if (attrib_stride != 0 && attrib_offset > attrib_stride) {
- LLVMValueRef buffer_offset =
- LLVMConstInt(ctx->ac.i32,
- attrib_offset / attrib_stride, false);
-
- buffer_index = LLVMBuildAdd(ctx->ac.builder,
- buffer_index,
- buffer_offset, "");
-
- attrib_offset = attrib_offset % attrib_stride;
- }
-
- input = ac_build_struct_tbuffer_load(&ctx->ac, t_list,
- buffer_index,
- LLVMConstInt(ctx->ac.i32, attrib_offset, false),
- ctx->ac.i32_0, ctx->ac.i32_0,
- num_channels,
- data_format, num_format, 0, true);
- }
-
- if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
- LLVMValueRef c[4];
- c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
- c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
- c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
- c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
-
- input = ac_build_gather_values(&ctx->ac, c, 4);
- }
-
- input = radv_fixup_vertex_input_fetches(ctx, input, num_channels,
- is_float);
-
- for (unsigned chan = 0; chan < 4; chan++) {
- LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
- output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
- if (type == GLSL_TYPE_FLOAT16) {
- output[chan] = LLVMBuildBitCast(ctx->ac.builder, output[chan], ctx->ac.f32, "");
- output[chan] = LLVMBuildFPTrunc(ctx->ac.builder, output[chan], ctx->ac.f16, "");
- }
- }
-
- output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
-
- for (unsigned chan = 0; chan < 4; chan++) {
- output[chan] = ac_to_integer(&ctx->ac, output[chan]);
- if (type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16)
- output[chan] = LLVMBuildTrunc(ctx->ac.builder, output[chan], ctx->ac.i16, "");
-
- ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = output[chan];
- }
- }
+handle_vs_input_decl(struct radv_shader_context *ctx, struct nir_variable *variable)
+{
+ LLVMValueRef t_list_ptr = ac_get_arg(&ctx->ac, ctx->args->ac.vertex_buffers);
+ LLVMValueRef t_offset;
+ LLVMValueRef t_list;
+ LLVMValueRef input;
+ LLVMValueRef buffer_index;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
+
+ enum glsl_base_type type = glsl_get_base_type(variable->type);
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ LLVMValueRef output[4];
+ unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
+ unsigned attrib_format = ctx->args->options->key.vs.vertex_attribute_formats[attrib_index];
+ unsigned data_format = attrib_format & 0x0f;
+ unsigned num_format = (attrib_format >> 4) & 0x07;
+ bool is_float =
+ num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
+ uint8_t input_usage_mask =
+ ctx->args->shader_info->vs.input_usage_mask[variable->data.location + i];
+ unsigned num_input_channels = util_last_bit(input_usage_mask);
+
+ if (num_input_channels == 0)
+ continue;
+
+ if (ctx->args->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
+ uint32_t divisor = ctx->args->options->key.vs.instance_rate_divisors[attrib_index];
+
+ if (divisor) {
+ buffer_index = ctx->abi.instance_id;
+
+ if (divisor != 1) {
+ buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
+ LLVMConstInt(ctx->ac.i32, divisor, 0), "");
+ }
+ } else {
+ buffer_index = ctx->ac.i32_0;
+ }
+
+ buffer_index = LLVMBuildAdd(
+ ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->ac.start_instance), buffer_index, "");
+ } else {
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
+ ac_get_arg(&ctx->ac, ctx->args->ac.base_vertex), "");
+ }
+
+ const struct ac_data_format_info *vtx_info = ac_get_data_format_info(data_format);
+
+ /* Adjust the number of channels to load based on the vertex
+ * attribute format.
+ */
+ unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
+ unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
+ unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
+ unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
+ unsigned alpha_adjust = ctx->args->options->key.vs.alpha_adjust[attrib_index];
+
+ if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
+ /* Always load, at least, 3 channels for formats that
+ * need to be shuffled because X<->Z.
+ */
+ num_channels = MAX2(num_channels, 3);
+ }
+
+ t_offset = LLVMConstInt(ctx->ac.i32, attrib_binding, false);
+ t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
+
+ /* Always split typed vertex buffer loads on GFX6 and GFX10+
+ * to avoid any alignment issues that triggers memory
+ * violations and eventually a GPU hang. This can happen if
+ * the stride (static or dynamic) is unaligned and also if the
+ * VBO offset is aligned to a scalar (eg. stride is 8 and VBO
+ * offset is 2 for R16G16B16A16_SNORM).
+ */
+ if (ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10) {
+ unsigned chan_format = vtx_info->chan_format;
+ LLVMValueRef values[4];
+
+ assert(ctx->ac.chip_class == GFX6 || ctx->ac.chip_class >= GFX10);
+
+ for (unsigned chan = 0; chan < num_channels; chan++) {
+ unsigned chan_offset = attrib_offset + chan * vtx_info->chan_byte_size;
+ LLVMValueRef chan_index = buffer_index;
+
+ if (attrib_stride != 0 && chan_offset > attrib_stride) {
+ LLVMValueRef buffer_offset =
+ LLVMConstInt(ctx->ac.i32, chan_offset / attrib_stride, false);
+
+ chan_index = LLVMBuildAdd(ctx->ac.builder, buffer_index, buffer_offset, "");
+
+ chan_offset = chan_offset % attrib_stride;
+ }
+
+ values[chan] = ac_build_struct_tbuffer_load(
+ &ctx->ac, t_list, chan_index, LLVMConstInt(ctx->ac.i32, chan_offset, false),
+ ctx->ac.i32_0, ctx->ac.i32_0, 1, chan_format, num_format, 0, true);
+ }
+
+ input = ac_build_gather_values(&ctx->ac, values, num_channels);
+ } else {
+ if (attrib_stride != 0 && attrib_offset > attrib_stride) {
+ LLVMValueRef buffer_offset =
+ LLVMConstInt(ctx->ac.i32, attrib_offset / attrib_stride, false);
+
+ buffer_index = LLVMBuildAdd(ctx->ac.builder, buffer_index, buffer_offset, "");
+
+ attrib_offset = attrib_offset % attrib_stride;
+ }
+
+ input = ac_build_struct_tbuffer_load(
+ &ctx->ac, t_list, buffer_index, LLVMConstInt(ctx->ac.i32, attrib_offset, false),
+ ctx->ac.i32_0, ctx->ac.i32_0, num_channels, data_format, num_format, 0, true);
+ }
+
+ if (ctx->args->options->key.vs.post_shuffle & (1 << attrib_index)) {
+ LLVMValueRef c[4];
+ c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
+ c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
+ c[2] = ac_llvm_extract_elem(&ctx->ac, input, 0);
+ c[3] = ac_llvm_extract_elem(&ctx->ac, input, 3);
+
+ input = ac_build_gather_values(&ctx->ac, c, 4);
+ }
+
+ input = radv_fixup_vertex_input_fetches(ctx, input, num_channels, is_float);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
+ output[chan] = LLVMBuildExtractElement(ctx->ac.builder, input, llvm_chan, "");
+ if (type == GLSL_TYPE_FLOAT16) {
+ output[chan] = LLVMBuildBitCast(ctx->ac.builder, output[chan], ctx->ac.f32, "");
+ output[chan] = LLVMBuildFPTrunc(ctx->ac.builder, output[chan], ctx->ac.f16, "");
+ }
+ }
+
+ output[3] = adjust_vertex_fetch_alpha(ctx, alpha_adjust, output[3]);
+
+ for (unsigned chan = 0; chan < 4; chan++) {
+ output[chan] = ac_to_integer(&ctx->ac, output[chan]);
+ if (type == GLSL_TYPE_UINT16 || type == GLSL_TYPE_INT16)
+ output[chan] = LLVMBuildTrunc(ctx->ac.builder, output[chan], ctx->ac.i16, "");
+
+ ctx->inputs[ac_llvm_reg_index_soa(variable->data.location + i, chan)] = output[chan];
+ }
+ }
}
static void
-handle_vs_inputs(struct radv_shader_context *ctx,
- struct nir_shader *nir) {
- nir_foreach_shader_in_variable(variable, nir)
- handle_vs_input_decl(ctx, variable);
+handle_vs_inputs(struct radv_shader_context *ctx, struct nir_shader *nir)
+{
+ nir_foreach_shader_in_variable (variable, nir)
+ handle_vs_input_decl(ctx, variable);
}
static void
-prepare_interp_optimize(struct radv_shader_context *ctx,
- struct nir_shader *nir)
-{
- bool uses_center = false;
- bool uses_centroid = false;
- nir_foreach_shader_in_variable(variable, nir) {
- if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
- variable->data.sample)
- continue;
-
- if (variable->data.centroid)
- uses_centroid = true;
- else
- uses_center = true;
- }
-
- ctx->abi.persp_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.persp_centroid);
- ctx->abi.linear_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.linear_centroid);
-
- if (uses_center && uses_centroid) {
- LLVMValueRef sel = LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT,
- ac_get_arg(&ctx->ac, ctx->args->ac.prim_mask),
- ctx->ac.i32_0, "");
- ctx->abi.persp_centroid =
- LLVMBuildSelect(ctx->ac.builder, sel,
- ac_get_arg(&ctx->ac, ctx->args->ac.persp_center),
- ctx->abi.persp_centroid, "");
- ctx->abi.linear_centroid =
- LLVMBuildSelect(ctx->ac.builder, sel,
- ac_get_arg(&ctx->ac, ctx->args->ac.linear_center),
- ctx->abi.linear_centroid, "");
- }
+prepare_interp_optimize(struct radv_shader_context *ctx, struct nir_shader *nir)
+{
+ bool uses_center = false;
+ bool uses_centroid = false;
+ nir_foreach_shader_in_variable (variable, nir) {
+ if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
+ variable->data.sample)
+ continue;
+
+ if (variable->data.centroid)
+ uses_centroid = true;
+ else
+ uses_center = true;
+ }
+
+ ctx->abi.persp_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.persp_centroid);
+ ctx->abi.linear_centroid = ac_get_arg(&ctx->ac, ctx->args->ac.linear_centroid);
+
+ if (uses_center && uses_centroid) {
+ LLVMValueRef sel =
+ LLVMBuildICmp(ctx->ac.builder, LLVMIntSLT, ac_get_arg(&ctx->ac, ctx->args->ac.prim_mask),
+ ctx->ac.i32_0, "");
+ ctx->abi.persp_centroid =
+ LLVMBuildSelect(ctx->ac.builder, sel, ac_get_arg(&ctx->ac, ctx->args->ac.persp_center),
+ ctx->abi.persp_centroid, "");
+ ctx->abi.linear_centroid =
+ LLVMBuildSelect(ctx->ac.builder, sel, ac_get_arg(&ctx->ac, ctx->args->ac.linear_center),
+ ctx->abi.linear_centroid, "");
+ }
}
static void
-scan_shader_output_decl(struct radv_shader_context *ctx,
- struct nir_variable *variable,
- struct nir_shader *shader,
- gl_shader_stage stage)
+scan_shader_output_decl(struct radv_shader_context *ctx, struct nir_variable *variable,
+ struct nir_shader *shader, gl_shader_stage stage)
{
- int idx = variable->data.driver_location;
- unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
- uint64_t mask_attribs;
+ int idx = variable->data.driver_location;
+ unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
+ uint64_t mask_attribs;
- if (variable->data.compact) {
- unsigned component_count = variable->data.location_frac +
- glsl_get_length(variable->type);
- attrib_count = (component_count + 3) / 4;
- }
+ if (variable->data.compact) {
+ unsigned component_count = variable->data.location_frac + glsl_get_length(variable->type);
+ attrib_count = (component_count + 3) / 4;
+ }
- mask_attribs = ((1ull << attrib_count) - 1) << idx;
+ mask_attribs = ((1ull << attrib_count) - 1) << idx;
- ctx->output_mask |= mask_attribs;
+ ctx->output_mask |= mask_attribs;
}
-
/* Initialize arguments for the shader export intrinsic */
static void
-si_llvm_init_export_args(struct radv_shader_context *ctx,
- LLVMValueRef *values,
- unsigned enabled_channels,
- unsigned target,
- struct ac_export_args *args)
-{
- /* Specify the channels that are enabled. */
- args->enabled_channels = enabled_channels;
-
- /* Specify whether the EXEC mask represents the valid mask */
- args->valid_mask = 0;
-
- /* Specify whether this is the last export */
- args->done = 0;
-
- /* Specify the target we are exporting */
- args->target = target;
-
- args->compr = false;
- args->out[0] = LLVMGetUndef(ctx->ac.f32);
- args->out[1] = LLVMGetUndef(ctx->ac.f32);
- args->out[2] = LLVMGetUndef(ctx->ac.f32);
- args->out[3] = LLVMGetUndef(ctx->ac.f32);
-
- if (!values)
- return;
-
- bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
- if (ctx->stage == MESA_SHADER_FRAGMENT) {
- unsigned index = target - V_008DFC_SQ_EXP_MRT;
- unsigned col_format = (ctx->args->options->key.fs.col_format >> (4 * index)) & 0xf;
- bool is_int8 = (ctx->args->options->key.fs.is_int8 >> index) & 1;
- bool is_int10 = (ctx->args->options->key.fs.is_int10 >> index) & 1;
-
- LLVMValueRef (*packf)(struct ac_llvm_context *ctx, LLVMValueRef args[2]) = NULL;
- LLVMValueRef (*packi)(struct ac_llvm_context *ctx, LLVMValueRef args[2],
- unsigned bits, bool hi) = NULL;
-
- switch(col_format) {
- case V_028714_SPI_SHADER_ZERO:
- args->enabled_channels = 0; /* writemask */
- args->target = V_008DFC_SQ_EXP_NULL;
- break;
-
- case V_028714_SPI_SHADER_32_R:
- args->enabled_channels = 1;
- args->out[0] = values[0];
- break;
-
- case V_028714_SPI_SHADER_32_GR:
- args->enabled_channels = 0x3;
- args->out[0] = values[0];
- args->out[1] = values[1];
- break;
-
- case V_028714_SPI_SHADER_32_AR:
- if (ctx->ac.chip_class >= GFX10) {
- args->enabled_channels = 0x3;
- args->out[0] = values[0];
- args->out[1] = values[3];
- } else {
- args->enabled_channels = 0x9;
- args->out[0] = values[0];
- args->out[3] = values[3];
- }
- break;
-
- case V_028714_SPI_SHADER_FP16_ABGR:
- args->enabled_channels = 0xf;
- packf = ac_build_cvt_pkrtz_f16;
- if (is_16bit) {
- for (unsigned chan = 0; chan < 4; chan++)
- values[chan] = LLVMBuildFPExt(ctx->ac.builder,
- values[chan],
- ctx->ac.f32, "");
- }
- break;
-
- case V_028714_SPI_SHADER_UNORM16_ABGR:
- args->enabled_channels = 0xf;
- packf = ac_build_cvt_pknorm_u16;
- break;
-
- case V_028714_SPI_SHADER_SNORM16_ABGR:
- args->enabled_channels = 0xf;
- packf = ac_build_cvt_pknorm_i16;
- break;
-
- case V_028714_SPI_SHADER_UINT16_ABGR:
- args->enabled_channels = 0xf;
- packi = ac_build_cvt_pk_u16;
- if (is_16bit) {
- for (unsigned chan = 0; chan < 4; chan++)
- values[chan] = LLVMBuildZExt(ctx->ac.builder,
- ac_to_integer(&ctx->ac, values[chan]),
- ctx->ac.i32, "");
- }
- break;
-
- case V_028714_SPI_SHADER_SINT16_ABGR:
- args->enabled_channels = 0xf;
- packi = ac_build_cvt_pk_i16;
- if (is_16bit) {
- for (unsigned chan = 0; chan < 4; chan++)
- values[chan] = LLVMBuildSExt(ctx->ac.builder,
- ac_to_integer(&ctx->ac, values[chan]),
- ctx->ac.i32, "");
- }
- break;
-
- default:
- case V_028714_SPI_SHADER_32_ABGR:
- memcpy(&args->out[0], values, sizeof(values[0]) * 4);
- break;
- }
-
- /* Replace NaN by zero (only 32-bit) to fix game bugs if
- * requested.
- */
- if (ctx->args->options->enable_mrt_output_nan_fixup &&
- !is_16bit &&
- (col_format == V_028714_SPI_SHADER_32_R ||
- col_format == V_028714_SPI_SHADER_32_GR ||
- col_format == V_028714_SPI_SHADER_32_AR ||
- col_format == V_028714_SPI_SHADER_32_ABGR ||
- col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
- for (unsigned i = 0; i < 4; i++) {
- LLVMValueRef class_args[2] = {
- values[i],
- LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN, false)
- };
- LLVMValueRef isnan =
- ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
- class_args, 2, AC_FUNC_ATTR_READNONE);
- values[i] = LLVMBuildSelect(ctx->ac.builder, isnan,
- ctx->ac.f32_0,
- values[i], "");
- }
- }
-
- /* Pack f16 or norm_i16/u16. */
- if (packf) {
- for (unsigned chan = 0; chan < 2; chan++) {
- LLVMValueRef pack_args[2] = {
- values[2 * chan],
- values[2 * chan + 1]
- };
- LLVMValueRef packed;
-
- packed = packf(&ctx->ac, pack_args);
- args->out[chan] = ac_to_float(&ctx->ac, packed);
- }
- args->compr = 1; /* COMPR flag */
- }
-
- /* Pack i16/u16. */
- if (packi) {
- for (unsigned chan = 0; chan < 2; chan++) {
- LLVMValueRef pack_args[2] = {
- ac_to_integer(&ctx->ac, values[2 * chan]),
- ac_to_integer(&ctx->ac, values[2 * chan + 1])
- };
- LLVMValueRef packed;
-
- packed = packi(&ctx->ac, pack_args,
- is_int8 ? 8 : is_int10 ? 10 : 16,
- chan == 1);
- args->out[chan] = ac_to_float(&ctx->ac, packed);
- }
- args->compr = 1; /* COMPR flag */
- }
- return;
- }
-
- if (is_16bit) {
- for (unsigned chan = 0; chan < 4; chan++) {
- values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i16, "");
- args->out[chan] = LLVMBuildZExt(ctx->ac.builder, values[chan], ctx->ac.i32, "");
- }
- } else
- memcpy(&args->out[0], values, sizeof(values[0]) * 4);
-
- for (unsigned i = 0; i < 4; ++i)
- args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
+si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
+ unsigned enabled_channels, unsigned target, struct ac_export_args *args)
+{
+ /* Specify the channels that are enabled. */
+ args->enabled_channels = enabled_channels;
+
+ /* Specify whether the EXEC mask represents the valid mask */
+ args->valid_mask = 0;
+
+ /* Specify whether this is the last export */
+ args->done = 0;
+
+ /* Specify the target we are exporting */
+ args->target = target;
+
+ args->compr = false;
+ args->out[0] = LLVMGetUndef(ctx->ac.f32);
+ args->out[1] = LLVMGetUndef(ctx->ac.f32);
+ args->out[2] = LLVMGetUndef(ctx->ac.f32);
+ args->out[3] = LLVMGetUndef(ctx->ac.f32);
+
+ if (!values)
+ return;
+
+ bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
+ if (ctx->stage == MESA_SHADER_FRAGMENT) {
+ unsigned index = target - V_008DFC_SQ_EXP_MRT;
+ unsigned col_format = (ctx->args->options->key.fs.col_format >> (4 * index)) & 0xf;
+ bool is_int8 = (ctx->args->options->key.fs.is_int8 >> index) & 1;
+ bool is_int10 = (ctx->args->options->key.fs.is_int10 >> index) & 1;
+
+ LLVMValueRef (*packf)(struct ac_llvm_context * ctx, LLVMValueRef args[2]) = NULL;
+ LLVMValueRef (*packi)(struct ac_llvm_context * ctx, LLVMValueRef args[2], unsigned bits,
+ bool hi) = NULL;
+
+ switch (col_format) {
+ case V_028714_SPI_SHADER_ZERO:
+ args->enabled_channels = 0; /* writemask */
+ args->target = V_008DFC_SQ_EXP_NULL;
+ break;
+
+ case V_028714_SPI_SHADER_32_R:
+ args->enabled_channels = 1;
+ args->out[0] = values[0];
+ break;
+
+ case V_028714_SPI_SHADER_32_GR:
+ args->enabled_channels = 0x3;
+ args->out[0] = values[0];
+ args->out[1] = values[1];
+ break;
+
+ case V_028714_SPI_SHADER_32_AR:
+ if (ctx->ac.chip_class >= GFX10) {
+ args->enabled_channels = 0x3;
+ args->out[0] = values[0];
+ args->out[1] = values[3];
+ } else {
+ args->enabled_channels = 0x9;
+ args->out[0] = values[0];
+ args->out[3] = values[3];
+ }
+ break;
+
+ case V_028714_SPI_SHADER_FP16_ABGR:
+ args->enabled_channels = 0xf;
+ packf = ac_build_cvt_pkrtz_f16;
+ if (is_16bit) {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildFPExt(ctx->ac.builder, values[chan], ctx->ac.f32, "");
+ }
+ break;
+
+ case V_028714_SPI_SHADER_UNORM16_ABGR:
+ args->enabled_channels = 0xf;
+ packf = ac_build_cvt_pknorm_u16;
+ break;
+
+ case V_028714_SPI_SHADER_SNORM16_ABGR:
+ args->enabled_channels = 0xf;
+ packf = ac_build_cvt_pknorm_i16;
+ break;
+
+ case V_028714_SPI_SHADER_UINT16_ABGR:
+ args->enabled_channels = 0xf;
+ packi = ac_build_cvt_pk_u16;
+ if (is_16bit) {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildZExt(ctx->ac.builder, ac_to_integer(&ctx->ac, values[chan]),
+ ctx->ac.i32, "");
+ }
+ break;
+
+ case V_028714_SPI_SHADER_SINT16_ABGR:
+ args->enabled_channels = 0xf;
+ packi = ac_build_cvt_pk_i16;
+ if (is_16bit) {
+ for (unsigned chan = 0; chan < 4; chan++)
+ values[chan] = LLVMBuildSExt(ctx->ac.builder, ac_to_integer(&ctx->ac, values[chan]),
+ ctx->ac.i32, "");
+ }
+ break;
+
+ default:
+ case V_028714_SPI_SHADER_32_ABGR:
+ memcpy(&args->out[0], values, sizeof(values[0]) * 4);
+ break;
+ }
+
+ /* Replace NaN by zero (only 32-bit) to fix game bugs if
+ * requested.
+ */
+ if (ctx->args->options->enable_mrt_output_nan_fixup && !is_16bit &&
+ (col_format == V_028714_SPI_SHADER_32_R || col_format == V_028714_SPI_SHADER_32_GR ||
+ col_format == V_028714_SPI_SHADER_32_AR || col_format == V_028714_SPI_SHADER_32_ABGR ||
+ col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
+ for (unsigned i = 0; i < 4; i++) {
+ LLVMValueRef class_args[2] = {values[i],
+ LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN, false)};
+ LLVMValueRef isnan = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
+ class_args, 2, AC_FUNC_ATTR_READNONE);
+ values[i] = LLVMBuildSelect(ctx->ac.builder, isnan, ctx->ac.f32_0, values[i], "");
+ }
+ }
+
+ /* Pack f16 or norm_i16/u16. */
+ if (packf) {
+ for (unsigned chan = 0; chan < 2; chan++) {
+ LLVMValueRef pack_args[2] = {values[2 * chan], values[2 * chan + 1]};
+ LLVMValueRef packed;
+
+ packed = packf(&ctx->ac, pack_args);
+ args->out[chan] = ac_to_float(&ctx->ac, packed);
+ }
+ args->compr = 1; /* COMPR flag */
+ }
+
+ /* Pack i16/u16. */
+ if (packi) {
+ for (unsigned chan = 0; chan < 2; chan++) {
+ LLVMValueRef pack_args[2] = {ac_to_integer(&ctx->ac, values[2 * chan]),
+ ac_to_integer(&ctx->ac, values[2 * chan + 1])};
+ LLVMValueRef packed;
+
+ packed = packi(&ctx->ac, pack_args, is_int8 ? 8 : is_int10 ? 10 : 16, chan == 1);
+ args->out[chan] = ac_to_float(&ctx->ac, packed);
+ }
+ args->compr = 1; /* COMPR flag */
+ }
+ return;
+ }
+
+ if (is_16bit) {
+ for (unsigned chan = 0; chan < 4; chan++) {
+ values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i16, "");
+ args->out[chan] = LLVMBuildZExt(ctx->ac.builder, values[chan], ctx->ac.i32, "");
+ }
+ } else
+ memcpy(&args->out[0], values, sizeof(values[0]) * 4);
+
+ for (unsigned i = 0; i < 4; ++i)
+ args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
}
static void
-radv_export_param(struct radv_shader_context *ctx, unsigned index,
- LLVMValueRef *values, unsigned enabled_channels)
+radv_export_param(struct radv_shader_context *ctx, unsigned index, LLVMValueRef *values,
+ unsigned enabled_channels)
{
- struct ac_export_args args;
+ struct ac_export_args args;
- si_llvm_init_export_args(ctx, values, enabled_channels,
- V_008DFC_SQ_EXP_PARAM + index, &args);
- ac_build_export(&ctx->ac, &args);
+ si_llvm_init_export_args(ctx, values, enabled_channels, V_008DFC_SQ_EXP_PARAM + index, &args);
+ ac_build_export(&ctx->ac, &args);
}
static LLVMValueRef
radv_load_output(struct radv_shader_context *ctx, unsigned index, unsigned chan)
{
- LLVMValueRef output = ctx->abi.outputs[ac_llvm_reg_index_soa(index, chan)];
- return LLVMBuildLoad(ctx->ac.builder, output, "");
+ LLVMValueRef output = ctx->abi.outputs[ac_llvm_reg_index_soa(index, chan)];
+ return LLVMBuildLoad(ctx->ac.builder, output, "");
}
static void
-radv_emit_stream_output(struct radv_shader_context *ctx,
- LLVMValueRef const *so_buffers,
- LLVMValueRef const *so_write_offsets,
- const struct radv_stream_output *output,
- struct radv_shader_output_values *shader_out)
-{
- unsigned num_comps = util_bitcount(output->component_mask);
- unsigned buf = output->buffer;
- unsigned offset = output->offset;
- unsigned start;
- LLVMValueRef out[4];
-
- assert(num_comps && num_comps <= 4);
- if (!num_comps || num_comps > 4)
- return;
-
- /* Get the first component. */
- start = ffs(output->component_mask) - 1;
-
- /* Load the output as int. */
- for (int i = 0; i < num_comps; i++) {
- out[i] = ac_to_integer(&ctx->ac, shader_out->values[start + i]);
- }
-
- /* Pack the output. */
- LLVMValueRef vdata = NULL;
-
- switch (num_comps) {
- case 1: /* as i32 */
- vdata = out[0];
- break;
- case 2: /* as v2i32 */
- case 3: /* as v4i32 (aligned to 4) */
- out[3] = LLVMGetUndef(ctx->ac.i32);
- /* fall through */
- case 4: /* as v4i32 */
- vdata = ac_build_gather_values(&ctx->ac, out,
- !ac_has_vec3_support(ctx->ac.chip_class, false) ?
- util_next_power_of_two(num_comps) :
- num_comps);
- break;
- }
-
- ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
- vdata, num_comps, so_write_offsets[buf],
- ctx->ac.i32_0, offset,
- ac_glc | ac_slc);
+radv_emit_stream_output(struct radv_shader_context *ctx, LLVMValueRef const *so_buffers,
+ LLVMValueRef const *so_write_offsets,
+ const struct radv_stream_output *output,
+ struct radv_shader_output_values *shader_out)
+{
+ unsigned num_comps = util_bitcount(output->component_mask);
+ unsigned buf = output->buffer;
+ unsigned offset = output->offset;
+ unsigned start;
+ LLVMValueRef out[4];
+
+ assert(num_comps && num_comps <= 4);
+ if (!num_comps || num_comps > 4)
+ return;
+
+ /* Get the first component. */
+ start = ffs(output->component_mask) - 1;
+
+ /* Load the output as int. */
+ for (int i = 0; i < num_comps; i++) {
+ out[i] = ac_to_integer(&ctx->ac, shader_out->values[start + i]);
+ }
+
+ /* Pack the output. */
+ LLVMValueRef vdata = NULL;
+
+ switch (num_comps) {
+ case 1: /* as i32 */
+ vdata = out[0];
+ break;
+ case 2: /* as v2i32 */
+ case 3: /* as v4i32 (aligned to 4) */
+ out[3] = LLVMGetUndef(ctx->ac.i32);
+ /* fall through */
+ case 4: /* as v4i32 */
+ vdata = ac_build_gather_values(&ctx->ac, out,
+ !ac_has_vec3_support(ctx->ac.chip_class, false)
+ ? util_next_power_of_two(num_comps)
+ : num_comps);
+ break;
+ }
+
+ ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf], vdata, num_comps, so_write_offsets[buf],
+ ctx->ac.i32_0, offset, ac_glc | ac_slc);
}
static void
radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
{
- int i;
-
- /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
- assert(ctx->args->ac.streamout_config.used);
- LLVMValueRef so_vtx_count =
- ac_build_bfe(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config),
- LLVMConstInt(ctx->ac.i32, 16, false),
- LLVMConstInt(ctx->ac.i32, 7, false), false);
-
- LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
-
- /* can_emit = tid < so_vtx_count; */
- LLVMValueRef can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
- tid, so_vtx_count, "");
-
- /* Emit the streamout code conditionally. This actually avoids
- * out-of-bounds buffer access. The hw tells us via the SGPR
- * (so_vtx_count) which threads are allowed to emit streamout data.
- */
- ac_build_ifcc(&ctx->ac, can_emit, 6501);
- {
- /* The buffer offset is computed as follows:
- * ByteOffset = streamout_offset[buffer_id]*4 +
- * (streamout_write_index + thread_id)*stride[buffer_id] +
- * attrib_offset
- */
- LLVMValueRef so_write_index =
- ac_get_arg(&ctx->ac, ctx->args->ac.streamout_write_index);
-
- /* Compute (streamout_write_index + thread_id). */
- so_write_index =
- LLVMBuildAdd(ctx->ac.builder, so_write_index, tid, "");
-
- /* Load the descriptor and compute the write offset for each
- * enabled buffer.
- */
- LLVMValueRef so_write_offset[4] = {0};
- LLVMValueRef so_buffers[4] = {0};
- LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
-
- for (i = 0; i < 4; i++) {
- uint16_t stride = ctx->args->shader_info->so.strides[i];
-
- if (!stride)
- continue;
-
- LLVMValueRef offset =
- LLVMConstInt(ctx->ac.i32, i, false);
-
- so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac,
- buf_ptr, offset);
-
- LLVMValueRef so_offset =
- ac_get_arg(&ctx->ac, ctx->args->ac.streamout_offset[i]);
-
- so_offset = LLVMBuildMul(ctx->ac.builder, so_offset,
- LLVMConstInt(ctx->ac.i32, 4, false), "");
-
- so_write_offset[i] =
- ac_build_imad(&ctx->ac, so_write_index,
- LLVMConstInt(ctx->ac.i32,
- stride * 4, false),
- so_offset);
- }
-
- /* Write streamout data. */
- for (i = 0; i < ctx->args->shader_info->so.num_outputs; i++) {
- struct radv_shader_output_values shader_out = {0};
- struct radv_stream_output *output =
- &ctx->args->shader_info->so.outputs[i];
-
- if (stream != output->stream)
- continue;
-
- for (int j = 0; j < 4; j++) {
- shader_out.values[j] =
- radv_load_output(ctx, output->location, j);
- }
-
- radv_emit_stream_output(ctx, so_buffers,so_write_offset,
- output, &shader_out);
- }
- }
- ac_build_endif(&ctx->ac, 6501);
-}
+ int i;
-static void
-radv_build_param_exports(struct radv_shader_context *ctx,
- struct radv_shader_output_values *outputs,
- unsigned noutput,
- struct radv_vs_output_info *outinfo,
- bool export_clip_dists)
-{
- unsigned param_count = 0;
+ /* Get bits [22:16], i.e. (so_param >> 16) & 127; */
+ assert(ctx->args->ac.streamout_config.used);
+ LLVMValueRef so_vtx_count = ac_build_bfe(
+ &ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config),
+ LLVMConstInt(ctx->ac.i32, 16, false), LLVMConstInt(ctx->ac.i32, 7, false), false);
- for (unsigned i = 0; i < noutput; i++) {
- unsigned slot_name = outputs[i].slot_name;
- unsigned usage_mask = outputs[i].usage_mask;
+ LLVMValueRef tid = ac_get_thread_id(&ctx->ac);
- if (slot_name != VARYING_SLOT_LAYER &&
- slot_name != VARYING_SLOT_PRIMITIVE_ID &&
- slot_name != VARYING_SLOT_VIEWPORT &&
- slot_name != VARYING_SLOT_CLIP_DIST0 &&
- slot_name != VARYING_SLOT_CLIP_DIST1 &&
- slot_name < VARYING_SLOT_VAR0)
- continue;
+ /* can_emit = tid < so_vtx_count; */
+ LLVMValueRef can_emit = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, tid, so_vtx_count, "");
- if ((slot_name == VARYING_SLOT_CLIP_DIST0 ||
- slot_name == VARYING_SLOT_CLIP_DIST1) && !export_clip_dists)
- continue;
+ /* Emit the streamout code conditionally. This actually avoids
+ * out-of-bounds buffer access. The hw tells us via the SGPR
+ * (so_vtx_count) which threads are allowed to emit streamout data.
+ */
+ ac_build_ifcc(&ctx->ac, can_emit, 6501);
+ {
+ /* The buffer offset is computed as follows:
+ * ByteOffset = streamout_offset[buffer_id]*4 +
+ * (streamout_write_index + thread_id)*stride[buffer_id] +
+ * attrib_offset
+ */
+ LLVMValueRef so_write_index = ac_get_arg(&ctx->ac, ctx->args->ac.streamout_write_index);
- radv_export_param(ctx, param_count, outputs[i].values, usage_mask);
+ /* Compute (streamout_write_index + thread_id). */
+ so_write_index = LLVMBuildAdd(ctx->ac.builder, so_write_index, tid, "");
- assert(i < ARRAY_SIZE(outinfo->vs_output_param_offset));
- outinfo->vs_output_param_offset[slot_name] = param_count++;
- }
+ /* Load the descriptor and compute the write offset for each
+ * enabled buffer.
+ */
+ LLVMValueRef so_write_offset[4] = {0};
+ LLVMValueRef so_buffers[4] = {0};
+ LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
- outinfo->param_exports = param_count;
-}
+ for (i = 0; i < 4; i++) {
+ uint16_t stride = ctx->args->shader_info->so.strides[i];
-/* Generate export instructions for hardware VS shader stage or NGG GS stage
- * (position and parameter data only).
- */
-static void
-radv_llvm_export_vs(struct radv_shader_context *ctx,
- struct radv_shader_output_values *outputs,
- unsigned noutput,
- struct radv_vs_output_info *outinfo,
- bool export_clip_dists)
-{
- LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_value = NULL;
- LLVMValueRef primitive_shading_rate = NULL;
- struct ac_export_args pos_args[4] = {0};
- unsigned pos_idx, index;
- int i;
-
- /* Build position exports */
- for (i = 0; i < noutput; i++) {
- switch (outputs[i].slot_name) {
- case VARYING_SLOT_POS:
- si_llvm_init_export_args(ctx, outputs[i].values, 0xf,
- V_008DFC_SQ_EXP_POS, &pos_args[0]);
- break;
- case VARYING_SLOT_PSIZ:
- psize_value = outputs[i].values[0];
- break;
- case VARYING_SLOT_LAYER:
- layer_value = outputs[i].values[0];
- break;
- case VARYING_SLOT_VIEWPORT:
- viewport_value = outputs[i].values[0];
- break;
- case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
- primitive_shading_rate = outputs[i].values[0];
- break;
- case VARYING_SLOT_CLIP_DIST0:
- case VARYING_SLOT_CLIP_DIST1:
- index = 2 + outputs[i].slot_index;
- si_llvm_init_export_args(ctx, outputs[i].values, 0xf,
- V_008DFC_SQ_EXP_POS + index,
- &pos_args[index]);
- break;
- default:
- break;
- }
- }
-
- /* We need to add the position output manually if it's missing. */
- if (!pos_args[0].out[0]) {
- pos_args[0].enabled_channels = 0xf; /* writemask */
- pos_args[0].valid_mask = 0; /* EXEC mask */
- pos_args[0].done = 0; /* last export? */
- pos_args[0].target = V_008DFC_SQ_EXP_POS;
- pos_args[0].compr = 0; /* COMPR flag */
- pos_args[0].out[0] = ctx->ac.f32_0; /* X */
- pos_args[0].out[1] = ctx->ac.f32_0; /* Y */
- pos_args[0].out[2] = ctx->ac.f32_0; /* Z */
- pos_args[0].out[3] = ctx->ac.f32_1; /* W */
- }
-
- if (outinfo->writes_pointsize ||
- outinfo->writes_layer ||
- outinfo->writes_layer ||
- outinfo->writes_viewport_index ||
- outinfo->writes_primitive_shading_rate) {
- pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
- (outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
- (outinfo->writes_layer == true ? 4 : 0));
- pos_args[1].valid_mask = 0;
- pos_args[1].done = 0;
- pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
- pos_args[1].compr = 0;
- pos_args[1].out[0] = ctx->ac.f32_0; /* X */
- pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
- pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
- pos_args[1].out[3] = ctx->ac.f32_0; /* W */
-
- if (outinfo->writes_pointsize == true)
- pos_args[1].out[0] = psize_value;
- if (outinfo->writes_layer == true)
- pos_args[1].out[2] = layer_value;
- if (outinfo->writes_viewport_index == true) {
- if (ctx->args->options->chip_class >= GFX9) {
- /* GFX9 has the layer in out.z[10:0] and the viewport
- * index in out.z[19:16].
- */
- LLVMValueRef v = viewport_value;
- v = ac_to_integer(&ctx->ac, v);
- v = LLVMBuildShl(ctx->ac.builder, v,
- LLVMConstInt(ctx->ac.i32, 16, false),
- "");
- v = LLVMBuildOr(ctx->ac.builder, v,
- ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
-
- pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
- pos_args[1].enabled_channels |= 1 << 2;
- } else {
- pos_args[1].out[3] = viewport_value;
- pos_args[1].enabled_channels |= 1 << 3;
- }
- }
-
- if (outinfo->writes_primitive_shading_rate) {
- LLVMValueRef v = ac_to_integer(&ctx->ac, primitive_shading_rate);
- LLVMValueRef cond;
-
- /* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
- LLVMValueRef x_rate =
- LLVMBuildAnd(ctx->ac.builder, v,
- LLVMConstInt(ctx->ac.i32, 4 | 8, false), "");
- cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, x_rate, ctx->ac.i32_0, "");
- x_rate = LLVMBuildSelect(ctx->ac.builder, cond,
- ctx->ac.i32_1, ctx->ac.i32_0, "");
-
- /* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
- LLVMValueRef y_rate =
- LLVMBuildAnd(ctx->ac.builder, v,
- LLVMConstInt(ctx->ac.i32, 1 | 2, false), "");
- cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, y_rate, ctx->ac.i32_0, "");
- y_rate = LLVMBuildSelect(ctx->ac.builder, cond,
- ctx->ac.i32_1, ctx->ac.i32_0, "");
-
- /* Bits [2:3] = VRS rate X
- * Bits [4:5] = VRS rate Y
- * HW shading rate = (xRate << 2) | (yRate << 4)
- */
- v = LLVMBuildOr(ctx->ac.builder,
- LLVMBuildShl(ctx->ac.builder, x_rate,
- LLVMConstInt(ctx->ac.i32, 2, false), ""),
- LLVMBuildShl(ctx->ac.builder, y_rate,
- LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
- pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
- }
- }
-
- for (i = 0; i < 4; i++) {
- if (pos_args[i].out[0])
- outinfo->pos_exports++;
- }
-
- /* GFX10 skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
- * Setting valid_mask=1 prevents it and has no other effect.
- */
- if (ctx->ac.chip_class == GFX10)
- pos_args[0].valid_mask = 1;
-
- pos_idx = 0;
- for (i = 0; i < 4; i++) {
- if (!pos_args[i].out[0])
- continue;
-
- /* Specify the target we are exporting */
- pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
-
- if (pos_idx == outinfo->pos_exports)
- /* Specify that this is the last export */
- pos_args[i].done = 1;
-
- ac_build_export(&ctx->ac, &pos_args[i]);
- }
-
- /* Build parameter exports */
- radv_build_param_exports(ctx, outputs, noutput, outinfo, export_clip_dists);
-}
+ if (!stride)
+ continue;
-static void
-handle_vs_outputs_post(struct radv_shader_context *ctx,
- bool export_prim_id,
- bool export_clip_dists,
- struct radv_vs_output_info *outinfo)
-{
- struct radv_shader_output_values *outputs;
- unsigned noutput = 0;
-
- if (ctx->args->options->key.has_multiview_view_index) {
- LLVMValueRef* tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
- if(!*tmp_out) {
- for(unsigned i = 0; i < 4; ++i)
- ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
- ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
- }
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i, false);
+
+ so_buffers[i] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
+
+ LLVMValueRef so_offset = ac_get_arg(&ctx->ac, ctx->args->ac.streamout_offset[i]);
+
+ so_offset =
+ LLVMBuildMul(ctx->ac.builder, so_offset, LLVMConstInt(ctx->ac.i32, 4, false), "");
- LLVMValueRef view_index = ac_get_arg(&ctx->ac, ctx->args->ac.view_index);
- LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, view_index), *tmp_out);
- ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
- }
+ so_write_offset[i] = ac_build_imad(
+ &ctx->ac, so_write_index, LLVMConstInt(ctx->ac.i32, stride * 4, false), so_offset);
+ }
- memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
- sizeof(outinfo->vs_output_param_offset));
- outinfo->pos_exports = 0;
+ /* Write streamout data. */
+ for (i = 0; i < ctx->args->shader_info->so.num_outputs; i++) {
+ struct radv_shader_output_values shader_out = {0};
+ struct radv_stream_output *output = &ctx->args->shader_info->so.outputs[i];
- if (!ctx->args->options->use_ngg_streamout &&
- ctx->args->shader_info->so.num_outputs &&
- !ctx->args->is_gs_copy_shader) {
- /* The GS copy shader emission already emits streamout. */
- radv_emit_streamout(ctx, 0);
- }
+ if (stream != output->stream)
+ continue;
- /* Allocate a temporary array for the output values. */
- unsigned num_outputs = util_bitcount64(ctx->output_mask) + export_prim_id;
- outputs = malloc(num_outputs * sizeof(outputs[0]));
+ for (int j = 0; j < 4; j++) {
+ shader_out.values[j] = radv_load_output(ctx, output->location, j);
+ }
- for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
- if (!(ctx->output_mask & (1ull << i)))
- continue;
+ radv_emit_stream_output(ctx, so_buffers, so_write_offset, output, &shader_out);
+ }
+ }
+ ac_build_endif(&ctx->ac, 6501);
+}
- outputs[noutput].slot_name = i;
- outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+static void
+radv_build_param_exports(struct radv_shader_context *ctx, struct radv_shader_output_values *outputs,
+ unsigned noutput, struct radv_vs_output_info *outinfo,
+ bool export_clip_dists)
+{
+ unsigned param_count = 0;
- if (ctx->stage == MESA_SHADER_VERTEX &&
- !ctx->args->is_gs_copy_shader) {
- outputs[noutput].usage_mask =
- ctx->args->shader_info->vs.output_usage_mask[i];
- } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
- outputs[noutput].usage_mask =
- ctx->args->shader_info->tes.output_usage_mask[i];
- } else {
- assert(ctx->args->is_gs_copy_shader);
- outputs[noutput].usage_mask =
- ctx->args->shader_info->gs.output_usage_mask[i];
- }
+ for (unsigned i = 0; i < noutput; i++) {
+ unsigned slot_name = outputs[i].slot_name;
+ unsigned usage_mask = outputs[i].usage_mask;
- for (unsigned j = 0; j < 4; j++) {
- outputs[noutput].values[j] =
- ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
- }
+ if (slot_name != VARYING_SLOT_LAYER && slot_name != VARYING_SLOT_PRIMITIVE_ID &&
+ slot_name != VARYING_SLOT_VIEWPORT && slot_name != VARYING_SLOT_CLIP_DIST0 &&
+ slot_name != VARYING_SLOT_CLIP_DIST1 && slot_name < VARYING_SLOT_VAR0)
+ continue;
- noutput++;
- }
+ if ((slot_name == VARYING_SLOT_CLIP_DIST0 || slot_name == VARYING_SLOT_CLIP_DIST1) &&
+ !export_clip_dists)
+ continue;
- /* Export PrimitiveID. */
- if (export_prim_id) {
- outputs[noutput].slot_name = VARYING_SLOT_PRIMITIVE_ID;
- outputs[noutput].slot_index = 0;
- outputs[noutput].usage_mask = 0x1;
- if (ctx->stage == MESA_SHADER_TESS_EVAL)
- outputs[noutput].values[0] =
- ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
- else
- outputs[noutput].values[0] =
- ac_get_arg(&ctx->ac, ctx->args->ac.vs_prim_id);
- for (unsigned j = 1; j < 4; j++)
- outputs[noutput].values[j] = ctx->ac.f32_0;
- noutput++;
- }
+ radv_export_param(ctx, param_count, outputs[i].values, usage_mask);
- radv_llvm_export_vs(ctx, outputs, noutput, outinfo, export_clip_dists);
+ assert(i < ARRAY_SIZE(outinfo->vs_output_param_offset));
+ outinfo->vs_output_param_offset[slot_name] = param_count++;
+ }
- free(outputs);
+ outinfo->param_exports = param_count;
}
-static LLVMValueRef get_wave_id_in_tg(struct radv_shader_context *ctx)
+/* Generate export instructions for hardware VS shader stage or NGG GS stage
+ * (position and parameter data only).
+ */
+static void
+radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_values *outputs,
+ unsigned noutput, struct radv_vs_output_info *outinfo, bool export_clip_dists)
+{
+ LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_value = NULL;
+ LLVMValueRef primitive_shading_rate = NULL;
+ struct ac_export_args pos_args[4] = {0};
+ unsigned pos_idx, index;
+ int i;
+
+ /* Build position exports */
+ for (i = 0; i < noutput; i++) {
+ switch (outputs[i].slot_name) {
+ case VARYING_SLOT_POS:
+ si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS, &pos_args[0]);
+ break;
+ case VARYING_SLOT_PSIZ:
+ psize_value = outputs[i].values[0];
+ break;
+ case VARYING_SLOT_LAYER:
+ layer_value = outputs[i].values[0];
+ break;
+ case VARYING_SLOT_VIEWPORT:
+ viewport_value = outputs[i].values[0];
+ break;
+ case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
+ primitive_shading_rate = outputs[i].values[0];
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ index = 2 + outputs[i].slot_index;
+ si_llvm_init_export_args(ctx, outputs[i].values, 0xf, V_008DFC_SQ_EXP_POS + index,
+ &pos_args[index]);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* We need to add the position output manually if it's missing. */
+ if (!pos_args[0].out[0]) {
+ pos_args[0].enabled_channels = 0xf; /* writemask */
+ pos_args[0].valid_mask = 0; /* EXEC mask */
+ pos_args[0].done = 0; /* last export? */
+ pos_args[0].target = V_008DFC_SQ_EXP_POS;
+ pos_args[0].compr = 0; /* COMPR flag */
+ pos_args[0].out[0] = ctx->ac.f32_0; /* X */
+ pos_args[0].out[1] = ctx->ac.f32_0; /* Y */
+ pos_args[0].out[2] = ctx->ac.f32_0; /* Z */
+ pos_args[0].out[3] = ctx->ac.f32_1; /* W */
+ }
+
+ if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer ||
+ outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate) {
+ pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
+ (outinfo->writes_primitive_shading_rate == true ? 2 : 0) |
+ (outinfo->writes_layer == true ? 4 : 0));
+ pos_args[1].valid_mask = 0;
+ pos_args[1].done = 0;
+ pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
+ pos_args[1].compr = 0;
+ pos_args[1].out[0] = ctx->ac.f32_0; /* X */
+ pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
+ pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
+ pos_args[1].out[3] = ctx->ac.f32_0; /* W */
+
+ if (outinfo->writes_pointsize == true)
+ pos_args[1].out[0] = psize_value;
+ if (outinfo->writes_layer == true)
+ pos_args[1].out[2] = layer_value;
+ if (outinfo->writes_viewport_index == true) {
+ if (ctx->args->options->chip_class >= GFX9) {
+ /* GFX9 has the layer in out.z[10:0] and the viewport
+ * index in out.z[19:16].
+ */
+ LLVMValueRef v = viewport_value;
+ v = ac_to_integer(&ctx->ac, v);
+ v = LLVMBuildShl(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 16, false), "");
+ v = LLVMBuildOr(ctx->ac.builder, v, ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
+
+ pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
+ pos_args[1].enabled_channels |= 1 << 2;
+ } else {
+ pos_args[1].out[3] = viewport_value;
+ pos_args[1].enabled_channels |= 1 << 3;
+ }
+ }
+
+ if (outinfo->writes_primitive_shading_rate) {
+ LLVMValueRef v = ac_to_integer(&ctx->ac, primitive_shading_rate);
+ LLVMValueRef cond;
+
+ /* xRate = (shadingRate & (Horizontal2Pixels | Horizontal4Pixels)) ? 0x1 : 0x0; */
+ LLVMValueRef x_rate =
+ LLVMBuildAnd(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 4 | 8, false), "");
+ cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, x_rate, ctx->ac.i32_0, "");
+ x_rate = LLVMBuildSelect(ctx->ac.builder, cond, ctx->ac.i32_1, ctx->ac.i32_0, "");
+
+ /* yRate = (shadingRate & (Vertical2Pixels | Vertical4Pixels)) ? 0x1 : 0x0; */
+ LLVMValueRef y_rate =
+ LLVMBuildAnd(ctx->ac.builder, v, LLVMConstInt(ctx->ac.i32, 1 | 2, false), "");
+ cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, y_rate, ctx->ac.i32_0, "");
+ y_rate = LLVMBuildSelect(ctx->ac.builder, cond, ctx->ac.i32_1, ctx->ac.i32_0, "");
+
+ /* Bits [2:3] = VRS rate X
+ * Bits [4:5] = VRS rate Y
+ * HW shading rate = (xRate << 2) | (yRate << 4)
+ */
+ v = LLVMBuildOr(
+ ctx->ac.builder,
+ LLVMBuildShl(ctx->ac.builder, x_rate, LLVMConstInt(ctx->ac.i32, 2, false), ""),
+ LLVMBuildShl(ctx->ac.builder, y_rate, LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
+ pos_args[1].out[1] = ac_to_float(&ctx->ac, v);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (pos_args[i].out[0])
+ outinfo->pos_exports++;
+ }
+
+ /* GFX10 skip POS0 exports if EXEC=0 and DONE=0, causing a hang.
+ * Setting valid_mask=1 prevents it and has no other effect.
+ */
+ if (ctx->ac.chip_class == GFX10)
+ pos_args[0].valid_mask = 1;
+
+ pos_idx = 0;
+ for (i = 0; i < 4; i++) {
+ if (!pos_args[i].out[0])
+ continue;
+
+ /* Specify the target we are exporting */
+ pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
+
+ if (pos_idx == outinfo->pos_exports)
+ /* Specify that this is the last export */
+ pos_args[i].done = 1;
+
+ ac_build_export(&ctx->ac, &pos_args[i]);
+ }
+
+ /* Build parameter exports */
+ radv_build_param_exports(ctx, outputs, noutput, outinfo, export_clip_dists);
+}
+
+static void
+handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, bool export_clip_dists,
+ struct radv_vs_output_info *outinfo)
+{
+ struct radv_shader_output_values *outputs;
+ unsigned noutput = 0;
+
+ if (ctx->args->options->key.has_multiview_view_index) {
+ LLVMValueRef *tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
+ if (!*tmp_out) {
+ for (unsigned i = 0; i < 4; ++i)
+ ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
+ ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
+ }
+
+ LLVMValueRef view_index = ac_get_arg(&ctx->ac, ctx->args->ac.view_index);
+ LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, view_index), *tmp_out);
+ ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
+ }
+
+ memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
+ sizeof(outinfo->vs_output_param_offset));
+ outinfo->pos_exports = 0;
+
+ if (!ctx->args->options->use_ngg_streamout && ctx->args->shader_info->so.num_outputs &&
+ !ctx->args->is_gs_copy_shader) {
+ /* The GS copy shader emission already emits streamout. */
+ radv_emit_streamout(ctx, 0);
+ }
+
+ /* Allocate a temporary array for the output values. */
+ unsigned num_outputs = util_bitcount64(ctx->output_mask) + export_prim_id;
+ outputs = malloc(num_outputs * sizeof(outputs[0]));
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ outputs[noutput].slot_name = i;
+ outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+
+ if (ctx->stage == MESA_SHADER_VERTEX && !ctx->args->is_gs_copy_shader) {
+ outputs[noutput].usage_mask = ctx->args->shader_info->vs.output_usage_mask[i];
+ } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
+ outputs[noutput].usage_mask = ctx->args->shader_info->tes.output_usage_mask[i];
+ } else {
+ assert(ctx->args->is_gs_copy_shader);
+ outputs[noutput].usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+ }
+
+ for (unsigned j = 0; j < 4; j++) {
+ outputs[noutput].values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
+ }
+
+ noutput++;
+ }
+
+ /* Export PrimitiveID. */
+ if (export_prim_id) {
+ outputs[noutput].slot_name = VARYING_SLOT_PRIMITIVE_ID;
+ outputs[noutput].slot_index = 0;
+ outputs[noutput].usage_mask = 0x1;
+ if (ctx->stage == MESA_SHADER_TESS_EVAL)
+ outputs[noutput].values[0] = ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
+ else
+ outputs[noutput].values[0] = ac_get_arg(&ctx->ac, ctx->args->ac.vs_prim_id);
+ for (unsigned j = 1; j < 4; j++)
+ outputs[noutput].values[j] = ctx->ac.f32_0;
+ noutput++;
+ }
+
+ radv_llvm_export_vs(ctx, outputs, noutput, outinfo, export_clip_dists);
+
+ free(outputs);
+}
+
+static LLVMValueRef
+get_wave_id_in_tg(struct radv_shader_context *ctx)
{
- return ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 24, 4);
+ return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 24, 4);
}
-static LLVMValueRef get_tgsize(struct radv_shader_context *ctx)
+static LLVMValueRef
+get_tgsize(struct radv_shader_context *ctx)
{
- return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 28, 4);
+ return ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 28, 4);
}
-static LLVMValueRef get_thread_id_in_tg(struct radv_shader_context *ctx)
+static LLVMValueRef
+get_thread_id_in_tg(struct radv_shader_context *ctx)
{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef tmp;
- tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
- LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), "");
- return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), "");
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef tmp;
+ tmp = LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
+ LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false), "");
+ return LLVMBuildAdd(builder, tmp, ac_get_thread_id(&ctx->ac), "");
}
-static LLVMValueRef ngg_get_vtx_cnt(struct radv_shader_context *ctx)
+static LLVMValueRef
+ngg_get_vtx_cnt(struct radv_shader_context *ctx)
{
- return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
- LLVMConstInt(ctx->ac.i32, 12, false),
- LLVMConstInt(ctx->ac.i32, 9, false),
- false);
+ return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
+ LLVMConstInt(ctx->ac.i32, 12, false), LLVMConstInt(ctx->ac.i32, 9, false),
+ false);
}
-static LLVMValueRef ngg_get_prim_cnt(struct radv_shader_context *ctx)
+static LLVMValueRef
+ngg_get_prim_cnt(struct radv_shader_context *ctx)
{
- return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
- LLVMConstInt(ctx->ac.i32, 22, false),
- LLVMConstInt(ctx->ac.i32, 9, false),
- false);
+ return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
+ LLVMConstInt(ctx->ac.i32, 22, false), LLVMConstInt(ctx->ac.i32, 9, false),
+ false);
}
-static LLVMValueRef ngg_get_ordered_id(struct radv_shader_context *ctx)
+static LLVMValueRef
+ngg_get_ordered_id(struct radv_shader_context *ctx)
{
- return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info),
- ctx->ac.i32_0,
- LLVMConstInt(ctx->ac.i32, 12, false),
- false);
+ return ac_build_bfe(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_tg_info), ctx->ac.i32_0,
+ LLVMConstInt(ctx->ac.i32, 12, false), false);
}
static LLVMValueRef
ngg_gs_get_vertex_storage(struct radv_shader_context *ctx)
{
- unsigned num_outputs = util_bitcount64(ctx->output_mask);
+ unsigned num_outputs = util_bitcount64(ctx->output_mask);
- if (ctx->args->options->key.has_multiview_view_index)
- num_outputs++;
+ if (ctx->args->options->key.has_multiview_view_index)
+ num_outputs++;
- LLVMTypeRef elements[2] = {
- LLVMArrayType(ctx->ac.i32, 4 * num_outputs),
- LLVMArrayType(ctx->ac.i8, 4),
- };
- LLVMTypeRef type = LLVMStructTypeInContext(ctx->ac.context, elements, 2, false);
- type = LLVMPointerType(LLVMArrayType(type, 0), AC_ADDR_SPACE_LDS);
- return LLVMBuildBitCast(ctx->ac.builder, ctx->gs_ngg_emit, type, "");
+ LLVMTypeRef elements[2] = {
+ LLVMArrayType(ctx->ac.i32, 4 * num_outputs),
+ LLVMArrayType(ctx->ac.i8, 4),
+ };
+ LLVMTypeRef type = LLVMStructTypeInContext(ctx->ac.context, elements, 2, false);
+ type = LLVMPointerType(LLVMArrayType(type, 0), AC_ADDR_SPACE_LDS);
+ return LLVMBuildBitCast(ctx->ac.builder, ctx->gs_ngg_emit, type, "");
}
/**
@@ -1651,177 +1522,167 @@ ngg_gs_get_vertex_storage(struct radv_shader_context *ctx)
static LLVMValueRef
ngg_gs_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexidx)
{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef storage = ngg_gs_get_vertex_storage(ctx);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef storage = ngg_gs_get_vertex_storage(ctx);
- /* gs_max_out_vertices = 2^(write_stride_2exp) * some odd number */
- unsigned write_stride_2exp = ffs(MAX2(ctx->shader->info.gs.vertices_out, 1)) - 1;
- if (write_stride_2exp) {
- LLVMValueRef row =
- LLVMBuildLShr(builder, vertexidx,
- LLVMConstInt(ctx->ac.i32, 5, false), "");
- LLVMValueRef swizzle =
- LLVMBuildAnd(builder, row,
- LLVMConstInt(ctx->ac.i32, (1u << write_stride_2exp) - 1,
- false), "");
- vertexidx = LLVMBuildXor(builder, vertexidx, swizzle, "");
- }
+ /* gs_max_out_vertices = 2^(write_stride_2exp) * some odd number */
+ unsigned write_stride_2exp = ffs(MAX2(ctx->shader->info.gs.vertices_out, 1)) - 1;
+ if (write_stride_2exp) {
+ LLVMValueRef row = LLVMBuildLShr(builder, vertexidx, LLVMConstInt(ctx->ac.i32, 5, false), "");
+ LLVMValueRef swizzle = LLVMBuildAnd(
+ builder, row, LLVMConstInt(ctx->ac.i32, (1u << write_stride_2exp) - 1, false), "");
+ vertexidx = LLVMBuildXor(builder, vertexidx, swizzle, "");
+ }
- return ac_build_gep0(&ctx->ac, storage, vertexidx);
+ return ac_build_gep0(&ctx->ac, storage, vertexidx);
}
static LLVMValueRef
-ngg_gs_emit_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef gsthread,
- LLVMValueRef emitidx)
+ngg_gs_emit_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef gsthread, LLVMValueRef emitidx)
{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef tmp;
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef tmp;
- tmp = LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false);
- tmp = LLVMBuildMul(builder, tmp, gsthread, "");
- const LLVMValueRef vertexidx = LLVMBuildAdd(builder, tmp, emitidx, "");
- return ngg_gs_vertex_ptr(ctx, vertexidx);
+ tmp = LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false);
+ tmp = LLVMBuildMul(builder, tmp, gsthread, "");
+ const LLVMValueRef vertexidx = LLVMBuildAdd(builder, tmp, emitidx, "");
+ return ngg_gs_vertex_ptr(ctx, vertexidx);
}
static LLVMValueRef
ngg_gs_get_emit_output_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexptr,
- unsigned out_idx)
+ unsigned out_idx)
{
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implied C-style array */
- ctx->ac.i32_0, /* first struct entry */
- LLVMConstInt(ctx->ac.i32, out_idx, false),
- };
- return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
+ LLVMValueRef gep_idx[3] = {
+ ctx->ac.i32_0, /* implied C-style array */
+ ctx->ac.i32_0, /* first struct entry */
+ LLVMConstInt(ctx->ac.i32, out_idx, false),
+ };
+ return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
}
static LLVMValueRef
ngg_gs_get_emit_primflag_ptr(struct radv_shader_context *ctx, LLVMValueRef vertexptr,
- unsigned stream)
+ unsigned stream)
{
- LLVMValueRef gep_idx[3] = {
- ctx->ac.i32_0, /* implied C-style array */
- ctx->ac.i32_1, /* second struct entry */
- LLVMConstInt(ctx->ac.i32, stream, false),
- };
- return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
+ LLVMValueRef gep_idx[3] = {
+ ctx->ac.i32_0, /* implied C-style array */
+ ctx->ac.i32_1, /* second struct entry */
+ LLVMConstInt(ctx->ac.i32, stream, false),
+ };
+ return LLVMBuildGEP(ctx->ac.builder, vertexptr, gep_idx, 3, "");
}
static struct radv_stream_output *
radv_get_stream_output_by_loc(struct radv_streamout_info *so, unsigned location)
{
- for (unsigned i = 0; i < so->num_outputs; ++i) {
- if (so->outputs[i].location == location)
- return &so->outputs[i];
- }
+ for (unsigned i = 0; i < so->num_outputs; ++i) {
+ if (so->outputs[i].location == location)
+ return &so->outputs[i];
+ }
- return NULL;
+ return NULL;
}
-static void build_streamout_vertex(struct radv_shader_context *ctx,
- LLVMValueRef *so_buffer, LLVMValueRef *wg_offset_dw,
- unsigned stream, LLVMValueRef offset_vtx,
- LLVMValueRef vertexptr)
+static void
+build_streamout_vertex(struct radv_shader_context *ctx, LLVMValueRef *so_buffer,
+ LLVMValueRef *wg_offset_dw, unsigned stream, LLVMValueRef offset_vtx,
+ LLVMValueRef vertexptr)
{
- struct radv_streamout_info *so = &ctx->args->shader_info->so;
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef offset[4] = {0};
- LLVMValueRef tmp;
+ struct radv_streamout_info *so = &ctx->args->shader_info->so;
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef offset[4] = {0};
+ LLVMValueRef tmp;
- for (unsigned buffer = 0; buffer < 4; ++buffer) {
- if (!wg_offset_dw[buffer])
- continue;
+ for (unsigned buffer = 0; buffer < 4; ++buffer) {
+ if (!wg_offset_dw[buffer])
+ continue;
- tmp = LLVMBuildMul(builder, offset_vtx,
- LLVMConstInt(ctx->ac.i32, so->strides[buffer], false), "");
- tmp = LLVMBuildAdd(builder, wg_offset_dw[buffer], tmp, "");
- offset[buffer] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
- }
+ tmp = LLVMBuildMul(builder, offset_vtx, LLVMConstInt(ctx->ac.i32, so->strides[buffer], false),
+ "");
+ tmp = LLVMBuildAdd(builder, wg_offset_dw[buffer], tmp, "");
+ offset[buffer] = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->ac.i32, 2, false), "");
+ }
- if (ctx->stage == MESA_SHADER_GEOMETRY) {
- struct radv_shader_output_values outputs[AC_LLVM_MAX_OUTPUTS];
- unsigned noutput = 0;
- unsigned out_idx = 0;
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ struct radv_shader_output_values outputs[AC_LLVM_MAX_OUTPUTS];
+ unsigned noutput = 0;
+ unsigned out_idx = 0;
- for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
- unsigned output_usage_mask =
- ctx->args->shader_info->gs.output_usage_mask[i];
- uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+ uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
- if (!(ctx->output_mask & (1ull << i)) ||
- output_stream != stream)
- continue;
+ if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
+ continue;
- outputs[noutput].slot_name = i;
- outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
- outputs[noutput].usage_mask = output_usage_mask;
+ outputs[noutput].slot_name = i;
+ outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+ outputs[noutput].usage_mask = output_usage_mask;
- int length = util_last_bit(output_usage_mask);
+ int length = util_last_bit(output_usage_mask);
- for (unsigned j = 0; j < length; j++, out_idx++) {
- if (!(output_usage_mask & (1 << j)))
- continue;
+ for (unsigned j = 0; j < length; j++, out_idx++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
- tmp = ac_build_gep0(&ctx->ac, vertexptr,
- LLVMConstInt(ctx->ac.i32, out_idx, false));
- outputs[noutput].values[j] = LLVMBuildLoad(builder, tmp, "");
- }
+ tmp = ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, out_idx, false));
+ outputs[noutput].values[j] = LLVMBuildLoad(builder, tmp, "");
+ }
- for (unsigned j = length; j < 4; j++)
- outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
+ for (unsigned j = length; j < 4; j++)
+ outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
- noutput++;
- }
+ noutput++;
+ }
- for (unsigned i = 0; i < noutput; i++) {
- struct radv_stream_output *output =
- radv_get_stream_output_by_loc(so, outputs[i].slot_name);
+ for (unsigned i = 0; i < noutput; i++) {
+ struct radv_stream_output *output =
+ radv_get_stream_output_by_loc(so, outputs[i].slot_name);
- if (!output ||
- output->stream != stream)
- continue;
+ if (!output || output->stream != stream)
+ continue;
- struct radv_shader_output_values out = {0};
+ struct radv_shader_output_values out = {0};
- for (unsigned j = 0; j < 4; j++) {
- out.values[j] = outputs[i].values[j];
- }
+ for (unsigned j = 0; j < 4; j++) {
+ out.values[j] = outputs[i].values[j];
+ }
- radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
- }
- } else {
- for (unsigned i = 0; i < so->num_outputs; ++i) {
- struct radv_stream_output *output =
- &ctx->args->shader_info->so.outputs[i];
+ radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
+ }
+ } else {
+ for (unsigned i = 0; i < so->num_outputs; ++i) {
+ struct radv_stream_output *output = &ctx->args->shader_info->so.outputs[i];
- if (stream != output->stream)
- continue;
+ if (stream != output->stream)
+ continue;
- struct radv_shader_output_values out = {0};
+ struct radv_shader_output_values out = {0};
- for (unsigned comp = 0; comp < 4; comp++) {
- if (!(output->component_mask & (1 << comp)))
- continue;
+ for (unsigned comp = 0; comp < 4; comp++) {
+ if (!(output->component_mask & (1 << comp)))
+ continue;
- tmp = ac_build_gep0(&ctx->ac, vertexptr,
- LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
- out.values[comp] = LLVMBuildLoad(builder, tmp, "");
- }
+ tmp =
+ ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
+ out.values[comp] = LLVMBuildLoad(builder, tmp, "");
+ }
- radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
- }
- }
+ radv_emit_stream_output(ctx, so_buffer, offset, output, &out);
+ }
+ }
}
struct ngg_streamout {
- LLVMValueRef num_vertices;
+ LLVMValueRef num_vertices;
- /* per-thread data */
- LLVMValueRef prim_enable[4]; /* i1 per stream */
- LLVMValueRef vertices[3]; /* [N x i32] addrspace(LDS)* */
+ /* per-thread data */
+ LLVMValueRef prim_enable[4]; /* i1 per stream */
+ LLVMValueRef vertices[3]; /* [N x i32] addrspace(LDS)* */
- /* Output */
- LLVMValueRef emit[4]; /* per-stream emitted primitives (only valid for used streams) */
+ /* Output */
+ LLVMValueRef emit[4]; /* per-stream emitted primitives (only valid for used streams) */
};
/**
@@ -1833,1323 +1694,1228 @@ struct ngg_streamout {
*
* Clobbers gs_ngg_scratch[8:].
*/
-static void build_streamout(struct radv_shader_context *ctx,
- struct ngg_streamout *nggso)
-{
- struct radv_streamout_info *so = &ctx->args->shader_info->so;
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
- LLVMValueRef tid = get_thread_id_in_tg(ctx);
- LLVMValueRef cond, tmp, tmp2;
- LLVMValueRef i32_2 = LLVMConstInt(ctx->ac.i32, 2, false);
- LLVMValueRef i32_4 = LLVMConstInt(ctx->ac.i32, 4, false);
- LLVMValueRef i32_8 = LLVMConstInt(ctx->ac.i32, 8, false);
- LLVMValueRef so_buffer[4] = {0};
- unsigned max_num_vertices = 1 + (nggso->vertices[1] ? 1 : 0) +
- (nggso->vertices[2] ? 1 : 0);
- LLVMValueRef prim_stride_dw[4] = {0};
- LLVMValueRef prim_stride_dw_vgpr = LLVMGetUndef(ctx->ac.i32);
- int stream_for_buffer[4] = { -1, -1, -1, -1 };
- unsigned bufmask_for_stream[4] = {0};
- bool isgs = ctx->stage == MESA_SHADER_GEOMETRY;
- unsigned scratch_emit_base = isgs ? 4 : 0;
- LLVMValueRef scratch_emit_basev = isgs ? i32_4 : ctx->ac.i32_0;
- unsigned scratch_offset_base = isgs ? 8 : 4;
- LLVMValueRef scratch_offset_basev = isgs ? i32_8 : i32_4;
-
- ac_llvm_add_target_dep_function_attr(ctx->main_function,
- "amdgpu-gds-size", 256);
-
- /* Determine the mapping of streamout buffers to vertex streams. */
- for (unsigned i = 0; i < so->num_outputs; ++i) {
- unsigned buf = so->outputs[i].buffer;
- unsigned stream = so->outputs[i].stream;
- assert(stream_for_buffer[buf] < 0 || stream_for_buffer[buf] == stream);
- stream_for_buffer[buf] = stream;
- bufmask_for_stream[stream] |= 1 << buf;
- }
-
- for (unsigned buffer = 0; buffer < 4; ++buffer) {
- if (stream_for_buffer[buffer] == -1)
- continue;
-
- assert(so->strides[buffer]);
-
- LLVMValueRef stride_for_buffer =
- LLVMConstInt(ctx->ac.i32, so->strides[buffer], false);
- prim_stride_dw[buffer] =
- LLVMBuildMul(builder, stride_for_buffer,
- nggso->num_vertices, "");
- prim_stride_dw_vgpr = ac_build_writelane(
- &ctx->ac, prim_stride_dw_vgpr, prim_stride_dw[buffer],
- LLVMConstInt(ctx->ac.i32, buffer, false));
-
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, buffer, false);
- so_buffer[buffer] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr,
- offset);
- }
-
- cond = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
- ac_build_ifcc(&ctx->ac, cond, 5200);
- {
- LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
- LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
-
- /* Advance the streamout offsets in GDS. */
- LLVMValueRef offsets_vgpr = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
- LLVMValueRef generated_by_stream_vgpr = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
-
- cond = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), i32_4, "");
- ac_build_ifcc(&ctx->ac, cond, 5210);
- {
- /* Fetch the number of generated primitives and store
- * it in GDS for later use.
- */
- if (isgs) {
- tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid);
- tmp = LLVMBuildLoad(builder, tmp, "");
- } else {
- tmp = ac_build_writelane(&ctx->ac, ctx->ac.i32_0,
- ngg_get_prim_cnt(ctx), ctx->ac.i32_0);
- }
- LLVMBuildStore(builder, tmp, generated_by_stream_vgpr);
-
- unsigned swizzle[4];
- int unused_stream = -1;
- for (unsigned stream = 0; stream < 4; ++stream) {
- if (!ctx->args->shader_info->gs.num_stream_output_components[stream]) {
- unused_stream = stream;
- break;
- }
- }
- for (unsigned buffer = 0; buffer < 4; ++buffer) {
- if (stream_for_buffer[buffer] >= 0) {
- swizzle[buffer] = stream_for_buffer[buffer];
- } else {
- assert(unused_stream >= 0);
- swizzle[buffer] = unused_stream;
- }
- }
-
- tmp = ac_build_quad_swizzle(&ctx->ac, tmp,
- swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
- tmp = LLVMBuildMul(builder, tmp, prim_stride_dw_vgpr, "");
-
- LLVMValueRef args[] = {
- LLVMBuildIntToPtr(builder, ngg_get_ordered_id(ctx), gdsptr, ""),
- tmp,
- ctx->ac.i32_0, // ordering
- ctx->ac.i32_0, // scope
- ctx->ac.i1false, // isVolatile
- LLVMConstInt(ctx->ac.i32, 4 << 24, false), // OA index
- ctx->ac.i1true, // wave release
- ctx->ac.i1true, // wave done
- };
-
- tmp = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ds.ordered.add",
- ctx->ac.i32, args, ARRAY_SIZE(args), 0);
-
- /* Keep offsets in a VGPR for quick retrieval via readlane by
- * the first wave for bounds checking, and also store in LDS
- * for retrieval by all waves later. */
- LLVMBuildStore(builder, tmp, offsets_vgpr);
-
- tmp2 = LLVMBuildAdd(builder, ac_get_thread_id(&ctx->ac),
- scratch_offset_basev, "");
- tmp2 = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tmp2);
- LLVMBuildStore(builder, tmp, tmp2);
- }
- ac_build_endif(&ctx->ac, 5210);
-
- /* Determine the max emit per buffer. This is done via the SALU, in part
- * because LLVM can't generate divide-by-multiply if we try to do this
- * via VALU with one lane per buffer.
- */
- LLVMValueRef max_emit[4] = {0};
- for (unsigned buffer = 0; buffer < 4; ++buffer) {
- if (stream_for_buffer[buffer] == -1)
- continue;
-
- /* Compute the streamout buffer size in DWORD. */
- LLVMValueRef bufsize_dw =
- LLVMBuildLShr(builder,
- LLVMBuildExtractElement(builder, so_buffer[buffer], i32_2, ""),
- i32_2, "");
-
- /* Load the streamout buffer offset from GDS. */
- tmp = LLVMBuildLoad(builder, offsets_vgpr, "");
- LLVMValueRef offset_dw =
- ac_build_readlane(&ctx->ac, tmp,
- LLVMConstInt(ctx->ac.i32, buffer, false));
-
- /* Compute the remaining size to emit. */
- LLVMValueRef remaining_dw =
- LLVMBuildSub(builder, bufsize_dw, offset_dw, "");
- tmp = LLVMBuildUDiv(builder, remaining_dw,
- prim_stride_dw[buffer], "");
-
- cond = LLVMBuildICmp(builder, LLVMIntULT,
- bufsize_dw, offset_dw, "");
- max_emit[buffer] = LLVMBuildSelect(builder, cond,
- ctx->ac.i32_0, tmp, "");
- }
-
- /* Determine the number of emitted primitives per stream and fixup the
- * GDS counter if necessary.
- *
- * This is complicated by the fact that a single stream can emit to
- * multiple buffers (but luckily not vice versa).
- */
- LLVMValueRef emit_vgpr = ctx->ac.i32_0;
-
- for (unsigned stream = 0; stream < 4; ++stream) {
- if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
- continue;
-
- /* Load the number of generated primitives from GDS and
- * determine that number for the given stream.
- */
- tmp = LLVMBuildLoad(builder, generated_by_stream_vgpr, "");
- LLVMValueRef generated =
- ac_build_readlane(&ctx->ac, tmp,
- LLVMConstInt(ctx->ac.i32, stream, false));
-
-
- /* Compute the number of emitted primitives. */
- LLVMValueRef emit = generated;
- for (unsigned buffer = 0; buffer < 4; ++buffer) {
- if (stream_for_buffer[buffer] == stream)
- emit = ac_build_umin(&ctx->ac, emit, max_emit[buffer]);
- }
-
- /* Store the number of emitted primitives for that
- * stream.
- */
- emit_vgpr = ac_build_writelane(&ctx->ac, emit_vgpr, emit,
- LLVMConstInt(ctx->ac.i32, stream, false));
-
- /* Fixup the offset using a plain GDS atomic if we overflowed. */
- cond = LLVMBuildICmp(builder, LLVMIntULT, emit, generated, "");
- ac_build_ifcc(&ctx->ac, cond, 5221); /* scalar branch */
- tmp = LLVMBuildLShr(builder,
- LLVMConstInt(ctx->ac.i32, bufmask_for_stream[stream], false),
- ac_get_thread_id(&ctx->ac), "");
- tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
- ac_build_ifcc(&ctx->ac, tmp, 5222);
- {
- tmp = LLVMBuildSub(builder, generated, emit, "");
- tmp = LLVMBuildMul(builder, tmp, prim_stride_dw_vgpr, "");
- tmp2 = LLVMBuildGEP(builder, gdsbase, &tid, 1, "");
- LLVMBuildAtomicRMW(builder, LLVMAtomicRMWBinOpSub, tmp2, tmp,
- LLVMAtomicOrderingMonotonic, false);
- }
- ac_build_endif(&ctx->ac, 5222);
- ac_build_endif(&ctx->ac, 5221);
- }
-
- /* Store the number of emitted primitives to LDS for later use. */
- cond = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), i32_4, "");
- ac_build_ifcc(&ctx->ac, cond, 5225);
- {
- tmp = LLVMBuildAdd(builder, ac_get_thread_id(&ctx->ac),
- scratch_emit_basev, "");
- tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tmp);
- LLVMBuildStore(builder, emit_vgpr, tmp);
- }
- ac_build_endif(&ctx->ac, 5225);
- }
- ac_build_endif(&ctx->ac, 5200);
-
- /* Determine the workgroup-relative per-thread / primitive offset into
- * the streamout buffers */
- struct ac_wg_scan primemit_scan[4] = {0};
-
- if (isgs) {
- for (unsigned stream = 0; stream < 4; ++stream) {
- if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
- continue;
-
- primemit_scan[stream].enable_exclusive = true;
- primemit_scan[stream].op = nir_op_iadd;
- primemit_scan[stream].src = nggso->prim_enable[stream];
- primemit_scan[stream].scratch =
- ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch,
- LLVMConstInt(ctx->ac.i32, 12 + 8 * stream, false));
- primemit_scan[stream].waveidx = get_wave_id_in_tg(ctx);
- primemit_scan[stream].numwaves = get_tgsize(ctx);
- primemit_scan[stream].maxwaves = 8;
- ac_build_wg_scan_top(&ctx->ac, &primemit_scan[stream]);
- }
- }
-
- ac_build_s_barrier(&ctx->ac);
-
- /* Fetch the per-buffer offsets and per-stream emit counts in all waves. */
- LLVMValueRef wgoffset_dw[4] = {0};
-
- {
- LLVMValueRef scratch_vgpr;
-
- tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ac_get_thread_id(&ctx->ac));
- scratch_vgpr = LLVMBuildLoad(builder, tmp, "");
-
- for (unsigned buffer = 0; buffer < 4; ++buffer) {
- if (stream_for_buffer[buffer] >= 0) {
- wgoffset_dw[buffer] = ac_build_readlane(
- &ctx->ac, scratch_vgpr,
- LLVMConstInt(ctx->ac.i32, scratch_offset_base + buffer, false));
- }
- }
-
- for (unsigned stream = 0; stream < 4; ++stream) {
- if (ctx->args->shader_info->gs.num_stream_output_components[stream]) {
- nggso->emit[stream] = ac_build_readlane(
- &ctx->ac, scratch_vgpr,
- LLVMConstInt(ctx->ac.i32, scratch_emit_base + stream, false));
- }
- }
- }
-
- /* Write out primitive data */
- for (unsigned stream = 0; stream < 4; ++stream) {
- if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
- continue;
-
- if (isgs) {
- ac_build_wg_scan_bottom(&ctx->ac, &primemit_scan[stream]);
- } else {
- primemit_scan[stream].result_exclusive = tid;
- }
-
- cond = LLVMBuildICmp(builder, LLVMIntULT,
- primemit_scan[stream].result_exclusive,
- nggso->emit[stream], "");
- cond = LLVMBuildAnd(builder, cond, nggso->prim_enable[stream], "");
- ac_build_ifcc(&ctx->ac, cond, 5240);
- {
- LLVMValueRef offset_vtx =
- LLVMBuildMul(builder, primemit_scan[stream].result_exclusive,
- nggso->num_vertices, "");
-
- for (unsigned i = 0; i < max_num_vertices; ++i) {
- cond = LLVMBuildICmp(builder, LLVMIntULT,
- LLVMConstInt(ctx->ac.i32, i, false),
- nggso->num_vertices, "");
- ac_build_ifcc(&ctx->ac, cond, 5241);
- build_streamout_vertex(ctx, so_buffer, wgoffset_dw,
- stream, offset_vtx, nggso->vertices[i]);
- ac_build_endif(&ctx->ac, 5241);
- offset_vtx = LLVMBuildAdd(builder, offset_vtx, ctx->ac.i32_1, "");
- }
- }
- ac_build_endif(&ctx->ac, 5240);
- }
-}
-
-static unsigned ngg_nogs_vertex_size(struct radv_shader_context *ctx)
-{
- unsigned lds_vertex_size = 0;
-
- if (ctx->args->shader_info->so.num_outputs)
- lds_vertex_size = 4 * ctx->args->shader_info->so.num_outputs + 1;
-
- return lds_vertex_size;
+static void
+build_streamout(struct radv_shader_context *ctx, struct ngg_streamout *nggso)
+{
+ struct radv_streamout_info *so = &ctx->args->shader_info->so;
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
+ LLVMValueRef tid = get_thread_id_in_tg(ctx);
+ LLVMValueRef cond, tmp, tmp2;
+ LLVMValueRef i32_2 = LLVMConstInt(ctx->ac.i32, 2, false);
+ LLVMValueRef i32_4 = LLVMConstInt(ctx->ac.i32, 4, false);
+ LLVMValueRef i32_8 = LLVMConstInt(ctx->ac.i32, 8, false);
+ LLVMValueRef so_buffer[4] = {0};
+ unsigned max_num_vertices = 1 + (nggso->vertices[1] ? 1 : 0) + (nggso->vertices[2] ? 1 : 0);
+ LLVMValueRef prim_stride_dw[4] = {0};
+ LLVMValueRef prim_stride_dw_vgpr = LLVMGetUndef(ctx->ac.i32);
+ int stream_for_buffer[4] = {-1, -1, -1, -1};
+ unsigned bufmask_for_stream[4] = {0};
+ bool isgs = ctx->stage == MESA_SHADER_GEOMETRY;
+ unsigned scratch_emit_base = isgs ? 4 : 0;
+ LLVMValueRef scratch_emit_basev = isgs ? i32_4 : ctx->ac.i32_0;
+ unsigned scratch_offset_base = isgs ? 8 : 4;
+ LLVMValueRef scratch_offset_basev = isgs ? i32_8 : i32_4;
+
+ ac_llvm_add_target_dep_function_attr(ctx->main_function, "amdgpu-gds-size", 256);
+
+ /* Determine the mapping of streamout buffers to vertex streams. */
+ for (unsigned i = 0; i < so->num_outputs; ++i) {
+ unsigned buf = so->outputs[i].buffer;
+ unsigned stream = so->outputs[i].stream;
+ assert(stream_for_buffer[buf] < 0 || stream_for_buffer[buf] == stream);
+ stream_for_buffer[buf] = stream;
+ bufmask_for_stream[stream] |= 1 << buf;
+ }
+
+ for (unsigned buffer = 0; buffer < 4; ++buffer) {
+ if (stream_for_buffer[buffer] == -1)
+ continue;
+
+ assert(so->strides[buffer]);
+
+ LLVMValueRef stride_for_buffer = LLVMConstInt(ctx->ac.i32, so->strides[buffer], false);
+ prim_stride_dw[buffer] = LLVMBuildMul(builder, stride_for_buffer, nggso->num_vertices, "");
+ prim_stride_dw_vgpr =
+ ac_build_writelane(&ctx->ac, prim_stride_dw_vgpr, prim_stride_dw[buffer],
+ LLVMConstInt(ctx->ac.i32, buffer, false));
+
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, buffer, false);
+ so_buffer[buffer] = ac_build_load_to_sgpr(&ctx->ac, buf_ptr, offset);
+ }
+
+ cond = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
+ ac_build_ifcc(&ctx->ac, cond, 5200);
+ {
+ LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
+ LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
+
+ /* Advance the streamout offsets in GDS. */
+ LLVMValueRef offsets_vgpr = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
+ LLVMValueRef generated_by_stream_vgpr = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
+
+ cond = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), i32_4, "");
+ ac_build_ifcc(&ctx->ac, cond, 5210);
+ {
+ /* Fetch the number of generated primitives and store
+ * it in GDS for later use.
+ */
+ if (isgs) {
+ tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid);
+ tmp = LLVMBuildLoad(builder, tmp, "");
+ } else {
+ tmp = ac_build_writelane(&ctx->ac, ctx->ac.i32_0, ngg_get_prim_cnt(ctx), ctx->ac.i32_0);
+ }
+ LLVMBuildStore(builder, tmp, generated_by_stream_vgpr);
+
+ unsigned swizzle[4];
+ int unused_stream = -1;
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ if (!ctx->args->shader_info->gs.num_stream_output_components[stream]) {
+ unused_stream = stream;
+ break;
+ }
+ }
+ for (unsigned buffer = 0; buffer < 4; ++buffer) {
+ if (stream_for_buffer[buffer] >= 0) {
+ swizzle[buffer] = stream_for_buffer[buffer];
+ } else {
+ assert(unused_stream >= 0);
+ swizzle[buffer] = unused_stream;
+ }
+ }
+
+ tmp = ac_build_quad_swizzle(&ctx->ac, tmp, swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+ tmp = LLVMBuildMul(builder, tmp, prim_stride_dw_vgpr, "");
+
+ LLVMValueRef args[] = {
+ LLVMBuildIntToPtr(builder, ngg_get_ordered_id(ctx), gdsptr, ""),
+ tmp,
+ ctx->ac.i32_0, // ordering
+ ctx->ac.i32_0, // scope
+ ctx->ac.i1false, // isVolatile
+ LLVMConstInt(ctx->ac.i32, 4 << 24, false), // OA index
+ ctx->ac.i1true, // wave release
+ ctx->ac.i1true, // wave done
+ };
+
+ tmp = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ds.ordered.add", ctx->ac.i32, args,
+ ARRAY_SIZE(args), 0);
+
+ /* Keep offsets in a VGPR for quick retrieval via readlane by
+ * the first wave for bounds checking, and also store in LDS
+ * for retrieval by all waves later. */
+ LLVMBuildStore(builder, tmp, offsets_vgpr);
+
+ tmp2 = LLVMBuildAdd(builder, ac_get_thread_id(&ctx->ac), scratch_offset_basev, "");
+ tmp2 = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tmp2);
+ LLVMBuildStore(builder, tmp, tmp2);
+ }
+ ac_build_endif(&ctx->ac, 5210);
+
+ /* Determine the max emit per buffer. This is done via the SALU, in part
+ * because LLVM can't generate divide-by-multiply if we try to do this
+ * via VALU with one lane per buffer.
+ */
+ LLVMValueRef max_emit[4] = {0};
+ for (unsigned buffer = 0; buffer < 4; ++buffer) {
+ if (stream_for_buffer[buffer] == -1)
+ continue;
+
+ /* Compute the streamout buffer size in DWORD. */
+ LLVMValueRef bufsize_dw = LLVMBuildLShr(
+ builder, LLVMBuildExtractElement(builder, so_buffer[buffer], i32_2, ""), i32_2, "");
+
+ /* Load the streamout buffer offset from GDS. */
+ tmp = LLVMBuildLoad(builder, offsets_vgpr, "");
+ LLVMValueRef offset_dw =
+ ac_build_readlane(&ctx->ac, tmp, LLVMConstInt(ctx->ac.i32, buffer, false));
+
+ /* Compute the remaining size to emit. */
+ LLVMValueRef remaining_dw = LLVMBuildSub(builder, bufsize_dw, offset_dw, "");
+ tmp = LLVMBuildUDiv(builder, remaining_dw, prim_stride_dw[buffer], "");
+
+ cond = LLVMBuildICmp(builder, LLVMIntULT, bufsize_dw, offset_dw, "");
+ max_emit[buffer] = LLVMBuildSelect(builder, cond, ctx->ac.i32_0, tmp, "");
+ }
+
+ /* Determine the number of emitted primitives per stream and fixup the
+ * GDS counter if necessary.
+ *
+ * This is complicated by the fact that a single stream can emit to
+ * multiple buffers (but luckily not vice versa).
+ */
+ LLVMValueRef emit_vgpr = ctx->ac.i32_0;
+
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
+ continue;
+
+ /* Load the number of generated primitives from GDS and
+ * determine that number for the given stream.
+ */
+ tmp = LLVMBuildLoad(builder, generated_by_stream_vgpr, "");
+ LLVMValueRef generated =
+ ac_build_readlane(&ctx->ac, tmp, LLVMConstInt(ctx->ac.i32, stream, false));
+
+ /* Compute the number of emitted primitives. */
+ LLVMValueRef emit = generated;
+ for (unsigned buffer = 0; buffer < 4; ++buffer) {
+ if (stream_for_buffer[buffer] == stream)
+ emit = ac_build_umin(&ctx->ac, emit, max_emit[buffer]);
+ }
+
+ /* Store the number of emitted primitives for that
+ * stream.
+ */
+ emit_vgpr =
+ ac_build_writelane(&ctx->ac, emit_vgpr, emit, LLVMConstInt(ctx->ac.i32, stream, false));
+
+ /* Fixup the offset using a plain GDS atomic if we overflowed. */
+ cond = LLVMBuildICmp(builder, LLVMIntULT, emit, generated, "");
+ ac_build_ifcc(&ctx->ac, cond, 5221); /* scalar branch */
+ tmp = LLVMBuildLShr(builder, LLVMConstInt(ctx->ac.i32, bufmask_for_stream[stream], false),
+ ac_get_thread_id(&ctx->ac), "");
+ tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5222);
+ {
+ tmp = LLVMBuildSub(builder, generated, emit, "");
+ tmp = LLVMBuildMul(builder, tmp, prim_stride_dw_vgpr, "");
+ tmp2 = LLVMBuildGEP(builder, gdsbase, &tid, 1, "");
+ LLVMBuildAtomicRMW(builder, LLVMAtomicRMWBinOpSub, tmp2, tmp,
+ LLVMAtomicOrderingMonotonic, false);
+ }
+ ac_build_endif(&ctx->ac, 5222);
+ ac_build_endif(&ctx->ac, 5221);
+ }
+
+ /* Store the number of emitted primitives to LDS for later use. */
+ cond = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), i32_4, "");
+ ac_build_ifcc(&ctx->ac, cond, 5225);
+ {
+ tmp = LLVMBuildAdd(builder, ac_get_thread_id(&ctx->ac), scratch_emit_basev, "");
+ tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tmp);
+ LLVMBuildStore(builder, emit_vgpr, tmp);
+ }
+ ac_build_endif(&ctx->ac, 5225);
+ }
+ ac_build_endif(&ctx->ac, 5200);
+
+ /* Determine the workgroup-relative per-thread / primitive offset into
+ * the streamout buffers */
+ struct ac_wg_scan primemit_scan[4] = {0};
+
+ if (isgs) {
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
+ continue;
+
+ primemit_scan[stream].enable_exclusive = true;
+ primemit_scan[stream].op = nir_op_iadd;
+ primemit_scan[stream].src = nggso->prim_enable[stream];
+ primemit_scan[stream].scratch = ac_build_gep0(
+ &ctx->ac, ctx->gs_ngg_scratch, LLVMConstInt(ctx->ac.i32, 12 + 8 * stream, false));
+ primemit_scan[stream].waveidx = get_wave_id_in_tg(ctx);
+ primemit_scan[stream].numwaves = get_tgsize(ctx);
+ primemit_scan[stream].maxwaves = 8;
+ ac_build_wg_scan_top(&ctx->ac, &primemit_scan[stream]);
+ }
+ }
+
+ ac_build_s_barrier(&ctx->ac);
+
+ /* Fetch the per-buffer offsets and per-stream emit counts in all waves. */
+ LLVMValueRef wgoffset_dw[4] = {0};
+
+ {
+ LLVMValueRef scratch_vgpr;
+
+ tmp = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ac_get_thread_id(&ctx->ac));
+ scratch_vgpr = LLVMBuildLoad(builder, tmp, "");
+
+ for (unsigned buffer = 0; buffer < 4; ++buffer) {
+ if (stream_for_buffer[buffer] >= 0) {
+ wgoffset_dw[buffer] =
+ ac_build_readlane(&ctx->ac, scratch_vgpr,
+ LLVMConstInt(ctx->ac.i32, scratch_offset_base + buffer, false));
+ }
+ }
+
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ if (ctx->args->shader_info->gs.num_stream_output_components[stream]) {
+ nggso->emit[stream] =
+ ac_build_readlane(&ctx->ac, scratch_vgpr,
+ LLVMConstInt(ctx->ac.i32, scratch_emit_base + stream, false));
+ }
+ }
+ }
+
+ /* Write out primitive data */
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
+ continue;
+
+ if (isgs) {
+ ac_build_wg_scan_bottom(&ctx->ac, &primemit_scan[stream]);
+ } else {
+ primemit_scan[stream].result_exclusive = tid;
+ }
+
+ cond = LLVMBuildICmp(builder, LLVMIntULT, primemit_scan[stream].result_exclusive,
+ nggso->emit[stream], "");
+ cond = LLVMBuildAnd(builder, cond, nggso->prim_enable[stream], "");
+ ac_build_ifcc(&ctx->ac, cond, 5240);
+ {
+ LLVMValueRef offset_vtx =
+ LLVMBuildMul(builder, primemit_scan[stream].result_exclusive, nggso->num_vertices, "");
+
+ for (unsigned i = 0; i < max_num_vertices; ++i) {
+ cond = LLVMBuildICmp(builder, LLVMIntULT, LLVMConstInt(ctx->ac.i32, i, false),
+ nggso->num_vertices, "");
+ ac_build_ifcc(&ctx->ac, cond, 5241);
+ build_streamout_vertex(ctx, so_buffer, wgoffset_dw, stream, offset_vtx,
+ nggso->vertices[i]);
+ ac_build_endif(&ctx->ac, 5241);
+ offset_vtx = LLVMBuildAdd(builder, offset_vtx, ctx->ac.i32_1, "");
+ }
+ }
+ ac_build_endif(&ctx->ac, 5240);
+ }
+}
+
+static unsigned
+ngg_nogs_vertex_size(struct radv_shader_context *ctx)
+{
+ unsigned lds_vertex_size = 0;
+
+ if (ctx->args->shader_info->so.num_outputs)
+ lds_vertex_size = 4 * ctx->args->shader_info->so.num_outputs + 1;
+
+ return lds_vertex_size;
}
/**
* Returns an `[N x i32] addrspace(LDS)*` pointing at contiguous LDS storage
* for the vertex outputs.
*/
-static LLVMValueRef ngg_nogs_vertex_ptr(struct radv_shader_context *ctx,
- LLVMValueRef vtxid)
+static LLVMValueRef
+ngg_nogs_vertex_ptr(struct radv_shader_context *ctx, LLVMValueRef vtxid)
{
- /* The extra dword is used to avoid LDS bank conflicts. */
- unsigned vertex_size = ngg_nogs_vertex_size(ctx);
- LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, vertex_size);
- LLVMTypeRef pai32 = LLVMPointerType(ai32, AC_ADDR_SPACE_LDS);
- LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, ctx->esgs_ring, pai32, "");
- return LLVMBuildGEP(ctx->ac.builder, tmp, &vtxid, 1, "");
+ /* The extra dword is used to avoid LDS bank conflicts. */
+ unsigned vertex_size = ngg_nogs_vertex_size(ctx);
+ LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, vertex_size);
+ LLVMTypeRef pai32 = LLVMPointerType(ai32, AC_ADDR_SPACE_LDS);
+ LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, ctx->esgs_ring, pai32, "");
+ return LLVMBuildGEP(ctx->ac.builder, tmp, &vtxid, 1, "");
}
static void
handle_ngg_outputs_post_1(struct radv_shader_context *ctx)
{
- struct radv_streamout_info *so = &ctx->args->shader_info->so;
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef vertex_ptr = NULL;
- LLVMValueRef tmp, tmp2;
+ struct radv_streamout_info *so = &ctx->args->shader_info->so;
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef vertex_ptr = NULL;
+ LLVMValueRef tmp, tmp2;
- assert((ctx->stage == MESA_SHADER_VERTEX ||
- ctx->stage == MESA_SHADER_TESS_EVAL) && !ctx->args->is_gs_copy_shader);
+ assert((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
+ !ctx->args->is_gs_copy_shader);
- if (!ctx->args->shader_info->so.num_outputs)
- return;
+ if (!ctx->args->shader_info->so.num_outputs)
+ return;
- vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
+ vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
- for (unsigned i = 0; i < so->num_outputs; ++i) {
- struct radv_stream_output *output =
- &ctx->args->shader_info->so.outputs[i];
+ for (unsigned i = 0; i < so->num_outputs; ++i) {
+ struct radv_stream_output *output = &ctx->args->shader_info->so.outputs[i];
- unsigned loc = output->location;
+ unsigned loc = output->location;
- for (unsigned comp = 0; comp < 4; comp++) {
- if (!(output->component_mask & (1 << comp)))
- continue;
+ for (unsigned comp = 0; comp < 4; comp++) {
+ if (!(output->component_mask & (1 << comp)))
+ continue;
- tmp = ac_build_gep0(&ctx->ac, vertex_ptr,
- LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
- tmp2 = LLVMBuildLoad(builder,
- ctx->abi.outputs[4 * loc + comp], "");
- tmp2 = ac_to_integer(&ctx->ac, tmp2);
- LLVMBuildStore(builder, tmp2, tmp);
- }
- }
+ tmp = ac_build_gep0(&ctx->ac, vertex_ptr, LLVMConstInt(ctx->ac.i32, 4 * i + comp, false));
+ tmp2 = LLVMBuildLoad(builder, ctx->abi.outputs[4 * loc + comp], "");
+ tmp2 = ac_to_integer(&ctx->ac, tmp2);
+ LLVMBuildStore(builder, tmp2, tmp);
+ }
+ }
}
static void
handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef tmp;
-
- assert((ctx->stage == MESA_SHADER_VERTEX ||
- ctx->stage == MESA_SHADER_TESS_EVAL) && !ctx->args->is_gs_copy_shader);
-
- LLVMValueRef prims_in_wave = ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
- LLVMValueRef vtx_in_wave = ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 0, 8);
- LLVMValueRef is_gs_thread = LLVMBuildICmp(builder, LLVMIntULT,
- ac_get_thread_id(&ctx->ac), prims_in_wave, "");
- LLVMValueRef is_es_thread = LLVMBuildICmp(builder, LLVMIntULT,
- ac_get_thread_id(&ctx->ac), vtx_in_wave, "");
- LLVMValueRef vtxindex[] = {
- ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 0, 16),
- ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 16, 16),
- ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[2]), 0, 16),
- };
-
- /* Determine the number of vertices per primitive. */
- unsigned num_vertices;
- LLVMValueRef num_vertices_val;
-
- if (ctx->stage == MESA_SHADER_VERTEX) {
- LLVMValueRef outprim_val =
- LLVMConstInt(ctx->ac.i32,
- ctx->args->options->key.vs.outprim, false);
- num_vertices_val = LLVMBuildAdd(builder, outprim_val,
- ctx->ac.i32_1, "");
- num_vertices = 3; /* TODO: optimize for points & lines */
- } else {
- assert(ctx->stage == MESA_SHADER_TESS_EVAL);
-
- if (ctx->shader->info.tess.point_mode)
- num_vertices = 1;
- else if (ctx->shader->info.tess.primitive_mode == GL_ISOLINES)
- num_vertices = 2;
- else
- num_vertices = 3;
-
- num_vertices_val = LLVMConstInt(ctx->ac.i32, num_vertices, false);
- }
-
- /* Streamout */
- if (ctx->args->shader_info->so.num_outputs) {
- struct ngg_streamout nggso = {0};
-
- nggso.num_vertices = num_vertices_val;
- nggso.prim_enable[0] = is_gs_thread;
-
- for (unsigned i = 0; i < num_vertices; ++i)
- nggso.vertices[i] = ngg_nogs_vertex_ptr(ctx, vtxindex[i]);
-
- build_streamout(ctx, &nggso);
- }
-
- /* Copy Primitive IDs from GS threads to the LDS address corresponding
- * to the ES thread of the provoking vertex.
- */
- if (ctx->stage == MESA_SHADER_VERTEX &&
- ctx->args->options->key.vs_common_out.export_prim_id) {
- if (ctx->args->shader_info->so.num_outputs)
- ac_build_s_barrier(&ctx->ac);
-
- ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
- /* Extract the PROVOKING_VTX_INDEX field. */
- LLVMValueRef provoking_vtx_in_prim =
- LLVMConstInt(ctx->ac.i32, 0, false);
-
- /* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
- LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
- LLVMValueRef provoking_vtx_index =
- LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, "");
-
- LLVMBuildStore(builder, ac_get_arg(&ctx->ac, ctx->args->ac.gs_prim_id),
- ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index));
- ac_build_endif(&ctx->ac, 5400);
- }
-
- /* TODO: primitive culling */
-
- ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx),
- ngg_get_vtx_cnt(ctx), ngg_get_prim_cnt(ctx));
-
- /* TODO: streamout queries */
- /* Export primitive data to the index buffer.
- *
- * For the first version, we will always build up all three indices
- * independent of the primitive type. The additional garbage data
- * shouldn't hurt.
- *
- * TODO: culling depends on the primitive type, so can have some
- * interaction here.
- */
- ac_build_ifcc(&ctx->ac, is_gs_thread, 6001);
- {
- struct ac_ngg_prim prim = {0};
-
- if (ctx->args->options->key.vs_common_out.as_ngg_passthrough) {
- prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]);
- } else {
- prim.num_vertices = num_vertices;
- prim.isnull = ctx->ac.i1false;
- memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
-
- for (unsigned i = 0; i < num_vertices; ++i) {
- tmp = LLVMBuildLShr(builder,
- ac_get_arg(&ctx->ac, ctx->args->ac.gs_invocation_id),
- LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
- prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
- }
- }
-
- ac_build_export_prim(&ctx->ac, &prim);
- }
- ac_build_endif(&ctx->ac, 6001);
-
- /* Export per-vertex data (positions and parameters). */
- ac_build_ifcc(&ctx->ac, is_es_thread, 6002);
- {
- struct radv_vs_output_info *outinfo =
- ctx->stage == MESA_SHADER_TESS_EVAL ?
- &ctx->args->shader_info->tes.outinfo : &ctx->args->shader_info->vs.outinfo;
-
- /* Exporting the primitive ID is handled below. */
- /* TODO: use the new VS export path */
- handle_vs_outputs_post(ctx, false,
- ctx->args->options->key.vs_common_out.export_clip_dists,
- outinfo);
-
- if (ctx->args->options->key.vs_common_out.export_prim_id) {
- unsigned param_count = outinfo->param_exports;
- LLVMValueRef values[4];
-
- if (ctx->stage == MESA_SHADER_VERTEX) {
- /* Wait for GS stores to finish. */
- ac_build_s_barrier(&ctx->ac);
-
- tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring,
- get_thread_id_in_tg(ctx));
- values[0] = LLVMBuildLoad(builder, tmp, "");
- } else {
- assert(ctx->stage == MESA_SHADER_TESS_EVAL);
- values[0] = ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
- }
-
- values[0] = ac_to_float(&ctx->ac, values[0]);
- for (unsigned j = 1; j < 4; j++)
- values[j] = ctx->ac.f32_0;
-
- radv_export_param(ctx, param_count, values, 0x1);
-
- outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++;
- outinfo->param_exports = param_count;
- }
- }
- ac_build_endif(&ctx->ac, 6002);
-}
-
-static void gfx10_ngg_gs_emit_prologue(struct radv_shader_context *ctx)
-{
- /* Zero out the part of LDS scratch that is used to accumulate the
- * per-stream generated primitive count.
- */
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef scratchptr = ctx->gs_ngg_scratch;
- LLVMValueRef tid = get_thread_id_in_tg(ctx);
- LLVMBasicBlockRef merge_block;
- LLVMValueRef cond;
-
- LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
- LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
- merge_block = LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
-
- cond = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), "");
- LLVMBuildCondBr(ctx->ac.builder, cond, then_block, merge_block);
- LLVMPositionBuilderAtEnd(ctx->ac.builder, then_block);
-
- LLVMValueRef ptr = ac_build_gep0(&ctx->ac, scratchptr, tid);
- LLVMBuildStore(builder, ctx->ac.i32_0, ptr);
-
- LLVMBuildBr(ctx->ac.builder, merge_block);
- LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
-
- ac_build_s_barrier(&ctx->ac);
-}
-
-static void gfx10_ngg_gs_emit_epilogue_1(struct radv_shader_context *ctx)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef i8_0 = LLVMConstInt(ctx->ac.i8, 0, false);
- LLVMValueRef tmp;
-
- /* Zero out remaining (non-emitted) primitive flags.
- *
- * Note: Alternatively, we could pass the relevant gs_next_vertex to
- * the emit threads via LDS. This is likely worse in the expected
- * typical case where each GS thread emits the full set of
- * vertices.
- */
- for (unsigned stream = 0; stream < 4; ++stream) {
- unsigned num_components;
-
- num_components =
- ctx->args->shader_info->gs.num_stream_output_components[stream];
- if (!num_components)
- continue;
-
- const LLVMValueRef gsthread = get_thread_id_in_tg(ctx);
-
- ac_build_bgnloop(&ctx->ac, 5100);
-
- const LLVMValueRef vertexidx =
- LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
- tmp = LLVMBuildICmp(builder, LLVMIntUGE, vertexidx,
- LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
- ac_build_ifcc(&ctx->ac, tmp, 5101);
- ac_build_break(&ctx->ac);
- ac_build_endif(&ctx->ac, 5101);
-
- tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
- LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
-
- tmp = ngg_gs_emit_vertex_ptr(ctx, gsthread, vertexidx);
- LLVMBuildStore(builder, i8_0,
- ngg_gs_get_emit_primflag_ptr(ctx, tmp, stream));
-
- ac_build_endloop(&ctx->ac, 5100);
- }
-
- /* Accumulate generated primitives counts across the entire threadgroup. */
- for (unsigned stream = 0; stream < 4; ++stream) {
- unsigned num_components;
-
- num_components =
- ctx->args->shader_info->gs.num_stream_output_components[stream];
- if (!num_components)
- continue;
-
- LLVMValueRef numprims =
- LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
- numprims = ac_build_reduce(&ctx->ac, numprims, nir_op_iadd, ctx->ac.wave_size);
-
- tmp = LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(&ctx->ac), ctx->ac.i32_0, "");
- ac_build_ifcc(&ctx->ac, tmp, 5105);
- {
- LLVMBuildAtomicRMW(builder, LLVMAtomicRMWBinOpAdd,
- ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch,
- LLVMConstInt(ctx->ac.i32, stream, false)),
- numprims, LLVMAtomicOrderingMonotonic, false);
- }
- ac_build_endif(&ctx->ac, 5105);
- }
-}
-
-static void gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
-{
- const unsigned verts_per_prim = si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive);
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef tmp, tmp2;
-
- ac_build_s_barrier(&ctx->ac);
-
- const LLVMValueRef tid = get_thread_id_in_tg(ctx);
- LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx);
-
- /* Streamout */
- if (ctx->args->shader_info->so.num_outputs) {
- struct ngg_streamout nggso = {0};
-
- nggso.num_vertices = LLVMConstInt(ctx->ac.i32, verts_per_prim, false);
-
- LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tid);
- for (unsigned stream = 0; stream < 4; ++stream) {
- if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
- continue;
-
- tmp = LLVMBuildLoad(builder,
- ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream), "");
- tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
- tmp2 = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
- nggso.prim_enable[stream] = LLVMBuildAnd(builder, tmp, tmp2, "");
- }
-
- for (unsigned i = 0; i < verts_per_prim; ++i) {
- tmp = LLVMBuildSub(builder, tid,
- LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false), "");
- tmp = ngg_gs_vertex_ptr(ctx, tmp);
- nggso.vertices[i] = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
- }
-
- build_streamout(ctx, &nggso);
- }
-
- /* Write shader query data. */
- tmp = ac_get_arg(&ctx->ac, ctx->args->ngg_gs_state);
- tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
- ac_build_ifcc(&ctx->ac, tmp, 5109);
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
- LLVMConstInt(ctx->ac.i32, 4, false), "");
- ac_build_ifcc(&ctx->ac, tmp, 5110);
- {
- tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
-
- ac_llvm_add_target_dep_function_attr(ctx->main_function,
- "amdgpu-gds-size", 256);
-
- LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
- LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
-
- const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
-
- /* Use a plain GDS atomic to accumulate the number of generated
- * primitives.
- */
- ac_build_atomic_rmw(&ctx->ac, LLVMAtomicRMWBinOpAdd, gdsbase,
- tmp, sync_scope);
- }
- ac_build_endif(&ctx->ac, 5110);
- ac_build_endif(&ctx->ac, 5109);
-
- /* TODO: culling */
-
- /* Determine vertex liveness. */
- LLVMValueRef vertliveptr = ac_build_alloca(&ctx->ac, ctx->ac.i1, "vertexlive");
-
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
- ac_build_ifcc(&ctx->ac, tmp, 5120);
- {
- for (unsigned i = 0; i < verts_per_prim; ++i) {
- const LLVMValueRef primidx =
- LLVMBuildAdd(builder, tid,
- LLVMConstInt(ctx->ac.i32, i, false), "");
-
- if (i > 0) {
- tmp = LLVMBuildICmp(builder, LLVMIntULT, primidx, num_emit_threads, "");
- ac_build_ifcc(&ctx->ac, tmp, 5121 + i);
- }
-
- /* Load primitive liveness */
- tmp = ngg_gs_vertex_ptr(ctx, primidx);
- tmp = LLVMBuildLoad(builder,
- ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
- const LLVMValueRef primlive =
- LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
-
- tmp = LLVMBuildLoad(builder, vertliveptr, "");
- tmp = LLVMBuildOr(builder, tmp, primlive, ""),
- LLVMBuildStore(builder, tmp, vertliveptr);
-
- if (i > 0)
- ac_build_endif(&ctx->ac, 5121 + i);
- }
- }
- ac_build_endif(&ctx->ac, 5120);
-
- /* Inclusive scan addition across the current wave. */
- LLVMValueRef vertlive = LLVMBuildLoad(builder, vertliveptr, "");
- struct ac_wg_scan vertlive_scan = {0};
- vertlive_scan.op = nir_op_iadd;
- vertlive_scan.enable_reduce = true;
- vertlive_scan.enable_exclusive = true;
- vertlive_scan.src = vertlive;
- vertlive_scan.scratch = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ctx->ac.i32_0);
- vertlive_scan.waveidx = get_wave_id_in_tg(ctx);
- vertlive_scan.numwaves = get_tgsize(ctx);
- vertlive_scan.maxwaves = 8;
-
- ac_build_wg_scan(&ctx->ac, &vertlive_scan);
-
- /* Skip all exports (including index exports) when possible. At least on
- * early gfx10 revisions this is also to avoid hangs.
- */
- LLVMValueRef have_exports =
- LLVMBuildICmp(builder, LLVMIntNE, vertlive_scan.result_reduce, ctx->ac.i32_0, "");
- num_emit_threads =
- LLVMBuildSelect(builder, have_exports, num_emit_threads, ctx->ac.i32_0, "");
-
- /* Allocate export space. Send this message as early as possible, to
- * hide the latency of the SQ <-> SPI roundtrip.
- *
- * Note: We could consider compacting primitives for export as well.
- * PA processes 1 non-null prim / clock, but it fetches 4 DW of
- * prim data per clock and skips null primitives at no additional
- * cost. So compacting primitives can only be beneficial when
- * there are 4 or more contiguous null primitives in the export
- * (in the common case of single-dword prim exports).
- */
- ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx),
- vertlive_scan.result_reduce, num_emit_threads);
-
- /* Setup the reverse vertex compaction permutation. We re-use stream 1
- * of the primitive liveness flags, relying on the fact that each
- * threadgroup can have at most 256 threads. */
- ac_build_ifcc(&ctx->ac, vertlive, 5130);
- {
- tmp = ngg_gs_vertex_ptr(ctx, vertlive_scan.result_exclusive);
- tmp2 = LLVMBuildTrunc(builder, tid, ctx->ac.i8, "");
- LLVMBuildStore(builder, tmp2,
- ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1));
- }
- ac_build_endif(&ctx->ac, 5130);
-
- ac_build_s_barrier(&ctx->ac);
-
- /* Export primitive data */
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
- ac_build_ifcc(&ctx->ac, tmp, 5140);
- {
- LLVMValueRef flags;
- struct ac_ngg_prim prim = {0};
- prim.num_vertices = verts_per_prim;
-
- tmp = ngg_gs_vertex_ptr(ctx, tid);
- flags = LLVMBuildLoad(builder,
- ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
- prim.isnull = LLVMBuildNot(builder, LLVMBuildTrunc(builder, flags, ctx->ac.i1, ""), "");
-
- for (unsigned i = 0; i < verts_per_prim; ++i) {
- prim.index[i] = LLVMBuildSub(builder, vertlive_scan.result_exclusive,
- LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false), "");
- prim.edgeflag[i] = ctx->ac.i1false;
- }
-
- /* Geometry shaders output triangle strips, but NGG expects
- * triangles. We need to change the vertex order for odd
- * triangles to get correct front/back facing by swapping 2
- * vertex indices, but we also have to keep the provoking
- * vertex in the same place.
- */
- if (verts_per_prim == 3) {
- LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, "");
- is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, "");
-
- struct ac_ngg_prim in = prim;
- prim.index[0] = in.index[0];
- prim.index[1] = LLVMBuildSelect(builder, is_odd,
- in.index[2], in.index[1], "");
- prim.index[2] = LLVMBuildSelect(builder, is_odd,
- in.index[1], in.index[2], "");
- }
-
- ac_build_export_prim(&ctx->ac, &prim);
- }
- ac_build_endif(&ctx->ac, 5140);
-
- /* Export position and parameter data */
- tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, vertlive_scan.result_reduce, "");
- ac_build_ifcc(&ctx->ac, tmp, 5145);
- {
- struct radv_vs_output_info *outinfo = &ctx->args->shader_info->vs.outinfo;
- bool export_view_index = ctx->args->options->key.has_multiview_view_index;
- struct radv_shader_output_values *outputs;
- unsigned noutput = 0;
-
- /* Allocate a temporary array for the output values. */
- unsigned num_outputs = util_bitcount64(ctx->output_mask) + export_view_index;
- outputs = calloc(num_outputs, sizeof(outputs[0]));
-
- memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
- sizeof(outinfo->vs_output_param_offset));
- outinfo->pos_exports = 0;
-
- tmp = ngg_gs_vertex_ptr(ctx, tid);
- tmp = LLVMBuildLoad(builder,
- ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1), "");
- tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
- const LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tmp);
-
- unsigned out_idx = 0;
- for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
- unsigned output_usage_mask =
- ctx->args->shader_info->gs.output_usage_mask[i];
- int length = util_last_bit(output_usage_mask);
-
- if (!(ctx->output_mask & (1ull << i)))
- continue;
-
- outputs[noutput].slot_name = i;
- outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
- outputs[noutput].usage_mask = output_usage_mask;
-
- for (unsigned j = 0; j < length; j++, out_idx++) {
- if (!(output_usage_mask & (1 << j)))
- continue;
-
- tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
- tmp = LLVMBuildLoad(builder, tmp, "");
-
- LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
- if (ac_get_type_size(type) == 2) {
- tmp = ac_to_integer(&ctx->ac, tmp);
- tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i16, "");
- }
-
- outputs[noutput].values[j] = ac_to_float(&ctx->ac, tmp);
- }
-
- for (unsigned j = length; j < 4; j++)
- outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
-
- noutput++;
- }
-
- /* Export ViewIndex. */
- if (export_view_index) {
- outputs[noutput].slot_name = VARYING_SLOT_LAYER;
- outputs[noutput].slot_index = 0;
- outputs[noutput].usage_mask = 0x1;
- outputs[noutput].values[0] =
- ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.view_index));
- for (unsigned j = 1; j < 4; j++)
- outputs[noutput].values[j] = ctx->ac.f32_0;
- noutput++;
- }
-
- radv_llvm_export_vs(ctx, outputs, noutput, outinfo,
- ctx->args->options->key.vs_common_out.export_clip_dists);
- FREE(outputs);
- }
- ac_build_endif(&ctx->ac, 5145);
-}
-
-static void gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx,
- unsigned stream,
- LLVMValueRef vertexidx,
- LLVMValueRef *addrs)
-{
- LLVMBuilderRef builder = ctx->ac.builder;
- LLVMValueRef tmp;
-
- const LLVMValueRef vertexptr =
- ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
- unsigned out_idx = 0;
- for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
- unsigned output_usage_mask =
- ctx->args->shader_info->gs.output_usage_mask[i];
- uint8_t output_stream =
- ctx->args->shader_info->gs.output_streams[i];
- LLVMValueRef *out_ptr = &addrs[i * 4];
- int length = util_last_bit(output_usage_mask);
-
- if (!(ctx->output_mask & (1ull << i)) ||
- output_stream != stream)
- continue;
-
- for (unsigned j = 0; j < length; j++, out_idx++) {
- if (!(output_usage_mask & (1 << j)))
- continue;
-
- LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder,
- out_ptr[j], "");
- out_val = ac_to_integer(&ctx->ac, out_val);
- out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
-
- LLVMBuildStore(builder, out_val,
- ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
- }
- }
- assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
-
- /* Store the current number of emitted vertices to zero out remaining
- * primitive flags in case the geometry shader doesn't emit the maximum
- * number of vertices.
- */
- tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
- LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
-
- /* Determine and store whether this vertex completed a primitive. */
- const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
-
- tmp = LLVMConstInt(ctx->ac.i32, si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive) - 1, false);
- const LLVMValueRef iscompleteprim =
- LLVMBuildICmp(builder, LLVMIntUGE, curverts, tmp, "");
-
- /* Since the geometry shader emits triangle strips, we need to
- * track which primitive is odd and swap vertex indices to get
- * the correct vertex order.
- */
- LLVMValueRef is_odd = ctx->ac.i1false;
- if (stream == 0 &&
- si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive) == 3) {
- tmp = LLVMBuildAnd(builder, curverts, ctx->ac.i32_1, "");
- is_odd = LLVMBuildICmp(builder, LLVMIntEQ, tmp, ctx->ac.i32_1, "");
- }
-
- tmp = LLVMBuildAdd(builder, curverts, ctx->ac.i32_1, "");
- LLVMBuildStore(builder, tmp, ctx->gs_curprim_verts[stream]);
-
- /* The per-vertex primitive flag encoding:
- * bit 0: whether this vertex finishes a primitive
- * bit 1: whether the primitive is odd (if we are emitting triangle strips)
- */
- tmp = LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i8, "");
- tmp = LLVMBuildOr(builder, tmp,
- LLVMBuildShl(builder,
- LLVMBuildZExt(builder, is_odd, ctx->ac.i8, ""),
- ctx->ac.i8_1, ""), "");
- LLVMBuildStore(builder, tmp,
- ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream));
-
- tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
- tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
- LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef tmp;
+
+ assert((ctx->stage == MESA_SHADER_VERTEX || ctx->stage == MESA_SHADER_TESS_EVAL) &&
+ !ctx->args->is_gs_copy_shader);
+
+ LLVMValueRef prims_in_wave =
+ ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
+ LLVMValueRef vtx_in_wave =
+ ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 0, 8);
+ LLVMValueRef is_gs_thread =
+ LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), prims_in_wave, "");
+ LLVMValueRef is_es_thread =
+ LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), vtx_in_wave, "");
+ LLVMValueRef vtxindex[] = {
+ ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 0, 16),
+ ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 16, 16),
+ ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[2]), 0, 16),
+ };
+
+ /* Determine the number of vertices per primitive. */
+ unsigned num_vertices;
+ LLVMValueRef num_vertices_val;
+
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ LLVMValueRef outprim_val =
+ LLVMConstInt(ctx->ac.i32, ctx->args->options->key.vs.outprim, false);
+ num_vertices_val = LLVMBuildAdd(builder, outprim_val, ctx->ac.i32_1, "");
+ num_vertices = 3; /* TODO: optimize for points & lines */
+ } else {
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
+
+ if (ctx->shader->info.tess.point_mode)
+ num_vertices = 1;
+ else if (ctx->shader->info.tess.primitive_mode == GL_ISOLINES)
+ num_vertices = 2;
+ else
+ num_vertices = 3;
+
+ num_vertices_val = LLVMConstInt(ctx->ac.i32, num_vertices, false);
+ }
+
+ /* Streamout */
+ if (ctx->args->shader_info->so.num_outputs) {
+ struct ngg_streamout nggso = {0};
+
+ nggso.num_vertices = num_vertices_val;
+ nggso.prim_enable[0] = is_gs_thread;
+
+ for (unsigned i = 0; i < num_vertices; ++i)
+ nggso.vertices[i] = ngg_nogs_vertex_ptr(ctx, vtxindex[i]);
+
+ build_streamout(ctx, &nggso);
+ }
+
+ /* Copy Primitive IDs from GS threads to the LDS address corresponding
+ * to the ES thread of the provoking vertex.
+ */
+ if (ctx->stage == MESA_SHADER_VERTEX && ctx->args->options->key.vs_common_out.export_prim_id) {
+ if (ctx->args->shader_info->so.num_outputs)
+ ac_build_s_barrier(&ctx->ac);
+
+ ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
+ /* Extract the PROVOKING_VTX_INDEX field. */
+ LLVMValueRef provoking_vtx_in_prim = LLVMConstInt(ctx->ac.i32, 0, false);
+
+ /* provoking_vtx_index = vtxindex[provoking_vtx_in_prim]; */
+ LLVMValueRef indices = ac_build_gather_values(&ctx->ac, vtxindex, 3);
+ LLVMValueRef provoking_vtx_index =
+ LLVMBuildExtractElement(builder, indices, provoking_vtx_in_prim, "");
+
+ LLVMBuildStore(builder, ac_get_arg(&ctx->ac, ctx->args->ac.gs_prim_id),
+ ac_build_gep0(&ctx->ac, ctx->esgs_ring, provoking_vtx_index));
+ ac_build_endif(&ctx->ac, 5400);
+ }
+
+ /* TODO: primitive culling */
+
+ ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), ngg_get_vtx_cnt(ctx),
+ ngg_get_prim_cnt(ctx));
+
+ /* TODO: streamout queries */
+ /* Export primitive data to the index buffer.
+ *
+ * For the first version, we will always build up all three indices
+ * independent of the primitive type. The additional garbage data
+ * shouldn't hurt.
+ *
+ * TODO: culling depends on the primitive type, so can have some
+ * interaction here.
+ */
+ ac_build_ifcc(&ctx->ac, is_gs_thread, 6001);
+ {
+ struct ac_ngg_prim prim = {0};
+
+ if (ctx->args->options->key.vs_common_out.as_ngg_passthrough) {
+ prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]);
+ } else {
+ prim.num_vertices = num_vertices;
+ prim.isnull = ctx->ac.i1false;
+ memcpy(prim.index, vtxindex, sizeof(vtxindex[0]) * 3);
+
+ for (unsigned i = 0; i < num_vertices; ++i) {
+ tmp = LLVMBuildLShr(builder, ac_get_arg(&ctx->ac, ctx->args->ac.gs_invocation_id),
+ LLVMConstInt(ctx->ac.i32, 8 + i, false), "");
+ prim.edgeflag[i] = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+ }
+ }
+
+ ac_build_export_prim(&ctx->ac, &prim);
+ }
+ ac_build_endif(&ctx->ac, 6001);
+
+ /* Export per-vertex data (positions and parameters). */
+ ac_build_ifcc(&ctx->ac, is_es_thread, 6002);
+ {
+ struct radv_vs_output_info *outinfo = ctx->stage == MESA_SHADER_TESS_EVAL
+ ? &ctx->args->shader_info->tes.outinfo
+ : &ctx->args->shader_info->vs.outinfo;
+
+ /* Exporting the primitive ID is handled below. */
+ /* TODO: use the new VS export path */
+ handle_vs_outputs_post(ctx, false, ctx->args->options->key.vs_common_out.export_clip_dists,
+ outinfo);
+
+ if (ctx->args->options->key.vs_common_out.export_prim_id) {
+ unsigned param_count = outinfo->param_exports;
+ LLVMValueRef values[4];
+
+ if (ctx->stage == MESA_SHADER_VERTEX) {
+ /* Wait for GS stores to finish. */
+ ac_build_s_barrier(&ctx->ac);
+
+ tmp = ac_build_gep0(&ctx->ac, ctx->esgs_ring, get_thread_id_in_tg(ctx));
+ values[0] = LLVMBuildLoad(builder, tmp, "");
+ } else {
+ assert(ctx->stage == MESA_SHADER_TESS_EVAL);
+ values[0] = ac_get_arg(&ctx->ac, ctx->args->ac.tes_patch_id);
+ }
+
+ values[0] = ac_to_float(&ctx->ac, values[0]);
+ for (unsigned j = 1; j < 4; j++)
+ values[j] = ctx->ac.f32_0;
+
+ radv_export_param(ctx, param_count, values, 0x1);
+
+ outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count++;
+ outinfo->param_exports = param_count;
+ }
+ }
+ ac_build_endif(&ctx->ac, 6002);
+}
+
+static void
+gfx10_ngg_gs_emit_prologue(struct radv_shader_context *ctx)
+{
+ /* Zero out the part of LDS scratch that is used to accumulate the
+ * per-stream generated primitive count.
+ */
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef scratchptr = ctx->gs_ngg_scratch;
+ LLVMValueRef tid = get_thread_id_in_tg(ctx);
+ LLVMBasicBlockRef merge_block;
+ LLVMValueRef cond;
+
+ LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
+ LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
+ merge_block = LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
+
+ cond = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), "");
+ LLVMBuildCondBr(ctx->ac.builder, cond, then_block, merge_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, then_block);
+
+ LLVMValueRef ptr = ac_build_gep0(&ctx->ac, scratchptr, tid);
+ LLVMBuildStore(builder, ctx->ac.i32_0, ptr);
+
+ LLVMBuildBr(ctx->ac.builder, merge_block);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
+
+ ac_build_s_barrier(&ctx->ac);
+}
+
+static void
+gfx10_ngg_gs_emit_epilogue_1(struct radv_shader_context *ctx)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef i8_0 = LLVMConstInt(ctx->ac.i8, 0, false);
+ LLVMValueRef tmp;
+
+ /* Zero out remaining (non-emitted) primitive flags.
+ *
+ * Note: Alternatively, we could pass the relevant gs_next_vertex to
+ * the emit threads via LDS. This is likely worse in the expected
+ * typical case where each GS thread emits the full set of
+ * vertices.
+ */
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ unsigned num_components;
+
+ num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
+ if (!num_components)
+ continue;
+
+ const LLVMValueRef gsthread = get_thread_id_in_tg(ctx);
+
+ ac_build_bgnloop(&ctx->ac, 5100);
+
+ const LLVMValueRef vertexidx = LLVMBuildLoad(builder, ctx->gs_next_vertex[stream], "");
+ tmp = LLVMBuildICmp(builder, LLVMIntUGE, vertexidx,
+ LLVMConstInt(ctx->ac.i32, ctx->shader->info.gs.vertices_out, false), "");
+ ac_build_ifcc(&ctx->ac, tmp, 5101);
+ ac_build_break(&ctx->ac);
+ ac_build_endif(&ctx->ac, 5101);
+
+ tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
+ LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
+
+ tmp = ngg_gs_emit_vertex_ptr(ctx, gsthread, vertexidx);
+ LLVMBuildStore(builder, i8_0, ngg_gs_get_emit_primflag_ptr(ctx, tmp, stream));
+
+ ac_build_endloop(&ctx->ac, 5100);
+ }
+
+ /* Accumulate generated primitives counts across the entire threadgroup. */
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ unsigned num_components;
+
+ num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
+ if (!num_components)
+ continue;
+
+ LLVMValueRef numprims = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
+ numprims = ac_build_reduce(&ctx->ac, numprims, nir_op_iadd, ctx->ac.wave_size);
+
+ tmp = LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(&ctx->ac), ctx->ac.i32_0, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5105);
+ {
+ LLVMBuildAtomicRMW(
+ builder, LLVMAtomicRMWBinOpAdd,
+ ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, LLVMConstInt(ctx->ac.i32, stream, false)),
+ numprims, LLVMAtomicOrderingMonotonic, false);
+ }
+ ac_build_endif(&ctx->ac, 5105);
+ }
+}
+
+static void
+gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
+{
+ const unsigned verts_per_prim =
+ si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive);
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef tmp, tmp2;
+
+ ac_build_s_barrier(&ctx->ac);
+
+ const LLVMValueRef tid = get_thread_id_in_tg(ctx);
+ LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx);
+
+ /* Streamout */
+ if (ctx->args->shader_info->so.num_outputs) {
+ struct ngg_streamout nggso = {0};
+
+ nggso.num_vertices = LLVMConstInt(ctx->ac.i32, verts_per_prim, false);
+
+ LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tid);
+ for (unsigned stream = 0; stream < 4; ++stream) {
+ if (!ctx->args->shader_info->gs.num_stream_output_components[stream])
+ continue;
+
+ tmp = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream), "");
+ tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+ tmp2 = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
+ nggso.prim_enable[stream] = LLVMBuildAnd(builder, tmp, tmp2, "");
+ }
+
+ for (unsigned i = 0; i < verts_per_prim; ++i) {
+ tmp = LLVMBuildSub(builder, tid, LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false),
+ "");
+ tmp = ngg_gs_vertex_ptr(ctx, tmp);
+ nggso.vertices[i] = ac_build_gep0(&ctx->ac, tmp, ctx->ac.i32_0);
+ }
+
+ build_streamout(ctx, &nggso);
+ }
+
+ /* Write shader query data. */
+ tmp = ac_get_arg(&ctx->ac, ctx->args->ngg_gs_state);
+ tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5109);
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, 4, false), "");
+ ac_build_ifcc(&ctx->ac, tmp, 5110);
+ {
+ tmp = LLVMBuildLoad(builder, ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, tid), "");
+
+ ac_llvm_add_target_dep_function_attr(ctx->main_function, "amdgpu-gds-size", 256);
+
+ LLVMTypeRef gdsptr = LLVMPointerType(ctx->ac.i32, AC_ADDR_SPACE_GDS);
+ LLVMValueRef gdsbase = LLVMBuildIntToPtr(builder, ctx->ac.i32_0, gdsptr, "");
+
+ const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
+
+ /* Use a plain GDS atomic to accumulate the number of generated
+ * primitives.
+ */
+ ac_build_atomic_rmw(&ctx->ac, LLVMAtomicRMWBinOpAdd, gdsbase, tmp, sync_scope);
+ }
+ ac_build_endif(&ctx->ac, 5110);
+ ac_build_endif(&ctx->ac, 5109);
+
+ /* TODO: culling */
+
+ /* Determine vertex liveness. */
+ LLVMValueRef vertliveptr = ac_build_alloca(&ctx->ac, ctx->ac.i1, "vertexlive");
+
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5120);
+ {
+ for (unsigned i = 0; i < verts_per_prim; ++i) {
+ const LLVMValueRef primidx =
+ LLVMBuildAdd(builder, tid, LLVMConstInt(ctx->ac.i32, i, false), "");
+
+ if (i > 0) {
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, primidx, num_emit_threads, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5121 + i);
+ }
+
+ /* Load primitive liveness */
+ tmp = ngg_gs_vertex_ptr(ctx, primidx);
+ tmp = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
+ const LLVMValueRef primlive = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
+
+ tmp = LLVMBuildLoad(builder, vertliveptr, "");
+ tmp = LLVMBuildOr(builder, tmp, primlive, ""), LLVMBuildStore(builder, tmp, vertliveptr);
+
+ if (i > 0)
+ ac_build_endif(&ctx->ac, 5121 + i);
+ }
+ }
+ ac_build_endif(&ctx->ac, 5120);
+
+ /* Inclusive scan addition across the current wave. */
+ LLVMValueRef vertlive = LLVMBuildLoad(builder, vertliveptr, "");
+ struct ac_wg_scan vertlive_scan = {0};
+ vertlive_scan.op = nir_op_iadd;
+ vertlive_scan.enable_reduce = true;
+ vertlive_scan.enable_exclusive = true;
+ vertlive_scan.src = vertlive;
+ vertlive_scan.scratch = ac_build_gep0(&ctx->ac, ctx->gs_ngg_scratch, ctx->ac.i32_0);
+ vertlive_scan.waveidx = get_wave_id_in_tg(ctx);
+ vertlive_scan.numwaves = get_tgsize(ctx);
+ vertlive_scan.maxwaves = 8;
+
+ ac_build_wg_scan(&ctx->ac, &vertlive_scan);
+
+ /* Skip all exports (including index exports) when possible. At least on
+ * early gfx10 revisions this is also to avoid hangs.
+ */
+ LLVMValueRef have_exports =
+ LLVMBuildICmp(builder, LLVMIntNE, vertlive_scan.result_reduce, ctx->ac.i32_0, "");
+ num_emit_threads = LLVMBuildSelect(builder, have_exports, num_emit_threads, ctx->ac.i32_0, "");
+
+ /* Allocate export space. Send this message as early as possible, to
+ * hide the latency of the SQ <-> SPI roundtrip.
+ *
+ * Note: We could consider compacting primitives for export as well.
+ * PA processes 1 non-null prim / clock, but it fetches 4 DW of
+ * prim data per clock and skips null primitives at no additional
+ * cost. So compacting primitives can only be beneficial when
+ * there are 4 or more contiguous null primitives in the export
+ * (in the common case of single-dword prim exports).
+ */
+ ac_build_sendmsg_gs_alloc_req(&ctx->ac, get_wave_id_in_tg(ctx), vertlive_scan.result_reduce,
+ num_emit_threads);
+
+ /* Setup the reverse vertex compaction permutation. We re-use stream 1
+ * of the primitive liveness flags, relying on the fact that each
+ * threadgroup can have at most 256 threads. */
+ ac_build_ifcc(&ctx->ac, vertlive, 5130);
+ {
+ tmp = ngg_gs_vertex_ptr(ctx, vertlive_scan.result_exclusive);
+ tmp2 = LLVMBuildTrunc(builder, tid, ctx->ac.i8, "");
+ LLVMBuildStore(builder, tmp2, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1));
+ }
+ ac_build_endif(&ctx->ac, 5130);
+
+ ac_build_s_barrier(&ctx->ac);
+
+ /* Export primitive data */
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, num_emit_threads, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5140);
+ {
+ LLVMValueRef flags;
+ struct ac_ngg_prim prim = {0};
+ prim.num_vertices = verts_per_prim;
+
+ tmp = ngg_gs_vertex_ptr(ctx, tid);
+ flags = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 0), "");
+ prim.isnull = LLVMBuildNot(builder, LLVMBuildTrunc(builder, flags, ctx->ac.i1, ""), "");
+
+ for (unsigned i = 0; i < verts_per_prim; ++i) {
+ prim.index[i] = LLVMBuildSub(builder, vertlive_scan.result_exclusive,
+ LLVMConstInt(ctx->ac.i32, verts_per_prim - i - 1, false), "");
+ prim.edgeflag[i] = ctx->ac.i1false;
+ }
+
+ /* Geometry shaders output triangle strips, but NGG expects
+ * triangles. We need to change the vertex order for odd
+ * triangles to get correct front/back facing by swapping 2
+ * vertex indices, but we also have to keep the provoking
+ * vertex in the same place.
+ */
+ if (verts_per_prim == 3) {
+ LLVMValueRef is_odd = LLVMBuildLShr(builder, flags, ctx->ac.i8_1, "");
+ is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, "");
+
+ struct ac_ngg_prim in = prim;
+ prim.index[0] = in.index[0];
+ prim.index[1] = LLVMBuildSelect(builder, is_odd, in.index[2], in.index[1], "");
+ prim.index[2] = LLVMBuildSelect(builder, is_odd, in.index[1], in.index[2], "");
+ }
+
+ ac_build_export_prim(&ctx->ac, &prim);
+ }
+ ac_build_endif(&ctx->ac, 5140);
+
+ /* Export position and parameter data */
+ tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, vertlive_scan.result_reduce, "");
+ ac_build_ifcc(&ctx->ac, tmp, 5145);
+ {
+ struct radv_vs_output_info *outinfo = &ctx->args->shader_info->vs.outinfo;
+ bool export_view_index = ctx->args->options->key.has_multiview_view_index;
+ struct radv_shader_output_values *outputs;
+ unsigned noutput = 0;
+
+ /* Allocate a temporary array for the output values. */
+ unsigned num_outputs = util_bitcount64(ctx->output_mask) + export_view_index;
+ outputs = calloc(num_outputs, sizeof(outputs[0]));
+
+ memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
+ sizeof(outinfo->vs_output_param_offset));
+ outinfo->pos_exports = 0;
+
+ tmp = ngg_gs_vertex_ptr(ctx, tid);
+ tmp = LLVMBuildLoad(builder, ngg_gs_get_emit_primflag_ptr(ctx, tmp, 1), "");
+ tmp = LLVMBuildZExt(builder, tmp, ctx->ac.i32, "");
+ const LLVMValueRef vertexptr = ngg_gs_vertex_ptr(ctx, tmp);
+
+ unsigned out_idx = 0;
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+ int length = util_last_bit(output_usage_mask);
+
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ outputs[noutput].slot_name = i;
+ outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
+ outputs[noutput].usage_mask = output_usage_mask;
+
+ for (unsigned j = 0; j < length; j++, out_idx++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
+ tmp = LLVMBuildLoad(builder, tmp, "");
+
+ LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
+ if (ac_get_type_size(type) == 2) {
+ tmp = ac_to_integer(&ctx->ac, tmp);
+ tmp = LLVMBuildTrunc(ctx->ac.builder, tmp, ctx->ac.i16, "");
+ }
+
+ outputs[noutput].values[j] = ac_to_float(&ctx->ac, tmp);
+ }
+
+ for (unsigned j = length; j < 4; j++)
+ outputs[noutput].values[j] = LLVMGetUndef(ctx->ac.f32);
+
+ noutput++;
+ }
+
+ /* Export ViewIndex. */
+ if (export_view_index) {
+ outputs[noutput].slot_name = VARYING_SLOT_LAYER;
+ outputs[noutput].slot_index = 0;
+ outputs[noutput].usage_mask = 0x1;
+ outputs[noutput].values[0] =
+ ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.view_index));
+ for (unsigned j = 1; j < 4; j++)
+ outputs[noutput].values[j] = ctx->ac.f32_0;
+ noutput++;
+ }
+
+ radv_llvm_export_vs(ctx, outputs, noutput, outinfo,
+ ctx->args->options->key.vs_common_out.export_clip_dists);
+ FREE(outputs);
+ }
+ ac_build_endif(&ctx->ac, 5145);
+}
+
+static void
+gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, LLVMValueRef vertexidx,
+ LLVMValueRef *addrs)
+{
+ LLVMBuilderRef builder = ctx->ac.builder;
+ LLVMValueRef tmp;
+
+ const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
+ unsigned out_idx = 0;
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+ uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
+ LLVMValueRef *out_ptr = &addrs[i * 4];
+ int length = util_last_bit(output_usage_mask);
+
+ if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
+ continue;
+
+ for (unsigned j = 0; j < length; j++, out_idx++) {
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, out_ptr[j], "");
+ out_val = ac_to_integer(&ctx->ac, out_val);
+ out_val = LLVMBuildZExtOrBitCast(ctx->ac.builder, out_val, ctx->ac.i32, "");
+
+ LLVMBuildStore(builder, out_val, ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
+ }
+ }
+ assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
+
+ /* Store the current number of emitted vertices to zero out remaining
+ * primitive flags in case the geometry shader doesn't emit the maximum
+ * number of vertices.
+ */
+ tmp = LLVMBuildAdd(builder, vertexidx, ctx->ac.i32_1, "");
+ LLVMBuildStore(builder, tmp, ctx->gs_next_vertex[stream]);
+
+ /* Determine and store whether this vertex completed a primitive. */
+ const LLVMValueRef curverts = LLVMBuildLoad(builder, ctx->gs_curprim_verts[stream], "");
+
+ tmp = LLVMConstInt(
+ ctx->ac.i32, si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive) - 1, false);
+ const LLVMValueRef iscompleteprim = LLVMBuildICmp(builder, LLVMIntUGE, curverts, tmp, "");
+
+ /* Since the geometry shader emits triangle strips, we need to
+ * track which primitive is odd and swap vertex indices to get
+ * the correct vertex order.
+ */
+ LLVMValueRef is_odd = ctx->ac.i1false;
+ if (stream == 0 && si_conv_gl_prim_to_vertices(ctx->shader->info.gs.output_primitive) == 3) {
+ tmp = LLVMBuildAnd(builder, curverts, ctx->ac.i32_1, "");
+ is_odd = LLVMBuildICmp(builder, LLVMIntEQ, tmp, ctx->ac.i32_1, "");
+ }
+
+ tmp = LLVMBuildAdd(builder, curverts, ctx->ac.i32_1, "");
+ LLVMBuildStore(builder, tmp, ctx->gs_curprim_verts[stream]);
+
+ /* The per-vertex primitive flag encoding:
+ * bit 0: whether this vertex finishes a primitive
+ * bit 1: whether the primitive is odd (if we are emitting triangle strips)
+ */
+ tmp = LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i8, "");
+ tmp = LLVMBuildOr(
+ builder, tmp,
+ LLVMBuildShl(builder, LLVMBuildZExt(builder, is_odd, ctx->ac.i8, ""), ctx->ac.i8_1, ""), "");
+ LLVMBuildStore(builder, tmp, ngg_gs_get_emit_primflag_ptr(ctx, vertexptr, stream));
+
+ tmp = LLVMBuildLoad(builder, ctx->gs_generated_prims[stream], "");
+ tmp = LLVMBuildAdd(builder, tmp, LLVMBuildZExt(builder, iscompleteprim, ctx->ac.i32, ""), "");
+ LLVMBuildStore(builder, tmp, ctx->gs_generated_prims[stream]);
}
static bool
-si_export_mrt_color(struct radv_shader_context *ctx,
- LLVMValueRef *color, unsigned index,
- struct ac_export_args *args)
+si_export_mrt_color(struct radv_shader_context *ctx, LLVMValueRef *color, unsigned index,
+ struct ac_export_args *args)
{
- /* Export */
- si_llvm_init_export_args(ctx, color, 0xf,
- V_008DFC_SQ_EXP_MRT + index, args);
- if (!args->enabled_channels)
- return false; /* unnecessary NULL export */
+ /* Export */
+ si_llvm_init_export_args(ctx, color, 0xf, V_008DFC_SQ_EXP_MRT + index, args);
+ if (!args->enabled_channels)
+ return false; /* unnecessary NULL export */
- return true;
+ return true;
}
static void
-radv_export_mrt_z(struct radv_shader_context *ctx,
- LLVMValueRef depth, LLVMValueRef stencil,
- LLVMValueRef samplemask)
+radv_export_mrt_z(struct radv_shader_context *ctx, LLVMValueRef depth, LLVMValueRef stencil,
+ LLVMValueRef samplemask)
{
- struct ac_export_args args;
+ struct ac_export_args args;
- ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
+ ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
- ac_build_export(&ctx->ac, &args);
+ ac_build_export(&ctx->ac, &args);
}
static void
handle_fs_outputs_post(struct radv_shader_context *ctx)
{
- unsigned index = 0;
- LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
- struct ac_export_args color_args[8];
-
- for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
- LLVMValueRef values[4];
-
- if (!(ctx->output_mask & (1ull << i)))
- continue;
-
- if (i < FRAG_RESULT_DATA0)
- continue;
-
- for (unsigned j = 0; j < 4; j++)
- values[j] = ac_to_float(&ctx->ac,
- radv_load_output(ctx, i, j));
-
- bool ret = si_export_mrt_color(ctx, values,
- i - FRAG_RESULT_DATA0,
- &color_args[index]);
- if (ret)
- index++;
- }
-
- /* Process depth, stencil, samplemask. */
- if (ctx->args->shader_info->ps.writes_z) {
- depth = ac_to_float(&ctx->ac,
- radv_load_output(ctx, FRAG_RESULT_DEPTH, 0));
- }
- if (ctx->args->shader_info->ps.writes_stencil) {
- stencil = ac_to_float(&ctx->ac,
- radv_load_output(ctx, FRAG_RESULT_STENCIL, 0));
- }
- if (ctx->args->shader_info->ps.writes_sample_mask) {
- samplemask = ac_to_float(&ctx->ac,
- radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0));
- }
-
- /* Set the DONE bit on last non-null color export only if Z isn't
- * exported.
- */
- if (index > 0 &&
- !ctx->args->shader_info->ps.writes_z &&
- !ctx->args->shader_info->ps.writes_stencil &&
- !ctx->args->shader_info->ps.writes_sample_mask) {
- unsigned last = index - 1;
-
- color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */
- color_args[last].done = 1; /* DONE bit */
- }
-
- /* Export PS outputs. */
- for (unsigned i = 0; i < index; i++)
- ac_build_export(&ctx->ac, &color_args[i]);
-
- if (depth || stencil || samplemask)
- radv_export_mrt_z(ctx, depth, stencil, samplemask);
- else if (!index)
- ac_build_export_null(&ctx->ac);
+ unsigned index = 0;
+ LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
+ struct ac_export_args color_args[8];
+
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ LLVMValueRef values[4];
+
+ if (!(ctx->output_mask & (1ull << i)))
+ continue;
+
+ if (i < FRAG_RESULT_DATA0)
+ continue;
+
+ for (unsigned j = 0; j < 4; j++)
+ values[j] = ac_to_float(&ctx->ac, radv_load_output(ctx, i, j));
+
+ bool ret = si_export_mrt_color(ctx, values, i - FRAG_RESULT_DATA0, &color_args[index]);
+ if (ret)
+ index++;
+ }
+
+ /* Process depth, stencil, samplemask. */
+ if (ctx->args->shader_info->ps.writes_z) {
+ depth = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_DEPTH, 0));
+ }
+ if (ctx->args->shader_info->ps.writes_stencil) {
+ stencil = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_STENCIL, 0));
+ }
+ if (ctx->args->shader_info->ps.writes_sample_mask) {
+ samplemask = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0));
+ }
+
+ /* Set the DONE bit on last non-null color export only if Z isn't
+ * exported.
+ */
+ if (index > 0 && !ctx->args->shader_info->ps.writes_z &&
+ !ctx->args->shader_info->ps.writes_stencil &&
+ !ctx->args->shader_info->ps.writes_sample_mask) {
+ unsigned last = index - 1;
+
+ color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */
+ color_args[last].done = 1; /* DONE bit */
+ }
+
+ /* Export PS outputs. */
+ for (unsigned i = 0; i < index; i++)
+ ac_build_export(&ctx->ac, &color_args[i]);
+
+ if (depth || stencil || samplemask)
+ radv_export_mrt_z(ctx, depth, stencil, samplemask);
+ else if (!index)
+ ac_build_export_null(&ctx->ac);
}
static void
emit_gs_epilogue(struct radv_shader_context *ctx)
{
- if (ctx->args->options->key.vs_common_out.as_ngg) {
- gfx10_ngg_gs_emit_epilogue_1(ctx);
- return;
- }
+ if (ctx->args->options->key.vs_common_out.as_ngg) {
+ gfx10_ngg_gs_emit_epilogue_1(ctx);
+ return;
+ }
- if (ctx->ac.chip_class >= GFX10)
- LLVMBuildFence(ctx->ac.builder, LLVMAtomicOrderingRelease, false, "");
+ if (ctx->ac.chip_class >= GFX10)
+ LLVMBuildFence(ctx->ac.builder, LLVMAtomicOrderingRelease, false, "");
- ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
+ ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
}
static void
-handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
- LLVMValueRef *addrs)
-{
- struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-
- switch (ctx->stage) {
- case MESA_SHADER_VERTEX:
- if (ctx->args->options->key.vs_common_out.as_ls)
- break; /* Lowered in NIR */
- else if (ctx->args->options->key.vs_common_out.as_es)
- break; /* Lowered in NIR */
- else if (ctx->args->options->key.vs_common_out.as_ngg)
- handle_ngg_outputs_post_1(ctx);
- else
- handle_vs_outputs_post(ctx, ctx->args->options->key.vs_common_out.export_prim_id,
- ctx->args->options->key.vs_common_out.export_clip_dists,
- &ctx->args->shader_info->vs.outinfo);
- break;
- case MESA_SHADER_FRAGMENT:
- handle_fs_outputs_post(ctx);
- break;
- case MESA_SHADER_GEOMETRY:
- emit_gs_epilogue(ctx);
- break;
- case MESA_SHADER_TESS_CTRL:
- break; /* Lowered in NIR */
- case MESA_SHADER_TESS_EVAL:
- if (ctx->args->options->key.vs_common_out.as_es)
- break; /* Lowered in NIR */
- else if (ctx->args->options->key.vs_common_out.as_ngg)
- handle_ngg_outputs_post_1(ctx);
- else
- handle_vs_outputs_post(ctx, ctx->args->options->key.vs_common_out.export_prim_id,
- ctx->args->options->key.vs_common_out.export_clip_dists,
- &ctx->args->shader_info->tes.outinfo);
- break;
- default:
- break;
- }
-}
-
-static void ac_llvm_finalize_module(struct radv_shader_context *ctx,
- LLVMPassManagerRef passmgr,
- const struct radv_nir_compiler_options *options)
-{
- LLVMRunPassManager(passmgr, ctx->ac.module);
- LLVMDisposeBuilder(ctx->ac.builder);
-
- ac_llvm_context_dispose(&ctx->ac);
+handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)
+{
+ struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
+
+ switch (ctx->stage) {
+ case MESA_SHADER_VERTEX:
+ if (ctx->args->options->key.vs_common_out.as_ls)
+ break; /* Lowered in NIR */
+ else if (ctx->args->options->key.vs_common_out.as_es)
+ break; /* Lowered in NIR */
+ else if (ctx->args->options->key.vs_common_out.as_ngg)
+ handle_ngg_outputs_post_1(ctx);
+ else
+ handle_vs_outputs_post(ctx, ctx->args->options->key.vs_common_out.export_prim_id,
+ ctx->args->options->key.vs_common_out.export_clip_dists,
+ &ctx->args->shader_info->vs.outinfo);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ handle_fs_outputs_post(ctx);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ emit_gs_epilogue(ctx);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ break; /* Lowered in NIR */
+ case MESA_SHADER_TESS_EVAL:
+ if (ctx->args->options->key.vs_common_out.as_es)
+ break; /* Lowered in NIR */
+ else if (ctx->args->options->key.vs_common_out.as_ngg)
+ handle_ngg_outputs_post_1(ctx);
+ else
+ handle_vs_outputs_post(ctx, ctx->args->options->key.vs_common_out.export_prim_id,
+ ctx->args->options->key.vs_common_out.export_clip_dists,
+ &ctx->args->shader_info->tes.outinfo);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr,
+ const struct radv_nir_compiler_options *options)
+{
+ LLVMRunPassManager(passmgr, ctx->ac.module);
+ LLVMDisposeBuilder(ctx->ac.builder);
+
+ ac_llvm_context_dispose(&ctx->ac);
}
static void
ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx)
{
- struct radv_vs_output_info *outinfo;
-
- switch (ctx->stage) {
- case MESA_SHADER_FRAGMENT:
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_TESS_CTRL:
- case MESA_SHADER_GEOMETRY:
- return;
- case MESA_SHADER_VERTEX:
- if (ctx->args->options->key.vs_common_out.as_ls ||
- ctx->args->options->key.vs_common_out.as_es)
- return;
- outinfo = &ctx->args->shader_info->vs.outinfo;
- break;
- case MESA_SHADER_TESS_EVAL:
- if (ctx->args->options->key.vs_common_out.as_es)
- return;
- outinfo = &ctx->args->shader_info->tes.outinfo;
- break;
- default:
- unreachable("Unhandled shader type");
- }
-
- ac_optimize_vs_outputs(&ctx->ac,
- ctx->main_function,
- outinfo->vs_output_param_offset,
- VARYING_SLOT_MAX, 0,
- &outinfo->param_exports);
+ struct radv_vs_output_info *outinfo;
+
+ switch (ctx->stage) {
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_GEOMETRY:
+ return;
+ case MESA_SHADER_VERTEX:
+ if (ctx->args->options->key.vs_common_out.as_ls ||
+ ctx->args->options->key.vs_common_out.as_es)
+ return;
+ outinfo = &ctx->args->shader_info->vs.outinfo;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ if (ctx->args->options->key.vs_common_out.as_es)
+ return;
+ outinfo = &ctx->args->shader_info->tes.outinfo;
+ break;
+ default:
+ unreachable("Unhandled shader type");
+ }
+
+ ac_optimize_vs_outputs(&ctx->ac, ctx->main_function, outinfo->vs_output_param_offset,
+ VARYING_SLOT_MAX, 0, &outinfo->param_exports);
}
static void
ac_setup_rings(struct radv_shader_context *ctx)
{
- if (ctx->args->options->chip_class <= GFX8 &&
- (ctx->stage == MESA_SHADER_GEOMETRY ||
- ctx->args->options->key.vs_common_out.as_es)) {
- unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS
- : RING_ESGS_VS;
- LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
-
- ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac,
- ctx->ring_offsets,
- offset);
- }
-
- if (ctx->args->is_gs_copy_shader) {
- ctx->gsvs_ring[0] =
- ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
- LLVMConstInt(ctx->ac.i32,
- RING_GSVS_VS, false));
- }
-
- if (ctx->stage == MESA_SHADER_GEOMETRY) {
- /* The conceptual layout of the GSVS ring is
- * v0c0 .. vLv0 v0c1 .. vLc1 ..
- * but the real memory layout is swizzled across
- * threads:
- * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
- * t16v0c0 ..
- * Override the buffer descriptor accordingly.
- */
- LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
- uint64_t stream_offset = 0;
- unsigned num_records = ctx->ac.wave_size;
- LLVMValueRef base_ring;
-
- base_ring =
- ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
- LLVMConstInt(ctx->ac.i32,
- RING_GSVS_GS, false));
-
- for (unsigned stream = 0; stream < 4; stream++) {
- unsigned num_components, stride;
- LLVMValueRef ring, tmp;
-
- num_components =
- ctx->args->shader_info->gs.num_stream_output_components[stream];
-
- if (!num_components)
- continue;
-
- stride = 4 * num_components * ctx->shader->info.gs.vertices_out;
-
- /* Limit on the stride field for <= GFX7. */
- assert(stride < (1 << 14));
-
- ring = LLVMBuildBitCast(ctx->ac.builder,
- base_ring, v2i64, "");
- tmp = LLVMBuildExtractElement(ctx->ac.builder,
- ring, ctx->ac.i32_0, "");
- tmp = LLVMBuildAdd(ctx->ac.builder, tmp,
- LLVMConstInt(ctx->ac.i64,
- stream_offset, 0), "");
- ring = LLVMBuildInsertElement(ctx->ac.builder,
- ring, tmp, ctx->ac.i32_0, "");
-
- stream_offset += stride * ctx->ac.wave_size;
-
- ring = LLVMBuildBitCast(ctx->ac.builder, ring,
- ctx->ac.v4i32, "");
-
- tmp = LLVMBuildExtractElement(ctx->ac.builder, ring,
- ctx->ac.i32_1, "");
- tmp = LLVMBuildOr(ctx->ac.builder, tmp,
- LLVMConstInt(ctx->ac.i32,
- S_008F04_STRIDE(stride), false), "");
- ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp,
- ctx->ac.i32_1, "");
-
- ring = LLVMBuildInsertElement(ctx->ac.builder, ring,
- LLVMConstInt(ctx->ac.i32,
- num_records, false),
- LLVMConstInt(ctx->ac.i32, 2, false), "");
-
- ctx->gsvs_ring[stream] = ring;
- }
- }
-
- if (ctx->stage == MESA_SHADER_TESS_CTRL ||
- ctx->stage == MESA_SHADER_TESS_EVAL) {
- ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false));
- ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false));
- }
+ if (ctx->args->options->chip_class <= GFX8 &&
+ (ctx->stage == MESA_SHADER_GEOMETRY || ctx->args->options->key.vs_common_out.as_es)) {
+ unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS;
+ LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
+
+ ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, offset);
+ }
+
+ if (ctx->args->is_gs_copy_shader) {
+ ctx->gsvs_ring[0] = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
+ LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false));
+ }
+
+ if (ctx->stage == MESA_SHADER_GEOMETRY) {
+ /* The conceptual layout of the GSVS ring is
+ * v0c0 .. vLv0 v0c1 .. vLc1 ..
+ * but the real memory layout is swizzled across
+ * threads:
+ * t0v0c0 .. t15v0c0 t0v1c0 .. t15v1c0 ... t15vLcL
+ * t16v0c0 ..
+ * Override the buffer descriptor accordingly.
+ */
+ LLVMTypeRef v2i64 = LLVMVectorType(ctx->ac.i64, 2);
+ uint64_t stream_offset = 0;
+ unsigned num_records = ctx->ac.wave_size;
+ LLVMValueRef base_ring;
+
+ base_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets,
+ LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
+
+ for (unsigned stream = 0; stream < 4; stream++) {
+ unsigned num_components, stride;
+ LLVMValueRef ring, tmp;
+
+ num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
+
+ if (!num_components)
+ continue;
+
+ stride = 4 * num_components * ctx->shader->info.gs.vertices_out;
+
+ /* Limit on the stride field for <= GFX7. */
+ assert(stride < (1 << 14));
+
+ ring = LLVMBuildBitCast(ctx->ac.builder, base_ring, v2i64, "");
+ tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, ctx->ac.i32_0, "");
+ tmp = LLVMBuildAdd(ctx->ac.builder, tmp, LLVMConstInt(ctx->ac.i64, stream_offset, 0), "");
+ ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, ctx->ac.i32_0, "");
+
+ stream_offset += stride * ctx->ac.wave_size;
+
+ ring = LLVMBuildBitCast(ctx->ac.builder, ring, ctx->ac.v4i32, "");
+
+ tmp = LLVMBuildExtractElement(ctx->ac.builder, ring, ctx->ac.i32_1, "");
+ tmp = LLVMBuildOr(ctx->ac.builder, tmp,
+ LLVMConstInt(ctx->ac.i32, S_008F04_STRIDE(stride), false), "");
+ ring = LLVMBuildInsertElement(ctx->ac.builder, ring, tmp, ctx->ac.i32_1, "");
+
+ ring = LLVMBuildInsertElement(ctx->ac.builder, ring,
+ LLVMConstInt(ctx->ac.i32, num_records, false),
+ LLVMConstInt(ctx->ac.i32, 2, false), "");
+
+ ctx->gsvs_ring[stream] = ring;
+ }
+ }
+
+ if (ctx->stage == MESA_SHADER_TESS_CTRL || ctx->stage == MESA_SHADER_TESS_EVAL) {
+ ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(
+ &ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false));
+ ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(
+ &ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false));
+ }
}
unsigned
-radv_nir_get_max_workgroup_size(enum chip_class chip_class,
- gl_shader_stage stage,
- const struct nir_shader *nir)
+radv_nir_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
+ const struct nir_shader *nir)
{
- const unsigned backup_sizes[] = {chip_class >= GFX9 ? 128 : 64, 1, 1};
- unsigned sizes[3];
- for (unsigned i = 0; i < 3; i++)
- sizes[i] = nir ? nir->info.cs.local_size[i] : backup_sizes[i];
- return radv_get_max_workgroup_size(chip_class, stage, sizes);
+ const unsigned backup_sizes[] = {chip_class >= GFX9 ? 128 : 64, 1, 1};
+ unsigned sizes[3];
+ for (unsigned i = 0; i < 3; i++)
+ sizes[i] = nir ? nir->info.cs.local_size[i] : backup_sizes[i];
+ return radv_get_max_workgroup_size(chip_class, stage, sizes);
}
/* Fixup the HW not emitting the TCS regs if there are no HS threads. */
-static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
-{
- LLVMValueRef count =
- ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
- LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
- ctx->ac.i32_0, "");
- ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
- ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id),
- ctx->abi.instance_id, "");
- ctx->vs_rel_patch_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
- ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids),
- ctx->vs_rel_patch_id,
- "");
- ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty,
- ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id),
- ctx->abi.vertex_id, "");
-}
-
-static void prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged)
-{
- if (merged) {
- for(int i = 5; i >= 0; --i) {
- ctx->gs_vtx_offset[i] =
- ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i & ~1]),
- (i & 1) * 16, 16);
- }
-
- ctx->gs_wave_id = ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info),
- 16, 8);
- } else {
- for (int i = 0; i < 6; i++)
- ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i]);
- ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
- }
+static void
+ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
+{
+ LLVMValueRef count =
+ ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 8, 8);
+ LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count, ctx->ac.i32_0, "");
+ ctx->abi.instance_id =
+ LLVMBuildSelect(ctx->ac.builder, hs_empty, ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id),
+ ctx->abi.instance_id, "");
+ ctx->vs_rel_patch_id =
+ LLVMBuildSelect(ctx->ac.builder, hs_empty, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_rel_ids),
+ ctx->vs_rel_patch_id, "");
+ ctx->abi.vertex_id =
+ LLVMBuildSelect(ctx->ac.builder, hs_empty, ac_get_arg(&ctx->ac, ctx->args->ac.tcs_patch_id),
+ ctx->abi.vertex_id, "");
+}
+
+static void
+prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged)
+{
+ if (merged) {
+ for (int i = 5; i >= 0; --i) {
+ ctx->gs_vtx_offset[i] = ac_unpack_param(
+ &ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i & ~1]), (i & 1) * 16, 16);
+ }
+
+ ctx->gs_wave_id =
+ ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.merged_wave_info), 16, 8);
+ } else {
+ for (int i = 0; i < 6; i++)
+ ctx->gs_vtx_offset[i] = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i]);
+ ctx->gs_wave_id = ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
+ }
}
/* Ensure that the esgs ring is declared.
@@ -3157,545 +2923,499 @@ static void prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged)
* We declare it with 64KB alignment as a hint that the
* pointer value will always be 0.
*/
-static void declare_esgs_ring(struct radv_shader_context *ctx)
-{
- if (ctx->esgs_ring)
- return;
-
- assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
-
- ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
- ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
- "esgs_ring",
- AC_ADDR_SPACE_LDS);
- LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
- LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
-}
-
-static
-LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
- struct nir_shader *const *shaders,
- int shader_count,
- const struct radv_shader_args *args)
-{
- struct radv_shader_context ctx = {0};
- ctx.args = args;
-
- enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT;
-
- if (args->shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) {
- float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
- }
-
- ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
- args->options->family, args->options->info, float_mode,
- args->shader_info->wave_size,
- args->shader_info->ballot_bit_size);
- ctx.context = ctx.ac.context;
-
- ctx.max_workgroup_size = 0;
- for (int i = 0; i < shader_count; ++i) {
- ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
- radv_nir_get_max_workgroup_size(args->options->chip_class,
- shaders[i]->info.stage,
- shaders[i]));
- }
-
- if (ctx.ac.chip_class >= GFX10) {
- if (is_pre_gs_stage(shaders[0]->info.stage) &&
- args->options->key.vs_common_out.as_ngg) {
- ctx.max_workgroup_size = 128;
- }
- }
-
- create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2);
-
- ctx.abi.inputs = &ctx.inputs[0];
- ctx.abi.emit_outputs = handle_shader_outputs_post;
- ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
- ctx.abi.load_ubo = radv_load_ubo;
- ctx.abi.load_ssbo = radv_load_ssbo;
- ctx.abi.load_sampler_desc = radv_get_sampler_desc;
- ctx.abi.load_resource = radv_load_resource;
- ctx.abi.load_ring_tess_factors = load_ring_tess_factors;
- ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip;
- ctx.abi.load_ring_esgs = load_ring_esgs;
- ctx.abi.clamp_shadow_reference = false;
- ctx.abi.adjust_frag_coord_z = args->options->adjust_frag_coord_z;
- ctx.abi.robust_buffer_access = args->options->robust_buffer_access;
-
- bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args->options->key.vs_common_out.as_ngg;
- if (shader_count >= 2 || is_ngg)
- ac_init_exec_full_mask(&ctx.ac);
-
- if (args->ac.vertex_id.used)
- ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args->ac.vertex_id);
- if (args->ac.vs_rel_patch_id.used)
- ctx.vs_rel_patch_id = ac_get_arg(&ctx.ac, args->ac.vs_rel_patch_id);
- if (args->ac.instance_id.used)
- ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
-
- if (args->options->has_ls_vgpr_init_bug &&
- shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
- ac_nir_fixup_ls_hs_input_vgprs(&ctx);
-
- if (is_ngg) {
- /* Declare scratch space base for streamout and vertex
- * compaction. Whether space is actually allocated is
- * determined during linking / PM4 creation.
- *
- * Add an extra dword per vertex to ensure an odd stride, which
- * avoids bank conflicts for SoA accesses.
- */
- if (!args->options->key.vs_common_out.as_ngg_passthrough)
- declare_esgs_ring(&ctx);
-
- /* This is really only needed when streamout and / or vertex
- * compaction is enabled.
- */
- if (args->shader_info->so.num_outputs) {
- LLVMTypeRef asi32 = LLVMArrayType(ctx.ac.i32, 8);
- ctx.gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx.ac.module,
- asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
- LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(asi32));
- LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
- }
- }
-
- for(int shader_idx = 0; shader_idx < shader_count; ++shader_idx) {
- ctx.stage = shaders[shader_idx]->info.stage;
- ctx.shader = shaders[shader_idx];
- ctx.output_mask = 0;
-
- if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY) {
- for (int i = 0; i < 4; i++) {
- ctx.gs_next_vertex[i] =
- ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
- }
- if (args->options->key.vs_common_out.as_ngg) {
- for (unsigned i = 0; i < 4; ++i) {
- ctx.gs_curprim_verts[i] =
- ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
- ctx.gs_generated_prims[i] =
- ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
- }
-
- unsigned scratch_size = 8;
- if (args->shader_info->so.num_outputs)
- scratch_size = 44;
-
- LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size);
- ctx.gs_ngg_scratch =
- LLVMAddGlobalInAddressSpace(ctx.ac.module,
- ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
- LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(ai32));
- LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
-
- ctx.gs_ngg_emit = LLVMAddGlobalInAddressSpace(ctx.ac.module,
- LLVMArrayType(ctx.ac.i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS);
- LLVMSetLinkage(ctx.gs_ngg_emit, LLVMExternalLinkage);
- LLVMSetAlignment(ctx.gs_ngg_emit, 4);
- }
-
- ctx.abi.emit_primitive = visit_end_primitive;
- } else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) {
- ctx.abi.load_tess_coord = load_tess_coord;
- } else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
- ctx.abi.load_base_vertex = radv_load_base_vertex;
- } else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) {
- ctx.abi.load_sample_position = load_sample_position;
- ctx.abi.load_sample_mask_in = load_sample_mask_in;
- }
-
- if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX &&
- args->options->key.vs_common_out.as_ngg &&
- args->options->key.vs_common_out.export_prim_id) {
- declare_esgs_ring(&ctx);
- }
-
- bool nested_barrier = false;
-
- if (shader_idx) {
- if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
- args->options->key.vs_common_out.as_ngg) {
- gfx10_ngg_gs_emit_prologue(&ctx);
- nested_barrier = false;
- } else {
- nested_barrier = true;
- }
- }
-
- if (nested_barrier) {
- /* Execute a barrier before the second shader in
- * a merged shader.
- *
- * Execute the barrier inside the conditional block,
- * so that empty waves can jump directly to s_endpgm,
- * which will also signal the barrier.
- *
- * This is possible in gfx9, because an empty wave
- * for the second shader does not participate in
- * the epilogue. With NGG, empty waves may still
- * be required to export data (e.g. GS output vertices),
- * so we cannot let them exit early.
- *
- * If the shader is TCS and the TCS epilog is present
- * and contains a barrier, it will wait there and then
- * reach s_endpgm.
- */
- ac_emit_barrier(&ctx.ac, ctx.stage);
- }
-
- nir_foreach_shader_out_variable(variable, shaders[shader_idx])
- scan_shader_output_decl(&ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage);
-
- ac_setup_rings(&ctx);
-
- LLVMBasicBlockRef merge_block = NULL;
- if (shader_count >= 2 || is_ngg) {
- LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
- LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
- merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
-
- LLVMValueRef count =
- ac_unpack_param(&ctx.ac,
- ac_get_arg(&ctx.ac, args->ac.merged_wave_info),
- 8 * shader_idx, 8);
- LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
- LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
- thread_id, count, "");
- LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
-
- LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
- }
-
- if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT)
- prepare_interp_optimize(&ctx, shaders[shader_idx]);
- else if(shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX)
- handle_vs_inputs(&ctx, shaders[shader_idx]);
- else if(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY)
- prepare_gs_input_vgprs(&ctx, shader_count >= 2);
-
- ac_nir_translate(&ctx.ac, &ctx.abi, &args->ac, shaders[shader_idx]);
-
- if (shader_count >= 2 || is_ngg) {
- LLVMBuildBr(ctx.ac.builder, merge_block);
- LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
- }
-
- /* This needs to be outside the if wrapping the shader body, as sometimes
- * the HW generates waves with 0 es/vs threads. */
- if (is_pre_gs_stage(shaders[shader_idx]->info.stage) &&
- args->options->key.vs_common_out.as_ngg &&
- shader_idx == shader_count - 1) {
- handle_ngg_outputs_post_2(&ctx);
- } else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
- args->options->key.vs_common_out.as_ngg) {
- gfx10_ngg_gs_emit_epilogue_2(&ctx);
- }
- }
-
- LLVMBuildRetVoid(ctx.ac.builder);
-
- if (args->options->dump_preoptir) {
- fprintf(stderr, "%s LLVM IR:\n\n",
- radv_get_shader_name(args->shader_info,
- shaders[shader_count - 1]->info.stage));
- ac_dump_module(ctx.ac.module);
- fprintf(stderr, "\n");
- }
-
- ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
-
- if (shader_count == 1)
- ac_nir_eliminate_const_vs_outputs(&ctx);
-
- if (args->options->dump_shader) {
- args->shader_info->private_mem_vgprs =
- ac_count_scratch_private_memory(ctx.main_function);
- }
-
- return ctx.ac.module;
-}
-
-static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
-{
- unsigned *retval = (unsigned *)context;
- LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
- char *description = LLVMGetDiagInfoDescription(di);
-
- if (severity == LLVMDSError) {
- *retval = 1;
- fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
- description);
- }
-
- LLVMDisposeMessage(description);
-}
-
-static unsigned radv_llvm_compile(LLVMModuleRef M,
- char **pelf_buffer, size_t *pelf_size,
- struct ac_llvm_compiler *ac_llvm)
-{
- unsigned retval = 0;
- LLVMContextRef llvm_ctx;
-
- /* Setup Diagnostic Handler*/
- llvm_ctx = LLVMGetModuleContext(M);
-
- LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
- &retval);
-
- /* Compile IR*/
- if (!radv_compile_to_elf(ac_llvm, M, pelf_buffer, pelf_size))
- retval = 1;
- return retval;
-}
-
-static void ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm,
- LLVMModuleRef llvm_module,
- struct radv_shader_binary **rbinary,
- gl_shader_stage stage,
- const char *name,
- const struct radv_nir_compiler_options *options)
-{
- char *elf_buffer = NULL;
- size_t elf_size = 0;
- char *llvm_ir_string = NULL;
-
- if (options->dump_shader) {
- fprintf(stderr, "%s LLVM IR:\n\n", name);
- ac_dump_module(llvm_module);
- fprintf(stderr, "\n");
- }
+static void
+declare_esgs_ring(struct radv_shader_context *ctx)
+{
+ if (ctx->esgs_ring)
+ return;
+
+ assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
+
+ ctx->esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
+ "esgs_ring", AC_ADDR_SPACE_LDS);
+ LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
+ LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
+}
+
+static LLVMModuleRef
+ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *const *shaders,
+ int shader_count, const struct radv_shader_args *args)
+{
+ struct radv_shader_context ctx = {0};
+ ctx.args = args;
+
+ enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT;
+
+ if (args->shader_info->float_controls_mode & FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32) {
+ float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
+ }
+
+ ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family,
+ args->options->info, float_mode, args->shader_info->wave_size,
+ args->shader_info->ballot_bit_size);
+ ctx.context = ctx.ac.context;
+
+ ctx.max_workgroup_size = 0;
+ for (int i = 0; i < shader_count; ++i) {
+ ctx.max_workgroup_size = MAX2(
+ ctx.max_workgroup_size, radv_nir_get_max_workgroup_size(
+ args->options->chip_class, shaders[i]->info.stage, shaders[i]));
+ }
+
+ if (ctx.ac.chip_class >= GFX10) {
+ if (is_pre_gs_stage(shaders[0]->info.stage) && args->options->key.vs_common_out.as_ngg) {
+ ctx.max_workgroup_size = 128;
+ }
+ }
+
+ create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2);
+
+ ctx.abi.inputs = &ctx.inputs[0];
+ ctx.abi.emit_outputs = handle_shader_outputs_post;
+ ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
+ ctx.abi.load_ubo = radv_load_ubo;
+ ctx.abi.load_ssbo = radv_load_ssbo;
+ ctx.abi.load_sampler_desc = radv_get_sampler_desc;
+ ctx.abi.load_resource = radv_load_resource;
+ ctx.abi.load_ring_tess_factors = load_ring_tess_factors;
+ ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip;
+ ctx.abi.load_ring_esgs = load_ring_esgs;
+ ctx.abi.clamp_shadow_reference = false;
+ ctx.abi.adjust_frag_coord_z = args->options->adjust_frag_coord_z;
+ ctx.abi.robust_buffer_access = args->options->robust_buffer_access;
+
+ bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args->options->key.vs_common_out.as_ngg;
+ if (shader_count >= 2 || is_ngg)
+ ac_init_exec_full_mask(&ctx.ac);
+
+ if (args->ac.vertex_id.used)
+ ctx.abi.vertex_id = ac_get_arg(&ctx.ac, args->ac.vertex_id);
+ if (args->ac.vs_rel_patch_id.used)
+ ctx.vs_rel_patch_id = ac_get_arg(&ctx.ac, args->ac.vs_rel_patch_id);
+ if (args->ac.instance_id.used)
+ ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
+
+ if (args->options->has_ls_vgpr_init_bug &&
+ shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
+ ac_nir_fixup_ls_hs_input_vgprs(&ctx);
+
+ if (is_ngg) {
+ /* Declare scratch space base for streamout and vertex
+ * compaction. Whether space is actually allocated is
+ * determined during linking / PM4 creation.
+ *
+ * Add an extra dword per vertex to ensure an odd stride, which
+ * avoids bank conflicts for SoA accesses.
+ */
+ if (!args->options->key.vs_common_out.as_ngg_passthrough)
+ declare_esgs_ring(&ctx);
+
+ /* This is really only needed when streamout and / or vertex
+ * compaction is enabled.
+ */
+ if (args->shader_info->so.num_outputs) {
+ LLVMTypeRef asi32 = LLVMArrayType(ctx.ac.i32, 8);
+ ctx.gs_ngg_scratch =
+ LLVMAddGlobalInAddressSpace(ctx.ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
+ LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(asi32));
+ LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
+ }
+ }
+
+ for (int shader_idx = 0; shader_idx < shader_count; ++shader_idx) {
+ ctx.stage = shaders[shader_idx]->info.stage;
+ ctx.shader = shaders[shader_idx];
+ ctx.output_mask = 0;
+
+ if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY) {
+ for (int i = 0; i < 4; i++) {
+ ctx.gs_next_vertex[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
+ }
+ if (args->options->key.vs_common_out.as_ngg) {
+ for (unsigned i = 0; i < 4; ++i) {
+ ctx.gs_curprim_verts[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
+ ctx.gs_generated_prims[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
+ }
+
+ unsigned scratch_size = 8;
+ if (args->shader_info->so.num_outputs)
+ scratch_size = 44;
+
+ LLVMTypeRef ai32 = LLVMArrayType(ctx.ac.i32, scratch_size);
+ ctx.gs_ngg_scratch =
+ LLVMAddGlobalInAddressSpace(ctx.ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
+ LLVMSetInitializer(ctx.gs_ngg_scratch, LLVMGetUndef(ai32));
+ LLVMSetAlignment(ctx.gs_ngg_scratch, 4);
+
+ ctx.gs_ngg_emit = LLVMAddGlobalInAddressSpace(
+ ctx.ac.module, LLVMArrayType(ctx.ac.i32, 0), "ngg_emit", AC_ADDR_SPACE_LDS);
+ LLVMSetLinkage(ctx.gs_ngg_emit, LLVMExternalLinkage);
+ LLVMSetAlignment(ctx.gs_ngg_emit, 4);
+ }
+
+ ctx.abi.emit_primitive = visit_end_primitive;
+ } else if (shaders[shader_idx]->info.stage == MESA_SHADER_TESS_EVAL) {
+ ctx.abi.load_tess_coord = load_tess_coord;
+ } else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX) {
+ ctx.abi.load_base_vertex = radv_load_base_vertex;
+ } else if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT) {
+ ctx.abi.load_sample_position = load_sample_position;
+ ctx.abi.load_sample_mask_in = load_sample_mask_in;
+ }
+
+ if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX &&
+ args->options->key.vs_common_out.as_ngg &&
+ args->options->key.vs_common_out.export_prim_id) {
+ declare_esgs_ring(&ctx);
+ }
+
+ bool nested_barrier = false;
+
+ if (shader_idx) {
+ if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
+ args->options->key.vs_common_out.as_ngg) {
+ gfx10_ngg_gs_emit_prologue(&ctx);
+ nested_barrier = false;
+ } else {
+ nested_barrier = true;
+ }
+ }
+
+ if (nested_barrier) {
+ /* Execute a barrier before the second shader in
+ * a merged shader.
+ *
+ * Execute the barrier inside the conditional block,
+ * so that empty waves can jump directly to s_endpgm,
+ * which will also signal the barrier.
+ *
+ * This is possible in gfx9, because an empty wave
+ * for the second shader does not participate in
+ * the epilogue. With NGG, empty waves may still
+ * be required to export data (e.g. GS output vertices),
+ * so we cannot let them exit early.
+ *
+ * If the shader is TCS and the TCS epilog is present
+ * and contains a barrier, it will wait there and then
+ * reach s_endpgm.
+ */
+ ac_emit_barrier(&ctx.ac, ctx.stage);
+ }
+
+ nir_foreach_shader_out_variable(variable, shaders[shader_idx]) scan_shader_output_decl(
+ &ctx, variable, shaders[shader_idx], shaders[shader_idx]->info.stage);
+
+ ac_setup_rings(&ctx);
+
+ LLVMBasicBlockRef merge_block = NULL;
+ if (shader_count >= 2 || is_ngg) {
+ LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
+ LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
+ merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
+
+ LLVMValueRef count = ac_unpack_param(
+ &ctx.ac, ac_get_arg(&ctx.ac, args->ac.merged_wave_info), 8 * shader_idx, 8);
+ LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
+ LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT, thread_id, count, "");
+ LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
+
+ LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
+ }
+
+ if (shaders[shader_idx]->info.stage == MESA_SHADER_FRAGMENT)
+ prepare_interp_optimize(&ctx, shaders[shader_idx]);
+ else if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX)
+ handle_vs_inputs(&ctx, shaders[shader_idx]);
+ else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY)
+ prepare_gs_input_vgprs(&ctx, shader_count >= 2);
+
+ ac_nir_translate(&ctx.ac, &ctx.abi, &args->ac, shaders[shader_idx]);
+
+ if (shader_count >= 2 || is_ngg) {
+ LLVMBuildBr(ctx.ac.builder, merge_block);
+ LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
+ }
+
+ /* This needs to be outside the if wrapping the shader body, as sometimes
+ * the HW generates waves with 0 es/vs threads. */
+ if (is_pre_gs_stage(shaders[shader_idx]->info.stage) &&
+ args->options->key.vs_common_out.as_ngg && shader_idx == shader_count - 1) {
+ handle_ngg_outputs_post_2(&ctx);
+ } else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
+ args->options->key.vs_common_out.as_ngg) {
+ gfx10_ngg_gs_emit_epilogue_2(&ctx);
+ }
+ }
+
+ LLVMBuildRetVoid(ctx.ac.builder);
+
+ if (args->options->dump_preoptir) {
+ fprintf(stderr, "%s LLVM IR:\n\n",
+ radv_get_shader_name(args->shader_info, shaders[shader_count - 1]->info.stage));
+ ac_dump_module(ctx.ac.module);
+ fprintf(stderr, "\n");
+ }
+
+ ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
+
+ if (shader_count == 1)
+ ac_nir_eliminate_const_vs_outputs(&ctx);
+
+ if (args->options->dump_shader) {
+ args->shader_info->private_mem_vgprs = ac_count_scratch_private_memory(ctx.main_function);
+ }
+
+ return ctx.ac.module;
+}
+
+static void
+ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
+{
+ unsigned *retval = (unsigned *)context;
+ LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
+ char *description = LLVMGetDiagInfoDescription(di);
+
+ if (severity == LLVMDSError) {
+ *retval = 1;
+ fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", description);
+ }
- if (options->record_ir) {
- char *llvm_ir = LLVMPrintModuleToString(llvm_module);
- llvm_ir_string = strdup(llvm_ir);
- LLVMDisposeMessage(llvm_ir);
- }
+ LLVMDisposeMessage(description);
+}
- int v = radv_llvm_compile(llvm_module, &elf_buffer, &elf_size, ac_llvm);
- if (v) {
- fprintf(stderr, "compile failed\n");
- }
+static unsigned
+radv_llvm_compile(LLVMModuleRef M, char **pelf_buffer, size_t *pelf_size,
+ struct ac_llvm_compiler *ac_llvm)
+{
+ unsigned retval = 0;
+ LLVMContextRef llvm_ctx;
- LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
- LLVMDisposeModule(llvm_module);
- LLVMContextDispose(ctx);
+ /* Setup Diagnostic Handler*/
+ llvm_ctx = LLVMGetModuleContext(M);
- size_t llvm_ir_size = llvm_ir_string ? strlen(llvm_ir_string) : 0;
- size_t alloc_size = sizeof(struct radv_shader_binary_rtld) + elf_size + llvm_ir_size + 1;
- struct radv_shader_binary_rtld *rbin = calloc(1, alloc_size);
- memcpy(rbin->data, elf_buffer, elf_size);
- if (llvm_ir_string)
- memcpy(rbin->data + elf_size, llvm_ir_string, llvm_ir_size + 1);
+ LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler, &retval);
- rbin->base.type = RADV_BINARY_TYPE_RTLD;
- rbin->base.stage = stage;
- rbin->base.total_size = alloc_size;
- rbin->elf_size = elf_size;
- rbin->llvm_ir_size = llvm_ir_size;
- *rbinary = &rbin->base;
+ /* Compile IR*/
+ if (!radv_compile_to_elf(ac_llvm, M, pelf_buffer, pelf_size))
+ retval = 1;
+ return retval;
+}
- free(llvm_ir_string);
- free(elf_buffer);
+static void
+ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_module,
+ struct radv_shader_binary **rbinary, gl_shader_stage stage, const char *name,
+ const struct radv_nir_compiler_options *options)
+{
+ char *elf_buffer = NULL;
+ size_t elf_size = 0;
+ char *llvm_ir_string = NULL;
+
+ if (options->dump_shader) {
+ fprintf(stderr, "%s LLVM IR:\n\n", name);
+ ac_dump_module(llvm_module);
+ fprintf(stderr, "\n");
+ }
+
+ if (options->record_ir) {
+ char *llvm_ir = LLVMPrintModuleToString(llvm_module);
+ llvm_ir_string = strdup(llvm_ir);
+ LLVMDisposeMessage(llvm_ir);
+ }
+
+ int v = radv_llvm_compile(llvm_module, &elf_buffer, &elf_size, ac_llvm);
+ if (v) {
+ fprintf(stderr, "compile failed\n");
+ }
+
+ LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
+ LLVMDisposeModule(llvm_module);
+ LLVMContextDispose(ctx);
+
+ size_t llvm_ir_size = llvm_ir_string ? strlen(llvm_ir_string) : 0;
+ size_t alloc_size = sizeof(struct radv_shader_binary_rtld) + elf_size + llvm_ir_size + 1;
+ struct radv_shader_binary_rtld *rbin = calloc(1, alloc_size);
+ memcpy(rbin->data, elf_buffer, elf_size);
+ if (llvm_ir_string)
+ memcpy(rbin->data + elf_size, llvm_ir_string, llvm_ir_size + 1);
+
+ rbin->base.type = RADV_BINARY_TYPE_RTLD;
+ rbin->base.stage = stage;
+ rbin->base.total_size = alloc_size;
+ rbin->elf_size = elf_size;
+ rbin->llvm_ir_size = llvm_ir_size;
+ *rbinary = &rbin->base;
+
+ free(llvm_ir_string);
+ free(elf_buffer);
}
static void
-radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
- struct radv_shader_binary **rbinary,
- const struct radv_shader_args *args,
- struct nir_shader *const *nir,
- int nir_count)
+radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, struct radv_shader_binary **rbinary,
+ const struct radv_shader_args *args, struct nir_shader *const *nir,
+ int nir_count)
{
- LLVMModuleRef llvm_module;
+ LLVMModuleRef llvm_module;
- llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, args);
+ llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, args);
- ac_compile_llvm_module(ac_llvm, llvm_module, rbinary,
- nir[nir_count - 1]->info.stage,
- radv_get_shader_name(args->shader_info,
- nir[nir_count - 1]->info.stage),
- args->options);
+ ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, nir[nir_count - 1]->info.stage,
+ radv_get_shader_name(args->shader_info, nir[nir_count - 1]->info.stage),
+ args->options);
- /* Determine the ES type (VS or TES) for the GS on GFX9. */
- if (args->options->chip_class >= GFX9) {
- if (nir_count == 2 &&
- nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
- args->shader_info->gs.es_type = nir[0]->info.stage;
- }
- }
+ /* Determine the ES type (VS or TES) for the GS on GFX9. */
+ if (args->options->chip_class >= GFX9) {
+ if (nir_count == 2 && nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
+ args->shader_info->gs.es_type = nir[0]->info.stage;
+ }
+ }
}
static void
ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
{
- LLVMValueRef vtx_offset =
- LLVMBuildMul(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id),
- LLVMConstInt(ctx->ac.i32, 4, false), "");
- LLVMValueRef stream_id;
-
- /* Fetch the vertex stream ID. */
- if (!ctx->args->options->use_ngg_streamout &&
- ctx->args->shader_info->so.num_outputs) {
- stream_id =
- ac_unpack_param(&ctx->ac,
- ac_get_arg(&ctx->ac,
- ctx->args->ac.streamout_config),
- 24, 2);
- } else {
- stream_id = ctx->ac.i32_0;
- }
-
- LLVMBasicBlockRef end_bb;
- LLVMValueRef switch_inst;
-
- end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context,
- ctx->main_function, "end");
- switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4);
-
- for (unsigned stream = 0; stream < 4; stream++) {
- unsigned num_components =
- ctx->args->shader_info->gs.num_stream_output_components[stream];
- LLVMBasicBlockRef bb;
- unsigned offset;
-
- if (stream > 0 && !num_components)
- continue;
-
- if (stream > 0 && !ctx->args->shader_info->so.num_outputs)
- continue;
-
- bb = LLVMInsertBasicBlockInContext(ctx->ac.context, end_bb, "out");
- LLVMAddCase(switch_inst, LLVMConstInt(ctx->ac.i32, stream, 0), bb);
- LLVMPositionBuilderAtEnd(ctx->ac.builder, bb);
-
- offset = 0;
- for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
- unsigned output_usage_mask =
- ctx->args->shader_info->gs.output_usage_mask[i];
- unsigned output_stream =
- ctx->args->shader_info->gs.output_streams[i];
- int length = util_last_bit(output_usage_mask);
-
- if (!(ctx->output_mask & (1ull << i)) ||
- output_stream != stream)
- continue;
-
- for (unsigned j = 0; j < length; j++) {
- LLVMValueRef value, soffset;
-
- if (!(output_usage_mask & (1 << j)))
- continue;
-
- soffset = LLVMConstInt(ctx->ac.i32,
- offset *
- ctx->shader->info.gs.vertices_out * 16 * 4, false);
-
- offset++;
-
- value = ac_build_buffer_load(&ctx->ac,
- ctx->gsvs_ring[0],
- 1, ctx->ac.i32_0,
- vtx_offset, soffset,
- 0, ctx->ac.f32, ac_glc | ac_slc, true, false);
-
- LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
- if (ac_get_type_size(type) == 2) {
- value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
- value = LLVMBuildTrunc(ctx->ac.builder, value, ctx->ac.i16, "");
- }
-
- LLVMBuildStore(ctx->ac.builder,
- ac_to_float(&ctx->ac, value), ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
- }
- }
-
- if (!ctx->args->options->use_ngg_streamout &&
- ctx->args->shader_info->so.num_outputs)
- radv_emit_streamout(ctx, stream);
-
- if (stream == 0) {
- handle_vs_outputs_post(ctx, false, true,
- &ctx->args->shader_info->vs.outinfo);
- }
-
- LLVMBuildBr(ctx->ac.builder, end_bb);
- }
-
- LLVMPositionBuilderAtEnd(ctx->ac.builder, end_bb);
+ LLVMValueRef vtx_offset =
+ LLVMBuildMul(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->ac.vertex_id),
+ LLVMConstInt(ctx->ac.i32, 4, false), "");
+ LLVMValueRef stream_id;
+
+ /* Fetch the vertex stream ID. */
+ if (!ctx->args->options->use_ngg_streamout && ctx->args->shader_info->so.num_outputs) {
+ stream_id =
+ ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config), 24, 2);
+ } else {
+ stream_id = ctx->ac.i32_0;
+ }
+
+ LLVMBasicBlockRef end_bb;
+ LLVMValueRef switch_inst;
+
+ end_bb = LLVMAppendBasicBlockInContext(ctx->ac.context, ctx->main_function, "end");
+ switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4);
+
+ for (unsigned stream = 0; stream < 4; stream++) {
+ unsigned num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
+ LLVMBasicBlockRef bb;
+ unsigned offset;
+
+ if (stream > 0 && !num_components)
+ continue;
+
+ if (stream > 0 && !ctx->args->shader_info->so.num_outputs)
+ continue;
+
+ bb = LLVMInsertBasicBlockInContext(ctx->ac.context, end_bb, "out");
+ LLVMAddCase(switch_inst, LLVMConstInt(ctx->ac.i32, stream, 0), bb);
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, bb);
+
+ offset = 0;
+ for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
+ unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
+ unsigned output_stream = ctx->args->shader_info->gs.output_streams[i];
+ int length = util_last_bit(output_usage_mask);
+
+ if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
+ continue;
+
+ for (unsigned j = 0; j < length; j++) {
+ LLVMValueRef value, soffset;
+
+ if (!(output_usage_mask & (1 << j)))
+ continue;
+
+ soffset = LLVMConstInt(ctx->ac.i32, offset * ctx->shader->info.gs.vertices_out * 16 * 4,
+ false);
+
+ offset++;
+
+ value = ac_build_buffer_load(&ctx->ac, ctx->gsvs_ring[0], 1, ctx->ac.i32_0, vtx_offset,
+ soffset, 0, ctx->ac.f32, ac_glc | ac_slc, true, false);
+
+ LLVMTypeRef type = LLVMGetAllocatedType(ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
+ if (ac_get_type_size(type) == 2) {
+ value = LLVMBuildBitCast(ctx->ac.builder, value, ctx->ac.i32, "");
+ value = LLVMBuildTrunc(ctx->ac.builder, value, ctx->ac.i16, "");
+ }
+
+ LLVMBuildStore(ctx->ac.builder, ac_to_float(&ctx->ac, value),
+ ctx->abi.outputs[ac_llvm_reg_index_soa(i, j)]);
+ }
+ }
+
+ if (!ctx->args->options->use_ngg_streamout && ctx->args->shader_info->so.num_outputs)
+ radv_emit_streamout(ctx, stream);
+
+ if (stream == 0) {
+ handle_vs_outputs_post(ctx, false, true, &ctx->args->shader_info->vs.outinfo);
+ }
+
+ LLVMBuildBr(ctx->ac.builder, end_bb);
+ }
+
+ LLVMPositionBuilderAtEnd(ctx->ac.builder, end_bb);
}
static void
-radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
- struct nir_shader *geom_shader,
- struct radv_shader_binary **rbinary,
- const struct radv_shader_args *args)
+radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader,
+ struct radv_shader_binary **rbinary,
+ const struct radv_shader_args *args)
{
- struct radv_shader_context ctx = {0};
- ctx.args = args;
-
- assert(args->is_gs_copy_shader);
+ struct radv_shader_context ctx = {0};
+ ctx.args = args;
- ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class,
- args->options->family, args->options->info,
- AC_FLOAT_MODE_DEFAULT, 64, 64);
- ctx.context = ctx.ac.context;
+ assert(args->is_gs_copy_shader);
- ctx.stage = MESA_SHADER_VERTEX;
- ctx.shader = geom_shader;
+ ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family,
+ args->options->info, AC_FLOAT_MODE_DEFAULT, 64, 64);
+ ctx.context = ctx.ac.context;
- create_function(&ctx, MESA_SHADER_VERTEX, false);
+ ctx.stage = MESA_SHADER_VERTEX;
+ ctx.shader = geom_shader;
- ac_setup_rings(&ctx);
+ create_function(&ctx, MESA_SHADER_VERTEX, false);
- nir_foreach_shader_out_variable(variable, geom_shader) {
- scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
- ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader,
- variable, MESA_SHADER_VERTEX);
- }
+ ac_setup_rings(&ctx);
- ac_gs_copy_shader_emit(&ctx);
+ nir_foreach_shader_out_variable(variable, geom_shader)
+ {
+ scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
+ ac_handle_shader_output_decl(&ctx.ac, &ctx.abi, geom_shader, variable, MESA_SHADER_VERTEX);
+ }
- LLVMBuildRetVoid(ctx.ac.builder);
+ ac_gs_copy_shader_emit(&ctx);
- ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
+ LLVMBuildRetVoid(ctx.ac.builder);
- ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary,
- MESA_SHADER_VERTEX, "GS Copy Shader", args->options);
- (*rbinary)->is_gs_copy_shader = true;
+ ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
+ ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary, MESA_SHADER_VERTEX, "GS Copy Shader",
+ args->options);
+ (*rbinary)->is_gs_copy_shader = true;
}
void
-llvm_compile_shader(struct radv_device *device,
- unsigned shader_count,
- struct nir_shader *const *shaders,
- struct radv_shader_binary **binary,
- struct radv_shader_args *args)
-{
- enum ac_target_machine_options tm_options = 0;
- struct ac_llvm_compiler ac_llvm;
- bool thread_compiler;
-
- tm_options |= AC_TM_SUPPORTS_SPILL;
- if (args->options->check_ir)
- tm_options |= AC_TM_CHECK_IR;
-
- thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
-
- radv_init_llvm_compiler(&ac_llvm, thread_compiler,
- args->options->family, tm_options,
- args->shader_info->wave_size);
-
- if (args->is_gs_copy_shader) {
- radv_compile_gs_copy_shader(&ac_llvm, *shaders, binary, args);
- } else {
- radv_compile_nir_shader(&ac_llvm, binary, args,
- shaders, shader_count);
- }
-
- radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+llvm_compile_shader(struct radv_device *device, unsigned shader_count,
+ struct nir_shader *const *shaders, struct radv_shader_binary **binary,
+ struct radv_shader_args *args)
+{
+ enum ac_target_machine_options tm_options = 0;
+ struct ac_llvm_compiler ac_llvm;
+ bool thread_compiler;
+
+ tm_options |= AC_TM_SUPPORTS_SPILL;
+ if (args->options->check_ir)
+ tm_options |= AC_TM_CHECK_IR;
+
+ thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
+
+ radv_init_llvm_compiler(&ac_llvm, thread_compiler, args->options->family, tm_options,
+ args->shader_info->wave_size);
+
+ if (args->is_gs_copy_shader) {
+ radv_compile_gs_copy_shader(&ac_llvm, *shaders, binary, args);
+ } else {
+ radv_compile_nir_shader(&ac_llvm, binary, args, shaders, shader_count);
+ }
+
+ radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
}
diff --git a/src/amd/vulkan/radv_pass.c b/src/amd/vulkan/radv_pass.c
index 3e1db72df0b..117bb3569be 100644
--- a/src/amd/vulkan/radv_pass.c
+++ b/src/amd/vulkan/radv_pass.c
@@ -29,312 +29,291 @@
#include "vk_util.h"
static void
-radv_render_pass_add_subpass_dep(struct radv_render_pass *pass,
- const VkSubpassDependency2 *dep)
+radv_render_pass_add_subpass_dep(struct radv_render_pass *pass, const VkSubpassDependency2 *dep)
{
- uint32_t src = dep->srcSubpass;
- uint32_t dst = dep->dstSubpass;
-
- /* Ignore subpass self-dependencies as they allow the app to call
- * vkCmdPipelineBarrier() inside the render pass and the driver should
- * only do the barrier when called, not when starting the render pass.
- */
- if (src == dst)
- return;
-
- /* Accumulate all ingoing external dependencies to the first subpass. */
- if (src == VK_SUBPASS_EXTERNAL)
- dst = 0;
-
- if (dst == VK_SUBPASS_EXTERNAL) {
- if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
- pass->end_barrier.src_stage_mask |= dep->srcStageMask;
- pass->end_barrier.src_access_mask |= dep->srcAccessMask;
- pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
- } else {
- if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
- pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask;
- pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask;
- pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask;
- }
+ uint32_t src = dep->srcSubpass;
+ uint32_t dst = dep->dstSubpass;
+
+ /* Ignore subpass self-dependencies as they allow the app to call
+ * vkCmdPipelineBarrier() inside the render pass and the driver should
+ * only do the barrier when called, not when starting the render pass.
+ */
+ if (src == dst)
+ return;
+
+ /* Accumulate all ingoing external dependencies to the first subpass. */
+ if (src == VK_SUBPASS_EXTERNAL)
+ dst = 0;
+
+ if (dst == VK_SUBPASS_EXTERNAL) {
+ if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+ pass->end_barrier.src_stage_mask |= dep->srcStageMask;
+ pass->end_barrier.src_access_mask |= dep->srcAccessMask;
+ pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
+ } else {
+ if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+ pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask;
+ pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask;
+ pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask;
+ }
}
static void
radv_render_pass_add_implicit_deps(struct radv_render_pass *pass)
{
- /* From the Vulkan 1.0.39 spec:
- *
- * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
- * first subpass that uses an attachment, then an implicit subpass
- * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
- * used in. The implicit subpass dependency only exists if there
- * exists an automatic layout transition away from initialLayout.
- * The subpass dependency operates as if defined with the
- * following parameters:
- *
- * VkSubpassDependency implicitDependency = {
- * .srcSubpass = VK_SUBPASS_EXTERNAL;
- * .dstSubpass = firstSubpass; // First subpass attachment is used in
- * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
- * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
- * .srcAccessMask = 0;
- * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
- * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
- * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
- * .dependencyFlags = 0;
- * };
- *
- * Similarly, if there is no subpass dependency from the last subpass
- * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
- * subpass dependency exists from the last subpass it is used in to
- * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
- * if there exists an automatic layout transition into finalLayout.
- * The subpass dependency operates as if defined with the following
- * parameters:
- *
- * VkSubpassDependency implicitDependency = {
- * .srcSubpass = lastSubpass; // Last subpass attachment is used in
- * .dstSubpass = VK_SUBPASS_EXTERNAL;
- * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
- * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
- * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
- * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
- * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
- * .dstAccessMask = 0;
- * .dependencyFlags = 0;
- * };
- */
- for (uint32_t i = 0; i < pass->subpass_count; i++) {
- struct radv_subpass *subpass = &pass->subpasses[i];
- bool add_ingoing_dep = false, add_outgoing_dep = false;
-
- for (uint32_t j = 0; j < subpass->attachment_count; j++) {
- struct radv_subpass_attachment *subpass_att =
- &subpass->attachments[j];
- if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct radv_render_pass_attachment *pass_att =
- &pass->attachments[subpass_att->attachment];
- uint32_t initial_layout = pass_att->initial_layout;
- uint32_t stencil_initial_layout = pass_att->stencil_initial_layout;
- uint32_t final_layout = pass_att->final_layout;
- uint32_t stencil_final_layout = pass_att->stencil_final_layout;
-
- /* The implicit subpass dependency only exists if
- * there exists an automatic layout transition away
- * from initialLayout.
- */
- if (pass_att->first_subpass_idx == i &&
- !subpass->has_ingoing_dep &&
- ((subpass_att->layout != initial_layout) ||
- (subpass_att->layout != stencil_initial_layout))) {
- add_ingoing_dep = true;
- }
-
- /* The implicit subpass dependency only exists if
- * there exists an automatic layout transition into
- * finalLayout.
- */
- if (pass_att->last_subpass_idx == i &&
- !subpass->has_outgoing_dep &&
- ((subpass_att->layout != final_layout) ||
- (subpass_att->layout != stencil_final_layout))) {
- add_outgoing_dep = true;
- }
- }
-
- if (add_ingoing_dep) {
- const VkSubpassDependency2KHR implicit_ingoing_dep = {
- .srcSubpass = VK_SUBPASS_EXTERNAL,
- .dstSubpass = i, /* first subpass attachment is used in */
- .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
- .srcAccessMask = 0,
- .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
- .dependencyFlags = 0,
- };
-
- radv_render_pass_add_subpass_dep(pass,
- &implicit_ingoing_dep);
- }
-
- if (add_outgoing_dep) {
- const VkSubpassDependency2KHR implicit_outgoing_dep = {
- .srcSubpass = i, /* last subpass attachment is used in */
- .dstSubpass = VK_SUBPASS_EXTERNAL,
- .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
- .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
- .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
- VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
- VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
- .dstAccessMask = 0,
- .dependencyFlags = 0,
- };
-
- radv_render_pass_add_subpass_dep(pass,
- &implicit_outgoing_dep);
- }
- }
+ /* From the Vulkan 1.0.39 spec:
+ *
+ * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
+ * first subpass that uses an attachment, then an implicit subpass
+ * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
+ * used in. The implicit subpass dependency only exists if there
+ * exists an automatic layout transition away from initialLayout.
+ * The subpass dependency operates as if defined with the
+ * following parameters:
+ *
+ * VkSubpassDependency implicitDependency = {
+ * .srcSubpass = VK_SUBPASS_EXTERNAL;
+ * .dstSubpass = firstSubpass; // First subpass attachment is used in
+ * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+ * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ * .srcAccessMask = 0;
+ * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+ * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ * .dependencyFlags = 0;
+ * };
+ *
+ * Similarly, if there is no subpass dependency from the last subpass
+ * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
+ * subpass dependency exists from the last subpass it is used in to
+ * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
+ * if there exists an automatic layout transition into finalLayout.
+ * The subpass dependency operates as if defined with the following
+ * parameters:
+ *
+ * VkSubpassDependency implicitDependency = {
+ * .srcSubpass = lastSubpass; // Last subpass attachment is used in
+ * .dstSubpass = VK_SUBPASS_EXTERNAL;
+ * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+ * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
+ * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
+ * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
+ * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ * .dstAccessMask = 0;
+ * .dependencyFlags = 0;
+ * };
+ */
+ for (uint32_t i = 0; i < pass->subpass_count; i++) {
+ struct radv_subpass *subpass = &pass->subpasses[i];
+ bool add_ingoing_dep = false, add_outgoing_dep = false;
+
+ for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+ struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
+ if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
+ uint32_t initial_layout = pass_att->initial_layout;
+ uint32_t stencil_initial_layout = pass_att->stencil_initial_layout;
+ uint32_t final_layout = pass_att->final_layout;
+ uint32_t stencil_final_layout = pass_att->stencil_final_layout;
+
+ /* The implicit subpass dependency only exists if
+ * there exists an automatic layout transition away
+ * from initialLayout.
+ */
+ if (pass_att->first_subpass_idx == i && !subpass->has_ingoing_dep &&
+ ((subpass_att->layout != initial_layout) ||
+ (subpass_att->layout != stencil_initial_layout))) {
+ add_ingoing_dep = true;
+ }
+
+ /* The implicit subpass dependency only exists if
+ * there exists an automatic layout transition into
+ * finalLayout.
+ */
+ if (pass_att->last_subpass_idx == i && !subpass->has_outgoing_dep &&
+ ((subpass_att->layout != final_layout) ||
+ (subpass_att->layout != stencil_final_layout))) {
+ add_outgoing_dep = true;
+ }
+ }
+
+ if (add_ingoing_dep) {
+ const VkSubpassDependency2KHR implicit_ingoing_dep = {
+ .srcSubpass = VK_SUBPASS_EXTERNAL,
+ .dstSubpass = i, /* first subpass attachment is used in */
+ .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ .srcAccessMask = 0,
+ .dstAccessMask =
+ VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+ .dependencyFlags = 0,
+ };
+
+ radv_render_pass_add_subpass_dep(pass, &implicit_ingoing_dep);
+ }
+
+ if (add_outgoing_dep) {
+ const VkSubpassDependency2KHR implicit_outgoing_dep = {
+ .srcSubpass = i, /* last subpass attachment is used in */
+ .dstSubpass = VK_SUBPASS_EXTERNAL,
+ .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+ .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+ .srcAccessMask =
+ VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+ VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+ VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
+ .dstAccessMask = 0,
+ .dependencyFlags = 0,
+ };
+
+ radv_render_pass_add_subpass_dep(pass, &implicit_outgoing_dep);
+ }
+ }
}
static void
radv_render_pass_compile(struct radv_render_pass *pass)
{
- for (uint32_t i = 0; i < pass->subpass_count; i++) {
- struct radv_subpass *subpass = &pass->subpasses[i];
-
- for (uint32_t j = 0; j < subpass->attachment_count; j++) {
- struct radv_subpass_attachment *subpass_att =
- &subpass->attachments[j];
- if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct radv_render_pass_attachment *pass_att =
- &pass->attachments[subpass_att->attachment];
-
- pass_att->first_subpass_idx = VK_SUBPASS_EXTERNAL;
- pass_att->last_subpass_idx = VK_SUBPASS_EXTERNAL;
- }
- }
-
- for (uint32_t i = 0; i < pass->subpass_count; i++) {
- struct radv_subpass *subpass = &pass->subpasses[i];
- uint32_t color_sample_count = 1, depth_sample_count = 1;
-
- /* We don't allow depth_stencil_attachment to be non-NULL and
- * be VK_ATTACHMENT_UNUSED. This way something can just check
- * for NULL and be guaranteed that they have a valid
- * attachment.
- */
- if (subpass->depth_stencil_attachment &&
- subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
- subpass->depth_stencil_attachment = NULL;
-
- if (subpass->ds_resolve_attachment &&
- subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
- subpass->ds_resolve_attachment = NULL;
-
- for (uint32_t j = 0; j < subpass->attachment_count; j++) {
- struct radv_subpass_attachment *subpass_att =
- &subpass->attachments[j];
- if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- struct radv_render_pass_attachment *pass_att =
- &pass->attachments[subpass_att->attachment];
-
- if (i < pass_att->first_subpass_idx)
- pass_att->first_subpass_idx = i;
- pass_att->last_subpass_idx = i;
- }
-
- subpass->has_color_att = false;
- for (uint32_t j = 0; j < subpass->color_count; j++) {
- struct radv_subpass_attachment *subpass_att =
- &subpass->color_attachments[j];
- if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- subpass->has_color_att = true;
-
- struct radv_render_pass_attachment *pass_att =
- &pass->attachments[subpass_att->attachment];
-
- color_sample_count = pass_att->samples;
- }
-
- if (subpass->depth_stencil_attachment) {
- const uint32_t a =
- subpass->depth_stencil_attachment->attachment;
- struct radv_render_pass_attachment *pass_att =
- &pass->attachments[a];
- depth_sample_count = pass_att->samples;
- }
-
- subpass->max_sample_count = MAX2(color_sample_count,
- depth_sample_count);
- subpass->color_sample_count = color_sample_count;
- subpass->depth_sample_count = depth_sample_count;
-
- /* We have to handle resolve attachments specially */
- subpass->has_color_resolve = false;
- if (subpass->resolve_attachments) {
- for (uint32_t j = 0; j < subpass->color_count; j++) {
- struct radv_subpass_attachment *resolve_att =
- &subpass->resolve_attachments[j];
-
- if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- subpass->has_color_resolve = true;
- }
- }
-
- for (uint32_t j = 0; j < subpass->input_count; ++j) {
- if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- for (uint32_t k = 0; k < subpass->color_count; ++k) {
- if (subpass->color_attachments[k].attachment == subpass->input_attachments[j].attachment) {
- subpass->input_attachments[j].in_render_loop = true;
- subpass->color_attachments[k].in_render_loop = true;
- }
- }
-
- if (subpass->depth_stencil_attachment &&
- subpass->depth_stencil_attachment->attachment == subpass->input_attachments[j].attachment) {
- subpass->input_attachments[j].in_render_loop = true;
- subpass->depth_stencil_attachment->in_render_loop = true;
- }
- }
- }
+ for (uint32_t i = 0; i < pass->subpass_count; i++) {
+ struct radv_subpass *subpass = &pass->subpasses[i];
+
+ for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+ struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
+ if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
+
+ pass_att->first_subpass_idx = VK_SUBPASS_EXTERNAL;
+ pass_att->last_subpass_idx = VK_SUBPASS_EXTERNAL;
+ }
+ }
+
+ for (uint32_t i = 0; i < pass->subpass_count; i++) {
+ struct radv_subpass *subpass = &pass->subpasses[i];
+ uint32_t color_sample_count = 1, depth_sample_count = 1;
+
+ /* We don't allow depth_stencil_attachment to be non-NULL and
+ * be VK_ATTACHMENT_UNUSED. This way something can just check
+ * for NULL and be guaranteed that they have a valid
+ * attachment.
+ */
+ if (subpass->depth_stencil_attachment &&
+ subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
+ subpass->depth_stencil_attachment = NULL;
+
+ if (subpass->ds_resolve_attachment &&
+ subpass->ds_resolve_attachment->attachment == VK_ATTACHMENT_UNUSED)
+ subpass->ds_resolve_attachment = NULL;
+
+ for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+ struct radv_subpass_attachment *subpass_att = &subpass->attachments[j];
+ if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
+
+ if (i < pass_att->first_subpass_idx)
+ pass_att->first_subpass_idx = i;
+ pass_att->last_subpass_idx = i;
+ }
+
+ subpass->has_color_att = false;
+ for (uint32_t j = 0; j < subpass->color_count; j++) {
+ struct radv_subpass_attachment *subpass_att = &subpass->color_attachments[j];
+ if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ subpass->has_color_att = true;
+
+ struct radv_render_pass_attachment *pass_att = &pass->attachments[subpass_att->attachment];
+
+ color_sample_count = pass_att->samples;
+ }
+
+ if (subpass->depth_stencil_attachment) {
+ const uint32_t a = subpass->depth_stencil_attachment->attachment;
+ struct radv_render_pass_attachment *pass_att = &pass->attachments[a];
+ depth_sample_count = pass_att->samples;
+ }
+
+ subpass->max_sample_count = MAX2(color_sample_count, depth_sample_count);
+ subpass->color_sample_count = color_sample_count;
+ subpass->depth_sample_count = depth_sample_count;
+
+ /* We have to handle resolve attachments specially */
+ subpass->has_color_resolve = false;
+ if (subpass->resolve_attachments) {
+ for (uint32_t j = 0; j < subpass->color_count; j++) {
+ struct radv_subpass_attachment *resolve_att = &subpass->resolve_attachments[j];
+
+ if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ subpass->has_color_resolve = true;
+ }
+ }
+
+ for (uint32_t j = 0; j < subpass->input_count; ++j) {
+ if (subpass->input_attachments[j].attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ for (uint32_t k = 0; k < subpass->color_count; ++k) {
+ if (subpass->color_attachments[k].attachment ==
+ subpass->input_attachments[j].attachment) {
+ subpass->input_attachments[j].in_render_loop = true;
+ subpass->color_attachments[k].in_render_loop = true;
+ }
+ }
+
+ if (subpass->depth_stencil_attachment && subpass->depth_stencil_attachment->attachment ==
+ subpass->input_attachments[j].attachment) {
+ subpass->input_attachments[j].in_render_loop = true;
+ subpass->depth_stencil_attachment->in_render_loop = true;
+ }
+ }
+ }
}
static void
-radv_destroy_render_pass(struct radv_device *device,
- const VkAllocationCallbacks *pAllocator,
- struct radv_render_pass *pass)
+radv_destroy_render_pass(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_render_pass *pass)
{
- vk_object_base_finish(&pass->base);
- vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
- vk_free2(&device->vk.alloc, pAllocator, pass);
+ vk_object_base_finish(&pass->base);
+ vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
+ vk_free2(&device->vk.alloc, pAllocator, pass);
}
static unsigned
radv_num_subpass_attachments2(const VkSubpassDescription2 *desc)
{
- const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
- vk_find_struct_const(desc->pNext,
- SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
-
- return desc->inputAttachmentCount +
- desc->colorAttachmentCount +
- (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
- (desc->pDepthStencilAttachment != NULL) +
- (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
+ const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
+ vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
+
+ return desc->inputAttachmentCount + desc->colorAttachmentCount +
+ (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+ (desc->pDepthStencilAttachment != NULL) +
+ (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
}
static bool
vk_image_layout_depth_only(VkImageLayout layout)
{
- switch (layout) {
- case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
- case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
- return true;
- default:
- return false;
- }
+ switch (layout) {
+ case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
+ case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
+ return true;
+ default:
+ return false;
+ }
}
/* From the Vulkan Specification 1.2.166 - VkAttachmentReference2:
@@ -348,16 +327,15 @@ vk_image_layout_depth_only(VkImageLayout layout)
static VkImageLayout
stencil_ref_layout(const VkAttachmentReference2 *att_ref)
{
- if (!vk_image_layout_depth_only(att_ref->layout))
- return att_ref->layout;
+ if (!vk_image_layout_depth_only(att_ref->layout))
+ return att_ref->layout;
- const VkAttachmentReferenceStencilLayoutKHR *stencil_ref =
- vk_find_struct_const(att_ref->pNext,
- ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
- if (!stencil_ref)
- return VK_IMAGE_LAYOUT_UNDEFINED;
+ const VkAttachmentReferenceStencilLayoutKHR *stencil_ref =
+ vk_find_struct_const(att_ref->pNext, ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
+ if (!stencil_ref)
+ return VK_IMAGE_LAYOUT_UNDEFINED;
- return stencil_ref->stencilLayout;
+ return stencil_ref->stencilLayout;
}
/* From the Vulkan Specification 1.2.166 - VkAttachmentDescription2:
@@ -372,212 +350,197 @@ stencil_ref_layout(const VkAttachmentReference2 *att_ref)
static VkImageLayout
stencil_desc_layout(const VkAttachmentDescription2KHR *att_desc, bool final)
{
- const struct util_format_description *desc = vk_format_description(att_desc->format);
- if (!util_format_has_stencil(desc))
- return VK_IMAGE_LAYOUT_UNDEFINED;
-
- const VkImageLayout main_layout =
- final ? att_desc->finalLayout : att_desc->initialLayout;
- if (!vk_image_layout_depth_only(main_layout))
- return main_layout;
-
- const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc =
- vk_find_struct_const(att_desc->pNext,
- ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
- assert(stencil_desc);
- return final ? stencil_desc->stencilFinalLayout : stencil_desc->stencilInitialLayout;
+ const struct util_format_description *desc = vk_format_description(att_desc->format);
+ if (!util_format_has_stencil(desc))
+ return VK_IMAGE_LAYOUT_UNDEFINED;
+
+ const VkImageLayout main_layout = final ? att_desc->finalLayout : att_desc->initialLayout;
+ if (!vk_image_layout_depth_only(main_layout))
+ return main_layout;
+
+ const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc =
+ vk_find_struct_const(att_desc->pNext, ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
+ assert(stencil_desc);
+ return final ? stencil_desc->stencilFinalLayout : stencil_desc->stencilInitialLayout;
}
-VkResult radv_CreateRenderPass2(
- VkDevice _device,
- const VkRenderPassCreateInfo2* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkRenderPass* pRenderPass)
+VkResult
+radv_CreateRenderPass2(VkDevice _device, const VkRenderPassCreateInfo2 *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkRenderPass *pRenderPass)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_render_pass *pass;
- size_t size;
- size_t attachments_offset;
-
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
-
- size = sizeof(*pass);
- size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
- attachments_offset = size;
- size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
-
- pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (pass == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- memset(pass, 0, size);
-
- vk_object_base_init(&device->vk, &pass->base,
- VK_OBJECT_TYPE_RENDER_PASS);
-
- pass->attachment_count = pCreateInfo->attachmentCount;
- pass->subpass_count = pCreateInfo->subpassCount;
- pass->attachments = (struct radv_render_pass_attachment *)((uint8_t *) pass + attachments_offset);
-
- for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
- struct radv_render_pass_attachment *att = &pass->attachments[i];
-
- att->format = pCreateInfo->pAttachments[i].format;
- att->samples = pCreateInfo->pAttachments[i].samples;
- att->load_op = pCreateInfo->pAttachments[i].loadOp;
- att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
- att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
- att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
- att->stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], false);
- att->stencil_final_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], true);
- // att->store_op = pCreateInfo->pAttachments[i].storeOp;
- // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
- }
- uint32_t subpass_attachment_count = 0;
- struct radv_subpass_attachment *p;
- for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
- subpass_attachment_count +=
- radv_num_subpass_attachments2(&pCreateInfo->pSubpasses[i]);
- }
-
- if (subpass_attachment_count) {
- pass->subpass_attachments =
- vk_alloc2(&device->vk.alloc, pAllocator,
- subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (pass->subpass_attachments == NULL) {
- radv_destroy_render_pass(device, pAllocator, pass);
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- }
- } else
- pass->subpass_attachments = NULL;
-
- p = pass->subpass_attachments;
- for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
- const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
- struct radv_subpass *subpass = &pass->subpasses[i];
-
- subpass->input_count = desc->inputAttachmentCount;
- subpass->color_count = desc->colorAttachmentCount;
- subpass->attachment_count = radv_num_subpass_attachments2(desc);
- subpass->attachments = p;
- subpass->view_mask = desc->viewMask;
-
- if (desc->inputAttachmentCount > 0) {
- subpass->input_attachments = p;
- p += desc->inputAttachmentCount;
-
- for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
- subpass->input_attachments[j] = (struct radv_subpass_attachment) {
- .attachment = desc->pInputAttachments[j].attachment,
- .layout = desc->pInputAttachments[j].layout,
- .stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]),
- };
- }
- }
-
- if (desc->colorAttachmentCount > 0) {
- subpass->color_attachments = p;
- p += desc->colorAttachmentCount;
-
- for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
- subpass->color_attachments[j] = (struct radv_subpass_attachment) {
- .attachment = desc->pColorAttachments[j].attachment,
- .layout = desc->pColorAttachments[j].layout,
- };
- }
- }
-
- if (desc->pResolveAttachments) {
- subpass->resolve_attachments = p;
- p += desc->colorAttachmentCount;
-
- for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
- subpass->resolve_attachments[j] = (struct radv_subpass_attachment) {
- .attachment = desc->pResolveAttachments[j].attachment,
- .layout = desc->pResolveAttachments[j].layout,
- };
- }
- }
-
- if (desc->pDepthStencilAttachment) {
- subpass->depth_stencil_attachment = p++;
-
- *subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
- .attachment = desc->pDepthStencilAttachment->attachment,
- .layout = desc->pDepthStencilAttachment->layout,
- .stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment),
- };
- }
-
- const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
- vk_find_struct_const(desc->pNext,
- SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
-
- if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
- subpass->ds_resolve_attachment = p++;
-
- *subpass->ds_resolve_attachment = (struct radv_subpass_attachment) {
- .attachment = ds_resolve->pDepthStencilResolveAttachment->attachment,
- .layout = ds_resolve->pDepthStencilResolveAttachment->layout,
- .stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment),
- };
-
- subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
- subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
- }
- }
-
- for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
- const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
-
- radv_render_pass_add_subpass_dep(pass,
- &pCreateInfo->pDependencies[i]);
-
- /* Determine if the subpass has explicit dependencies from/to
- * VK_SUBPASS_EXTERNAL.
- */
- if (dep->srcSubpass == VK_SUBPASS_EXTERNAL &&
- dep->dstSubpass != VK_SUBPASS_EXTERNAL) {
- pass->subpasses[dep->dstSubpass].has_ingoing_dep = true;
- }
-
- if (dep->dstSubpass == VK_SUBPASS_EXTERNAL &&
- dep->srcSubpass != VK_SUBPASS_EXTERNAL) {
- pass->subpasses[dep->srcSubpass].has_outgoing_dep = true;
- }
- }
-
- radv_render_pass_compile(pass);
-
- radv_render_pass_add_implicit_deps(pass);
-
- *pRenderPass = radv_render_pass_to_handle(pass);
-
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_render_pass *pass;
+ size_t size;
+ size_t attachments_offset;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
+
+ size = sizeof(*pass);
+ size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
+ attachments_offset = size;
+ size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
+
+ pass = vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (pass == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ memset(pass, 0, size);
+
+ vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
+
+ pass->attachment_count = pCreateInfo->attachmentCount;
+ pass->subpass_count = pCreateInfo->subpassCount;
+ pass->attachments = (struct radv_render_pass_attachment *)((uint8_t *)pass + attachments_offset);
+
+ for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+ struct radv_render_pass_attachment *att = &pass->attachments[i];
+
+ att->format = pCreateInfo->pAttachments[i].format;
+ att->samples = pCreateInfo->pAttachments[i].samples;
+ att->load_op = pCreateInfo->pAttachments[i].loadOp;
+ att->stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
+ att->initial_layout = pCreateInfo->pAttachments[i].initialLayout;
+ att->final_layout = pCreateInfo->pAttachments[i].finalLayout;
+ att->stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], false);
+ att->stencil_final_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i], true);
+ // att->store_op = pCreateInfo->pAttachments[i].storeOp;
+ // att->stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
+ }
+ uint32_t subpass_attachment_count = 0;
+ struct radv_subpass_attachment *p;
+ for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+ subpass_attachment_count += radv_num_subpass_attachments2(&pCreateInfo->pSubpasses[i]);
+ }
+
+ if (subpass_attachment_count) {
+ pass->subpass_attachments =
+ vk_alloc2(&device->vk.alloc, pAllocator,
+ subpass_attachment_count * sizeof(struct radv_subpass_attachment), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (pass->subpass_attachments == NULL) {
+ radv_destroy_render_pass(device, pAllocator, pass);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ } else
+ pass->subpass_attachments = NULL;
+
+ p = pass->subpass_attachments;
+ for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+ const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
+ struct radv_subpass *subpass = &pass->subpasses[i];
+
+ subpass->input_count = desc->inputAttachmentCount;
+ subpass->color_count = desc->colorAttachmentCount;
+ subpass->attachment_count = radv_num_subpass_attachments2(desc);
+ subpass->attachments = p;
+ subpass->view_mask = desc->viewMask;
+
+ if (desc->inputAttachmentCount > 0) {
+ subpass->input_attachments = p;
+ p += desc->inputAttachmentCount;
+
+ for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
+ subpass->input_attachments[j] = (struct radv_subpass_attachment){
+ .attachment = desc->pInputAttachments[j].attachment,
+ .layout = desc->pInputAttachments[j].layout,
+ .stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]),
+ };
+ }
+ }
+
+ if (desc->colorAttachmentCount > 0) {
+ subpass->color_attachments = p;
+ p += desc->colorAttachmentCount;
+
+ for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
+ subpass->color_attachments[j] = (struct radv_subpass_attachment){
+ .attachment = desc->pColorAttachments[j].attachment,
+ .layout = desc->pColorAttachments[j].layout,
+ };
+ }
+ }
+
+ if (desc->pResolveAttachments) {
+ subpass->resolve_attachments = p;
+ p += desc->colorAttachmentCount;
+
+ for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
+ subpass->resolve_attachments[j] = (struct radv_subpass_attachment){
+ .attachment = desc->pResolveAttachments[j].attachment,
+ .layout = desc->pResolveAttachments[j].layout,
+ };
+ }
+ }
+
+ if (desc->pDepthStencilAttachment) {
+ subpass->depth_stencil_attachment = p++;
+
+ *subpass->depth_stencil_attachment = (struct radv_subpass_attachment){
+ .attachment = desc->pDepthStencilAttachment->attachment,
+ .layout = desc->pDepthStencilAttachment->layout,
+ .stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment),
+ };
+ }
+
+ const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
+ vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
+
+ if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
+ subpass->ds_resolve_attachment = p++;
+
+ *subpass->ds_resolve_attachment = (struct radv_subpass_attachment){
+ .attachment = ds_resolve->pDepthStencilResolveAttachment->attachment,
+ .layout = ds_resolve->pDepthStencilResolveAttachment->layout,
+ .stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment),
+ };
+
+ subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
+ subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
+ }
+ }
+
+ for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
+ const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
+
+ radv_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
+
+ /* Determine if the subpass has explicit dependencies from/to
+ * VK_SUBPASS_EXTERNAL.
+ */
+ if (dep->srcSubpass == VK_SUBPASS_EXTERNAL && dep->dstSubpass != VK_SUBPASS_EXTERNAL) {
+ pass->subpasses[dep->dstSubpass].has_ingoing_dep = true;
+ }
+
+ if (dep->dstSubpass == VK_SUBPASS_EXTERNAL && dep->srcSubpass != VK_SUBPASS_EXTERNAL) {
+ pass->subpasses[dep->srcSubpass].has_outgoing_dep = true;
+ }
+ }
+
+ radv_render_pass_compile(pass);
+
+ radv_render_pass_add_implicit_deps(pass);
+
+ *pRenderPass = radv_render_pass_to_handle(pass);
+
+ return VK_SUCCESS;
}
-void radv_DestroyRenderPass(
- VkDevice _device,
- VkRenderPass _pass,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyRenderPass(VkDevice _device, VkRenderPass _pass,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_render_pass, pass, _pass);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_render_pass, pass, _pass);
- if (!_pass)
- return;
+ if (!_pass)
+ return;
- radv_destroy_render_pass(device, pAllocator, pass);
+ radv_destroy_render_pass(device, pAllocator, pass);
}
-void radv_GetRenderAreaGranularity(
- VkDevice device,
- VkRenderPass renderPass,
- VkExtent2D* pGranularity)
+void
+radv_GetRenderAreaGranularity(VkDevice device, VkRenderPass renderPass, VkExtent2D *pGranularity)
{
- pGranularity->width = 1;
- pGranularity->height = 1;
+ pGranularity->width = 1;
+ pGranularity->height = 1;
}
-
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index b47dc5ae4fe..71b2aaca605 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -25,559 +25,558 @@
* IN THE SOFTWARE.
*/
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_xfb_info.h"
+#include "spirv/nir_spirv.h"
#include "util/disk_cache.h"
#include "util/mesa-sha1.h"
#include "util/u_atomic.h"
+#include "radv_cs.h"
#include "radv_debug.h"
#include "radv_private.h"
-#include "radv_cs.h"
#include "radv_shader.h"
-#include "nir/nir.h"
-#include "nir/nir_builder.h"
-#include "nir/nir_xfb_info.h"
-#include "spirv/nir_spirv.h"
#include "vk_util.h"
-#include "sid.h"
+#include "util/debug.h"
#include "ac_binary.h"
+#include "ac_exp_param.h"
#include "ac_llvm_util.h"
#include "ac_nir_to_llvm.h"
-#include "vk_format.h"
-#include "util/debug.h"
-#include "ac_exp_param.h"
#include "ac_shader_util.h"
+#include "aco_interface.h"
+#include "sid.h"
+#include "vk_format.h"
struct radv_blend_state {
- uint32_t blend_enable_4bit;
- uint32_t need_src_alpha;
+ uint32_t blend_enable_4bit;
+ uint32_t need_src_alpha;
- uint32_t cb_color_control;
- uint32_t cb_target_mask;
- uint32_t cb_target_enabled_4bit;
- uint32_t sx_mrt_blend_opt[8];
- uint32_t cb_blend_control[8];
+ uint32_t cb_color_control;
+ uint32_t cb_target_mask;
+ uint32_t cb_target_enabled_4bit;
+ uint32_t sx_mrt_blend_opt[8];
+ uint32_t cb_blend_control[8];
- uint32_t spi_shader_col_format;
- uint32_t col_format_is_int8;
- uint32_t col_format_is_int10;
- uint32_t cb_shader_mask;
- uint32_t db_alpha_to_mask;
+ uint32_t spi_shader_col_format;
+ uint32_t col_format_is_int8;
+ uint32_t col_format_is_int10;
+ uint32_t cb_shader_mask;
+ uint32_t db_alpha_to_mask;
- uint32_t commutative_4bit;
+ uint32_t commutative_4bit;
- bool single_cb_enable;
- bool mrt0_is_dual_src;
+ bool single_cb_enable;
+ bool mrt0_is_dual_src;
};
struct radv_dsa_order_invariance {
- /* Whether the final result in Z/S buffers is guaranteed to be
- * invariant under changes to the order in which fragments arrive.
- */
- bool zs;
-
- /* Whether the set of fragments that pass the combined Z/S test is
- * guaranteed to be invariant under changes to the order in which
- * fragments arrive.
- */
- bool pass_set;
+ /* Whether the final result in Z/S buffers is guaranteed to be
+ * invariant under changes to the order in which fragments arrive.
+ */
+ bool zs;
+
+ /* Whether the set of fragments that pass the combined Z/S test is
+ * guaranteed to be invariant under changes to the order in which
+ * fragments arrive.
+ */
+ bool pass_set;
};
static const VkPipelineMultisampleStateCreateInfo *
radv_pipeline_get_multisample_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable)
- return pCreateInfo->pMultisampleState;
- return NULL;
+ if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable)
+ return pCreateInfo->pMultisampleState;
+ return NULL;
}
static const VkPipelineTessellationStateCreateInfo *
radv_pipeline_get_tessellation_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
- if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
- pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
- return pCreateInfo->pTessellationState;
- }
- }
- return NULL;
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ if (pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
+ pCreateInfo->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
+ return pCreateInfo->pTessellationState;
+ }
+ }
+ return NULL;
}
static const VkPipelineDepthStencilStateCreateInfo *
radv_pipeline_get_depth_stencil_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
- subpass->depth_stencil_attachment)
- return pCreateInfo->pDepthStencilState;
- return NULL;
+ if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
+ subpass->depth_stencil_attachment)
+ return pCreateInfo->pDepthStencilState;
+ return NULL;
}
static const VkPipelineColorBlendStateCreateInfo *
radv_pipeline_get_color_blend_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
- subpass->has_color_att)
- return pCreateInfo->pColorBlendState;
- return NULL;
+ if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && subpass->has_color_att)
+ return pCreateInfo->pColorBlendState;
+ return NULL;
}
-bool radv_pipeline_has_ngg(const struct radv_pipeline *pipeline)
+bool
+radv_pipeline_has_ngg(const struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *variant = NULL;
- if (pipeline->shaders[MESA_SHADER_GEOMETRY])
- variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
- else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
- variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
- else if (pipeline->shaders[MESA_SHADER_VERTEX])
- variant = pipeline->shaders[MESA_SHADER_VERTEX];
- else
- return false;
- return variant->info.is_ngg;
+ struct radv_shader_variant *variant = NULL;
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+ variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
+ else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+ variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+ else if (pipeline->shaders[MESA_SHADER_VERTEX])
+ variant = pipeline->shaders[MESA_SHADER_VERTEX];
+ else
+ return false;
+ return variant->info.is_ngg;
}
-bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline)
+bool
+radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline)
{
- assert(radv_pipeline_has_ngg(pipeline));
-
- struct radv_shader_variant *variant = NULL;
- if (pipeline->shaders[MESA_SHADER_GEOMETRY])
- variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
- else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
- variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
- else if (pipeline->shaders[MESA_SHADER_VERTEX])
- variant = pipeline->shaders[MESA_SHADER_VERTEX];
- else
- return false;
- return variant->info.is_ngg_passthrough;
+ assert(radv_pipeline_has_ngg(pipeline));
+
+ struct radv_shader_variant *variant = NULL;
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+ variant = pipeline->shaders[MESA_SHADER_GEOMETRY];
+ else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+ variant = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+ else if (pipeline->shaders[MESA_SHADER_VERTEX])
+ variant = pipeline->shaders[MESA_SHADER_VERTEX];
+ else
+ return false;
+ return variant->info.is_ngg_passthrough;
}
-bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline)
+bool
+radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline)
{
- if (!radv_pipeline_has_gs(pipeline))
- return false;
-
- /* The GS copy shader is required if the pipeline has GS on GFX6-GFX9.
- * On GFX10, it might be required in rare cases if it's not possible to
- * enable NGG.
- */
- if (radv_pipeline_has_ngg(pipeline))
- return false;
-
- assert(pipeline->gs_copy_shader);
- return true;
+ if (!radv_pipeline_has_gs(pipeline))
+ return false;
+
+ /* The GS copy shader is required if the pipeline has GS on GFX6-GFX9.
+ * On GFX10, it might be required in rare cases if it's not possible to
+ * enable NGG.
+ */
+ if (radv_pipeline_has_ngg(pipeline))
+ return false;
+
+ assert(pipeline->gs_copy_shader);
+ return true;
}
static void
-radv_pipeline_destroy(struct radv_device *device,
- struct radv_pipeline *pipeline,
- const VkAllocationCallbacks* allocator)
+radv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline,
+ const VkAllocationCallbacks *allocator)
{
- for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
- if (pipeline->shaders[i])
- radv_shader_variant_destroy(device, pipeline->shaders[i]);
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (pipeline->shaders[i])
+ radv_shader_variant_destroy(device, pipeline->shaders[i]);
- if (pipeline->gs_copy_shader)
- radv_shader_variant_destroy(device, pipeline->gs_copy_shader);
+ if (pipeline->gs_copy_shader)
+ radv_shader_variant_destroy(device, pipeline->gs_copy_shader);
- if(pipeline->cs.buf)
- free(pipeline->cs.buf);
+ if (pipeline->cs.buf)
+ free(pipeline->cs.buf);
- vk_object_base_finish(&pipeline->base);
- vk_free2(&device->vk.alloc, allocator, pipeline);
+ vk_object_base_finish(&pipeline->base);
+ vk_free2(&device->vk.alloc, allocator, pipeline);
}
-void radv_DestroyPipeline(
- VkDevice _device,
- VkPipeline _pipeline,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
- if (!_pipeline)
- return;
+ if (!_pipeline)
+ return;
- radv_pipeline_destroy(device, pipeline, pAllocator);
+ radv_pipeline_destroy(device, pipeline, pAllocator);
}
-static uint32_t get_hash_flags(const struct radv_device *device, bool stats)
+static uint32_t
+get_hash_flags(const struct radv_device *device, bool stats)
{
- uint32_t hash_flags = 0;
-
- if (device->instance->debug_flags & RADV_DEBUG_NO_NGG)
- hash_flags |= RADV_HASH_SHADER_NO_NGG;
- if (device->physical_device->cs_wave_size == 32)
- hash_flags |= RADV_HASH_SHADER_CS_WAVE32;
- if (device->physical_device->ps_wave_size == 32)
- hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
- if (device->physical_device->ge_wave_size == 32)
- hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
- if (device->physical_device->use_llvm)
- hash_flags |= RADV_HASH_SHADER_LLVM;
- if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
- hash_flags |= RADV_HASH_SHADER_DISCARD_TO_DEMOTE;
- if (device->instance->enable_mrt_output_nan_fixup)
- hash_flags |= RADV_HASH_SHADER_MRT_NAN_FIXUP;
- if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM)
- hash_flags |= RADV_HASH_SHADER_INVARIANT_GEOM;
- if (stats)
- hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS;
- if (device->force_vrs != RADV_FORCE_VRS_2x2)
- hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x2;
- if (device->force_vrs != RADV_FORCE_VRS_2x1)
- hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x1;
- if (device->force_vrs != RADV_FORCE_VRS_1x2)
- hash_flags |= RADV_HASH_SHADER_FORCE_VRS_1x2;
- return hash_flags;
+ uint32_t hash_flags = 0;
+
+ if (device->instance->debug_flags & RADV_DEBUG_NO_NGG)
+ hash_flags |= RADV_HASH_SHADER_NO_NGG;
+ if (device->physical_device->cs_wave_size == 32)
+ hash_flags |= RADV_HASH_SHADER_CS_WAVE32;
+ if (device->physical_device->ps_wave_size == 32)
+ hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
+ if (device->physical_device->ge_wave_size == 32)
+ hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+ if (device->physical_device->use_llvm)
+ hash_flags |= RADV_HASH_SHADER_LLVM;
+ if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE)
+ hash_flags |= RADV_HASH_SHADER_DISCARD_TO_DEMOTE;
+ if (device->instance->enable_mrt_output_nan_fixup)
+ hash_flags |= RADV_HASH_SHADER_MRT_NAN_FIXUP;
+ if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM)
+ hash_flags |= RADV_HASH_SHADER_INVARIANT_GEOM;
+ if (stats)
+ hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS;
+ if (device->force_vrs != RADV_FORCE_VRS_2x2)
+ hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x2;
+ if (device->force_vrs != RADV_FORCE_VRS_2x1)
+ hash_flags |= RADV_HASH_SHADER_FORCE_VRS_2x1;
+ if (device->force_vrs != RADV_FORCE_VRS_1x2)
+ hash_flags |= RADV_HASH_SHADER_FORCE_VRS_1x2;
+ return hash_flags;
}
static void
-radv_pipeline_init_scratch(const struct radv_device *device,
- struct radv_pipeline *pipeline)
+radv_pipeline_init_scratch(const struct radv_device *device, struct radv_pipeline *pipeline)
{
- unsigned scratch_bytes_per_wave = 0;
- unsigned max_waves = 0;
- unsigned min_waves = 1;
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (pipeline->shaders[i] &&
- pipeline->shaders[i]->config.scratch_bytes_per_wave) {
- unsigned max_stage_waves = device->scratch_waves;
-
- scratch_bytes_per_wave = MAX2(scratch_bytes_per_wave,
- pipeline->shaders[i]->config.scratch_bytes_per_wave);
-
- max_stage_waves = MIN2(max_stage_waves,
- 4 * device->physical_device->rad_info.num_good_compute_units *
- (256 / pipeline->shaders[i]->config.num_vgprs));
- max_waves = MAX2(max_waves, max_stage_waves);
- }
- }
-
- if (pipeline->shaders[MESA_SHADER_COMPUTE]) {
- unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
- pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
- pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2];
- min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
- }
-
- pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
- pipeline->max_waves = max_waves;
+ unsigned scratch_bytes_per_wave = 0;
+ unsigned max_waves = 0;
+ unsigned min_waves = 1;
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (pipeline->shaders[i] && pipeline->shaders[i]->config.scratch_bytes_per_wave) {
+ unsigned max_stage_waves = device->scratch_waves;
+
+ scratch_bytes_per_wave =
+ MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave);
+
+ max_stage_waves =
+ MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_good_compute_units *
+ (256 / pipeline->shaders[i]->config.num_vgprs));
+ max_waves = MAX2(max_waves, max_stage_waves);
+ }
+ }
+
+ if (pipeline->shaders[MESA_SHADER_COMPUTE]) {
+ unsigned group_size = pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[0] *
+ pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[1] *
+ pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.block_size[2];
+ min_waves = MAX2(min_waves, round_up_u32(group_size, 64));
+ }
+
+ pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave;
+ pipeline->max_waves = max_waves;
}
-static uint32_t si_translate_blend_logic_op(VkLogicOp op)
+static uint32_t
+si_translate_blend_logic_op(VkLogicOp op)
{
- switch (op) {
- case VK_LOGIC_OP_CLEAR:
- return V_028808_ROP3_CLEAR;
- case VK_LOGIC_OP_AND:
- return V_028808_ROP3_AND;
- case VK_LOGIC_OP_AND_REVERSE:
- return V_028808_ROP3_AND_REVERSE;
- case VK_LOGIC_OP_COPY:
- return V_028808_ROP3_COPY;
- case VK_LOGIC_OP_AND_INVERTED:
- return V_028808_ROP3_AND_INVERTED;
- case VK_LOGIC_OP_NO_OP:
- return V_028808_ROP3_NO_OP;
- case VK_LOGIC_OP_XOR:
- return V_028808_ROP3_XOR;
- case VK_LOGIC_OP_OR:
- return V_028808_ROP3_OR;
- case VK_LOGIC_OP_NOR:
- return V_028808_ROP3_NOR;
- case VK_LOGIC_OP_EQUIVALENT:
- return V_028808_ROP3_EQUIVALENT;
- case VK_LOGIC_OP_INVERT:
- return V_028808_ROP3_INVERT;
- case VK_LOGIC_OP_OR_REVERSE:
- return V_028808_ROP3_OR_REVERSE;
- case VK_LOGIC_OP_COPY_INVERTED:
- return V_028808_ROP3_COPY_INVERTED;
- case VK_LOGIC_OP_OR_INVERTED:
- return V_028808_ROP3_OR_INVERTED;
- case VK_LOGIC_OP_NAND:
- return V_028808_ROP3_NAND;
- case VK_LOGIC_OP_SET:
- return V_028808_ROP3_SET;
- default:
- unreachable("Unhandled logic op");
- }
+ switch (op) {
+ case VK_LOGIC_OP_CLEAR:
+ return V_028808_ROP3_CLEAR;
+ case VK_LOGIC_OP_AND:
+ return V_028808_ROP3_AND;
+ case VK_LOGIC_OP_AND_REVERSE:
+ return V_028808_ROP3_AND_REVERSE;
+ case VK_LOGIC_OP_COPY:
+ return V_028808_ROP3_COPY;
+ case VK_LOGIC_OP_AND_INVERTED:
+ return V_028808_ROP3_AND_INVERTED;
+ case VK_LOGIC_OP_NO_OP:
+ return V_028808_ROP3_NO_OP;
+ case VK_LOGIC_OP_XOR:
+ return V_028808_ROP3_XOR;
+ case VK_LOGIC_OP_OR:
+ return V_028808_ROP3_OR;
+ case VK_LOGIC_OP_NOR:
+ return V_028808_ROP3_NOR;
+ case VK_LOGIC_OP_EQUIVALENT:
+ return V_028808_ROP3_EQUIVALENT;
+ case VK_LOGIC_OP_INVERT:
+ return V_028808_ROP3_INVERT;
+ case VK_LOGIC_OP_OR_REVERSE:
+ return V_028808_ROP3_OR_REVERSE;
+ case VK_LOGIC_OP_COPY_INVERTED:
+ return V_028808_ROP3_COPY_INVERTED;
+ case VK_LOGIC_OP_OR_INVERTED:
+ return V_028808_ROP3_OR_INVERTED;
+ case VK_LOGIC_OP_NAND:
+ return V_028808_ROP3_NAND;
+ case VK_LOGIC_OP_SET:
+ return V_028808_ROP3_SET;
+ default:
+ unreachable("Unhandled logic op");
+ }
}
-
-static uint32_t si_translate_blend_function(VkBlendOp op)
+static uint32_t
+si_translate_blend_function(VkBlendOp op)
{
- switch (op) {
- case VK_BLEND_OP_ADD:
- return V_028780_COMB_DST_PLUS_SRC;
- case VK_BLEND_OP_SUBTRACT:
- return V_028780_COMB_SRC_MINUS_DST;
- case VK_BLEND_OP_REVERSE_SUBTRACT:
- return V_028780_COMB_DST_MINUS_SRC;
- case VK_BLEND_OP_MIN:
- return V_028780_COMB_MIN_DST_SRC;
- case VK_BLEND_OP_MAX:
- return V_028780_COMB_MAX_DST_SRC;
- default:
- return 0;
- }
+ switch (op) {
+ case VK_BLEND_OP_ADD:
+ return V_028780_COMB_DST_PLUS_SRC;
+ case VK_BLEND_OP_SUBTRACT:
+ return V_028780_COMB_SRC_MINUS_DST;
+ case VK_BLEND_OP_REVERSE_SUBTRACT:
+ return V_028780_COMB_DST_MINUS_SRC;
+ case VK_BLEND_OP_MIN:
+ return V_028780_COMB_MIN_DST_SRC;
+ case VK_BLEND_OP_MAX:
+ return V_028780_COMB_MAX_DST_SRC;
+ default:
+ return 0;
+ }
}
-static uint32_t si_translate_blend_factor(VkBlendFactor factor)
+static uint32_t
+si_translate_blend_factor(VkBlendFactor factor)
{
- switch (factor) {
- case VK_BLEND_FACTOR_ZERO:
- return V_028780_BLEND_ZERO;
- case VK_BLEND_FACTOR_ONE:
- return V_028780_BLEND_ONE;
- case VK_BLEND_FACTOR_SRC_COLOR:
- return V_028780_BLEND_SRC_COLOR;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
- return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
- case VK_BLEND_FACTOR_DST_COLOR:
- return V_028780_BLEND_DST_COLOR;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
- return V_028780_BLEND_ONE_MINUS_DST_COLOR;
- case VK_BLEND_FACTOR_SRC_ALPHA:
- return V_028780_BLEND_SRC_ALPHA;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
- return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
- case VK_BLEND_FACTOR_DST_ALPHA:
- return V_028780_BLEND_DST_ALPHA;
- case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
- return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
- case VK_BLEND_FACTOR_CONSTANT_COLOR:
- return V_028780_BLEND_CONSTANT_COLOR;
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
- return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
- case VK_BLEND_FACTOR_CONSTANT_ALPHA:
- return V_028780_BLEND_CONSTANT_ALPHA;
- case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
- return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
- case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
- return V_028780_BLEND_SRC_ALPHA_SATURATE;
- case VK_BLEND_FACTOR_SRC1_COLOR:
- return V_028780_BLEND_SRC1_COLOR;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
- return V_028780_BLEND_INV_SRC1_COLOR;
- case VK_BLEND_FACTOR_SRC1_ALPHA:
- return V_028780_BLEND_SRC1_ALPHA;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
- return V_028780_BLEND_INV_SRC1_ALPHA;
- default:
- return 0;
- }
+ switch (factor) {
+ case VK_BLEND_FACTOR_ZERO:
+ return V_028780_BLEND_ZERO;
+ case VK_BLEND_FACTOR_ONE:
+ return V_028780_BLEND_ONE;
+ case VK_BLEND_FACTOR_SRC_COLOR:
+ return V_028780_BLEND_SRC_COLOR;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+ return V_028780_BLEND_ONE_MINUS_SRC_COLOR;
+ case VK_BLEND_FACTOR_DST_COLOR:
+ return V_028780_BLEND_DST_COLOR;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
+ return V_028780_BLEND_ONE_MINUS_DST_COLOR;
+ case VK_BLEND_FACTOR_SRC_ALPHA:
+ return V_028780_BLEND_SRC_ALPHA;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+ return V_028780_BLEND_ONE_MINUS_SRC_ALPHA;
+ case VK_BLEND_FACTOR_DST_ALPHA:
+ return V_028780_BLEND_DST_ALPHA;
+ case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
+ return V_028780_BLEND_ONE_MINUS_DST_ALPHA;
+ case VK_BLEND_FACTOR_CONSTANT_COLOR:
+ return V_028780_BLEND_CONSTANT_COLOR;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
+ return V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR;
+ case VK_BLEND_FACTOR_CONSTANT_ALPHA:
+ return V_028780_BLEND_CONSTANT_ALPHA;
+ case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
+ return V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA;
+ case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+ return V_028780_BLEND_SRC_ALPHA_SATURATE;
+ case VK_BLEND_FACTOR_SRC1_COLOR:
+ return V_028780_BLEND_SRC1_COLOR;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
+ return V_028780_BLEND_INV_SRC1_COLOR;
+ case VK_BLEND_FACTOR_SRC1_ALPHA:
+ return V_028780_BLEND_SRC1_ALPHA;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
+ return V_028780_BLEND_INV_SRC1_ALPHA;
+ default:
+ return 0;
+ }
}
-static uint32_t si_translate_blend_opt_function(VkBlendOp op)
+static uint32_t
+si_translate_blend_opt_function(VkBlendOp op)
{
- switch (op) {
- case VK_BLEND_OP_ADD:
- return V_028760_OPT_COMB_ADD;
- case VK_BLEND_OP_SUBTRACT:
- return V_028760_OPT_COMB_SUBTRACT;
- case VK_BLEND_OP_REVERSE_SUBTRACT:
- return V_028760_OPT_COMB_REVSUBTRACT;
- case VK_BLEND_OP_MIN:
- return V_028760_OPT_COMB_MIN;
- case VK_BLEND_OP_MAX:
- return V_028760_OPT_COMB_MAX;
- default:
- return V_028760_OPT_COMB_BLEND_DISABLED;
- }
+ switch (op) {
+ case VK_BLEND_OP_ADD:
+ return V_028760_OPT_COMB_ADD;
+ case VK_BLEND_OP_SUBTRACT:
+ return V_028760_OPT_COMB_SUBTRACT;
+ case VK_BLEND_OP_REVERSE_SUBTRACT:
+ return V_028760_OPT_COMB_REVSUBTRACT;
+ case VK_BLEND_OP_MIN:
+ return V_028760_OPT_COMB_MIN;
+ case VK_BLEND_OP_MAX:
+ return V_028760_OPT_COMB_MAX;
+ default:
+ return V_028760_OPT_COMB_BLEND_DISABLED;
+ }
}
-static uint32_t si_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha)
+static uint32_t
+si_translate_blend_opt_factor(VkBlendFactor factor, bool is_alpha)
{
- switch (factor) {
- case VK_BLEND_FACTOR_ZERO:
- return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
- case VK_BLEND_FACTOR_ONE:
- return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
- case VK_BLEND_FACTOR_SRC_COLOR:
- return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
- : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
- return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
- : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
- case VK_BLEND_FACTOR_SRC_ALPHA:
- return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
- case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
- return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
- case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
- return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
- : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
- default:
- return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
- }
+ switch (factor) {
+ case VK_BLEND_FACTOR_ZERO:
+ return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL;
+ case VK_BLEND_FACTOR_ONE:
+ return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE;
+ case VK_BLEND_FACTOR_SRC_COLOR:
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0
+ : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1
+ : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1;
+ case VK_BLEND_FACTOR_SRC_ALPHA:
+ return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0;
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
+ return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1;
+ case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
+ return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE
+ : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+ default:
+ return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+ }
}
/**
* Get rid of DST in the blend factors by commuting the operands:
* func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
*/
-static void si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor,
- VkBlendFactor *dst_factor, VkBlendFactor expected_dst,
- VkBlendFactor replacement_src)
+static void
+si_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor *dst_factor,
+ VkBlendFactor expected_dst, VkBlendFactor replacement_src)
{
- if (*src_factor == expected_dst &&
- *dst_factor == VK_BLEND_FACTOR_ZERO) {
- *src_factor = VK_BLEND_FACTOR_ZERO;
- *dst_factor = replacement_src;
-
- /* Commuting the operands requires reversing subtractions. */
- if (*func == VK_BLEND_OP_SUBTRACT)
- *func = VK_BLEND_OP_REVERSE_SUBTRACT;
- else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
- *func = VK_BLEND_OP_SUBTRACT;
- }
+ if (*src_factor == expected_dst && *dst_factor == VK_BLEND_FACTOR_ZERO) {
+ *src_factor = VK_BLEND_FACTOR_ZERO;
+ *dst_factor = replacement_src;
+
+ /* Commuting the operands requires reversing subtractions. */
+ if (*func == VK_BLEND_OP_SUBTRACT)
+ *func = VK_BLEND_OP_REVERSE_SUBTRACT;
+ else if (*func == VK_BLEND_OP_REVERSE_SUBTRACT)
+ *func = VK_BLEND_OP_SUBTRACT;
+ }
}
-static bool si_blend_factor_uses_dst(VkBlendFactor factor)
+static bool
+si_blend_factor_uses_dst(VkBlendFactor factor)
{
- return factor == VK_BLEND_FACTOR_DST_COLOR ||
- factor == VK_BLEND_FACTOR_DST_ALPHA ||
- factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
- factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
- factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
+ return factor == VK_BLEND_FACTOR_DST_COLOR || factor == VK_BLEND_FACTOR_DST_ALPHA ||
+ factor == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+ factor == VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ||
+ factor == VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR;
}
-static bool is_dual_src(VkBlendFactor factor)
+static bool
+is_dual_src(VkBlendFactor factor)
{
- switch (factor) {
- case VK_BLEND_FACTOR_SRC1_COLOR:
- case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
- case VK_BLEND_FACTOR_SRC1_ALPHA:
- case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
- return true;
- default:
- return false;
- }
+ switch (factor) {
+ case VK_BLEND_FACTOR_SRC1_COLOR:
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
+ case VK_BLEND_FACTOR_SRC1_ALPHA:
+ case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
+ return true;
+ default:
+ return false;
+ }
}
-static unsigned radv_choose_spi_color_format(const struct radv_device *device,
- VkFormat vk_format,
- bool blend_enable,
- bool blend_need_alpha)
+static unsigned
+radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format,
+ bool blend_enable, bool blend_need_alpha)
{
- const struct util_format_description *desc = vk_format_description(vk_format);
- bool use_rbplus = device->physical_device->rad_info.rbplus_allowed;
- struct ac_spi_color_formats formats = {0};
- unsigned format, ntype, swap;
-
- format = radv_translate_colorformat(vk_format);
- ntype = radv_translate_color_numformat(vk_format, desc,
- vk_format_get_first_non_void_channel(vk_format));
- swap = radv_translate_colorswap(vk_format, false);
-
- ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus,
- &formats);
-
- if (blend_enable && blend_need_alpha)
- return formats.blend_alpha;
- else if(blend_need_alpha)
- return formats.alpha;
- else if(blend_enable)
- return formats.blend;
- else
- return formats.normal;
+ const struct util_format_description *desc = vk_format_description(vk_format);
+ bool use_rbplus = device->physical_device->rad_info.rbplus_allowed;
+ struct ac_spi_color_formats formats = {0};
+ unsigned format, ntype, swap;
+
+ format = radv_translate_colorformat(vk_format);
+ ntype = radv_translate_color_numformat(vk_format, desc,
+ vk_format_get_first_non_void_channel(vk_format));
+ swap = radv_translate_colorswap(vk_format, false);
+
+ ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats);
+
+ if (blend_enable && blend_need_alpha)
+ return formats.blend_alpha;
+ else if (blend_need_alpha)
+ return formats.alpha;
+ else if (blend_enable)
+ return formats.blend;
+ else
+ return formats.normal;
}
static bool
format_is_int8(VkFormat format)
{
- const struct util_format_description *desc = vk_format_description(format);
- int channel = vk_format_get_first_non_void_channel(format);
+ const struct util_format_description *desc = vk_format_description(format);
+ int channel = vk_format_get_first_non_void_channel(format);
- return channel >= 0 && desc->channel[channel].pure_integer &&
- desc->channel[channel].size == 8;
+ return channel >= 0 && desc->channel[channel].pure_integer && desc->channel[channel].size == 8;
}
static bool
format_is_int10(VkFormat format)
{
- const struct util_format_description *desc = vk_format_description(format);
-
- if (desc->nr_channels != 4)
- return false;
- for (unsigned i = 0; i < 4; i++) {
- if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
- return true;
- }
- return false;
+ const struct util_format_description *desc = vk_format_description(format);
+
+ if (desc->nr_channels != 4)
+ return false;
+ for (unsigned i = 0; i < 4; i++) {
+ if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
+ return true;
+ }
+ return false;
}
static void
radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- struct radv_blend_state *blend)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ struct radv_blend_state *blend)
{
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- unsigned col_format = 0, is_int8 = 0, is_int10 = 0;
- unsigned num_targets;
-
- for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) {
- unsigned cf;
-
- if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED ||
- !(blend->cb_target_mask & (0xfu << (i * 4)))) {
- cf = V_028714_SPI_SHADER_ZERO;
- } else {
- struct radv_render_pass_attachment *attachment = pass->attachments + subpass->color_attachments[i].attachment;
- bool blend_enable =
- blend->blend_enable_4bit & (0xfu << (i * 4));
-
- cf = radv_choose_spi_color_format(pipeline->device,
- attachment->format,
- blend_enable,
- blend->need_src_alpha & (1 << i));
-
- if (format_is_int8(attachment->format))
- is_int8 |= 1 << i;
- if (format_is_int10(attachment->format))
- is_int10 |= 1 << i;
- }
-
- col_format |= cf << (4 * i);
- }
-
- if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) {
- /* When a subpass doesn't have any color attachments, write the
- * alpha channel of MRT0 when alpha coverage is enabled because
- * the depth attachment needs it.
- */
- col_format |= V_028714_SPI_SHADER_32_AR;
- }
-
- /* If the i-th target format is set, all previous target formats must
- * be non-zero to avoid hangs.
- */
- num_targets = (util_last_bit(col_format) + 3) / 4;
- for (unsigned i = 0; i < num_targets; i++) {
- if (!(col_format & (0xfu << (i * 4)))) {
- col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
- }
- }
-
- /* The output for dual source blending should have the same format as
- * the first output.
- */
- if (blend->mrt0_is_dual_src) {
- assert(!(col_format >> 4));
- col_format |= (col_format & 0xf) << 4;
- }
-
- blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
- blend->spi_shader_col_format = col_format;
- blend->col_format_is_int8 = is_int8;
- blend->col_format_is_int10 = is_int10;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ unsigned col_format = 0, is_int8 = 0, is_int10 = 0;
+ unsigned num_targets;
+
+ for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) {
+ unsigned cf;
+
+ if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED ||
+ !(blend->cb_target_mask & (0xfu << (i * 4)))) {
+ cf = V_028714_SPI_SHADER_ZERO;
+ } else {
+ struct radv_render_pass_attachment *attachment =
+ pass->attachments + subpass->color_attachments[i].attachment;
+ bool blend_enable = blend->blend_enable_4bit & (0xfu << (i * 4));
+
+ cf = radv_choose_spi_color_format(pipeline->device, attachment->format, blend_enable,
+ blend->need_src_alpha & (1 << i));
+
+ if (format_is_int8(attachment->format))
+ is_int8 |= 1 << i;
+ if (format_is_int10(attachment->format))
+ is_int10 |= 1 << i;
+ }
+
+ col_format |= cf << (4 * i);
+ }
+
+ if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) {
+ /* When a subpass doesn't have any color attachments, write the
+ * alpha channel of MRT0 when alpha coverage is enabled because
+ * the depth attachment needs it.
+ */
+ col_format |= V_028714_SPI_SHADER_32_AR;
+ }
+
+ /* If the i-th target format is set, all previous target formats must
+ * be non-zero to avoid hangs.
+ */
+ num_targets = (util_last_bit(col_format) + 3) / 4;
+ for (unsigned i = 0; i < num_targets; i++) {
+ if (!(col_format & (0xfu << (i * 4)))) {
+ col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
+ }
+ }
+
+ /* The output for dual source blending should have the same format as
+ * the first output.
+ */
+ if (blend->mrt0_is_dual_src) {
+ assert(!(col_format >> 4));
+ col_format |= (col_format & 0xf) << 4;
+ }
+
+ blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
+ blend->spi_shader_col_format = col_format;
+ blend->col_format_is_int8 = is_int8;
+ blend->col_format_is_int10 = is_int10;
}
/*
@@ -585,1965 +584,1874 @@ radv_pipeline_compute_spi_color_formats(const struct radv_pipeline *pipeline,
* radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i.
*/
const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = {
- VK_FORMAT_R32_SFLOAT,
- VK_FORMAT_R32G32_SFLOAT,
- VK_FORMAT_R8G8B8A8_UNORM,
- VK_FORMAT_R16G16B16A16_UNORM,
- VK_FORMAT_R16G16B16A16_SNORM,
- VK_FORMAT_R16G16B16A16_UINT,
- VK_FORMAT_R16G16B16A16_SINT,
- VK_FORMAT_R32G32B32A32_SFLOAT,
- VK_FORMAT_R8G8B8A8_UINT,
- VK_FORMAT_R8G8B8A8_SINT,
- VK_FORMAT_A2R10G10B10_UINT_PACK32,
- VK_FORMAT_A2R10G10B10_SINT_PACK32,
+ VK_FORMAT_R32_SFLOAT,
+ VK_FORMAT_R32G32_SFLOAT,
+ VK_FORMAT_R8G8B8A8_UNORM,
+ VK_FORMAT_R16G16B16A16_UNORM,
+ VK_FORMAT_R16G16B16A16_SNORM,
+ VK_FORMAT_R16G16B16A16_UINT,
+ VK_FORMAT_R16G16B16A16_SINT,
+ VK_FORMAT_R32G32B32A32_SFLOAT,
+ VK_FORMAT_R8G8B8A8_UINT,
+ VK_FORMAT_R8G8B8A8_SINT,
+ VK_FORMAT_A2R10G10B10_UINT_PACK32,
+ VK_FORMAT_A2R10G10B10_SINT_PACK32,
};
-unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
+unsigned
+radv_format_meta_fs_key(struct radv_device *device, VkFormat format)
{
- unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
- assert(col_format != V_028714_SPI_SHADER_32_AR);
-
- bool is_int8 = format_is_int8(format);
- bool is_int10 = format_is_int10(format);
-
- if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
- return 8;
- else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
- return 9;
- else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
- return 10;
- else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
- return 11;
- else {
- if (col_format >= V_028714_SPI_SHADER_32_AR)
- --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */
-
- --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
- return col_format;
- }
+ unsigned col_format = radv_choose_spi_color_format(device, format, false, false);
+ assert(col_format != V_028714_SPI_SHADER_32_AR);
+
+ bool is_int8 = format_is_int8(format);
+ bool is_int10 = format_is_int10(format);
+
+ if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8)
+ return 8;
+ else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8)
+ return 9;
+ else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10)
+ return 10;
+ else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10)
+ return 11;
+ else {
+ if (col_format >= V_028714_SPI_SHADER_32_AR)
+ --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */
+
+ --col_format; /* Skip V_028714_SPI_SHADER_ZERO */
+ return col_format;
+ }
}
static void
-radv_blend_check_commutativity(struct radv_blend_state *blend,
- VkBlendOp op, VkBlendFactor src,
- VkBlendFactor dst, unsigned chanmask)
+radv_blend_check_commutativity(struct radv_blend_state *blend, VkBlendOp op, VkBlendFactor src,
+ VkBlendFactor dst, unsigned chanmask)
{
- /* Src factor is allowed when it does not depend on Dst. */
- static const uint32_t src_allowed =
- (1u << VK_BLEND_FACTOR_ONE) |
- (1u << VK_BLEND_FACTOR_SRC_COLOR) |
- (1u << VK_BLEND_FACTOR_SRC_ALPHA) |
- (1u << VK_BLEND_FACTOR_SRC_ALPHA_SATURATE) |
- (1u << VK_BLEND_FACTOR_CONSTANT_COLOR) |
- (1u << VK_BLEND_FACTOR_CONSTANT_ALPHA) |
- (1u << VK_BLEND_FACTOR_SRC1_COLOR) |
- (1u << VK_BLEND_FACTOR_SRC1_ALPHA) |
- (1u << VK_BLEND_FACTOR_ZERO) |
- (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR) |
- (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) |
- (1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR) |
- (1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA) |
- (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR) |
- (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA);
-
- if (dst == VK_BLEND_FACTOR_ONE &&
- (src_allowed & (1u << src))) {
- /* Addition is commutative, but floating point addition isn't
- * associative: subtle changes can be introduced via different
- * rounding. Be conservative, only enable for min and max.
- */
- if (op == VK_BLEND_OP_MAX || op == VK_BLEND_OP_MIN)
- blend->commutative_4bit |= chanmask;
- }
+ /* Src factor is allowed when it does not depend on Dst. */
+ static const uint32_t src_allowed =
+ (1u << VK_BLEND_FACTOR_ONE) | (1u << VK_BLEND_FACTOR_SRC_COLOR) |
+ (1u << VK_BLEND_FACTOR_SRC_ALPHA) | (1u << VK_BLEND_FACTOR_SRC_ALPHA_SATURATE) |
+ (1u << VK_BLEND_FACTOR_CONSTANT_COLOR) | (1u << VK_BLEND_FACTOR_CONSTANT_ALPHA) |
+ (1u << VK_BLEND_FACTOR_SRC1_COLOR) | (1u << VK_BLEND_FACTOR_SRC1_ALPHA) |
+ (1u << VK_BLEND_FACTOR_ZERO) | (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR) |
+ (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA) |
+ (1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR) |
+ (1u << VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA) |
+ (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR) | (1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA);
+
+ if (dst == VK_BLEND_FACTOR_ONE && (src_allowed & (1u << src))) {
+ /* Addition is commutative, but floating point addition isn't
+ * associative: subtle changes can be introduced via different
+ * rounding. Be conservative, only enable for min and max.
+ */
+ if (op == VK_BLEND_OP_MAX || op == VK_BLEND_OP_MIN)
+ blend->commutative_4bit |= chanmask;
+ }
}
static struct radv_blend_state
radv_pipeline_init_blend_state(const struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_graphics_pipeline_create_info *extra)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra)
{
- const VkPipelineColorBlendStateCreateInfo *vkblend = radv_pipeline_get_color_blend_state(pCreateInfo);
- const VkPipelineMultisampleStateCreateInfo *vkms = radv_pipeline_get_multisample_state(pCreateInfo);
- struct radv_blend_state blend = {0};
- unsigned mode = V_028808_CB_NORMAL;
- int i;
-
- if (extra && extra->custom_blend_mode) {
- blend.single_cb_enable = true;
- mode = extra->custom_blend_mode;
- }
-
- blend.cb_color_control = 0;
- if (vkblend) {
- if (vkblend->logicOpEnable)
- blend.cb_color_control |= S_028808_ROP3(si_translate_blend_logic_op(vkblend->logicOp));
- else
- blend.cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY);
- }
-
- blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) |
- S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
- S_028B70_ALPHA_TO_MASK_OFFSET2(0) |
- S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
- S_028B70_OFFSET_ROUND(1);
-
- if (vkms && vkms->alphaToCoverageEnable) {
- blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
- blend.need_src_alpha |= 0x1;
- }
-
- blend.cb_target_mask = 0;
- if (vkblend) {
- for (i = 0; i < vkblend->attachmentCount; i++) {
- const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i];
- unsigned blend_cntl = 0;
- unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
- VkBlendOp eqRGB = att->colorBlendOp;
- VkBlendFactor srcRGB = att->srcColorBlendFactor;
- VkBlendFactor dstRGB = att->dstColorBlendFactor;
- VkBlendOp eqA = att->alphaBlendOp;
- VkBlendFactor srcA = att->srcAlphaBlendFactor;
- VkBlendFactor dstA = att->dstAlphaBlendFactor;
-
- blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
-
- if (!att->colorWriteMask)
- continue;
-
- /* Ignore other blend targets if dual-source blending
- * is enabled to prevent wrong behaviour.
- */
- if (blend.mrt0_is_dual_src)
- continue;
-
- blend.cb_target_mask |= (unsigned)att->colorWriteMask << (4 * i);
- blend.cb_target_enabled_4bit |= 0xfu << (4 * i);
- if (!att->blendEnable) {
- blend.cb_blend_control[i] = blend_cntl;
- continue;
- }
-
- if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
- if (i == 0)
- blend.mrt0_is_dual_src = true;
-
- if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
- srcRGB = VK_BLEND_FACTOR_ONE;
- dstRGB = VK_BLEND_FACTOR_ONE;
- }
- if (eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX) {
- srcA = VK_BLEND_FACTOR_ONE;
- dstA = VK_BLEND_FACTOR_ONE;
- }
-
- radv_blend_check_commutativity(&blend, eqRGB, srcRGB, dstRGB,
- 0x7u << (4 * i));
- radv_blend_check_commutativity(&blend, eqA, srcA, dstA,
- 0x8u << (4 * i));
-
- /* Blending optimizations for RB+.
- * These transformations don't change the behavior.
- *
- * First, get rid of DST in the blend factors:
- * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
- */
- si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB,
- VK_BLEND_FACTOR_DST_COLOR,
- VK_BLEND_FACTOR_SRC_COLOR);
-
- si_blend_remove_dst(&eqA, &srcA, &dstA,
- VK_BLEND_FACTOR_DST_COLOR,
- VK_BLEND_FACTOR_SRC_COLOR);
-
- si_blend_remove_dst(&eqA, &srcA, &dstA,
- VK_BLEND_FACTOR_DST_ALPHA,
- VK_BLEND_FACTOR_SRC_ALPHA);
-
- /* Look up the ideal settings from tables. */
- srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
- dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
- srcA_opt = si_translate_blend_opt_factor(srcA, true);
- dstA_opt = si_translate_blend_opt_factor(dstA, true);
-
- /* Handle interdependencies. */
- if (si_blend_factor_uses_dst(srcRGB))
- dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
- if (si_blend_factor_uses_dst(srcA))
- dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
-
- if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE &&
- (dstRGB == VK_BLEND_FACTOR_ZERO ||
- dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
- dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE))
- dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
-
- /* Set the final value. */
- blend.sx_mrt_blend_opt[i] =
- S_028760_COLOR_SRC_OPT(srcRGB_opt) |
- S_028760_COLOR_DST_OPT(dstRGB_opt) |
- S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
- S_028760_ALPHA_SRC_OPT(srcA_opt) |
- S_028760_ALPHA_DST_OPT(dstA_opt) |
- S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
- blend_cntl |= S_028780_ENABLE(1);
-
- blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
- blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
- blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
- if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
- blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
- blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
- blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
- blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
- }
- blend.cb_blend_control[i] = blend_cntl;
-
- blend.blend_enable_4bit |= 0xfu << (i * 4);
-
- if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
- dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
- srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
- dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
- srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ||
- dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
- blend.need_src_alpha |= 1 << i;
- }
- for (i = vkblend->attachmentCount; i < 8; i++) {
- blend.cb_blend_control[i] = 0;
- blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
- }
- }
-
- if (pipeline->device->physical_device->rad_info.has_rbplus) {
- /* Disable RB+ blend optimizations for dual source blending. */
- if (blend.mrt0_is_dual_src) {
- for (i = 0; i < 8; i++) {
- blend.sx_mrt_blend_opt[i] =
- S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
- S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
- }
- }
-
- /* RB+ doesn't work with dual source blending, logic op and
- * RESOLVE.
- */
- if (blend.mrt0_is_dual_src ||
- (vkblend && vkblend->logicOpEnable) ||
- mode == V_028808_CB_RESOLVE)
- blend.cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);
- }
-
- if (blend.cb_target_mask)
- blend.cb_color_control |= S_028808_MODE(mode);
- else
- blend.cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
-
- radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo, &blend);
- return blend;
+ const VkPipelineColorBlendStateCreateInfo *vkblend =
+ radv_pipeline_get_color_blend_state(pCreateInfo);
+ const VkPipelineMultisampleStateCreateInfo *vkms =
+ radv_pipeline_get_multisample_state(pCreateInfo);
+ struct radv_blend_state blend = {0};
+ unsigned mode = V_028808_CB_NORMAL;
+ int i;
+
+ if (extra && extra->custom_blend_mode) {
+ blend.single_cb_enable = true;
+ mode = extra->custom_blend_mode;
+ }
+
+ blend.cb_color_control = 0;
+ if (vkblend) {
+ if (vkblend->logicOpEnable)
+ blend.cb_color_control |= S_028808_ROP3(si_translate_blend_logic_op(vkblend->logicOp));
+ else
+ blend.cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY);
+ }
+
+ blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) |
+ S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) |
+ S_028B70_OFFSET_ROUND(1);
+
+ if (vkms && vkms->alphaToCoverageEnable) {
+ blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1);
+ blend.need_src_alpha |= 0x1;
+ }
+
+ blend.cb_target_mask = 0;
+ if (vkblend) {
+ for (i = 0; i < vkblend->attachmentCount; i++) {
+ const VkPipelineColorBlendAttachmentState *att = &vkblend->pAttachments[i];
+ unsigned blend_cntl = 0;
+ unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt;
+ VkBlendOp eqRGB = att->colorBlendOp;
+ VkBlendFactor srcRGB = att->srcColorBlendFactor;
+ VkBlendFactor dstRGB = att->dstColorBlendFactor;
+ VkBlendOp eqA = att->alphaBlendOp;
+ VkBlendFactor srcA = att->srcAlphaBlendFactor;
+ VkBlendFactor dstA = att->dstAlphaBlendFactor;
+
+ blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+
+ if (!att->colorWriteMask)
+ continue;
+
+ /* Ignore other blend targets if dual-source blending
+ * is enabled to prevent wrong behaviour.
+ */
+ if (blend.mrt0_is_dual_src)
+ continue;
+
+ blend.cb_target_mask |= (unsigned)att->colorWriteMask << (4 * i);
+ blend.cb_target_enabled_4bit |= 0xfu << (4 * i);
+ if (!att->blendEnable) {
+ blend.cb_blend_control[i] = blend_cntl;
+ continue;
+ }
+
+ if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
+ if (i == 0)
+ blend.mrt0_is_dual_src = true;
+
+ if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
+ srcRGB = VK_BLEND_FACTOR_ONE;
+ dstRGB = VK_BLEND_FACTOR_ONE;
+ }
+ if (eqA == VK_BLEND_OP_MIN || eqA == VK_BLEND_OP_MAX) {
+ srcA = VK_BLEND_FACTOR_ONE;
+ dstA = VK_BLEND_FACTOR_ONE;
+ }
+
+ radv_blend_check_commutativity(&blend, eqRGB, srcRGB, dstRGB, 0x7u << (4 * i));
+ radv_blend_check_commutativity(&blend, eqA, srcA, dstA, 0x8u << (4 * i));
+
+ /* Blending optimizations for RB+.
+ * These transformations don't change the behavior.
+ *
+ * First, get rid of DST in the blend factors:
+ * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC)
+ */
+ si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, VK_BLEND_FACTOR_DST_COLOR,
+ VK_BLEND_FACTOR_SRC_COLOR);
+
+ si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_COLOR,
+ VK_BLEND_FACTOR_SRC_COLOR);
+
+ si_blend_remove_dst(&eqA, &srcA, &dstA, VK_BLEND_FACTOR_DST_ALPHA,
+ VK_BLEND_FACTOR_SRC_ALPHA);
+
+ /* Look up the ideal settings from tables. */
+ srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false);
+ dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false);
+ srcA_opt = si_translate_blend_opt_factor(srcA, true);
+ dstA_opt = si_translate_blend_opt_factor(dstA, true);
+
+ /* Handle interdependencies. */
+ if (si_blend_factor_uses_dst(srcRGB))
+ dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+ if (si_blend_factor_uses_dst(srcA))
+ dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
+
+ if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE &&
+ (dstRGB == VK_BLEND_FACTOR_ZERO || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
+ dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE))
+ dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0;
+
+ /* Set the final value. */
+ blend.sx_mrt_blend_opt[i] =
+ S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) |
+ S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) |
+ S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) |
+ S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA));
+ blend_cntl |= S_028780_ENABLE(1);
+
+ blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB));
+ blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(srcRGB));
+ blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(dstRGB));
+ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
+ blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1);
+ blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA));
+ blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(srcA));
+ blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(dstA));
+ }
+ blend.cb_blend_control[i] = blend_cntl;
+
+ blend.blend_enable_4bit |= 0xfu << (i * 4);
+
+ if (srcRGB == VK_BLEND_FACTOR_SRC_ALPHA || dstRGB == VK_BLEND_FACTOR_SRC_ALPHA ||
+ srcRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+ dstRGB == VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ||
+ dstRGB == VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA)
+ blend.need_src_alpha |= 1 << i;
+ }
+ for (i = vkblend->attachmentCount; i < 8; i++) {
+ blend.cb_blend_control[i] = 0;
+ blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) |
+ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
+ }
+ }
+
+ if (pipeline->device->physical_device->rad_info.has_rbplus) {
+ /* Disable RB+ blend optimizations for dual source blending. */
+ if (blend.mrt0_is_dual_src) {
+ for (i = 0; i < 8; i++) {
+ blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
+ S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
+ }
+ }
+
+ /* RB+ doesn't work with dual source blending, logic op and
+ * RESOLVE.
+ */
+ if (blend.mrt0_is_dual_src || (vkblend && vkblend->logicOpEnable) ||
+ mode == V_028808_CB_RESOLVE)
+ blend.cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1);
+ }
+
+ if (blend.cb_target_mask)
+ blend.cb_color_control |= S_028808_MODE(mode);
+ else
+ blend.cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
+
+ radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo, &blend);
+ return blend;
}
-static uint32_t si_translate_fill(VkPolygonMode func)
+static uint32_t
+si_translate_fill(VkPolygonMode func)
{
- switch(func) {
- case VK_POLYGON_MODE_FILL:
- return V_028814_X_DRAW_TRIANGLES;
- case VK_POLYGON_MODE_LINE:
- return V_028814_X_DRAW_LINES;
- case VK_POLYGON_MODE_POINT:
- return V_028814_X_DRAW_POINTS;
- default:
- assert(0);
- return V_028814_X_DRAW_POINTS;
- }
+ switch (func) {
+ case VK_POLYGON_MODE_FILL:
+ return V_028814_X_DRAW_TRIANGLES;
+ case VK_POLYGON_MODE_LINE:
+ return V_028814_X_DRAW_LINES;
+ case VK_POLYGON_MODE_POINT:
+ return V_028814_X_DRAW_POINTS;
+ default:
+ assert(0);
+ return V_028814_X_DRAW_POINTS;
+ }
}
-static uint8_t radv_pipeline_get_ps_iter_samples(const VkGraphicsPipelineCreateInfo *pCreateInfo)
+static uint8_t
+radv_pipeline_get_ps_iter_samples(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
- uint32_t ps_iter_samples = 1;
- uint32_t num_samples;
-
- /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
- *
- * "If the VK_AMD_mixed_attachment_samples extension is enabled and the
- * subpass uses color attachments, totalSamples is the number of
- * samples of the color attachments. Otherwise, totalSamples is the
- * value of VkPipelineMultisampleStateCreateInfo::rasterizationSamples
- * specified at pipeline creation time."
- */
- if (subpass->has_color_att) {
- num_samples = subpass->color_sample_count;
- } else {
- num_samples = vkms->rasterizationSamples;
- }
-
- if (vkms->sampleShadingEnable) {
- ps_iter_samples = ceilf(vkms->minSampleShading * num_samples);
- ps_iter_samples = util_next_power_of_two(ps_iter_samples);
- }
- return ps_iter_samples;
+ const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+ uint32_t ps_iter_samples = 1;
+ uint32_t num_samples;
+
+ /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
+ *
+ * "If the VK_AMD_mixed_attachment_samples extension is enabled and the
+ * subpass uses color attachments, totalSamples is the number of
+ * samples of the color attachments. Otherwise, totalSamples is the
+ * value of VkPipelineMultisampleStateCreateInfo::rasterizationSamples
+ * specified at pipeline creation time."
+ */
+ if (subpass->has_color_att) {
+ num_samples = subpass->color_sample_count;
+ } else {
+ num_samples = vkms->rasterizationSamples;
+ }
+
+ if (vkms->sampleShadingEnable) {
+ ps_iter_samples = ceilf(vkms->minSampleShading * num_samples);
+ ps_iter_samples = util_next_power_of_two(ps_iter_samples);
+ }
+ return ps_iter_samples;
}
static bool
radv_is_depth_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo)
{
- return pCreateInfo->depthTestEnable &&
- pCreateInfo->depthWriteEnable &&
- pCreateInfo->depthCompareOp != VK_COMPARE_OP_NEVER;
+ return pCreateInfo->depthTestEnable && pCreateInfo->depthWriteEnable &&
+ pCreateInfo->depthCompareOp != VK_COMPARE_OP_NEVER;
}
static bool
radv_writes_stencil(const VkStencilOpState *state)
{
- return state->writeMask &&
- (state->failOp != VK_STENCIL_OP_KEEP ||
- state->passOp != VK_STENCIL_OP_KEEP ||
- state->depthFailOp != VK_STENCIL_OP_KEEP);
+ return state->writeMask &&
+ (state->failOp != VK_STENCIL_OP_KEEP || state->passOp != VK_STENCIL_OP_KEEP ||
+ state->depthFailOp != VK_STENCIL_OP_KEEP);
}
static bool
radv_is_stencil_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo)
{
- return pCreateInfo->stencilTestEnable &&
- (radv_writes_stencil(&pCreateInfo->front) ||
- radv_writes_stencil(&pCreateInfo->back));
+ return pCreateInfo->stencilTestEnable &&
+ (radv_writes_stencil(&pCreateInfo->front) || radv_writes_stencil(&pCreateInfo->back));
}
static bool
radv_is_ds_write_enabled(const VkPipelineDepthStencilStateCreateInfo *pCreateInfo)
{
- return radv_is_depth_write_enabled(pCreateInfo) ||
- radv_is_stencil_write_enabled(pCreateInfo);
+ return radv_is_depth_write_enabled(pCreateInfo) || radv_is_stencil_write_enabled(pCreateInfo);
}
static bool
radv_order_invariant_stencil_op(VkStencilOp op)
{
- /* REPLACE is normally order invariant, except when the stencil
- * reference value is written by the fragment shader. Tracking this
- * interaction does not seem worth the effort, so be conservative.
- */
- return op != VK_STENCIL_OP_INCREMENT_AND_CLAMP &&
- op != VK_STENCIL_OP_DECREMENT_AND_CLAMP &&
- op != VK_STENCIL_OP_REPLACE;
+ /* REPLACE is normally order invariant, except when the stencil
+ * reference value is written by the fragment shader. Tracking this
+ * interaction does not seem worth the effort, so be conservative.
+ */
+ return op != VK_STENCIL_OP_INCREMENT_AND_CLAMP && op != VK_STENCIL_OP_DECREMENT_AND_CLAMP &&
+ op != VK_STENCIL_OP_REPLACE;
}
static bool
radv_order_invariant_stencil_state(const VkStencilOpState *state)
{
- /* Compute whether, assuming Z writes are disabled, this stencil state
- * is order invariant in the sense that the set of passing fragments as
- * well as the final stencil buffer result does not depend on the order
- * of fragments.
- */
- return !state->writeMask ||
- /* The following assumes that Z writes are disabled. */
- (state->compareOp == VK_COMPARE_OP_ALWAYS &&
- radv_order_invariant_stencil_op(state->passOp) &&
- radv_order_invariant_stencil_op(state->depthFailOp)) ||
- (state->compareOp == VK_COMPARE_OP_NEVER &&
- radv_order_invariant_stencil_op(state->failOp));
+ /* Compute whether, assuming Z writes are disabled, this stencil state
+ * is order invariant in the sense that the set of passing fragments as
+ * well as the final stencil buffer result does not depend on the order
+ * of fragments.
+ */
+ return !state->writeMask ||
+ /* The following assumes that Z writes are disabled. */
+ (state->compareOp == VK_COMPARE_OP_ALWAYS &&
+ radv_order_invariant_stencil_op(state->passOp) &&
+ radv_order_invariant_stencil_op(state->depthFailOp)) ||
+ (state->compareOp == VK_COMPARE_OP_NEVER &&
+ radv_order_invariant_stencil_op(state->failOp));
}
static bool
-radv_is_state_dynamic(const VkGraphicsPipelineCreateInfo *pCreateInfo,
- VkDynamicState state)
+radv_is_state_dynamic(const VkGraphicsPipelineCreateInfo *pCreateInfo, VkDynamicState state)
{
- if (pCreateInfo->pDynamicState) {
- uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
- for (uint32_t i = 0; i < count; i++) {
- if (pCreateInfo->pDynamicState->pDynamicStates[i] == state)
- return true;
- }
- }
-
- return false;
+ if (pCreateInfo->pDynamicState) {
+ uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+ for (uint32_t i = 0; i < count; i++) {
+ if (pCreateInfo->pDynamicState->pDynamicStates[i] == state)
+ return true;
+ }
+ }
+
+ return false;
}
static bool
radv_pipeline_has_dynamic_ds_states(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- VkDynamicState ds_states[] = {
- VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
- VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
- VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
- VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
- VK_DYNAMIC_STATE_STENCIL_OP_EXT,
- };
-
- for (uint32_t i = 0; i < ARRAY_SIZE(ds_states); i++) {
- if (radv_is_state_dynamic(pCreateInfo, ds_states[i]))
- return true;
- }
-
- return false;
+ VkDynamicState ds_states[] = {
+ VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
+ VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
+ VK_DYNAMIC_STATE_STENCIL_OP_EXT,
+ };
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(ds_states); i++) {
+ if (radv_is_state_dynamic(pCreateInfo, ds_states[i]))
+ return true;
+ }
+
+ return false;
}
static bool
radv_pipeline_out_of_order_rast(struct radv_pipeline *pipeline,
- const struct radv_blend_state *blend,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const struct radv_blend_state *blend,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- const VkPipelineDepthStencilStateCreateInfo *vkds = radv_pipeline_get_depth_stencil_state(pCreateInfo);
- const VkPipelineColorBlendStateCreateInfo *vkblend = radv_pipeline_get_color_blend_state(pCreateInfo);
- unsigned colormask = blend->cb_target_enabled_4bit;
-
- if (!pipeline->device->physical_device->out_of_order_rast_allowed)
- return false;
-
- /* Be conservative if a logic operation is enabled with color buffers. */
- if (colormask && vkblend && vkblend->logicOpEnable)
- return false;
-
- /* Be conservative if an extended dynamic depth/stencil state is
- * enabled because the driver can't update out-of-order rasterization
- * dynamically.
- */
- if (radv_pipeline_has_dynamic_ds_states(pCreateInfo))
- return false;
-
- /* Default depth/stencil invariance when no attachment is bound. */
- struct radv_dsa_order_invariance dsa_order_invariant = {
- .zs = true, .pass_set = true
- };
-
- if (vkds) {
- struct radv_render_pass_attachment *attachment =
- pass->attachments + subpass->depth_stencil_attachment->attachment;
- bool has_stencil = vk_format_has_stencil(attachment->format);
- struct radv_dsa_order_invariance order_invariance[2];
- struct radv_shader_variant *ps =
- pipeline->shaders[MESA_SHADER_FRAGMENT];
-
- /* Compute depth/stencil order invariance in order to know if
- * it's safe to enable out-of-order.
- */
- bool zfunc_is_ordered =
- vkds->depthCompareOp == VK_COMPARE_OP_NEVER ||
- vkds->depthCompareOp == VK_COMPARE_OP_LESS ||
- vkds->depthCompareOp == VK_COMPARE_OP_LESS_OR_EQUAL ||
- vkds->depthCompareOp == VK_COMPARE_OP_GREATER ||
- vkds->depthCompareOp == VK_COMPARE_OP_GREATER_OR_EQUAL;
-
- bool nozwrite_and_order_invariant_stencil =
- !radv_is_ds_write_enabled(vkds) ||
- (!radv_is_depth_write_enabled(vkds) &&
- radv_order_invariant_stencil_state(&vkds->front) &&
- radv_order_invariant_stencil_state(&vkds->back));
-
- order_invariance[1].zs =
- nozwrite_and_order_invariant_stencil ||
- (!radv_is_stencil_write_enabled(vkds) &&
- zfunc_is_ordered);
- order_invariance[0].zs =
- !radv_is_depth_write_enabled(vkds) || zfunc_is_ordered;
-
- order_invariance[1].pass_set =
- nozwrite_and_order_invariant_stencil ||
- (!radv_is_stencil_write_enabled(vkds) &&
- (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS ||
- vkds->depthCompareOp == VK_COMPARE_OP_NEVER));
- order_invariance[0].pass_set =
- !radv_is_depth_write_enabled(vkds) ||
- (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS ||
- vkds->depthCompareOp == VK_COMPARE_OP_NEVER);
-
- dsa_order_invariant = order_invariance[has_stencil];
- if (!dsa_order_invariant.zs)
- return false;
-
- /* The set of PS invocations is always order invariant,
- * except when early Z/S tests are requested.
- */
- if (ps &&
- ps->info.ps.writes_memory &&
- ps->info.ps.early_fragment_test &&
- !dsa_order_invariant.pass_set)
- return false;
-
- /* Determine if out-of-order rasterization should be disabled
- * when occlusion queries are used.
- */
- pipeline->graphics.disable_out_of_order_rast_for_occlusion =
- !dsa_order_invariant.pass_set;
- }
-
- /* No color buffers are enabled for writing. */
- if (!colormask)
- return true;
-
- unsigned blendmask = colormask & blend->blend_enable_4bit;
-
- if (blendmask) {
- /* Only commutative blending. */
- if (blendmask & ~blend->commutative_4bit)
- return false;
-
- if (!dsa_order_invariant.pass_set)
- return false;
- }
-
- if (colormask & ~blendmask)
- return false;
-
- return true;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ const VkPipelineDepthStencilStateCreateInfo *vkds =
+ radv_pipeline_get_depth_stencil_state(pCreateInfo);
+ const VkPipelineColorBlendStateCreateInfo *vkblend =
+ radv_pipeline_get_color_blend_state(pCreateInfo);
+ unsigned colormask = blend->cb_target_enabled_4bit;
+
+ if (!pipeline->device->physical_device->out_of_order_rast_allowed)
+ return false;
+
+ /* Be conservative if a logic operation is enabled with color buffers. */
+ if (colormask && vkblend && vkblend->logicOpEnable)
+ return false;
+
+ /* Be conservative if an extended dynamic depth/stencil state is
+ * enabled because the driver can't update out-of-order rasterization
+ * dynamically.
+ */
+ if (radv_pipeline_has_dynamic_ds_states(pCreateInfo))
+ return false;
+
+ /* Default depth/stencil invariance when no attachment is bound. */
+ struct radv_dsa_order_invariance dsa_order_invariant = {.zs = true, .pass_set = true};
+
+ if (vkds) {
+ struct radv_render_pass_attachment *attachment =
+ pass->attachments + subpass->depth_stencil_attachment->attachment;
+ bool has_stencil = vk_format_has_stencil(attachment->format);
+ struct radv_dsa_order_invariance order_invariance[2];
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+ /* Compute depth/stencil order invariance in order to know if
+ * it's safe to enable out-of-order.
+ */
+ bool zfunc_is_ordered = vkds->depthCompareOp == VK_COMPARE_OP_NEVER ||
+ vkds->depthCompareOp == VK_COMPARE_OP_LESS ||
+ vkds->depthCompareOp == VK_COMPARE_OP_LESS_OR_EQUAL ||
+ vkds->depthCompareOp == VK_COMPARE_OP_GREATER ||
+ vkds->depthCompareOp == VK_COMPARE_OP_GREATER_OR_EQUAL;
+
+ bool nozwrite_and_order_invariant_stencil =
+ !radv_is_ds_write_enabled(vkds) ||
+ (!radv_is_depth_write_enabled(vkds) && radv_order_invariant_stencil_state(&vkds->front) &&
+ radv_order_invariant_stencil_state(&vkds->back));
+
+ order_invariance[1].zs = nozwrite_and_order_invariant_stencil ||
+ (!radv_is_stencil_write_enabled(vkds) && zfunc_is_ordered);
+ order_invariance[0].zs = !radv_is_depth_write_enabled(vkds) || zfunc_is_ordered;
+
+ order_invariance[1].pass_set =
+ nozwrite_and_order_invariant_stencil ||
+ (!radv_is_stencil_write_enabled(vkds) && (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS ||
+ vkds->depthCompareOp == VK_COMPARE_OP_NEVER));
+ order_invariance[0].pass_set =
+ !radv_is_depth_write_enabled(vkds) || (vkds->depthCompareOp == VK_COMPARE_OP_ALWAYS ||
+ vkds->depthCompareOp == VK_COMPARE_OP_NEVER);
+
+ dsa_order_invariant = order_invariance[has_stencil];
+ if (!dsa_order_invariant.zs)
+ return false;
+
+ /* The set of PS invocations is always order invariant,
+ * except when early Z/S tests are requested.
+ */
+ if (ps && ps->info.ps.writes_memory && ps->info.ps.early_fragment_test &&
+ !dsa_order_invariant.pass_set)
+ return false;
+
+ /* Determine if out-of-order rasterization should be disabled
+ * when occlusion queries are used.
+ */
+ pipeline->graphics.disable_out_of_order_rast_for_occlusion = !dsa_order_invariant.pass_set;
+ }
+
+ /* No color buffers are enabled for writing. */
+ if (!colormask)
+ return true;
+
+ unsigned blendmask = colormask & blend->blend_enable_4bit;
+
+ if (blendmask) {
+ /* Only commutative blending. */
+ if (blendmask & ~blend->commutative_4bit)
+ return false;
+
+ if (!dsa_order_invariant.pass_set)
+ return false;
+ }
+
+ if (colormask & ~blendmask)
+ return false;
+
+ return true;
}
static const VkConservativeRasterizationModeEXT
radv_get_conservative_raster_mode(const VkPipelineRasterizationStateCreateInfo *pCreateInfo)
{
- const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster =
- vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT);
+ const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster =
+ vk_find_struct_const(pCreateInfo->pNext,
+ PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT);
- if (!conservative_raster)
- return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
- return conservative_raster->conservativeRasterizationMode;
+ if (!conservative_raster)
+ return VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT;
+ return conservative_raster->conservativeRasterizationMode;
}
static void
radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
- const struct radv_blend_state *blend,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const struct radv_blend_state *blend,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- const VkPipelineMultisampleStateCreateInfo *vkms = radv_pipeline_get_multisample_state(pCreateInfo);
- struct radv_multisample_state *ms = &pipeline->graphics.ms;
- unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
- const VkConservativeRasterizationModeEXT mode =
- radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState);
- bool out_of_order_rast = false;
- int ps_iter_samples = 1;
- uint32_t mask = 0xffff;
-
- if (vkms) {
- ms->num_samples = vkms->rasterizationSamples;
-
- /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
- *
- * "Sample shading is enabled for a graphics pipeline:
- *
- * - If the interface of the fragment shader entry point of the
- * graphics pipeline includes an input variable decorated
- * with SampleId or SamplePosition. In this case
- * minSampleShadingFactor takes the value 1.0.
- * - Else if the sampleShadingEnable member of the
- * VkPipelineMultisampleStateCreateInfo structure specified
- * when creating the graphics pipeline is set to VK_TRUE. In
- * this case minSampleShadingFactor takes the value of
- * VkPipelineMultisampleStateCreateInfo::minSampleShading.
- *
- * Otherwise, sample shading is considered disabled."
- */
- if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.uses_sample_shading) {
- ps_iter_samples = ms->num_samples;
- } else {
- ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo);
- }
- } else {
- ms->num_samples = 1;
- }
-
- const struct VkPipelineRasterizationStateRasterizationOrderAMD *raster_order =
- vk_find_struct_const(pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD);
- if (raster_order && raster_order->rasterizationOrder == VK_RASTERIZATION_ORDER_RELAXED_AMD) {
- /* Out-of-order rasterization is explicitly enabled by the
- * application.
- */
- out_of_order_rast = true;
- } else {
- /* Determine if the driver can enable out-of-order
- * rasterization internally.
- */
- out_of_order_rast =
- radv_pipeline_out_of_order_rast(pipeline, blend, pCreateInfo);
- }
-
- ms->pa_sc_aa_config = 0;
- ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
- S_028804_INCOHERENT_EQAA_READS(1) |
- S_028804_INTERPOLATE_COMP_Z(1) |
- S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
-
- /* Adjust MSAA state if conservative rasterization is enabled. */
- if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
- ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1);
-
- ms->db_eqaa |= S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) |
- S_028804_OVERRASTERIZATION_AMOUNT(4);
- }
-
- ms->pa_sc_mode_cntl_1 =
- S_028A4C_WALK_FENCE_ENABLE(1) | //TODO linear dst fixes
- S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
- S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
- S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
- /* always 1: */
- S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) |
- S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
- S_028A4C_TILE_WALK_ORDER_ENABLE(1) |
- S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
- S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
- S_028A4C_FORCE_EOV_REZ_ENABLE(1);
- ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pipeline->device->physical_device->rad_info.chip_class >= GFX9) |
- S_028A48_VPORT_SCISSOR_ENABLE(1);
-
- const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line =
- vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
- PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
- if (rast_line) {
- ms->pa_sc_mode_cntl_0 |= S_028A48_LINE_STIPPLE_ENABLE(rast_line->stippledLineEnable);
- if (rast_line->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) {
- /* From the Vulkan spec 1.1.129:
- *
- * "When VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT lines
- * are being rasterized, sample locations may all be
- * treated as being at the pixel center (this may
- * affect attribute and depth interpolation)."
- */
- ms->num_samples = 1;
- }
- }
-
- if (ms->num_samples > 1) {
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
- uint32_t z_samples = subpass->depth_stencil_attachment ? subpass->depth_sample_count : ms->num_samples;
- unsigned log_samples = util_logbase2(ms->num_samples);
- unsigned log_z_samples = util_logbase2(z_samples);
- unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
- ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
- ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
- S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
- S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
- S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
- ms->pa_sc_aa_config |= S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
- S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) |
- S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */
- S_028BE0_COVERED_CENTROID_IS_CENTER(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3);
- ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
- if (ps_iter_samples > 1)
- pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
- }
-
- if (vkms && vkms->pSampleMask) {
- mask = vkms->pSampleMask[0] & 0xffff;
- }
-
- ms->pa_sc_aa_mask[0] = mask | (mask << 16);
- ms->pa_sc_aa_mask[1] = mask | (mask << 16);
+ const VkPipelineMultisampleStateCreateInfo *vkms =
+ radv_pipeline_get_multisample_state(pCreateInfo);
+ struct radv_multisample_state *ms = &pipeline->graphics.ms;
+ unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
+ const VkConservativeRasterizationModeEXT mode =
+ radv_get_conservative_raster_mode(pCreateInfo->pRasterizationState);
+ bool out_of_order_rast = false;
+ int ps_iter_samples = 1;
+ uint32_t mask = 0xffff;
+
+ if (vkms) {
+ ms->num_samples = vkms->rasterizationSamples;
+
+ /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading:
+ *
+ * "Sample shading is enabled for a graphics pipeline:
+ *
+ * - If the interface of the fragment shader entry point of the
+ * graphics pipeline includes an input variable decorated
+ * with SampleId or SamplePosition. In this case
+ * minSampleShadingFactor takes the value 1.0.
+ * - Else if the sampleShadingEnable member of the
+ * VkPipelineMultisampleStateCreateInfo structure specified
+ * when creating the graphics pipeline is set to VK_TRUE. In
+ * this case minSampleShadingFactor takes the value of
+ * VkPipelineMultisampleStateCreateInfo::minSampleShading.
+ *
+ * Otherwise, sample shading is considered disabled."
+ */
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.uses_sample_shading) {
+ ps_iter_samples = ms->num_samples;
+ } else {
+ ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo);
+ }
+ } else {
+ ms->num_samples = 1;
+ }
+
+ const struct VkPipelineRasterizationStateRasterizationOrderAMD *raster_order =
+ vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
+ PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD);
+ if (raster_order && raster_order->rasterizationOrder == VK_RASTERIZATION_ORDER_RELAXED_AMD) {
+ /* Out-of-order rasterization is explicitly enabled by the
+ * application.
+ */
+ out_of_order_rast = true;
+ } else {
+ /* Determine if the driver can enable out-of-order
+ * rasterization internally.
+ */
+ out_of_order_rast = radv_pipeline_out_of_order_rast(pipeline, blend, pCreateInfo);
+ }
+
+ ms->pa_sc_aa_config = 0;
+ ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) |
+ S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1);
+
+ /* Adjust MSAA state if conservative rasterization is enabled. */
+ if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
+ ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1);
+
+ ms->db_eqaa |=
+ S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | S_028804_OVERRASTERIZATION_AMOUNT(4);
+ }
+
+ ms->pa_sc_mode_cntl_1 =
+ S_028A4C_WALK_FENCE_ENABLE(1) | // TODO linear dst fixes
+ S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) |
+ S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) |
+ S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) |
+ /* always 1: */
+ S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) |
+ S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
+ S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1);
+ ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(
+ pipeline->device->physical_device->rad_info.chip_class >= GFX9) |
+ S_028A48_VPORT_SCISSOR_ENABLE(1);
+
+ const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line = vk_find_struct_const(
+ pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
+ if (rast_line) {
+ ms->pa_sc_mode_cntl_0 |= S_028A48_LINE_STIPPLE_ENABLE(rast_line->stippledLineEnable);
+ if (rast_line->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) {
+ /* From the Vulkan spec 1.1.129:
+ *
+ * "When VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT lines
+ * are being rasterized, sample locations may all be
+ * treated as being at the pixel center (this may
+ * affect attribute and depth interpolation)."
+ */
+ ms->num_samples = 1;
+ }
+ }
+
+ if (ms->num_samples > 1) {
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+ uint32_t z_samples =
+ subpass->depth_stencil_attachment ? subpass->depth_sample_count : ms->num_samples;
+ unsigned log_samples = util_logbase2(ms->num_samples);
+ unsigned log_z_samples = util_logbase2(z_samples);
+ unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples);
+ ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
+ ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
+ S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
+ S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) |
+ S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples);
+ ms->pa_sc_aa_config |=
+ S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
+ S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) |
+ S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */
+ S_028BE0_COVERED_CENTROID_IS_CENTER(
+ pipeline->device->physical_device->rad_info.chip_class >= GFX10_3);
+ ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1);
+ if (ps_iter_samples > 1)
+ pipeline->graphics.spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2);
+ }
+
+ if (vkms && vkms->pSampleMask) {
+ mask = vkms->pSampleMask[0] & 0xffff;
+ }
+
+ ms->pa_sc_aa_mask[0] = mask | (mask << 16);
+ ms->pa_sc_aa_mask[1] = mask | (mask << 16);
}
static void
gfx103_pipeline_init_vrs_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- const VkPipelineMultisampleStateCreateInfo *vkms = radv_pipeline_get_multisample_state(pCreateInfo);
- struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- struct radv_multisample_state *ms = &pipeline->graphics.ms;
- struct radv_vrs_state *vrs = &pipeline->graphics.vrs;
-
- if (vkms &&
- (vkms->sampleShadingEnable ||
- ps->info.ps.uses_sample_shading || ps->info.ps.reads_sample_mask_in)) {
- /* Disable VRS and use the rates from PS_ITER_SAMPLES if:
- *
- * 1) sample shading is enabled or per-sample interpolation is
- * used by the fragment shader
- * 2) the fragment shader reads gl_SampleMaskIn because the
- * 16-bit sample coverage mask isn't enough for MSAA8x and
- * 2x2 coarse shading isn't enough.
- */
- vrs->pa_cl_vrs_cntl =
- S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE);
-
- /* Make sure sample shading is enabled even if only MSAA1x is
- * used because the SAMPLE_ITER combiner is in passthrough
- * mode if PS_ITER_SAMPLE is 0, and it uses the per-draw rate.
- * The default VRS rate when sample shading is enabled is 1x1.
- */
- if (!G_028A4C_PS_ITER_SAMPLE(ms->pa_sc_mode_cntl_1))
- ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1);
- } else {
- vrs->pa_cl_vrs_cntl =
- S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
- }
-
- /* Primitive and HTILE combiners are always passthrough. */
- vrs->pa_cl_vrs_cntl |= S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) |
- S_028848_HTILE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
+ const VkPipelineMultisampleStateCreateInfo *vkms =
+ radv_pipeline_get_multisample_state(pCreateInfo);
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ struct radv_multisample_state *ms = &pipeline->graphics.ms;
+ struct radv_vrs_state *vrs = &pipeline->graphics.vrs;
+
+ if (vkms && (vkms->sampleShadingEnable || ps->info.ps.uses_sample_shading ||
+ ps->info.ps.reads_sample_mask_in)) {
+ /* Disable VRS and use the rates from PS_ITER_SAMPLES if:
+ *
+ * 1) sample shading is enabled or per-sample interpolation is
+ * used by the fragment shader
+ * 2) the fragment shader reads gl_SampleMaskIn because the
+ * 16-bit sample coverage mask isn't enough for MSAA8x and
+ * 2x2 coarse shading isn't enough.
+ */
+ vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE);
+
+ /* Make sure sample shading is enabled even if only MSAA1x is
+ * used because the SAMPLE_ITER combiner is in passthrough
+ * mode if PS_ITER_SAMPLE is 0, and it uses the per-draw rate.
+ * The default VRS rate when sample shading is enabled is 1x1.
+ */
+ if (!G_028A4C_PS_ITER_SAMPLE(ms->pa_sc_mode_cntl_1))
+ ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1);
+ } else {
+ vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
+ }
+
+ /* Primitive and HTILE combiners are always passthrough. */
+ vrs->pa_cl_vrs_cntl |= S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) |
+ S_028848_HTILE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
}
static bool
radv_prim_can_use_guardband(enum VkPrimitiveTopology topology)
{
- switch (topology) {
- case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
- return false;
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
- return true;
- default:
- unreachable("unhandled primitive type");
- }
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ return false;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+ return true;
+ default:
+ unreachable("unhandled primitive type");
+ }
}
static uint32_t
si_conv_gl_prim_to_gs_out(unsigned gl_prim)
{
- switch (gl_prim) {
- case 0: /* GL_POINTS */
- return V_028A6C_POINTLIST;
- case 1: /* GL_LINES */
- case 3: /* GL_LINE_STRIP */
- case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
- case 0x8E7A: /* GL_ISOLINES */
- return V_028A6C_LINESTRIP;
-
- case 4: /* GL_TRIANGLES */
- case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
- case 5: /* GL_TRIANGLE_STRIP */
- case 7: /* GL_QUADS */
- return V_028A6C_TRISTRIP;
- default:
- assert(0);
- return 0;
- }
+ switch (gl_prim) {
+ case 0: /* GL_POINTS */
+ return V_028A6C_POINTLIST;
+ case 1: /* GL_LINES */
+ case 3: /* GL_LINE_STRIP */
+ case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
+ case 0x8E7A: /* GL_ISOLINES */
+ return V_028A6C_LINESTRIP;
+
+ case 4: /* GL_TRIANGLES */
+ case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
+ case 5: /* GL_TRIANGLE_STRIP */
+ case 7: /* GL_QUADS */
+ return V_028A6C_TRISTRIP;
+ default:
+ assert(0);
+ return 0;
+ }
}
static uint32_t
si_conv_prim_to_gs_out(enum VkPrimitiveTopology topology)
{
- switch (topology) {
- case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
- case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
- return V_028A6C_POINTLIST;
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
- return V_028A6C_LINESTRIP;
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
- return V_028A6C_TRISTRIP;
- default:
- assert(0);
- return 0;
- }
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+ return V_028A6C_POINTLIST;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ return V_028A6C_LINESTRIP;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ return V_028A6C_TRISTRIP;
+ default:
+ assert(0);
+ return 0;
+ }
}
-static uint64_t radv_dynamic_state_mask(VkDynamicState state)
+static uint64_t
+radv_dynamic_state_mask(VkDynamicState state)
{
- switch(state) {
- case VK_DYNAMIC_STATE_VIEWPORT:
- case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
- return RADV_DYNAMIC_VIEWPORT;
- case VK_DYNAMIC_STATE_SCISSOR:
- case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
- return RADV_DYNAMIC_SCISSOR;
- case VK_DYNAMIC_STATE_LINE_WIDTH:
- return RADV_DYNAMIC_LINE_WIDTH;
- case VK_DYNAMIC_STATE_DEPTH_BIAS:
- return RADV_DYNAMIC_DEPTH_BIAS;
- case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
- return RADV_DYNAMIC_BLEND_CONSTANTS;
- case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
- return RADV_DYNAMIC_DEPTH_BOUNDS;
- case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
- return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
- case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
- return RADV_DYNAMIC_STENCIL_WRITE_MASK;
- case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
- return RADV_DYNAMIC_STENCIL_REFERENCE;
- case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
- return RADV_DYNAMIC_DISCARD_RECTANGLE;
- case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
- return RADV_DYNAMIC_SAMPLE_LOCATIONS;
- case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
- return RADV_DYNAMIC_LINE_STIPPLE;
- case VK_DYNAMIC_STATE_CULL_MODE_EXT:
- return RADV_DYNAMIC_CULL_MODE;
- case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
- return RADV_DYNAMIC_FRONT_FACE;
- case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
- return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
- case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
- return RADV_DYNAMIC_DEPTH_TEST_ENABLE;
- case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
- return RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
- case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
- return RADV_DYNAMIC_DEPTH_COMPARE_OP;
- case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
- return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
- case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
- return RADV_DYNAMIC_STENCIL_TEST_ENABLE;
- case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
- return RADV_DYNAMIC_STENCIL_OP;
- case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
- return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
- case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
- return RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
- default:
- unreachable("Unhandled dynamic state");
- }
+ switch (state) {
+ case VK_DYNAMIC_STATE_VIEWPORT:
+ case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:
+ return RADV_DYNAMIC_VIEWPORT;
+ case VK_DYNAMIC_STATE_SCISSOR:
+ case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:
+ return RADV_DYNAMIC_SCISSOR;
+ case VK_DYNAMIC_STATE_LINE_WIDTH:
+ return RADV_DYNAMIC_LINE_WIDTH;
+ case VK_DYNAMIC_STATE_DEPTH_BIAS:
+ return RADV_DYNAMIC_DEPTH_BIAS;
+ case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
+ return RADV_DYNAMIC_BLEND_CONSTANTS;
+ case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
+ return RADV_DYNAMIC_DEPTH_BOUNDS;
+ case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
+ return RADV_DYNAMIC_STENCIL_COMPARE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
+ return RADV_DYNAMIC_STENCIL_WRITE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+ return RADV_DYNAMIC_STENCIL_REFERENCE;
+ case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT:
+ return RADV_DYNAMIC_DISCARD_RECTANGLE;
+ case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:
+ return RADV_DYNAMIC_SAMPLE_LOCATIONS;
+ case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:
+ return RADV_DYNAMIC_LINE_STIPPLE;
+ case VK_DYNAMIC_STATE_CULL_MODE_EXT:
+ return RADV_DYNAMIC_CULL_MODE;
+ case VK_DYNAMIC_STATE_FRONT_FACE_EXT:
+ return RADV_DYNAMIC_FRONT_FACE;
+ case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:
+ return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
+ case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:
+ return RADV_DYNAMIC_DEPTH_TEST_ENABLE;
+ case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:
+ return RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
+ case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:
+ return RADV_DYNAMIC_DEPTH_COMPARE_OP;
+ case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:
+ return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
+ case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:
+ return RADV_DYNAMIC_STENCIL_TEST_ENABLE;
+ case VK_DYNAMIC_STATE_STENCIL_OP_EXT:
+ return RADV_DYNAMIC_STENCIL_OP;
+ case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:
+ return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
+ case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:
+ return RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
+ default:
+ unreachable("Unhandled dynamic state");
+ }
}
-static uint64_t radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
+static uint64_t
+radv_pipeline_needed_dynamic_state(const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- uint64_t states = RADV_DYNAMIC_ALL;
-
- /* If rasterization is disabled we do not care about any of the
- * dynamic states, since they are all rasterization related only,
- * except primitive topology and vertex binding stride.
- */
- if (pCreateInfo->pRasterizationState->rasterizerDiscardEnable)
- return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY |
- RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
-
- if (!pCreateInfo->pRasterizationState->depthBiasEnable)
- states &= ~RADV_DYNAMIC_DEPTH_BIAS;
-
- if (!pCreateInfo->pDepthStencilState ||
- (!pCreateInfo->pDepthStencilState->depthBoundsTestEnable &&
- !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT)))
- states &= ~RADV_DYNAMIC_DEPTH_BOUNDS;
-
- if (!pCreateInfo->pDepthStencilState ||
- (!pCreateInfo->pDepthStencilState->stencilTestEnable &&
- !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT)))
- states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK |
- RADV_DYNAMIC_STENCIL_WRITE_MASK |
- RADV_DYNAMIC_STENCIL_REFERENCE);
-
- if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
- states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
-
- if (!pCreateInfo->pMultisampleState ||
- !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
- PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
- states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
-
- if (!pCreateInfo->pRasterizationState ||
- !vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
- PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT))
- states &= ~RADV_DYNAMIC_LINE_STIPPLE;
-
- if (!vk_find_struct_const(pCreateInfo->pNext,
- PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR))
- states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
-
- /* TODO: blend constants & line width. */
-
- return states;
+ uint64_t states = RADV_DYNAMIC_ALL;
+
+ /* If rasterization is disabled we do not care about any of the
+ * dynamic states, since they are all rasterization related only,
+ * except primitive topology and vertex binding stride.
+ */
+ if (pCreateInfo->pRasterizationState->rasterizerDiscardEnable)
+ return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;
+
+ if (!pCreateInfo->pRasterizationState->depthBiasEnable)
+ states &= ~RADV_DYNAMIC_DEPTH_BIAS;
+
+ if (!pCreateInfo->pDepthStencilState ||
+ (!pCreateInfo->pDepthStencilState->depthBoundsTestEnable &&
+ !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT)))
+ states &= ~RADV_DYNAMIC_DEPTH_BOUNDS;
+
+ if (!pCreateInfo->pDepthStencilState ||
+ (!pCreateInfo->pDepthStencilState->stencilTestEnable &&
+ !radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT)))
+ states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK |
+ RADV_DYNAMIC_STENCIL_REFERENCE);
+
+ if (!vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT))
+ states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE;
+
+ if (!pCreateInfo->pMultisampleState ||
+ !vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+ PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT))
+ states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS;
+
+ if (!pCreateInfo->pRasterizationState ||
+ !vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
+ PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT))
+ states &= ~RADV_DYNAMIC_LINE_STIPPLE;
+
+ if (!vk_find_struct_const(pCreateInfo->pNext,
+ PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR))
+ states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
+
+ /* TODO: blend constants & line width. */
+
+ return states;
}
static struct radv_ia_multi_vgt_param_helpers
radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline)
{
- struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
- const struct radv_device *device = pipeline->device;
-
- if (radv_pipeline_has_tess(pipeline))
- ia_multi_vgt_param.primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
- else if (radv_pipeline_has_gs(pipeline))
- ia_multi_vgt_param.primgroup_size = 64;
- else
- ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
-
- /* GS requirement. */
- ia_multi_vgt_param.partial_es_wave = false;
- if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
- if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
- ia_multi_vgt_param.partial_es_wave = true;
-
- ia_multi_vgt_param.ia_switch_on_eoi = false;
- if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
- ia_multi_vgt_param.ia_switch_on_eoi = true;
- if (radv_pipeline_has_gs(pipeline) &&
- pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
- ia_multi_vgt_param.ia_switch_on_eoi = true;
- if (radv_pipeline_has_tess(pipeline)) {
- /* SWITCH_ON_EOI must be set if PrimID is used. */
- if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
- radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
- ia_multi_vgt_param.ia_switch_on_eoi = true;
- }
-
- ia_multi_vgt_param.partial_vs_wave = false;
- if (radv_pipeline_has_tess(pipeline)) {
- /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
- if ((device->physical_device->rad_info.family == CHIP_TAHITI ||
- device->physical_device->rad_info.family == CHIP_PITCAIRN ||
- device->physical_device->rad_info.family == CHIP_BONAIRE) &&
- radv_pipeline_has_gs(pipeline))
- ia_multi_vgt_param.partial_vs_wave = true;
- /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
- if (device->physical_device->rad_info.has_distributed_tess) {
- if (radv_pipeline_has_gs(pipeline)) {
- if (device->physical_device->rad_info.chip_class <= GFX8)
- ia_multi_vgt_param.partial_es_wave = true;
- } else {
- ia_multi_vgt_param.partial_vs_wave = true;
- }
- }
- }
-
- if (radv_pipeline_has_gs(pipeline)) {
- /* On these chips there is the possibility of a hang if the
- * pipeline uses a GS and partial_vs_wave is not set.
- *
- * This mostly does not hit 4-SE chips, as those typically set
- * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
- * with GS due to another workaround.
- *
- * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
- */
- if (device->physical_device->rad_info.family == CHIP_TONGA ||
- device->physical_device->rad_info.family == CHIP_FIJI ||
- device->physical_device->rad_info.family == CHIP_POLARIS10 ||
- device->physical_device->rad_info.family == CHIP_POLARIS11 ||
- device->physical_device->rad_info.family == CHIP_POLARIS12 ||
- device->physical_device->rad_info.family == CHIP_VEGAM) {
- ia_multi_vgt_param.partial_vs_wave = true;
- }
- }
-
- ia_multi_vgt_param.base =
- S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
- /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
- S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
- S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
- S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
-
- return ia_multi_vgt_param;
+ struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0};
+ const struct radv_device *device = pipeline->device;
+
+ if (radv_pipeline_has_tess(pipeline))
+ ia_multi_vgt_param.primgroup_size =
+ pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
+ else if (radv_pipeline_has_gs(pipeline))
+ ia_multi_vgt_param.primgroup_size = 64;
+ else
+ ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
+
+ /* GS requirement. */
+ ia_multi_vgt_param.partial_es_wave = false;
+ if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= GFX8)
+ if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
+ ia_multi_vgt_param.partial_es_wave = true;
+
+ ia_multi_vgt_param.ia_switch_on_eoi = false;
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input)
+ ia_multi_vgt_param.ia_switch_on_eoi = true;
+ if (radv_pipeline_has_gs(pipeline) && pipeline->shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id)
+ ia_multi_vgt_param.ia_switch_on_eoi = true;
+ if (radv_pipeline_has_tess(pipeline)) {
+ /* SWITCH_ON_EOI must be set if PrimID is used. */
+ if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
+ radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
+ ia_multi_vgt_param.ia_switch_on_eoi = true;
+ }
+
+ ia_multi_vgt_param.partial_vs_wave = false;
+ if (radv_pipeline_has_tess(pipeline)) {
+ /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */
+ if ((device->physical_device->rad_info.family == CHIP_TAHITI ||
+ device->physical_device->rad_info.family == CHIP_PITCAIRN ||
+ device->physical_device->rad_info.family == CHIP_BONAIRE) &&
+ radv_pipeline_has_gs(pipeline))
+ ia_multi_vgt_param.partial_vs_wave = true;
+ /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
+ if (device->physical_device->rad_info.has_distributed_tess) {
+ if (radv_pipeline_has_gs(pipeline)) {
+ if (device->physical_device->rad_info.chip_class <= GFX8)
+ ia_multi_vgt_param.partial_es_wave = true;
+ } else {
+ ia_multi_vgt_param.partial_vs_wave = true;
+ }
+ }
+ }
+
+ if (radv_pipeline_has_gs(pipeline)) {
+ /* On these chips there is the possibility of a hang if the
+ * pipeline uses a GS and partial_vs_wave is not set.
+ *
+ * This mostly does not hit 4-SE chips, as those typically set
+ * ia_switch_on_eoi and then partial_vs_wave is set for pipelines
+ * with GS due to another workaround.
+ *
+ * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
+ */
+ if (device->physical_device->rad_info.family == CHIP_TONGA ||
+ device->physical_device->rad_info.family == CHIP_FIJI ||
+ device->physical_device->rad_info.family == CHIP_POLARIS10 ||
+ device->physical_device->rad_info.family == CHIP_POLARIS11 ||
+ device->physical_device->rad_info.family == CHIP_POLARIS12 ||
+ device->physical_device->rad_info.family == CHIP_VEGAM) {
+ ia_multi_vgt_param.partial_vs_wave = true;
+ }
+ }
+
+ ia_multi_vgt_param.base =
+ S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
+ /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(device->physical_device->rad_info.chip_class == GFX8 ? 2 : 0) |
+ S_030960_EN_INST_OPT_BASIC(device->physical_device->rad_info.chip_class >= GFX9) |
+ S_030960_EN_INST_OPT_ADV(device->physical_device->rad_info.chip_class >= GFX9);
+
+ return ia_multi_vgt_param;
}
static void
radv_pipeline_init_input_assembly_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_graphics_pipeline_create_info *extra)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra)
{
- const VkPipelineInputAssemblyStateCreateInfo *ia_state = pCreateInfo->pInputAssemblyState;
- struct radv_shader_variant *tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
- struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
-
- pipeline->graphics.prim_restart_enable = !!ia_state->primitiveRestartEnable;
- pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(ia_state->topology);
-
- if (radv_pipeline_has_gs(pipeline)) {
- if (si_conv_gl_prim_to_gs_out(gs->info.gs.output_prim) == V_028A6C_TRISTRIP)
- pipeline->graphics.can_use_guardband = true;
- } else if (radv_pipeline_has_tess(pipeline)) {
- if (!tes->info.tes.point_mode &&
- si_conv_gl_prim_to_gs_out(tes->info.tes.primitive_mode) == V_028A6C_TRISTRIP)
- pipeline->graphics.can_use_guardband = true;
- }
-
- if (extra && extra->use_rectlist) {
- pipeline->graphics.can_use_guardband = true;
- }
-
- pipeline->graphics.ia_multi_vgt_param =
- radv_compute_ia_multi_vgt_param_helpers(pipeline);
+ const VkPipelineInputAssemblyStateCreateInfo *ia_state = pCreateInfo->pInputAssemblyState;
+ struct radv_shader_variant *tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+ struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+ pipeline->graphics.prim_restart_enable = !!ia_state->primitiveRestartEnable;
+ pipeline->graphics.can_use_guardband = radv_prim_can_use_guardband(ia_state->topology);
+
+ if (radv_pipeline_has_gs(pipeline)) {
+ if (si_conv_gl_prim_to_gs_out(gs->info.gs.output_prim) == V_028A6C_TRISTRIP)
+ pipeline->graphics.can_use_guardband = true;
+ } else if (radv_pipeline_has_tess(pipeline)) {
+ if (!tes->info.tes.point_mode &&
+ si_conv_gl_prim_to_gs_out(tes->info.tes.primitive_mode) == V_028A6C_TRISTRIP)
+ pipeline->graphics.can_use_guardband = true;
+ }
+
+ if (extra && extra->use_rectlist) {
+ pipeline->graphics.can_use_guardband = true;
+ }
+
+ pipeline->graphics.ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(pipeline);
}
static void
radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_graphics_pipeline_create_info *extra)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra)
{
- uint64_t needed_states = radv_pipeline_needed_dynamic_state(pCreateInfo);
- uint64_t states = needed_states;
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
-
- pipeline->dynamic_state = default_dynamic_state;
- pipeline->graphics.needed_dynamic_state = needed_states;
-
- if (pCreateInfo->pDynamicState) {
- /* Remove all of the states that are marked as dynamic */
- uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
- for (uint32_t s = 0; s < count; s++)
- states &= ~radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
- }
-
- struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
-
- if (needed_states & RADV_DYNAMIC_VIEWPORT) {
- assert(pCreateInfo->pViewportState);
-
- dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
- if (states & RADV_DYNAMIC_VIEWPORT) {
- typed_memcpy(dynamic->viewport.viewports,
- pCreateInfo->pViewportState->pViewports,
- pCreateInfo->pViewportState->viewportCount);
- }
- }
-
- if (needed_states & RADV_DYNAMIC_SCISSOR) {
- dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
- if (states & RADV_DYNAMIC_SCISSOR) {
- typed_memcpy(dynamic->scissor.scissors,
- pCreateInfo->pViewportState->pScissors,
- pCreateInfo->pViewportState->scissorCount);
- }
- }
-
- if (states & RADV_DYNAMIC_LINE_WIDTH) {
- assert(pCreateInfo->pRasterizationState);
- dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
- }
-
- if (states & RADV_DYNAMIC_DEPTH_BIAS) {
- assert(pCreateInfo->pRasterizationState);
- dynamic->depth_bias.bias =
- pCreateInfo->pRasterizationState->depthBiasConstantFactor;
- dynamic->depth_bias.clamp =
- pCreateInfo->pRasterizationState->depthBiasClamp;
- dynamic->depth_bias.slope =
- pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
- }
-
- /* Section 9.2 of the Vulkan 1.0.15 spec says:
- *
- * pColorBlendState is [...] NULL if the pipeline has rasterization
- * disabled or if the subpass of the render pass the pipeline is
- * created against does not use any color attachments.
- */
- if (subpass->has_color_att && states & RADV_DYNAMIC_BLEND_CONSTANTS) {
- assert(pCreateInfo->pColorBlendState);
- typed_memcpy(dynamic->blend_constants,
- pCreateInfo->pColorBlendState->blendConstants, 4);
- }
-
- if (states & RADV_DYNAMIC_CULL_MODE) {
- dynamic->cull_mode =
- pCreateInfo->pRasterizationState->cullMode;
- }
-
- if (states & RADV_DYNAMIC_FRONT_FACE) {
- dynamic->front_face =
- pCreateInfo->pRasterizationState->frontFace;
- }
-
- if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
- dynamic->primitive_topology =
- si_translate_prim(pCreateInfo->pInputAssemblyState->topology);
- if (extra && extra->use_rectlist) {
- dynamic->primitive_topology = V_008958_DI_PT_RECTLIST;
- }
- }
-
- /* If there is no depthstencil attachment, then don't read
- * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
- * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
- * no need to override the depthstencil defaults in
- * radv_pipeline::dynamic_state when there is no depthstencil attachment.
- *
- * Section 9.2 of the Vulkan 1.0.15 spec says:
- *
- * pDepthStencilState is [...] NULL if the pipeline has rasterization
- * disabled or if the subpass of the render pass the pipeline is created
- * against does not use a depth/stencil attachment.
- */
- if (needed_states && subpass->depth_stencil_attachment) {
- assert(pCreateInfo->pDepthStencilState);
-
- if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
- dynamic->depth_bounds.min =
- pCreateInfo->pDepthStencilState->minDepthBounds;
- dynamic->depth_bounds.max =
- pCreateInfo->pDepthStencilState->maxDepthBounds;
- }
-
- if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
- dynamic->stencil_compare_mask.front =
- pCreateInfo->pDepthStencilState->front.compareMask;
- dynamic->stencil_compare_mask.back =
- pCreateInfo->pDepthStencilState->back.compareMask;
- }
-
- if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
- dynamic->stencil_write_mask.front =
- pCreateInfo->pDepthStencilState->front.writeMask;
- dynamic->stencil_write_mask.back =
- pCreateInfo->pDepthStencilState->back.writeMask;
- }
-
- if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
- dynamic->stencil_reference.front =
- pCreateInfo->pDepthStencilState->front.reference;
- dynamic->stencil_reference.back =
- pCreateInfo->pDepthStencilState->back.reference;
- }
-
- if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
- dynamic->depth_test_enable =
- pCreateInfo->pDepthStencilState->depthTestEnable;
- }
-
- if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
- dynamic->depth_write_enable =
- pCreateInfo->pDepthStencilState->depthWriteEnable;
- }
-
- if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
- dynamic->depth_compare_op =
- pCreateInfo->pDepthStencilState->depthCompareOp;
- }
-
- if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
- dynamic->depth_bounds_test_enable =
- pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
- }
-
- if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
- dynamic->stencil_test_enable =
- pCreateInfo->pDepthStencilState->stencilTestEnable;
- }
-
- if (states & RADV_DYNAMIC_STENCIL_OP) {
- dynamic->stencil_op.front.compare_op =
- pCreateInfo->pDepthStencilState->front.compareOp;
- dynamic->stencil_op.front.fail_op =
- pCreateInfo->pDepthStencilState->front.failOp;
- dynamic->stencil_op.front.pass_op =
- pCreateInfo->pDepthStencilState->front.passOp;
- dynamic->stencil_op.front.depth_fail_op =
- pCreateInfo->pDepthStencilState->front.depthFailOp;
-
- dynamic->stencil_op.back.compare_op =
- pCreateInfo->pDepthStencilState->back.compareOp;
- dynamic->stencil_op.back.fail_op =
- pCreateInfo->pDepthStencilState->back.failOp;
- dynamic->stencil_op.back.pass_op =
- pCreateInfo->pDepthStencilState->back.passOp;
- dynamic->stencil_op.back.depth_fail_op =
- pCreateInfo->pDepthStencilState->back.depthFailOp;
- }
- }
-
- const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
- vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
- if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
- dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
- if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
- typed_memcpy(dynamic->discard_rectangle.rectangles,
- discard_rectangle_info->pDiscardRectangles,
- discard_rectangle_info->discardRectangleCount);
- }
- }
-
- if (needed_states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
- const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
- vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
- PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
- /* If sampleLocationsEnable is VK_FALSE, the default sample
- * locations are used and the values specified in
- * sampleLocationsInfo are ignored.
- */
- if (sample_location_info->sampleLocationsEnable) {
- const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
- &sample_location_info->sampleLocationsInfo;
-
- assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
-
- dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
- dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
- dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
- typed_memcpy(&dynamic->sample_location.locations[0],
- pSampleLocationsInfo->pSampleLocations,
- pSampleLocationsInfo->sampleLocationsCount);
- }
- }
-
- const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info =
- vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
- PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
- if (needed_states & RADV_DYNAMIC_LINE_STIPPLE) {
- dynamic->line_stipple.factor = rast_line_info->lineStippleFactor;
- dynamic->line_stipple.pattern = rast_line_info->lineStipplePattern;
- }
-
- if (!(states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE))
- pipeline->graphics.uses_dynamic_stride = true;
-
- const VkPipelineFragmentShadingRateStateCreateInfoKHR *shading_rate =
- vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
- if (needed_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
- dynamic->fragment_shading_rate.size = shading_rate->fragmentSize;
- for (int i = 0; i < 2; i++)
- dynamic->fragment_shading_rate.combiner_ops[i] = shading_rate->combinerOps[i];
- }
-
- pipeline->dynamic_state.mask = states;
+ uint64_t needed_states = radv_pipeline_needed_dynamic_state(pCreateInfo);
+ uint64_t states = needed_states;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
+
+ pipeline->dynamic_state = default_dynamic_state;
+ pipeline->graphics.needed_dynamic_state = needed_states;
+
+ if (pCreateInfo->pDynamicState) {
+ /* Remove all of the states that are marked as dynamic */
+ uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+ for (uint32_t s = 0; s < count; s++)
+ states &= ~radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]);
+ }
+
+ struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
+
+ if (needed_states & RADV_DYNAMIC_VIEWPORT) {
+ assert(pCreateInfo->pViewportState);
+
+ dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
+ if (states & RADV_DYNAMIC_VIEWPORT) {
+ typed_memcpy(dynamic->viewport.viewports, pCreateInfo->pViewportState->pViewports,
+ pCreateInfo->pViewportState->viewportCount);
+ }
+ }
+
+ if (needed_states & RADV_DYNAMIC_SCISSOR) {
+ dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
+ if (states & RADV_DYNAMIC_SCISSOR) {
+ typed_memcpy(dynamic->scissor.scissors, pCreateInfo->pViewportState->pScissors,
+ pCreateInfo->pViewportState->scissorCount);
+ }
+ }
+
+ if (states & RADV_DYNAMIC_LINE_WIDTH) {
+ assert(pCreateInfo->pRasterizationState);
+ dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
+ }
+
+ if (states & RADV_DYNAMIC_DEPTH_BIAS) {
+ assert(pCreateInfo->pRasterizationState);
+ dynamic->depth_bias.bias = pCreateInfo->pRasterizationState->depthBiasConstantFactor;
+ dynamic->depth_bias.clamp = pCreateInfo->pRasterizationState->depthBiasClamp;
+ dynamic->depth_bias.slope = pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
+ }
+
+ /* Section 9.2 of the Vulkan 1.0.15 spec says:
+ *
+ * pColorBlendState is [...] NULL if the pipeline has rasterization
+ * disabled or if the subpass of the render pass the pipeline is
+ * created against does not use any color attachments.
+ */
+ if (subpass->has_color_att && states & RADV_DYNAMIC_BLEND_CONSTANTS) {
+ assert(pCreateInfo->pColorBlendState);
+ typed_memcpy(dynamic->blend_constants, pCreateInfo->pColorBlendState->blendConstants, 4);
+ }
+
+ if (states & RADV_DYNAMIC_CULL_MODE) {
+ dynamic->cull_mode = pCreateInfo->pRasterizationState->cullMode;
+ }
+
+ if (states & RADV_DYNAMIC_FRONT_FACE) {
+ dynamic->front_face = pCreateInfo->pRasterizationState->frontFace;
+ }
+
+ if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
+ dynamic->primitive_topology = si_translate_prim(pCreateInfo->pInputAssemblyState->topology);
+ if (extra && extra->use_rectlist) {
+ dynamic->primitive_topology = V_008958_DI_PT_RECTLIST;
+ }
+ }
+
+ /* If there is no depthstencil attachment, then don't read
+ * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
+ * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
+ * no need to override the depthstencil defaults in
+ * radv_pipeline::dynamic_state when there is no depthstencil attachment.
+ *
+ * Section 9.2 of the Vulkan 1.0.15 spec says:
+ *
+ * pDepthStencilState is [...] NULL if the pipeline has rasterization
+ * disabled or if the subpass of the render pass the pipeline is created
+ * against does not use a depth/stencil attachment.
+ */
+ if (needed_states && subpass->depth_stencil_attachment) {
+ assert(pCreateInfo->pDepthStencilState);
+
+ if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
+ dynamic->depth_bounds.min = pCreateInfo->pDepthStencilState->minDepthBounds;
+ dynamic->depth_bounds.max = pCreateInfo->pDepthStencilState->maxDepthBounds;
+ }
+
+ if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
+ dynamic->stencil_compare_mask.front = pCreateInfo->pDepthStencilState->front.compareMask;
+ dynamic->stencil_compare_mask.back = pCreateInfo->pDepthStencilState->back.compareMask;
+ }
+
+ if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
+ dynamic->stencil_write_mask.front = pCreateInfo->pDepthStencilState->front.writeMask;
+ dynamic->stencil_write_mask.back = pCreateInfo->pDepthStencilState->back.writeMask;
+ }
+
+ if (states & RADV_DYNAMIC_STENCIL_REFERENCE) {
+ dynamic->stencil_reference.front = pCreateInfo->pDepthStencilState->front.reference;
+ dynamic->stencil_reference.back = pCreateInfo->pDepthStencilState->back.reference;
+ }
+
+ if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
+ dynamic->depth_test_enable = pCreateInfo->pDepthStencilState->depthTestEnable;
+ }
+
+ if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
+ dynamic->depth_write_enable = pCreateInfo->pDepthStencilState->depthWriteEnable;
+ }
+
+ if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
+ dynamic->depth_compare_op = pCreateInfo->pDepthStencilState->depthCompareOp;
+ }
+
+ if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
+ dynamic->depth_bounds_test_enable = pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
+ }
+
+ if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
+ dynamic->stencil_test_enable = pCreateInfo->pDepthStencilState->stencilTestEnable;
+ }
+
+ if (states & RADV_DYNAMIC_STENCIL_OP) {
+ dynamic->stencil_op.front.compare_op = pCreateInfo->pDepthStencilState->front.compareOp;
+ dynamic->stencil_op.front.fail_op = pCreateInfo->pDepthStencilState->front.failOp;
+ dynamic->stencil_op.front.pass_op = pCreateInfo->pDepthStencilState->front.passOp;
+ dynamic->stencil_op.front.depth_fail_op =
+ pCreateInfo->pDepthStencilState->front.depthFailOp;
+
+ dynamic->stencil_op.back.compare_op = pCreateInfo->pDepthStencilState->back.compareOp;
+ dynamic->stencil_op.back.fail_op = pCreateInfo->pDepthStencilState->back.failOp;
+ dynamic->stencil_op.back.pass_op = pCreateInfo->pDepthStencilState->back.passOp;
+ dynamic->stencil_op.back.depth_fail_op = pCreateInfo->pDepthStencilState->back.depthFailOp;
+ }
+ }
+
+ const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
+ vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
+ if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+ dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
+ if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+ typed_memcpy(dynamic->discard_rectangle.rectangles,
+ discard_rectangle_info->pDiscardRectangles,
+ discard_rectangle_info->discardRectangleCount);
+ }
+ }
+
+ if (needed_states & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
+ const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info =
+ vk_find_struct_const(pCreateInfo->pMultisampleState->pNext,
+ PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT);
+ /* If sampleLocationsEnable is VK_FALSE, the default sample
+ * locations are used and the values specified in
+ * sampleLocationsInfo are ignored.
+ */
+ if (sample_location_info->sampleLocationsEnable) {
+ const VkSampleLocationsInfoEXT *pSampleLocationsInfo =
+ &sample_location_info->sampleLocationsInfo;
+
+ assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
+
+ dynamic->sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
+ dynamic->sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
+ dynamic->sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
+ typed_memcpy(&dynamic->sample_location.locations[0],
+ pSampleLocationsInfo->pSampleLocations,
+ pSampleLocationsInfo->sampleLocationsCount);
+ }
+ }
+
+ const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info = vk_find_struct_const(
+ pCreateInfo->pRasterizationState->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
+ if (needed_states & RADV_DYNAMIC_LINE_STIPPLE) {
+ dynamic->line_stipple.factor = rast_line_info->lineStippleFactor;
+ dynamic->line_stipple.pattern = rast_line_info->lineStipplePattern;
+ }
+
+ if (!(states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE))
+ pipeline->graphics.uses_dynamic_stride = true;
+
+ const VkPipelineFragmentShadingRateStateCreateInfoKHR *shading_rate = vk_find_struct_const(
+ pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
+ if (needed_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
+ dynamic->fragment_shading_rate.size = shading_rate->fragmentSize;
+ for (int i = 0; i < 2; i++)
+ dynamic->fragment_shading_rate.combiner_ops[i] = shading_rate->combinerOps[i];
+ }
+
+ pipeline->dynamic_state.mask = states;
}
static void
radv_pipeline_init_raster_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- const VkPipelineRasterizationStateCreateInfo *raster_info =
- pCreateInfo->pRasterizationState;
-
- pipeline->graphics.pa_su_sc_mode_cntl =
- S_028814_FACE(raster_info->frontFace) |
- S_028814_CULL_FRONT(!!(raster_info->cullMode & VK_CULL_MODE_FRONT_BIT)) |
- S_028814_CULL_BACK(!!(raster_info->cullMode & VK_CULL_MODE_BACK_BIT)) |
- S_028814_POLY_MODE(raster_info->polygonMode != VK_POLYGON_MODE_FILL) |
- S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(raster_info->polygonMode)) |
- S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(raster_info->polygonMode)) |
- S_028814_POLY_OFFSET_FRONT_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
- S_028814_POLY_OFFSET_BACK_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
- S_028814_POLY_OFFSET_PARA_ENABLE(raster_info->depthBiasEnable ? 1 : 0);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- /* It should also be set if PERPENDICULAR_ENDCAP_ENA is set. */
- pipeline->graphics.pa_su_sc_mode_cntl |=
- S_028814_KEEP_TOGETHER_ENABLE(raster_info->polygonMode != VK_POLYGON_MODE_FILL);
- }
+ const VkPipelineRasterizationStateCreateInfo *raster_info = pCreateInfo->pRasterizationState;
+
+ pipeline->graphics.pa_su_sc_mode_cntl =
+ S_028814_FACE(raster_info->frontFace) |
+ S_028814_CULL_FRONT(!!(raster_info->cullMode & VK_CULL_MODE_FRONT_BIT)) |
+ S_028814_CULL_BACK(!!(raster_info->cullMode & VK_CULL_MODE_BACK_BIT)) |
+ S_028814_POLY_MODE(raster_info->polygonMode != VK_POLYGON_MODE_FILL) |
+ S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(raster_info->polygonMode)) |
+ S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(raster_info->polygonMode)) |
+ S_028814_POLY_OFFSET_FRONT_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
+ S_028814_POLY_OFFSET_BACK_ENABLE(raster_info->depthBiasEnable ? 1 : 0) |
+ S_028814_POLY_OFFSET_PARA_ENABLE(raster_info->depthBiasEnable ? 1 : 0);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+ /* It should also be set if PERPENDICULAR_ENDCAP_ENA is set. */
+ pipeline->graphics.pa_su_sc_mode_cntl |=
+ S_028814_KEEP_TOGETHER_ENABLE(raster_info->polygonMode != VK_POLYGON_MODE_FILL);
+ }
}
static void
radv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- const VkPipelineDepthStencilStateCreateInfo *ds_info
- = radv_pipeline_get_depth_stencil_state(pCreateInfo);
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- struct radv_render_pass_attachment *attachment = NULL;
- uint32_t db_depth_control = 0;
-
- if (subpass->depth_stencil_attachment)
- attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
-
- bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format);
- bool has_stencil_attachment = attachment && vk_format_has_stencil(attachment->format);
-
- if (ds_info) {
- if (has_depth_attachment) {
- db_depth_control = S_028800_Z_ENABLE(ds_info->depthTestEnable ? 1 : 0) |
- S_028800_Z_WRITE_ENABLE(ds_info->depthWriteEnable ? 1 : 0) |
- S_028800_ZFUNC(ds_info->depthCompareOp) |
- S_028800_DEPTH_BOUNDS_ENABLE(ds_info->depthBoundsTestEnable ? 1 : 0);
- }
-
- if (has_stencil_attachment && ds_info->stencilTestEnable) {
- db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
- db_depth_control |= S_028800_STENCILFUNC(ds_info->front.compareOp);
- db_depth_control |= S_028800_STENCILFUNC_BF(ds_info->back.compareOp);
- }
- }
-
- pipeline->graphics.db_depth_control = db_depth_control;
+ const VkPipelineDepthStencilStateCreateInfo *ds_info =
+ radv_pipeline_get_depth_stencil_state(pCreateInfo);
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ struct radv_render_pass_attachment *attachment = NULL;
+ uint32_t db_depth_control = 0;
+
+ if (subpass->depth_stencil_attachment)
+ attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
+
+ bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format);
+ bool has_stencil_attachment = attachment && vk_format_has_stencil(attachment->format);
+
+ if (ds_info) {
+ if (has_depth_attachment) {
+ db_depth_control = S_028800_Z_ENABLE(ds_info->depthTestEnable ? 1 : 0) |
+ S_028800_Z_WRITE_ENABLE(ds_info->depthWriteEnable ? 1 : 0) |
+ S_028800_ZFUNC(ds_info->depthCompareOp) |
+ S_028800_DEPTH_BOUNDS_ENABLE(ds_info->depthBoundsTestEnable ? 1 : 0);
+ }
+
+ if (has_stencil_attachment && ds_info->stencilTestEnable) {
+ db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
+ db_depth_control |= S_028800_STENCILFUNC(ds_info->front.compareOp);
+ db_depth_control |= S_028800_STENCILFUNC_BF(ds_info->back.compareOp);
+ }
+ }
+
+ pipeline->graphics.db_depth_control = db_depth_control;
}
static void
-gfx9_get_gs_info(const struct radv_pipeline_key *key,
- const struct radv_pipeline *pipeline,
- nir_shader **nir,
- struct radv_shader_info *infos,
- struct gfx9_gs_info *out)
+gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline *pipeline,
+ nir_shader **nir, struct radv_shader_info *infos, struct gfx9_gs_info *out)
{
- struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
- struct radv_es_output_info *es_info;
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
- es_info = nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
- else
- es_info = nir[MESA_SHADER_TESS_CTRL] ?
- &infos[MESA_SHADER_TESS_EVAL].tes.es_info :
- &infos[MESA_SHADER_VERTEX].vs.es_info;
-
- unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
- bool uses_adjacency;
- switch(key->topology) {
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
- uses_adjacency = true;
- break;
- default:
- uses_adjacency = false;
- break;
- }
-
- /* All these are in dwords: */
- /* We can't allow using the whole LDS, because GS waves compete with
- * other shader stages for LDS space. */
- const unsigned max_lds_size = 8 * 1024;
- const unsigned esgs_itemsize = es_info->esgs_itemsize / 4;
- unsigned esgs_lds_size;
-
- /* All these are per subgroup: */
- const unsigned max_out_prims = 32 * 1024;
- const unsigned max_es_verts = 255;
- const unsigned ideal_gs_prims = 64;
- unsigned max_gs_prims, gs_prims;
- unsigned min_es_verts, es_verts, worst_case_es_verts;
-
- if (uses_adjacency || gs_num_invocations > 1)
- max_gs_prims = 127 / gs_num_invocations;
- else
- max_gs_prims = 255;
-
- /* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations.
- * Make sure we don't go over the maximum value.
- */
- if (gs_info->gs.vertices_out > 0) {
- max_gs_prims = MIN2(max_gs_prims,
- max_out_prims /
- (gs_info->gs.vertices_out * gs_num_invocations));
- }
- assert(max_gs_prims > 0);
-
- /* If the primitive has adjacency, halve the number of vertices
- * that will be reused in multiple primitives.
- */
- min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1);
-
- gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
- worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
-
- /* Compute ESGS LDS size based on the worst case number of ES vertices
- * needed to create the target number of GS prims per subgroup.
- */
- esgs_lds_size = esgs_itemsize * worst_case_es_verts;
-
- /* If total LDS usage is too big, refactor partitions based on ratio
- * of ESGS item sizes.
- */
- if (esgs_lds_size > max_lds_size) {
- /* Our target GS Prims Per Subgroup was too large. Calculate
- * the maximum number of GS Prims Per Subgroup that will fit
- * into LDS, capped by the maximum that the hardware can support.
- */
- gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)),
- max_gs_prims);
- assert(gs_prims > 0);
- worst_case_es_verts = MIN2(min_es_verts * gs_prims,
- max_es_verts);
-
- esgs_lds_size = esgs_itemsize * worst_case_es_verts;
- assert(esgs_lds_size <= max_lds_size);
- }
-
- /* Now calculate remaining ESGS information. */
- if (esgs_lds_size)
- es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts);
- else
- es_verts = max_es_verts;
-
- /* Vertices for adjacency primitives are not always reused, so restore
- * it for ES_VERTS_PER_SUBGRP.
- */
- min_es_verts = gs_info->gs.vertices_in;
-
- /* For normal primitives, the VGT only checks if they are past the ES
- * verts per subgroup after allocating a full GS primitive and if they
- * are, kick off a new subgroup. But if those additional ES verts are
- * unique (e.g. not reused) we need to make sure there is enough LDS
- * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP.
- */
- es_verts -= min_es_verts - 1;
-
- uint32_t es_verts_per_subgroup = es_verts;
- uint32_t gs_prims_per_subgroup = gs_prims;
- uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
- uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
- out->lds_size = align(esgs_lds_size, 128) / 128;
- out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
- S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
- S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
- out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
- out->vgt_esgs_ring_itemsize = esgs_itemsize;
- assert(max_prims_per_subgroup <= max_out_prims);
+ struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
+ struct radv_es_output_info *es_info;
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
+ es_info = nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
+ else
+ es_info = nir[MESA_SHADER_TESS_CTRL] ? &infos[MESA_SHADER_TESS_EVAL].tes.es_info
+ : &infos[MESA_SHADER_VERTEX].vs.es_info;
+
+ unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1);
+ bool uses_adjacency;
+ switch (key->topology) {
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ uses_adjacency = true;
+ break;
+ default:
+ uses_adjacency = false;
+ break;
+ }
+
+ /* All these are in dwords: */
+ /* We can't allow using the whole LDS, because GS waves compete with
+ * other shader stages for LDS space. */
+ const unsigned max_lds_size = 8 * 1024;
+ const unsigned esgs_itemsize = es_info->esgs_itemsize / 4;
+ unsigned esgs_lds_size;
+
+ /* All these are per subgroup: */
+ const unsigned max_out_prims = 32 * 1024;
+ const unsigned max_es_verts = 255;
+ const unsigned ideal_gs_prims = 64;
+ unsigned max_gs_prims, gs_prims;
+ unsigned min_es_verts, es_verts, worst_case_es_verts;
+
+ if (uses_adjacency || gs_num_invocations > 1)
+ max_gs_prims = 127 / gs_num_invocations;
+ else
+ max_gs_prims = 255;
+
+ /* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations.
+ * Make sure we don't go over the maximum value.
+ */
+ if (gs_info->gs.vertices_out > 0) {
+ max_gs_prims =
+ MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations));
+ }
+ assert(max_gs_prims > 0);
+
+ /* If the primitive has adjacency, halve the number of vertices
+ * that will be reused in multiple primitives.
+ */
+ min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1);
+
+ gs_prims = MIN2(ideal_gs_prims, max_gs_prims);
+ worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
+
+ /* Compute ESGS LDS size based on the worst case number of ES vertices
+ * needed to create the target number of GS prims per subgroup.
+ */
+ esgs_lds_size = esgs_itemsize * worst_case_es_verts;
+
+ /* If total LDS usage is too big, refactor partitions based on ratio
+ * of ESGS item sizes.
+ */
+ if (esgs_lds_size > max_lds_size) {
+ /* Our target GS Prims Per Subgroup was too large. Calculate
+ * the maximum number of GS Prims Per Subgroup that will fit
+ * into LDS, capped by the maximum that the hardware can support.
+ */
+ gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)), max_gs_prims);
+ assert(gs_prims > 0);
+ worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts);
+
+ esgs_lds_size = esgs_itemsize * worst_case_es_verts;
+ assert(esgs_lds_size <= max_lds_size);
+ }
+
+ /* Now calculate remaining ESGS information. */
+ if (esgs_lds_size)
+ es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts);
+ else
+ es_verts = max_es_verts;
+
+ /* Vertices for adjacency primitives are not always reused, so restore
+ * it for ES_VERTS_PER_SUBGRP.
+ */
+ min_es_verts = gs_info->gs.vertices_in;
+
+ /* For normal primitives, the VGT only checks if they are past the ES
+ * verts per subgroup after allocating a full GS primitive and if they
+ * are, kick off a new subgroup. But if those additional ES verts are
+ * unique (e.g. not reused) we need to make sure there is enough LDS
+ * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP.
+ */
+ es_verts -= min_es_verts - 1;
+
+ uint32_t es_verts_per_subgroup = es_verts;
+ uint32_t gs_prims_per_subgroup = gs_prims;
+ uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
+ uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out;
+ out->lds_size = align(esgs_lds_size, 128) / 128;
+ out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) |
+ S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) |
+ S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup);
+ out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup);
+ out->vgt_esgs_ring_itemsize = esgs_itemsize;
+ assert(max_prims_per_subgroup <= max_out_prims);
}
-static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts,
- unsigned min_verts_per_prim, bool use_adjacency)
+static void
+clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim,
+ bool use_adjacency)
{
- unsigned max_reuse = max_esverts - min_verts_per_prim;
- if (use_adjacency)
- max_reuse /= 2;
- *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
+ unsigned max_reuse = max_esverts - min_verts_per_prim;
+ if (use_adjacency)
+ max_reuse /= 2;
+ *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
}
static unsigned
radv_get_num_input_vertices(nir_shader **nir)
{
- if (nir[MESA_SHADER_GEOMETRY]) {
- nir_shader *gs = nir[MESA_SHADER_GEOMETRY];
+ if (nir[MESA_SHADER_GEOMETRY]) {
+ nir_shader *gs = nir[MESA_SHADER_GEOMETRY];
- return gs->info.gs.vertices_in;
- }
+ return gs->info.gs.vertices_in;
+ }
- if (nir[MESA_SHADER_TESS_CTRL]) {
- nir_shader *tes = nir[MESA_SHADER_TESS_EVAL];
+ if (nir[MESA_SHADER_TESS_CTRL]) {
+ nir_shader *tes = nir[MESA_SHADER_TESS_EVAL];
- if (tes->info.tess.point_mode)
- return 1;
- if (tes->info.tess.primitive_mode == GL_ISOLINES)
- return 2;
- return 3;
- }
+ if (tes->info.tess.point_mode)
+ return 1;
+ if (tes->info.tess.primitive_mode == GL_ISOLINES)
+ return 2;
+ return 3;
+ }
- return 3;
+ return 3;
}
static void
-gfx10_get_ngg_info(const struct radv_pipeline_key *key,
- struct radv_pipeline *pipeline,
- nir_shader **nir,
- struct radv_shader_info *infos,
- struct gfx10_ngg_info *ngg)
+gfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline,
+ nir_shader **nir, struct radv_shader_info *infos, struct gfx10_ngg_info *ngg)
{
- struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
- struct radv_es_output_info *es_info =
- nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
- unsigned gs_type = nir[MESA_SHADER_GEOMETRY] ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX;
- unsigned max_verts_per_prim = radv_get_num_input_vertices(nir);
- unsigned min_verts_per_prim =
- gs_type == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
- unsigned gs_num_invocations = nir[MESA_SHADER_GEOMETRY] ? MAX2(gs_info->gs.invocations, 1) : 1;
- bool uses_adjacency;
- switch(key->topology) {
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
- uses_adjacency = true;
- break;
- default:
- uses_adjacency = false;
- break;
- }
-
- /* All these are in dwords: */
- /* We can't allow using the whole LDS, because GS waves compete with
- * other shader stages for LDS space.
- *
- * TODO: We should really take the shader's internal LDS use into
- * account. The linker will fail if the size is greater than
- * 8K dwords.
- */
- const unsigned max_lds_size = 8 * 1024 - 768;
- const unsigned target_lds_size = max_lds_size;
- unsigned esvert_lds_size = 0;
- unsigned gsprim_lds_size = 0;
-
- /* All these are per subgroup: */
- const unsigned min_esverts = pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 29 : 24;
- bool max_vert_out_per_gs_instance = false;
- unsigned max_esverts_base = 256;
- unsigned max_gsprims_base = 128; /* default prim group size clamp */
-
- /* Hardware has the following non-natural restrictions on the value
- * of GE_CNTL.VERT_GRP_SIZE based on based on the primitive type of
- * the draw:
- * - at most 252 for any line input primitive type
- * - at most 251 for any quad input primitive type
- * - at most 251 for triangle strips with adjacency (this happens to
- * be the natural limit for triangle *lists* with adjacency)
- */
- max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
-
- if (gs_type == MESA_SHADER_GEOMETRY) {
- unsigned max_out_verts_per_gsprim =
- gs_info->gs.vertices_out * gs_num_invocations;
-
- if (max_out_verts_per_gsprim <= 256) {
- if (max_out_verts_per_gsprim) {
- max_gsprims_base = MIN2(max_gsprims_base,
- 256 / max_out_verts_per_gsprim);
- }
- } else {
- /* Use special multi-cycling mode in which each GS
- * instance gets its own subgroup. Does not work with
- * tessellation. */
- max_vert_out_per_gs_instance = true;
- max_gsprims_base = 1;
- max_out_verts_per_gsprim = gs_info->gs.vertices_out;
- }
-
- esvert_lds_size = es_info->esgs_itemsize / 4;
- gsprim_lds_size = (gs_info->gs.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
- } else {
- /* VS and TES. */
- /* LDS size for passing data from GS to ES. */
- struct radv_streamout_info *so_info = nir[MESA_SHADER_TESS_CTRL]
- ? &infos[MESA_SHADER_TESS_EVAL].so
- : &infos[MESA_SHADER_VERTEX].so;
-
- if (so_info->num_outputs)
- esvert_lds_size = 4 * so_info->num_outputs + 1;
-
- /* GS stores Primitive IDs (one DWORD) into LDS at the address
- * corresponding to the ES thread of the provoking vertex. All
- * ES threads load and export PrimitiveID for their thread.
- */
- if (!nir[MESA_SHADER_TESS_CTRL] &&
- infos[MESA_SHADER_VERTEX].vs.outinfo.export_prim_id)
- esvert_lds_size = MAX2(esvert_lds_size, 1);
- }
-
- unsigned max_gsprims = max_gsprims_base;
- unsigned max_esverts = max_esverts_base;
-
- if (esvert_lds_size)
- max_esverts = MIN2(max_esverts, target_lds_size / esvert_lds_size);
- if (gsprim_lds_size)
- max_gsprims = MIN2(max_gsprims, target_lds_size / gsprim_lds_size);
-
- max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
- clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
- assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
-
- if (esvert_lds_size || gsprim_lds_size) {
- /* Now that we have a rough proportionality between esverts
- * and gsprims based on the primitive type, scale both of them
- * down simultaneously based on required LDS space.
- *
- * We could be smarter about this if we knew how much vertex
- * reuse to expect.
- */
- unsigned lds_total = max_esverts * esvert_lds_size +
- max_gsprims * gsprim_lds_size;
- if (lds_total > target_lds_size) {
- max_esverts = max_esverts * target_lds_size / lds_total;
- max_gsprims = max_gsprims * target_lds_size / lds_total;
-
- max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
- clamp_gsprims_to_esverts(&max_gsprims, max_esverts,
- min_verts_per_prim, uses_adjacency);
- assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
- }
- }
-
- /* Round up towards full wave sizes for better ALU utilization. */
- if (!max_vert_out_per_gs_instance) {
- unsigned orig_max_esverts;
- unsigned orig_max_gsprims;
- unsigned wavesize;
-
- if (gs_type == MESA_SHADER_GEOMETRY) {
- wavesize = gs_info->wave_size;
- } else {
- wavesize = nir[MESA_SHADER_TESS_CTRL]
- ? infos[MESA_SHADER_TESS_EVAL].wave_size
- : infos[MESA_SHADER_VERTEX].wave_size;
- }
-
- do {
- orig_max_esverts = max_esverts;
- orig_max_gsprims = max_gsprims;
-
- max_esverts = align(max_esverts, wavesize);
- max_esverts = MIN2(max_esverts, max_esverts_base);
- if (esvert_lds_size)
- max_esverts = MIN2(max_esverts,
- (max_lds_size - max_gsprims * gsprim_lds_size) /
- esvert_lds_size);
- max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
-
- /* Hardware restriction: minimum value of max_esverts */
- if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
- max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
- else
- max_esverts = MAX2(max_esverts, min_esverts);
-
- max_gsprims = align(max_gsprims, wavesize);
- max_gsprims = MIN2(max_gsprims, max_gsprims_base);
- if (gsprim_lds_size) {
- /* Don't count unusable vertices to the LDS
- * size. Those are vertices above the maximum
- * number of vertices that can occur in the
- * workgroup, which is e.g. max_gsprims * 3
- * for triangles.
- */
- unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
- max_gsprims =
- MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
- }
- clamp_gsprims_to_esverts(&max_gsprims, max_esverts,
- min_verts_per_prim, uses_adjacency);
- assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
- } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
-
- /* Verify the restriction. */
- if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
- assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
- else
- assert(max_esverts >= min_esverts);
- } else {
- /* Hardware restriction: minimum value of max_esverts */
- if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
- max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
- else
- max_esverts = MAX2(max_esverts, min_esverts);
- }
-
- unsigned max_out_vertices =
- max_vert_out_per_gs_instance ? gs_info->gs.vertices_out :
- gs_type == MESA_SHADER_GEOMETRY ?
- max_gsprims * gs_num_invocations * gs_info->gs.vertices_out :
- max_esverts;
- assert(max_out_vertices <= 256);
-
- unsigned prim_amp_factor = 1;
- if (gs_type == MESA_SHADER_GEOMETRY) {
- /* Number of output primitives per GS input primitive after
- * GS instancing. */
- prim_amp_factor = gs_info->gs.vertices_out;
- }
-
- /* On Gfx10, the GE only checks against the maximum number of ES verts
- * after allocating a full GS primitive. So we need to ensure that
- * whenever this check passes, there is enough space for a full
- * primitive without vertex reuse.
- */
- if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
- ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1;
- else
- ngg->hw_max_esverts = max_esverts;
-
- ngg->max_gsprims = max_gsprims;
- ngg->max_out_verts = max_out_vertices;
- ngg->prim_amp_factor = prim_amp_factor;
- ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
- ngg->ngg_emit_size = max_gsprims * gsprim_lds_size;
-
- /* Don't count unusable vertices. */
- ngg->esgs_ring_size =
- MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4;
-
- if (gs_type == MESA_SHADER_GEOMETRY) {
- ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
- } else {
- ngg->vgt_esgs_ring_itemsize = 1;
- }
-
- assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */
+ struct radv_shader_info *gs_info = &infos[MESA_SHADER_GEOMETRY];
+ struct radv_es_output_info *es_info =
+ nir[MESA_SHADER_TESS_CTRL] ? &gs_info->tes.es_info : &gs_info->vs.es_info;
+ unsigned gs_type = nir[MESA_SHADER_GEOMETRY] ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX;
+ unsigned max_verts_per_prim = radv_get_num_input_vertices(nir);
+ unsigned min_verts_per_prim = gs_type == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1;
+ unsigned gs_num_invocations = nir[MESA_SHADER_GEOMETRY] ? MAX2(gs_info->gs.invocations, 1) : 1;
+ bool uses_adjacency;
+ switch (key->topology) {
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ uses_adjacency = true;
+ break;
+ default:
+ uses_adjacency = false;
+ break;
+ }
+
+ /* All these are in dwords: */
+ /* We can't allow using the whole LDS, because GS waves compete with
+ * other shader stages for LDS space.
+ *
+ * TODO: We should really take the shader's internal LDS use into
+ * account. The linker will fail if the size is greater than
+ * 8K dwords.
+ */
+ const unsigned max_lds_size = 8 * 1024 - 768;
+ const unsigned target_lds_size = max_lds_size;
+ unsigned esvert_lds_size = 0;
+ unsigned gsprim_lds_size = 0;
+
+ /* All these are per subgroup: */
+ const unsigned min_esverts =
+ pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 29 : 24;
+ bool max_vert_out_per_gs_instance = false;
+ unsigned max_esverts_base = 256;
+ unsigned max_gsprims_base = 128; /* default prim group size clamp */
+
+ /* Hardware has the following non-natural restrictions on the value
+ * of GE_CNTL.VERT_GRP_SIZE based on based on the primitive type of
+ * the draw:
+ * - at most 252 for any line input primitive type
+ * - at most 251 for any quad input primitive type
+ * - at most 251 for triangle strips with adjacency (this happens to
+ * be the natural limit for triangle *lists* with adjacency)
+ */
+ max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1);
+
+ if (gs_type == MESA_SHADER_GEOMETRY) {
+ unsigned max_out_verts_per_gsprim = gs_info->gs.vertices_out * gs_num_invocations;
+
+ if (max_out_verts_per_gsprim <= 256) {
+ if (max_out_verts_per_gsprim) {
+ max_gsprims_base = MIN2(max_gsprims_base, 256 / max_out_verts_per_gsprim);
+ }
+ } else {
+ /* Use special multi-cycling mode in which each GS
+ * instance gets its own subgroup. Does not work with
+ * tessellation. */
+ max_vert_out_per_gs_instance = true;
+ max_gsprims_base = 1;
+ max_out_verts_per_gsprim = gs_info->gs.vertices_out;
+ }
+
+ esvert_lds_size = es_info->esgs_itemsize / 4;
+ gsprim_lds_size = (gs_info->gs.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim;
+ } else {
+ /* VS and TES. */
+ /* LDS size for passing data from GS to ES. */
+ struct radv_streamout_info *so_info = nir[MESA_SHADER_TESS_CTRL]
+ ? &infos[MESA_SHADER_TESS_EVAL].so
+ : &infos[MESA_SHADER_VERTEX].so;
+
+ if (so_info->num_outputs)
+ esvert_lds_size = 4 * so_info->num_outputs + 1;
+
+ /* GS stores Primitive IDs (one DWORD) into LDS at the address
+ * corresponding to the ES thread of the provoking vertex. All
+ * ES threads load and export PrimitiveID for their thread.
+ */
+ if (!nir[MESA_SHADER_TESS_CTRL] && infos[MESA_SHADER_VERTEX].vs.outinfo.export_prim_id)
+ esvert_lds_size = MAX2(esvert_lds_size, 1);
+ }
+
+ unsigned max_gsprims = max_gsprims_base;
+ unsigned max_esverts = max_esverts_base;
+
+ if (esvert_lds_size)
+ max_esverts = MIN2(max_esverts, target_lds_size / esvert_lds_size);
+ if (gsprim_lds_size)
+ max_gsprims = MIN2(max_gsprims, target_lds_size / gsprim_lds_size);
+
+ max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+ clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
+ assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
+
+ if (esvert_lds_size || gsprim_lds_size) {
+ /* Now that we have a rough proportionality between esverts
+ * and gsprims based on the primitive type, scale both of them
+ * down simultaneously based on required LDS space.
+ *
+ * We could be smarter about this if we knew how much vertex
+ * reuse to expect.
+ */
+ unsigned lds_total = max_esverts * esvert_lds_size + max_gsprims * gsprim_lds_size;
+ if (lds_total > target_lds_size) {
+ max_esverts = max_esverts * target_lds_size / lds_total;
+ max_gsprims = max_gsprims * target_lds_size / lds_total;
+
+ max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+ clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
+ assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
+ }
+ }
+
+ /* Round up towards full wave sizes for better ALU utilization. */
+ if (!max_vert_out_per_gs_instance) {
+ unsigned orig_max_esverts;
+ unsigned orig_max_gsprims;
+ unsigned wavesize;
+
+ if (gs_type == MESA_SHADER_GEOMETRY) {
+ wavesize = gs_info->wave_size;
+ } else {
+ wavesize = nir[MESA_SHADER_TESS_CTRL] ? infos[MESA_SHADER_TESS_EVAL].wave_size
+ : infos[MESA_SHADER_VERTEX].wave_size;
+ }
+
+ do {
+ orig_max_esverts = max_esverts;
+ orig_max_gsprims = max_gsprims;
+
+ max_esverts = align(max_esverts, wavesize);
+ max_esverts = MIN2(max_esverts, max_esverts_base);
+ if (esvert_lds_size)
+ max_esverts =
+ MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
+ max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+
+ /* Hardware restriction: minimum value of max_esverts */
+ if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
+ max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+ else
+ max_esverts = MAX2(max_esverts, min_esverts);
+
+ max_gsprims = align(max_gsprims, wavesize);
+ max_gsprims = MIN2(max_gsprims, max_gsprims_base);
+ if (gsprim_lds_size) {
+ /* Don't count unusable vertices to the LDS
+ * size. Those are vertices above the maximum
+ * number of vertices that can occur in the
+ * workgroup, which is e.g. max_gsprims * 3
+ * for triangles.
+ */
+ unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
+ max_gsprims = MIN2(max_gsprims,
+ (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
+ }
+ clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency);
+ assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
+ } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
+
+ /* Verify the restriction. */
+ if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
+ assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
+ else
+ assert(max_esverts >= min_esverts);
+ } else {
+ /* Hardware restriction: minimum value of max_esverts */
+ if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
+ max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
+ else
+ max_esverts = MAX2(max_esverts, min_esverts);
+ }
+
+ unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out
+ : gs_type == MESA_SHADER_GEOMETRY
+ ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out
+ : max_esverts;
+ assert(max_out_vertices <= 256);
+
+ unsigned prim_amp_factor = 1;
+ if (gs_type == MESA_SHADER_GEOMETRY) {
+ /* Number of output primitives per GS input primitive after
+ * GS instancing. */
+ prim_amp_factor = gs_info->gs.vertices_out;
+ }
+
+ /* On Gfx10, the GE only checks against the maximum number of ES verts
+ * after allocating a full GS primitive. So we need to ensure that
+ * whenever this check passes, there is enough space for a full
+ * primitive without vertex reuse.
+ */
+ if (pipeline->device->physical_device->rad_info.chip_class == GFX10)
+ ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1;
+ else
+ ngg->hw_max_esverts = max_esverts;
+
+ ngg->max_gsprims = max_gsprims;
+ ngg->max_out_verts = max_out_vertices;
+ ngg->prim_amp_factor = prim_amp_factor;
+ ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
+ ngg->ngg_emit_size = max_gsprims * gsprim_lds_size;
+
+ /* Don't count unusable vertices. */
+ ngg->esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4;
+
+ if (gs_type == MESA_SHADER_GEOMETRY) {
+ ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4;
+ } else {
+ ngg->vgt_esgs_ring_itemsize = 1;
+ }
+
+ assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */
}
static void
-radv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline,
- const struct gfx9_gs_info *gs)
+radv_pipeline_init_gs_ring_state(struct radv_pipeline *pipeline, const struct gfx9_gs_info *gs)
{
- struct radv_device *device = pipeline->device;
- unsigned num_se = device->physical_device->rad_info.max_se;
- unsigned wave_size = 64;
- unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
- /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
- * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
- */
- unsigned gs_vertex_reuse =
- (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se;
- unsigned alignment = 256 * num_se;
- /* The maximum size is 63.999 MB per SE. */
- unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
- struct radv_shader_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
-
- /* Calculate the minimum size. */
- unsigned min_esgs_ring_size = align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse *
- wave_size, alignment);
- /* These are recommended sizes, not minimum sizes. */
- unsigned esgs_ring_size = max_gs_waves * 2 * wave_size *
- gs->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in;
- unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size *
- gs_info->gs.max_gsvs_emit_size;
-
- min_esgs_ring_size = align(min_esgs_ring_size, alignment);
- esgs_ring_size = align(esgs_ring_size, alignment);
- gsvs_ring_size = align(gsvs_ring_size, alignment);
-
- if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
- pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
-
- pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
+ struct radv_device *device = pipeline->device;
+ unsigned num_se = device->physical_device->rad_info.max_se;
+ unsigned wave_size = 64;
+ unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
+ /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16.
+ * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+ */
+ unsigned gs_vertex_reuse =
+ (device->physical_device->rad_info.chip_class >= GFX8 ? 32 : 16) * num_se;
+ unsigned alignment = 256 * num_se;
+ /* The maximum size is 63.999 MB per SE. */
+ unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
+ struct radv_shader_info *gs_info = &pipeline->shaders[MESA_SHADER_GEOMETRY]->info;
+
+ /* Calculate the minimum size. */
+ unsigned min_esgs_ring_size =
+ align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse * wave_size, alignment);
+ /* These are recommended sizes, not minimum sizes. */
+ unsigned esgs_ring_size =
+ max_gs_waves * 2 * wave_size * gs->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in;
+ unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size;
+
+ min_esgs_ring_size = align(min_esgs_ring_size, alignment);
+ esgs_ring_size = align(esgs_ring_size, alignment);
+ gsvs_ring_size = align(gsvs_ring_size, alignment);
+
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
+ pipeline->graphics.esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size);
+
+ pipeline->graphics.gsvs_ring_size = MIN2(gsvs_ring_size, max_size);
}
struct radv_shader_variant *
-radv_get_shader(const struct radv_pipeline *pipeline,
- gl_shader_stage stage)
+radv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage)
{
- if (stage == MESA_SHADER_VERTEX) {
- if (pipeline->shaders[MESA_SHADER_VERTEX])
- return pipeline->shaders[MESA_SHADER_VERTEX];
- if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
- return pipeline->shaders[MESA_SHADER_TESS_CTRL];
- if (pipeline->shaders[MESA_SHADER_GEOMETRY])
- return pipeline->shaders[MESA_SHADER_GEOMETRY];
- } else if (stage == MESA_SHADER_TESS_EVAL) {
- if (!radv_pipeline_has_tess(pipeline))
- return NULL;
- if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
- return pipeline->shaders[MESA_SHADER_TESS_EVAL];
- if (pipeline->shaders[MESA_SHADER_GEOMETRY])
- return pipeline->shaders[MESA_SHADER_GEOMETRY];
- }
- return pipeline->shaders[stage];
+ if (stage == MESA_SHADER_VERTEX) {
+ if (pipeline->shaders[MESA_SHADER_VERTEX])
+ return pipeline->shaders[MESA_SHADER_VERTEX];
+ if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
+ return pipeline->shaders[MESA_SHADER_TESS_CTRL];
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+ return pipeline->shaders[MESA_SHADER_GEOMETRY];
+ } else if (stage == MESA_SHADER_TESS_EVAL) {
+ if (!radv_pipeline_has_tess(pipeline))
+ return NULL;
+ if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+ return pipeline->shaders[MESA_SHADER_TESS_EVAL];
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY])
+ return pipeline->shaders[MESA_SHADER_GEOMETRY];
+ }
+ return pipeline->shaders[stage];
}
-static const struct radv_vs_output_info *get_vs_output_info(const struct radv_pipeline *pipeline)
+static const struct radv_vs_output_info *
+get_vs_output_info(const struct radv_pipeline *pipeline)
{
- if (radv_pipeline_has_gs(pipeline))
- if (radv_pipeline_has_ngg(pipeline))
- return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.vs.outinfo;
- else
- return &pipeline->gs_copy_shader->info.vs.outinfo;
- else if (radv_pipeline_has_tess(pipeline))
- return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo;
- else
- return &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outinfo;
+ if (radv_pipeline_has_gs(pipeline))
+ if (radv_pipeline_has_ngg(pipeline))
+ return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.vs.outinfo;
+ else
+ return &pipeline->gs_copy_shader->info.vs.outinfo;
+ else if (radv_pipeline_has_tess(pipeline))
+ return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo;
+ else
+ return &pipeline->shaders[MESA_SHADER_VERTEX]->info.vs.outinfo;
}
static void
radv_link_shaders(struct radv_pipeline *pipeline, nir_shader **shaders,
- bool optimize_conservatively)
+ bool optimize_conservatively)
{
- nir_shader* ordered_shaders[MESA_SHADER_STAGES];
- int shader_count = 0;
-
- if(shaders[MESA_SHADER_FRAGMENT]) {
- ordered_shaders[shader_count++] = shaders[MESA_SHADER_FRAGMENT];
- }
- if(shaders[MESA_SHADER_GEOMETRY]) {
- ordered_shaders[shader_count++] = shaders[MESA_SHADER_GEOMETRY];
- }
- if(shaders[MESA_SHADER_TESS_EVAL]) {
- ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_EVAL];
- }
- if(shaders[MESA_SHADER_TESS_CTRL]) {
- ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_CTRL];
- }
- if(shaders[MESA_SHADER_VERTEX]) {
- ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX];
- }
- if(shaders[MESA_SHADER_COMPUTE]) {
- ordered_shaders[shader_count++] = shaders[MESA_SHADER_COMPUTE];
- }
-
- bool has_geom_tess = shaders[MESA_SHADER_GEOMETRY] || shaders[MESA_SHADER_TESS_CTRL];
- bool merged_gs = shaders[MESA_SHADER_GEOMETRY] &&
- pipeline->device->physical_device->rad_info.chip_class >= GFX9;
-
- if (!optimize_conservatively && shader_count > 1) {
- unsigned first = ordered_shaders[shader_count - 1]->info.stage;
- unsigned last = ordered_shaders[0]->info.stage;
-
- if (ordered_shaders[0]->info.stage == MESA_SHADER_FRAGMENT &&
- ordered_shaders[1]->info.has_transform_feedback_varyings)
- nir_link_xfb_varyings(ordered_shaders[1], ordered_shaders[0]);
-
- for (int i = 1; i < shader_count; ++i) {
- nir_lower_io_arrays_to_elements(ordered_shaders[i],
- ordered_shaders[i - 1]);
- }
-
- for (int i = 0; i < shader_count; ++i) {
- nir_variable_mode mask = 0;
-
- if (ordered_shaders[i]->info.stage != first)
- mask = mask | nir_var_shader_in;
-
- if (ordered_shaders[i]->info.stage != last)
- mask = mask | nir_var_shader_out;
-
- if (nir_lower_io_to_scalar_early(ordered_shaders[i], mask)) {
- /* Optimize the new vector code and then remove dead vars */
- nir_copy_prop(ordered_shaders[i]);
- nir_opt_shrink_vectors(ordered_shaders[i],
- !pipeline->device->instance->disable_shrink_image_store);
-
- if (ordered_shaders[i]->info.stage != last) {
- /* Optimize swizzled movs of load_const for
- * nir_link_opt_varyings's constant propagation
- */
- nir_opt_constant_folding(ordered_shaders[i]);
- /* For nir_link_opt_varyings's duplicate input opt */
- nir_opt_cse(ordered_shaders[i]);
- }
-
- /* Run copy-propagation to help remove dead
- * output variables (some shaders have useless
- * copies to/from an output), so compaction
- * later will be more effective.
- *
- * This will have been done earlier but it might
- * not have worked because the outputs were vector.
- */
- if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
- nir_opt_copy_prop_vars(ordered_shaders[i]);
-
- nir_opt_dce(ordered_shaders[i]);
- nir_remove_dead_variables(ordered_shaders[i],
- nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
- }
- }
- }
-
- for (int i = 1; !optimize_conservatively && (i < shader_count); ++i) {
- if (nir_link_opt_varyings(ordered_shaders[i], ordered_shaders[i - 1])) {
- nir_opt_constant_folding(ordered_shaders[i - 1]);
- nir_opt_algebraic(ordered_shaders[i - 1]);
- nir_opt_dce(ordered_shaders[i - 1]);
- }
-
- nir_remove_dead_variables(ordered_shaders[i],
- nir_var_shader_out, NULL);
- nir_remove_dead_variables(ordered_shaders[i - 1],
- nir_var_shader_in, NULL);
-
- bool progress = nir_remove_unused_varyings(ordered_shaders[i],
- ordered_shaders[i - 1]);
-
- nir_compact_varyings(ordered_shaders[i],
- ordered_shaders[i - 1], true);
-
- if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL ||
- (ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) ||
- (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
- nir_lower_io_to_vector(ordered_shaders[i], nir_var_shader_out);
- if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
- nir_vectorize_tess_levels(ordered_shaders[i]);
- nir_opt_combine_stores(ordered_shaders[i], nir_var_shader_out);
- }
- if (ordered_shaders[i - 1]->info.stage == MESA_SHADER_GEOMETRY ||
- ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_CTRL ||
- ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_EVAL) {
- nir_lower_io_to_vector(ordered_shaders[i - 1], nir_var_shader_in);
- }
-
- if (progress) {
- if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
- ac_lower_indirect_derefs(ordered_shaders[i],
- pipeline->device->physical_device->rad_info.chip_class);
- /* remove dead writes, which can remove input loads */
- nir_lower_vars_to_ssa(ordered_shaders[i]);
- nir_opt_dce(ordered_shaders[i]);
- }
-
- if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
- ac_lower_indirect_derefs(ordered_shaders[i - 1],
- pipeline->device->physical_device->rad_info.chip_class);
- }
- }
- }
+ nir_shader *ordered_shaders[MESA_SHADER_STAGES];
+ int shader_count = 0;
+
+ if (shaders[MESA_SHADER_FRAGMENT]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_FRAGMENT];
+ }
+ if (shaders[MESA_SHADER_GEOMETRY]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_GEOMETRY];
+ }
+ if (shaders[MESA_SHADER_TESS_EVAL]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_EVAL];
+ }
+ if (shaders[MESA_SHADER_TESS_CTRL]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_TESS_CTRL];
+ }
+ if (shaders[MESA_SHADER_VERTEX]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_VERTEX];
+ }
+ if (shaders[MESA_SHADER_COMPUTE]) {
+ ordered_shaders[shader_count++] = shaders[MESA_SHADER_COMPUTE];
+ }
+
+ bool has_geom_tess = shaders[MESA_SHADER_GEOMETRY] || shaders[MESA_SHADER_TESS_CTRL];
+ bool merged_gs = shaders[MESA_SHADER_GEOMETRY] &&
+ pipeline->device->physical_device->rad_info.chip_class >= GFX9;
+
+ if (!optimize_conservatively && shader_count > 1) {
+ unsigned first = ordered_shaders[shader_count - 1]->info.stage;
+ unsigned last = ordered_shaders[0]->info.stage;
+
+ if (ordered_shaders[0]->info.stage == MESA_SHADER_FRAGMENT &&
+ ordered_shaders[1]->info.has_transform_feedback_varyings)
+ nir_link_xfb_varyings(ordered_shaders[1], ordered_shaders[0]);
+
+ for (int i = 1; i < shader_count; ++i) {
+ nir_lower_io_arrays_to_elements(ordered_shaders[i], ordered_shaders[i - 1]);
+ }
+
+ for (int i = 0; i < shader_count; ++i) {
+ nir_variable_mode mask = 0;
+
+ if (ordered_shaders[i]->info.stage != first)
+ mask = mask | nir_var_shader_in;
+
+ if (ordered_shaders[i]->info.stage != last)
+ mask = mask | nir_var_shader_out;
+
+ if (nir_lower_io_to_scalar_early(ordered_shaders[i], mask)) {
+ /* Optimize the new vector code and then remove dead vars */
+ nir_copy_prop(ordered_shaders[i]);
+ nir_opt_shrink_vectors(ordered_shaders[i],
+ !pipeline->device->instance->disable_shrink_image_store);
+
+ if (ordered_shaders[i]->info.stage != last) {
+ /* Optimize swizzled movs of load_const for
+ * nir_link_opt_varyings's constant propagation
+ */
+ nir_opt_constant_folding(ordered_shaders[i]);
+ /* For nir_link_opt_varyings's duplicate input opt */
+ nir_opt_cse(ordered_shaders[i]);
+ }
+
+ /* Run copy-propagation to help remove dead
+ * output variables (some shaders have useless
+ * copies to/from an output), so compaction
+ * later will be more effective.
+ *
+ * This will have been done earlier but it might
+ * not have worked because the outputs were vector.
+ */
+ if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
+ nir_opt_copy_prop_vars(ordered_shaders[i]);
+
+ nir_opt_dce(ordered_shaders[i]);
+ nir_remove_dead_variables(
+ ordered_shaders[i], nir_var_function_temp | nir_var_shader_in | nir_var_shader_out,
+ NULL);
+ }
+ }
+ }
+
+ for (int i = 1; !optimize_conservatively && (i < shader_count); ++i) {
+ if (nir_link_opt_varyings(ordered_shaders[i], ordered_shaders[i - 1])) {
+ nir_opt_constant_folding(ordered_shaders[i - 1]);
+ nir_opt_algebraic(ordered_shaders[i - 1]);
+ nir_opt_dce(ordered_shaders[i - 1]);
+ }
+
+ nir_remove_dead_variables(ordered_shaders[i], nir_var_shader_out, NULL);
+ nir_remove_dead_variables(ordered_shaders[i - 1], nir_var_shader_in, NULL);
+
+ bool progress = nir_remove_unused_varyings(ordered_shaders[i], ordered_shaders[i - 1]);
+
+ nir_compact_varyings(ordered_shaders[i], ordered_shaders[i - 1], true);
+
+ if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL ||
+ (ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) ||
+ (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) {
+ nir_lower_io_to_vector(ordered_shaders[i], nir_var_shader_out);
+ if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL)
+ nir_vectorize_tess_levels(ordered_shaders[i]);
+ nir_opt_combine_stores(ordered_shaders[i], nir_var_shader_out);
+ }
+ if (ordered_shaders[i - 1]->info.stage == MESA_SHADER_GEOMETRY ||
+ ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_CTRL ||
+ ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_EVAL) {
+ nir_lower_io_to_vector(ordered_shaders[i - 1], nir_var_shader_in);
+ }
+
+ if (progress) {
+ if (nir_lower_global_vars_to_local(ordered_shaders[i])) {
+ ac_lower_indirect_derefs(ordered_shaders[i],
+ pipeline->device->physical_device->rad_info.chip_class);
+ /* remove dead writes, which can remove input loads */
+ nir_lower_vars_to_ssa(ordered_shaders[i]);
+ nir_opt_dce(ordered_shaders[i]);
+ }
+
+ if (nir_lower_global_vars_to_local(ordered_shaders[i - 1])) {
+ ac_lower_indirect_derefs(ordered_shaders[i - 1],
+ pipeline->device->physical_device->rad_info.chip_class);
+ }
+ }
+ }
}
static void
radv_set_driver_locations(struct radv_pipeline *pipeline, nir_shader **shaders,
- struct radv_shader_info infos[MESA_SHADER_STAGES])
+ struct radv_shader_info infos[MESA_SHADER_STAGES])
{
- if (shaders[MESA_SHADER_FRAGMENT]) {
- nir_foreach_shader_out_variable(var, shaders[MESA_SHADER_FRAGMENT])
- {
- var->data.driver_location = var->data.location + var->data.index;
- }
- }
-
- if (!shaders[MESA_SHADER_VERTEX])
- return;
-
- bool has_tess = shaders[MESA_SHADER_TESS_CTRL];
- bool has_gs = shaders[MESA_SHADER_GEOMETRY];
-
- /* Merged stage for VS and TES */
- unsigned vs_info_idx = MESA_SHADER_VERTEX;
- unsigned tes_info_idx = MESA_SHADER_TESS_EVAL;
-
- /* Which stage is the last in the vertex, tess, geometry pipeline */
- unsigned last_vtg_stage = MESA_SHADER_VERTEX;
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
- /* These are merged into the next stage */
- vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY;
- tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL;
- }
-
- nir_foreach_shader_in_variable(var, shaders[MESA_SHADER_VERTEX]) {
- var->data.driver_location = var->data.location;
- }
-
- if (has_tess) {
- nir_linked_io_var_info vs2tcs =
- nir_assign_linked_io_var_locations(shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_TESS_CTRL]);
- nir_linked_io_var_info tcs2tes =
- nir_assign_linked_io_var_locations(shaders[MESA_SHADER_TESS_CTRL], shaders[MESA_SHADER_TESS_EVAL]);
-
- infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs = vs2tcs.num_linked_io_vars;
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs = tcs2tes.num_linked_io_vars;
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars;
- infos[MESA_SHADER_TESS_EVAL].tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
- infos[MESA_SHADER_TESS_EVAL].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
-
- /* Copy data to merged stage */
- infos[vs_info_idx].vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
- infos[tes_info_idx].tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
- infos[tes_info_idx].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
-
- if (has_gs) {
- nir_linked_io_var_info tes2gs =
- nir_assign_linked_io_var_locations(shaders[MESA_SHADER_TESS_EVAL], shaders[MESA_SHADER_GEOMETRY]);
-
- infos[MESA_SHADER_TESS_EVAL].tes.num_linked_outputs = tes2gs.num_linked_io_vars;
- infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = tes2gs.num_linked_io_vars;
-
- /* Copy data to merged stage */
- infos[tes_info_idx].tes.num_linked_outputs = tes2gs.num_linked_io_vars;
-
- last_vtg_stage = MESA_SHADER_GEOMETRY;
-
- } else {
- last_vtg_stage = MESA_SHADER_TESS_EVAL;
- }
- } else if (has_gs) {
- nir_linked_io_var_info vs2gs =
- nir_assign_linked_io_var_locations(shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_GEOMETRY]);
-
- infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2gs.num_linked_io_vars;
- infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = vs2gs.num_linked_io_vars;
- last_vtg_stage = MESA_SHADER_GEOMETRY;
-
- /* Copy data to merged stage */
- infos[vs_info_idx].vs.num_linked_outputs = vs2gs.num_linked_io_vars;
- }
-
- nir_foreach_shader_out_variable(var, shaders[last_vtg_stage]) {
- var->data.driver_location = var->data.location;
- }
+ if (shaders[MESA_SHADER_FRAGMENT]) {
+ nir_foreach_shader_out_variable(var, shaders[MESA_SHADER_FRAGMENT])
+ {
+ var->data.driver_location = var->data.location + var->data.index;
+ }
+ }
+
+ if (!shaders[MESA_SHADER_VERTEX])
+ return;
+
+ bool has_tess = shaders[MESA_SHADER_TESS_CTRL];
+ bool has_gs = shaders[MESA_SHADER_GEOMETRY];
+
+ /* Merged stage for VS and TES */
+ unsigned vs_info_idx = MESA_SHADER_VERTEX;
+ unsigned tes_info_idx = MESA_SHADER_TESS_EVAL;
+
+ /* Which stage is the last in the vertex, tess, geometry pipeline */
+ unsigned last_vtg_stage = MESA_SHADER_VERTEX;
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+ /* These are merged into the next stage */
+ vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY;
+ tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL;
+ }
+
+ nir_foreach_shader_in_variable (var, shaders[MESA_SHADER_VERTEX]) {
+ var->data.driver_location = var->data.location;
+ }
+
+ if (has_tess) {
+ nir_linked_io_var_info vs2tcs = nir_assign_linked_io_var_locations(
+ shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_TESS_CTRL]);
+ nir_linked_io_var_info tcs2tes = nir_assign_linked_io_var_locations(
+ shaders[MESA_SHADER_TESS_CTRL], shaders[MESA_SHADER_TESS_EVAL]);
+
+ infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs = vs2tcs.num_linked_io_vars;
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs = tcs2tes.num_linked_io_vars;
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars;
+ infos[MESA_SHADER_TESS_EVAL].tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
+ infos[MESA_SHADER_TESS_EVAL].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
+
+ /* Copy data to merged stage */
+ infos[vs_info_idx].vs.num_linked_outputs = vs2tcs.num_linked_io_vars;
+ infos[tes_info_idx].tes.num_linked_inputs = tcs2tes.num_linked_io_vars;
+ infos[tes_info_idx].tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars;
+
+ if (has_gs) {
+ nir_linked_io_var_info tes2gs = nir_assign_linked_io_var_locations(
+ shaders[MESA_SHADER_TESS_EVAL], shaders[MESA_SHADER_GEOMETRY]);
+
+ infos[MESA_SHADER_TESS_EVAL].tes.num_linked_outputs = tes2gs.num_linked_io_vars;
+ infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = tes2gs.num_linked_io_vars;
+
+ /* Copy data to merged stage */
+ infos[tes_info_idx].tes.num_linked_outputs = tes2gs.num_linked_io_vars;
+
+ last_vtg_stage = MESA_SHADER_GEOMETRY;
+
+ } else {
+ last_vtg_stage = MESA_SHADER_TESS_EVAL;
+ }
+ } else if (has_gs) {
+ nir_linked_io_var_info vs2gs = nir_assign_linked_io_var_locations(
+ shaders[MESA_SHADER_VERTEX], shaders[MESA_SHADER_GEOMETRY]);
+
+ infos[MESA_SHADER_VERTEX].vs.num_linked_outputs = vs2gs.num_linked_io_vars;
+ infos[MESA_SHADER_GEOMETRY].gs.num_linked_inputs = vs2gs.num_linked_io_vars;
+ last_vtg_stage = MESA_SHADER_GEOMETRY;
+
+ /* Copy data to merged stage */
+ infos[vs_info_idx].vs.num_linked_outputs = vs2gs.num_linked_io_vars;
+ }
+
+ nir_foreach_shader_out_variable(var, shaders[last_vtg_stage])
+ {
+ var->data.driver_location = var->data.location;
+ }
}
static uint32_t
radv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *input_state,
- uint32_t attrib_binding)
+ uint32_t attrib_binding)
{
- for (uint32_t i = 0; i < input_state->vertexBindingDescriptionCount; i++) {
- const VkVertexInputBindingDescription *input_binding =
- &input_state->pVertexBindingDescriptions[i];
+ for (uint32_t i = 0; i < input_state->vertexBindingDescriptionCount; i++) {
+ const VkVertexInputBindingDescription *input_binding =
+ &input_state->pVertexBindingDescriptions[i];
- if (input_binding->binding == attrib_binding)
- return input_binding->stride;
- }
+ if (input_binding->binding == attrib_binding)
+ return input_binding->stride;
+ }
- return 0;
+ return 0;
}
static struct radv_pipeline_key
@@ -2551,692 +2459,666 @@ radv_generate_graphics_pipeline_key(const struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct radv_blend_state *blend)
{
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- const VkPipelineVertexInputStateCreateInfo *input_state =
- pCreateInfo->pVertexInputState;
- const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state =
- vk_find_struct_const(input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
- bool uses_dynamic_stride = false;
-
- struct radv_pipeline_key key;
- memset(&key, 0, sizeof(key));
-
- if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
- key.optimisations_disabled = 1;
-
- key.has_multiview_view_index = !!subpass->view_mask;
-
- uint32_t binding_input_rate = 0;
- uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
- for (unsigned i = 0; i < input_state->vertexBindingDescriptionCount; ++i) {
- if (input_state->pVertexBindingDescriptions[i].inputRate) {
- unsigned binding = input_state->pVertexBindingDescriptions[i].binding;
- binding_input_rate |= 1u << binding;
- instance_rate_divisors[binding] = 1;
- }
- }
- if (divisor_state) {
- for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) {
- instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] =
- divisor_state->pVertexBindingDivisors[i].divisor;
- }
- }
-
- if (pCreateInfo->pDynamicState) {
- uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
- for (uint32_t i = 0; i < count; i++) {
- if (pCreateInfo->pDynamicState->pDynamicStates[i] == VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT) {
- uses_dynamic_stride = true;
- break;
- }
- }
- }
-
- for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
- const VkVertexInputAttributeDescription *desc =
- &input_state->pVertexAttributeDescriptions[i];
- const struct util_format_description *format_desc;
- unsigned location = desc->location;
- unsigned binding = desc->binding;
- unsigned num_format, data_format;
- int first_non_void;
-
- if (binding_input_rate & (1u << binding)) {
- key.instance_rate_inputs |= 1u << location;
- key.instance_rate_divisors[location] = instance_rate_divisors[binding];
- }
-
- format_desc = vk_format_description(desc->format);
- first_non_void = vk_format_get_first_non_void_channel(desc->format);
-
- num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
- data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
-
- key.vertex_attribute_formats[location] = data_format | (num_format << 4);
- key.vertex_attribute_bindings[location] = desc->binding;
- key.vertex_attribute_offsets[location] = desc->offset;
-
- if (!uses_dynamic_stride) {
- /* From the Vulkan spec 1.2.157:
- *
- * "If the bound pipeline state object was created
- * with the
- * VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT
- * dynamic state enabled then pStrides[i] specifies
- * the distance in bytes between two consecutive
- * elements within the corresponding buffer. In this
- * case the VkVertexInputBindingDescription::stride
- * state from the pipeline state object is ignored."
- *
- * Make sure the vertex attribute stride is zero to
- * avoid computing a wrong offset if it's initialized
- * to something else than zero.
- */
- key.vertex_attribute_strides[location] =
- radv_get_attrib_stride(input_state, desc->binding);
- }
-
- enum ac_fetch_format adjust = AC_FETCH_FORMAT_NONE;
- if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 &&
- pipeline->device->physical_device->rad_info.family != CHIP_STONEY) {
- VkFormat format = input_state->pVertexAttributeDescriptions[i].format;
- switch(format) {
- case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
- case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
- adjust = AC_FETCH_FORMAT_SNORM;
- break;
- case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
- case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
- adjust = AC_FETCH_FORMAT_SSCALED;
- break;
- case VK_FORMAT_A2R10G10B10_SINT_PACK32:
- case VK_FORMAT_A2B10G10R10_SINT_PACK32:
- adjust = AC_FETCH_FORMAT_SINT;
- break;
- default:
- break;
- }
- }
- key.vertex_alpha_adjust[location] = adjust;
-
- switch (desc->format) {
- case VK_FORMAT_B8G8R8A8_UNORM:
- case VK_FORMAT_B8G8R8A8_SNORM:
- case VK_FORMAT_B8G8R8A8_USCALED:
- case VK_FORMAT_B8G8R8A8_SSCALED:
- case VK_FORMAT_B8G8R8A8_UINT:
- case VK_FORMAT_B8G8R8A8_SINT:
- case VK_FORMAT_B8G8R8A8_SRGB:
- case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
- case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
- case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
- case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
- case VK_FORMAT_A2R10G10B10_UINT_PACK32:
- case VK_FORMAT_A2R10G10B10_SINT_PACK32:
- key.vertex_post_shuffle |= 1 << location;
- break;
- default:
- break;
- }
- }
-
- const VkPipelineTessellationStateCreateInfo *tess =
- radv_pipeline_get_tessellation_state(pCreateInfo);
- if (tess)
- key.tess_input_vertices = tess->patchControlPoints;
-
- const VkPipelineMultisampleStateCreateInfo *vkms =
- radv_pipeline_get_multisample_state(pCreateInfo);
- if (vkms && vkms->rasterizationSamples > 1) {
- uint32_t num_samples = vkms->rasterizationSamples;
- uint32_t ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo);
- key.num_samples = num_samples;
- key.log2_ps_iter_samples = util_logbase2(ps_iter_samples);
- }
-
- key.col_format = blend->spi_shader_col_format;
- if (pipeline->device->physical_device->rad_info.chip_class < GFX8) {
- key.is_int8 = blend->col_format_is_int8;
- key.is_int10 = blend->col_format_is_int10;
- }
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10)
- key.topology = pCreateInfo->pInputAssemblyState->topology;
-
- return key;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ const VkPipelineVertexInputStateCreateInfo *input_state = pCreateInfo->pVertexInputState;
+ const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state =
+ vk_find_struct_const(input_state->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
+ bool uses_dynamic_stride = false;
+
+ struct radv_pipeline_key key;
+ memset(&key, 0, sizeof(key));
+
+ if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
+ key.optimisations_disabled = 1;
+
+ key.has_multiview_view_index = !!subpass->view_mask;
+
+ uint32_t binding_input_rate = 0;
+ uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
+ for (unsigned i = 0; i < input_state->vertexBindingDescriptionCount; ++i) {
+ if (input_state->pVertexBindingDescriptions[i].inputRate) {
+ unsigned binding = input_state->pVertexBindingDescriptions[i].binding;
+ binding_input_rate |= 1u << binding;
+ instance_rate_divisors[binding] = 1;
+ }
+ }
+ if (divisor_state) {
+ for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) {
+ instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] =
+ divisor_state->pVertexBindingDivisors[i].divisor;
+ }
+ }
+
+ if (pCreateInfo->pDynamicState) {
+ uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
+ for (uint32_t i = 0; i < count; i++) {
+ if (pCreateInfo->pDynamicState->pDynamicStates[i] ==
+ VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT) {
+ uses_dynamic_stride = true;
+ break;
+ }
+ }
+ }
+
+ for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
+ const VkVertexInputAttributeDescription *desc = &input_state->pVertexAttributeDescriptions[i];
+ const struct util_format_description *format_desc;
+ unsigned location = desc->location;
+ unsigned binding = desc->binding;
+ unsigned num_format, data_format;
+ int first_non_void;
+
+ if (binding_input_rate & (1u << binding)) {
+ key.instance_rate_inputs |= 1u << location;
+ key.instance_rate_divisors[location] = instance_rate_divisors[binding];
+ }
+
+ format_desc = vk_format_description(desc->format);
+ first_non_void = vk_format_get_first_non_void_channel(desc->format);
+
+ num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
+ data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
+
+ key.vertex_attribute_formats[location] = data_format | (num_format << 4);
+ key.vertex_attribute_bindings[location] = desc->binding;
+ key.vertex_attribute_offsets[location] = desc->offset;
+
+ if (!uses_dynamic_stride) {
+ /* From the Vulkan spec 1.2.157:
+ *
+ * "If the bound pipeline state object was created
+ * with the
+ * VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT
+ * dynamic state enabled then pStrides[i] specifies
+ * the distance in bytes between two consecutive
+ * elements within the corresponding buffer. In this
+ * case the VkVertexInputBindingDescription::stride
+ * state from the pipeline state object is ignored."
+ *
+ * Make sure the vertex attribute stride is zero to
+ * avoid computing a wrong offset if it's initialized
+ * to something else than zero.
+ */
+ key.vertex_attribute_strides[location] =
+ radv_get_attrib_stride(input_state, desc->binding);
+ }
+
+ enum ac_fetch_format adjust = AC_FETCH_FORMAT_NONE;
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8 &&
+ pipeline->device->physical_device->rad_info.family != CHIP_STONEY) {
+ VkFormat format = input_state->pVertexAttributeDescriptions[i].format;
+ switch (format) {
+ case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+ case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
+ adjust = AC_FETCH_FORMAT_SNORM;
+ break;
+ case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+ case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
+ adjust = AC_FETCH_FORMAT_SSCALED;
+ break;
+ case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+ case VK_FORMAT_A2B10G10R10_SINT_PACK32:
+ adjust = AC_FETCH_FORMAT_SINT;
+ break;
+ default:
+ break;
+ }
+ }
+ key.vertex_alpha_adjust[location] = adjust;
+
+ switch (desc->format) {
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ case VK_FORMAT_B8G8R8A8_SNORM:
+ case VK_FORMAT_B8G8R8A8_USCALED:
+ case VK_FORMAT_B8G8R8A8_SSCALED:
+ case VK_FORMAT_B8G8R8A8_UINT:
+ case VK_FORMAT_B8G8R8A8_SINT:
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+ case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
+ case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+ case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+ case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+ key.vertex_post_shuffle |= 1 << location;
+ break;
+ default:
+ break;
+ }
+ }
+
+ const VkPipelineTessellationStateCreateInfo *tess =
+ radv_pipeline_get_tessellation_state(pCreateInfo);
+ if (tess)
+ key.tess_input_vertices = tess->patchControlPoints;
+
+ const VkPipelineMultisampleStateCreateInfo *vkms =
+ radv_pipeline_get_multisample_state(pCreateInfo);
+ if (vkms && vkms->rasterizationSamples > 1) {
+ uint32_t num_samples = vkms->rasterizationSamples;
+ uint32_t ps_iter_samples = radv_pipeline_get_ps_iter_samples(pCreateInfo);
+ key.num_samples = num_samples;
+ key.log2_ps_iter_samples = util_logbase2(ps_iter_samples);
+ }
+
+ key.col_format = blend->spi_shader_col_format;
+ if (pipeline->device->physical_device->rad_info.chip_class < GFX8) {
+ key.is_int8 = blend->col_format_is_int8;
+ key.is_int10 = blend->col_format_is_int10;
+ }
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10)
+ key.topology = pCreateInfo->pInputAssemblyState->topology;
+
+ return key;
}
static bool
radv_nir_stage_uses_xfb(const nir_shader *nir)
{
- nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
- bool uses_xfb = !!xfb;
+ nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
+ bool uses_xfb = !!xfb;
- ralloc_free(xfb);
- return uses_xfb;
+ ralloc_free(xfb);
+ return uses_xfb;
}
static void
-radv_fill_shader_keys(struct radv_device *device,
- struct radv_shader_variant_key *keys,
- const struct radv_pipeline_key *key,
- nir_shader **nir)
+radv_fill_shader_keys(struct radv_device *device, struct radv_shader_variant_key *keys,
+ const struct radv_pipeline_key *key, nir_shader **nir)
{
- keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
- keys[MESA_SHADER_VERTEX].vs.post_shuffle = key->vertex_post_shuffle;
- for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) {
- keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
- keys[MESA_SHADER_VERTEX].vs.vertex_attribute_formats[i] = key->vertex_attribute_formats[i];
- keys[MESA_SHADER_VERTEX].vs.vertex_attribute_bindings[i] = key->vertex_attribute_bindings[i];
- keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
- keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
- keys[MESA_SHADER_VERTEX].vs.alpha_adjust[i] = key->vertex_alpha_adjust[i];
- }
- keys[MESA_SHADER_VERTEX].vs.outprim = si_conv_prim_to_gs_out(key->topology);
-
- if (nir[MESA_SHADER_TESS_CTRL]) {
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
- keys[MESA_SHADER_TESS_CTRL].tcs.input_vertices = key->tess_input_vertices;
- keys[MESA_SHADER_TESS_CTRL].tcs.primitive_mode = nir[MESA_SHADER_TESS_EVAL]->info.tess.primitive_mode;
- }
-
- if (nir[MESA_SHADER_GEOMETRY]) {
- if (nir[MESA_SHADER_TESS_CTRL])
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_es = true;
- else
- keys[MESA_SHADER_VERTEX].vs_common_out.as_es = true;
- }
-
- if (device->physical_device->use_ngg) {
- if (nir[MESA_SHADER_TESS_CTRL]) {
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
- } else {
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = true;
- }
-
- if (nir[MESA_SHADER_TESS_CTRL] &&
- nir[MESA_SHADER_GEOMETRY] &&
- nir[MESA_SHADER_GEOMETRY]->info.gs.invocations *
- nir[MESA_SHADER_GEOMETRY]->info.gs.vertices_out > 256) {
- /* Fallback to the legacy path if tessellation is
- * enabled with extreme geometry because
- * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it
- * might hang.
- */
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
- }
-
- gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
-
- for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
- if (nir[i])
- last_xfb_stage = i;
- }
-
- bool uses_xfb = nir[last_xfb_stage] &&
- radv_nir_stage_uses_xfb(nir[last_xfb_stage]);
-
- if (!device->physical_device->use_ngg_streamout && uses_xfb) {
- if (nir[MESA_SHADER_TESS_CTRL])
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
- else
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
- }
-
- /* Determine if the pipeline is eligible for the NGG passthrough
- * mode. It can't be enabled for geometry shaders, for NGG
- * streamout or for vertex shaders that export the primitive ID
- * (this is checked later because we don't have the info here.)
- */
- if (!nir[MESA_SHADER_GEOMETRY] && !uses_xfb) {
- if (nir[MESA_SHADER_TESS_CTRL] &&
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg) {
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg_passthrough = true;
- } else if (nir[MESA_SHADER_VERTEX] &&
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) {
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = true;
- }
- }
- }
-
- for(int i = 0; i < MESA_SHADER_STAGES; ++i)
- keys[i].has_multiview_view_index = key->has_multiview_view_index;
-
- keys[MESA_SHADER_FRAGMENT].fs.col_format = key->col_format;
- keys[MESA_SHADER_FRAGMENT].fs.is_int8 = key->is_int8;
- keys[MESA_SHADER_FRAGMENT].fs.is_int10 = key->is_int10;
- keys[MESA_SHADER_FRAGMENT].fs.log2_ps_iter_samples = key->log2_ps_iter_samples;
- keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples;
-
- if (nir[MESA_SHADER_COMPUTE]) {
- unsigned subgroup_size = key->compute_subgroup_size;
- unsigned req_subgroup_size = subgroup_size;
- bool require_full_subgroups = key->require_full_subgroups;
-
- if (!subgroup_size)
- subgroup_size = device->physical_device->cs_wave_size;
-
- unsigned local_size = nir[MESA_SHADER_COMPUTE]->info.cs.local_size[0] *
- nir[MESA_SHADER_COMPUTE]->info.cs.local_size[1] *
- nir[MESA_SHADER_COMPUTE]->info.cs.local_size[2];
-
- /* Games don't always request full subgroups when they should,
- * which can cause bugs if cswave32 is enabled.
- */
- if (device->physical_device->cs_wave_size == 32 &&
- nir[MESA_SHADER_COMPUTE]->info.cs.uses_wide_subgroup_intrinsics &&
- !req_subgroup_size && local_size % RADV_SUBGROUP_SIZE == 0)
- require_full_subgroups = true;
-
- if (require_full_subgroups && !req_subgroup_size) {
- /* don't use wave32 pretending to be wave64 */
- subgroup_size = RADV_SUBGROUP_SIZE;
- }
-
- keys[MESA_SHADER_COMPUTE].cs.subgroup_size = subgroup_size;
- }
+ keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
+ keys[MESA_SHADER_VERTEX].vs.post_shuffle = key->vertex_post_shuffle;
+ for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) {
+ keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
+ keys[MESA_SHADER_VERTEX].vs.vertex_attribute_formats[i] = key->vertex_attribute_formats[i];
+ keys[MESA_SHADER_VERTEX].vs.vertex_attribute_bindings[i] = key->vertex_attribute_bindings[i];
+ keys[MESA_SHADER_VERTEX].vs.vertex_attribute_offsets[i] = key->vertex_attribute_offsets[i];
+ keys[MESA_SHADER_VERTEX].vs.vertex_attribute_strides[i] = key->vertex_attribute_strides[i];
+ keys[MESA_SHADER_VERTEX].vs.alpha_adjust[i] = key->vertex_alpha_adjust[i];
+ }
+ keys[MESA_SHADER_VERTEX].vs.outprim = si_conv_prim_to_gs_out(key->topology);
+
+ if (nir[MESA_SHADER_TESS_CTRL]) {
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ls = true;
+ keys[MESA_SHADER_TESS_CTRL].tcs.input_vertices = key->tess_input_vertices;
+ keys[MESA_SHADER_TESS_CTRL].tcs.primitive_mode =
+ nir[MESA_SHADER_TESS_EVAL]->info.tess.primitive_mode;
+ }
+
+ if (nir[MESA_SHADER_GEOMETRY]) {
+ if (nir[MESA_SHADER_TESS_CTRL])
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_es = true;
+ else
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_es = true;
+ }
+
+ if (device->physical_device->use_ngg) {
+ if (nir[MESA_SHADER_TESS_CTRL]) {
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = true;
+ } else {
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = true;
+ }
+
+ if (nir[MESA_SHADER_TESS_CTRL] && nir[MESA_SHADER_GEOMETRY] &&
+ nir[MESA_SHADER_GEOMETRY]->info.gs.invocations *
+ nir[MESA_SHADER_GEOMETRY]->info.gs.vertices_out >
+ 256) {
+ /* Fallback to the legacy path if tessellation is
+ * enabled with extreme geometry because
+ * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it
+ * might hang.
+ */
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+ }
+
+ gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
+
+ for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+ if (nir[i])
+ last_xfb_stage = i;
+ }
+
+ bool uses_xfb = nir[last_xfb_stage] && radv_nir_stage_uses_xfb(nir[last_xfb_stage]);
+
+ if (!device->physical_device->use_ngg_streamout && uses_xfb) {
+ if (nir[MESA_SHADER_TESS_CTRL])
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+ else
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+ }
+
+ /* Determine if the pipeline is eligible for the NGG passthrough
+ * mode. It can't be enabled for geometry shaders, for NGG
+ * streamout or for vertex shaders that export the primitive ID
+ * (this is checked later because we don't have the info here.)
+ */
+ if (!nir[MESA_SHADER_GEOMETRY] && !uses_xfb) {
+ if (nir[MESA_SHADER_TESS_CTRL] && keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg) {
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg_passthrough = true;
+ } else if (nir[MESA_SHADER_VERTEX] && keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) {
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = true;
+ }
+ }
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ keys[i].has_multiview_view_index = key->has_multiview_view_index;
+
+ keys[MESA_SHADER_FRAGMENT].fs.col_format = key->col_format;
+ keys[MESA_SHADER_FRAGMENT].fs.is_int8 = key->is_int8;
+ keys[MESA_SHADER_FRAGMENT].fs.is_int10 = key->is_int10;
+ keys[MESA_SHADER_FRAGMENT].fs.log2_ps_iter_samples = key->log2_ps_iter_samples;
+ keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples;
+
+ if (nir[MESA_SHADER_COMPUTE]) {
+ unsigned subgroup_size = key->compute_subgroup_size;
+ unsigned req_subgroup_size = subgroup_size;
+ bool require_full_subgroups = key->require_full_subgroups;
+
+ if (!subgroup_size)
+ subgroup_size = device->physical_device->cs_wave_size;
+
+ unsigned local_size = nir[MESA_SHADER_COMPUTE]->info.cs.local_size[0] *
+ nir[MESA_SHADER_COMPUTE]->info.cs.local_size[1] *
+ nir[MESA_SHADER_COMPUTE]->info.cs.local_size[2];
+
+ /* Games don't always request full subgroups when they should,
+ * which can cause bugs if cswave32 is enabled.
+ */
+ if (device->physical_device->cs_wave_size == 32 &&
+ nir[MESA_SHADER_COMPUTE]->info.cs.uses_wide_subgroup_intrinsics && !req_subgroup_size &&
+ local_size % RADV_SUBGROUP_SIZE == 0)
+ require_full_subgroups = true;
+
+ if (require_full_subgroups && !req_subgroup_size) {
+ /* don't use wave32 pretending to be wave64 */
+ subgroup_size = RADV_SUBGROUP_SIZE;
+ }
+
+ keys[MESA_SHADER_COMPUTE].cs.subgroup_size = subgroup_size;
+ }
}
static uint8_t
-radv_get_wave_size(struct radv_device *device,
- const VkPipelineShaderStageCreateInfo *pStage,
- gl_shader_stage stage,
- const struct radv_shader_variant_key *key)
+radv_get_wave_size(struct radv_device *device, const VkPipelineShaderStageCreateInfo *pStage,
+ gl_shader_stage stage, const struct radv_shader_variant_key *key)
{
- if (stage == MESA_SHADER_GEOMETRY && !key->vs_common_out.as_ngg)
- return 64;
- else if (stage == MESA_SHADER_COMPUTE) {
- return key->cs.subgroup_size;
- }
- else if (stage == MESA_SHADER_FRAGMENT)
- return device->physical_device->ps_wave_size;
- else
- return device->physical_device->ge_wave_size;
+ if (stage == MESA_SHADER_GEOMETRY && !key->vs_common_out.as_ngg)
+ return 64;
+ else if (stage == MESA_SHADER_COMPUTE) {
+ return key->cs.subgroup_size;
+ } else if (stage == MESA_SHADER_FRAGMENT)
+ return device->physical_device->ps_wave_size;
+ else
+ return device->physical_device->ge_wave_size;
}
static uint8_t
-radv_get_ballot_bit_size(struct radv_device *device,
- const VkPipelineShaderStageCreateInfo *pStage,
- gl_shader_stage stage,
- const struct radv_shader_variant_key *key)
+radv_get_ballot_bit_size(struct radv_device *device, const VkPipelineShaderStageCreateInfo *pStage,
+ gl_shader_stage stage, const struct radv_shader_variant_key *key)
{
- if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size)
- return key->cs.subgroup_size;
- return 64;
+ if (stage == MESA_SHADER_COMPUTE && key->cs.subgroup_size)
+ return key->cs.subgroup_size;
+ return 64;
}
static void
radv_fill_shader_info(struct radv_pipeline *pipeline,
- const VkPipelineShaderStageCreateInfo **pStages,
- struct radv_shader_variant_key *keys,
- struct radv_shader_info *infos,
+ const VkPipelineShaderStageCreateInfo **pStages,
+ struct radv_shader_variant_key *keys, struct radv_shader_info *infos,
nir_shader **nir)
{
- unsigned active_stages = 0;
- unsigned filled_stages = 0;
-
- for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- if (nir[i])
- active_stages |= (1 << i);
- }
-
- if (nir[MESA_SHADER_FRAGMENT]) {
- radv_nir_shader_info_init(&infos[MESA_SHADER_FRAGMENT]);
- radv_nir_shader_info_pass(nir[MESA_SHADER_FRAGMENT],
- pipeline->layout,
- &keys[MESA_SHADER_FRAGMENT],
- &infos[MESA_SHADER_FRAGMENT]);
-
- /* TODO: These are no longer used as keys we should refactor this */
- keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id =
- infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
- keys[MESA_SHADER_VERTEX].vs_common_out.export_layer_id =
- infos[MESA_SHADER_FRAGMENT].ps.layer_input;
- keys[MESA_SHADER_VERTEX].vs_common_out.export_clip_dists =
- !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
- keys[MESA_SHADER_VERTEX].vs_common_out.export_viewport_index =
- infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input;
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_prim_id =
- infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_layer_id =
- infos[MESA_SHADER_FRAGMENT].ps.layer_input;
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_clip_dists =
- !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_viewport_index =
- infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input;
-
- /* NGG passthrough mode can't be enabled for vertex shaders
- * that export the primitive ID.
- *
- * TODO: I should really refactor the keys logic.
- */
- if (nir[MESA_SHADER_VERTEX] &&
- keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id) {
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = false;
- }
-
- filled_stages |= (1 << MESA_SHADER_FRAGMENT);
- }
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
- nir[MESA_SHADER_TESS_CTRL]) {
- struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
- struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
- key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
-
- radv_nir_shader_info_init(&infos[MESA_SHADER_TESS_CTRL]);
-
- for (int i = 0; i < 2; i++) {
- radv_nir_shader_info_pass(combined_nir[i],
- pipeline->layout, &key,
- &infos[MESA_SHADER_TESS_CTRL]);
- }
-
- filled_stages |= (1 << MESA_SHADER_VERTEX);
- filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
- }
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
- nir[MESA_SHADER_GEOMETRY]) {
- gl_shader_stage pre_stage = nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
- struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
-
- radv_nir_shader_info_init(&infos[MESA_SHADER_GEOMETRY]);
-
- for (int i = 0; i < 2; i++) {
- radv_nir_shader_info_pass(combined_nir[i],
- pipeline->layout,
- &keys[pre_stage],
- &infos[MESA_SHADER_GEOMETRY]);
- }
-
- filled_stages |= (1 << pre_stage);
- filled_stages |= (1 << MESA_SHADER_GEOMETRY);
- }
-
- active_stages ^= filled_stages;
- while (active_stages) {
- int i = u_bit_scan(&active_stages);
- radv_nir_shader_info_init(&infos[i]);
- radv_nir_shader_info_pass(nir[i], pipeline->layout,
- &keys[i], &infos[i]);
- }
-
- for (int i = 0; i < MESA_SHADER_STAGES; i++) {
- if (nir[i]) {
- infos[i].wave_size =
- radv_get_wave_size(pipeline->device, pStages[i],
- i, &keys[i]);
- infos[i].ballot_bit_size =
- radv_get_ballot_bit_size(pipeline->device,
- pStages[i], i,
- &keys[i]);
- }
- }
+ unsigned active_stages = 0;
+ unsigned filled_stages = 0;
+
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (nir[i])
+ active_stages |= (1 << i);
+ }
+
+ if (nir[MESA_SHADER_FRAGMENT]) {
+ radv_nir_shader_info_init(&infos[MESA_SHADER_FRAGMENT]);
+ radv_nir_shader_info_pass(nir[MESA_SHADER_FRAGMENT], pipeline->layout,
+ &keys[MESA_SHADER_FRAGMENT], &infos[MESA_SHADER_FRAGMENT]);
+
+ /* TODO: These are no longer used as keys we should refactor this */
+ keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id =
+ infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
+ keys[MESA_SHADER_VERTEX].vs_common_out.export_layer_id =
+ infos[MESA_SHADER_FRAGMENT].ps.layer_input;
+ keys[MESA_SHADER_VERTEX].vs_common_out.export_clip_dists =
+ !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
+ keys[MESA_SHADER_VERTEX].vs_common_out.export_viewport_index =
+ infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input;
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_prim_id =
+ infos[MESA_SHADER_FRAGMENT].ps.prim_id_input;
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_layer_id =
+ infos[MESA_SHADER_FRAGMENT].ps.layer_input;
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_clip_dists =
+ !!infos[MESA_SHADER_FRAGMENT].ps.num_input_clips_culls;
+ keys[MESA_SHADER_TESS_EVAL].vs_common_out.export_viewport_index =
+ infos[MESA_SHADER_FRAGMENT].ps.viewport_index_input;
+
+ /* NGG passthrough mode can't be enabled for vertex shaders
+ * that export the primitive ID.
+ *
+ * TODO: I should really refactor the keys logic.
+ */
+ if (nir[MESA_SHADER_VERTEX] && keys[MESA_SHADER_VERTEX].vs_common_out.export_prim_id) {
+ keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg_passthrough = false;
+ }
+
+ filled_stages |= (1 << MESA_SHADER_FRAGMENT);
+ }
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
+ nir[MESA_SHADER_TESS_CTRL]) {
+ struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
+ struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
+ key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+
+ radv_nir_shader_info_init(&infos[MESA_SHADER_TESS_CTRL]);
+
+ for (int i = 0; i < 2; i++) {
+ radv_nir_shader_info_pass(combined_nir[i], pipeline->layout, &key,
+ &infos[MESA_SHADER_TESS_CTRL]);
+ }
+
+ filled_stages |= (1 << MESA_SHADER_VERTEX);
+ filled_stages |= (1 << MESA_SHADER_TESS_CTRL);
+ }
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9 &&
+ nir[MESA_SHADER_GEOMETRY]) {
+ gl_shader_stage pre_stage =
+ nir[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+
+ radv_nir_shader_info_init(&infos[MESA_SHADER_GEOMETRY]);
+
+ for (int i = 0; i < 2; i++) {
+ radv_nir_shader_info_pass(combined_nir[i], pipeline->layout, &keys[pre_stage],
+ &infos[MESA_SHADER_GEOMETRY]);
+ }
+
+ filled_stages |= (1 << pre_stage);
+ filled_stages |= (1 << MESA_SHADER_GEOMETRY);
+ }
+
+ active_stages ^= filled_stages;
+ while (active_stages) {
+ int i = u_bit_scan(&active_stages);
+ radv_nir_shader_info_init(&infos[i]);
+ radv_nir_shader_info_pass(nir[i], pipeline->layout, &keys[i], &infos[i]);
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (nir[i]) {
+ infos[i].wave_size = radv_get_wave_size(pipeline->device, pStages[i], i, &keys[i]);
+ infos[i].ballot_bit_size =
+ radv_get_ballot_bit_size(pipeline->device, pStages[i], i, &keys[i]);
+ }
+ }
}
static void
-merge_tess_info(struct shader_info *tes_info,
- struct shader_info *tcs_info)
+merge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info)
{
- /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
- *
- * "PointMode. Controls generation of points rather than triangles
- * or lines. This functionality defaults to disabled, and is
- * enabled if either shader stage includes the execution mode.
- *
- * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
- * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
- * and OutputVertices, it says:
- *
- * "One mode must be set in at least one of the tessellation
- * shader stages."
- *
- * So, the fields can be set in either the TCS or TES, but they must
- * agree if set in both. Our backend looks at TES, so bitwise-or in
- * the values from the TCS.
- */
- assert(tcs_info->tess.tcs_vertices_out == 0 ||
- tes_info->tess.tcs_vertices_out == 0 ||
- tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
- tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
-
- assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
- tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
- tcs_info->tess.spacing == tes_info->tess.spacing);
- tes_info->tess.spacing |= tcs_info->tess.spacing;
-
- assert(tcs_info->tess.primitive_mode == 0 ||
- tes_info->tess.primitive_mode == 0 ||
- tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
- tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
- tes_info->tess.ccw |= tcs_info->tess.ccw;
- tes_info->tess.point_mode |= tcs_info->tess.point_mode;
-
- /* Copy the merged info back to the TCS */
- tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
- tcs_info->tess.spacing = tes_info->tess.spacing;
- tcs_info->tess.primitive_mode = tes_info->tess.primitive_mode;
- tcs_info->tess.ccw = tes_info->tess.ccw;
- tcs_info->tess.point_mode = tes_info->tess.point_mode;
+ /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
+ *
+ * "PointMode. Controls generation of points rather than triangles
+ * or lines. This functionality defaults to disabled, and is
+ * enabled if either shader stage includes the execution mode.
+ *
+ * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
+ * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
+ * and OutputVertices, it says:
+ *
+ * "One mode must be set in at least one of the tessellation
+ * shader stages."
+ *
+ * So, the fields can be set in either the TCS or TES, but they must
+ * agree if set in both. Our backend looks at TES, so bitwise-or in
+ * the values from the TCS.
+ */
+ assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 ||
+ tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
+ tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
+
+ assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+ tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
+ tcs_info->tess.spacing == tes_info->tess.spacing);
+ tes_info->tess.spacing |= tcs_info->tess.spacing;
+
+ assert(tcs_info->tess.primitive_mode == 0 || tes_info->tess.primitive_mode == 0 ||
+ tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
+ tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
+ tes_info->tess.ccw |= tcs_info->tess.ccw;
+ tes_info->tess.point_mode |= tcs_info->tess.point_mode;
+
+ /* Copy the merged info back to the TCS */
+ tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out;
+ tcs_info->tess.spacing = tes_info->tess.spacing;
+ tcs_info->tess.primitive_mode = tes_info->tess.primitive_mode;
+ tcs_info->tess.ccw = tes_info->tess.ccw;
+ tcs_info->tess.point_mode = tes_info->tess.point_mode;
}
static void
-gather_tess_info(struct radv_device *device,
- nir_shader **nir, struct radv_shader_info *infos,
+gather_tess_info(struct radv_device *device, nir_shader **nir, struct radv_shader_info *infos,
const struct radv_pipeline_key *pipeline_key)
{
- merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
-
- /* Number of tessellation patches per workgroup processed by the current pipeline. */
- unsigned num_patches =
- get_tcs_num_patches(
- pipeline_key->tess_input_vertices,
- nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs,
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs,
- device->tess_offchip_block_dw_size,
- device->physical_device->rad_info.chip_class,
- device->physical_device->rad_info.family);
-
- /* LDS size used by VS+TCS for storing TCS inputs and outputs. */
- unsigned tcs_lds_size =
- calculate_tess_lds_size(
- device->physical_device->rad_info.chip_class,
- pipeline_key->tess_input_vertices,
- nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs,
- num_patches,
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
- infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs);
-
- infos[MESA_SHADER_TESS_CTRL].num_tess_patches = num_patches;
- infos[MESA_SHADER_TESS_CTRL].tcs.num_lds_blocks = tcs_lds_size;
- infos[MESA_SHADER_TESS_CTRL].tcs.tes_reads_tess_factors = !!(nir[MESA_SHADER_TESS_EVAL]->info.inputs_read & (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER));
- infos[MESA_SHADER_TESS_CTRL].tcs.tes_inputs_read = nir[MESA_SHADER_TESS_EVAL]->info.inputs_read;
- infos[MESA_SHADER_TESS_CTRL].tcs.tes_patch_inputs_read = nir[MESA_SHADER_TESS_EVAL]->info.patch_inputs_read;
-
- infos[MESA_SHADER_TESS_EVAL].num_tess_patches = num_patches;
- infos[MESA_SHADER_GEOMETRY].num_tess_patches = num_patches;
-
- if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
- /* When the number of TCS input and output vertices are the same (typically 3):
- * - There is an equal amount of LS and HS invocations
- * - In case of merged LSHS shaders, the LS and HS halves of the shader
- * always process the exact same vertex. We can use this knowledge to optimize them.
- *
- * We don't set tcs_in_out_eq if the float controls differ because that might
- * involve different float modes for the same block and our optimizer
- * doesn't handle a instruction dominating another with a different mode.
- */
- infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq =
- device->physical_device->rad_info.chip_class >= GFX9 &&
- pipeline_key->tess_input_vertices == nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out &&
- nir[MESA_SHADER_VERTEX]->info.float_controls_execution_mode == nir[MESA_SHADER_TESS_CTRL]->info.float_controls_execution_mode;
-
- if (infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq)
- infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask =
- nir[MESA_SHADER_TESS_CTRL]->info.inputs_read &
- nir[MESA_SHADER_VERTEX]->info.outputs_written &
- ~nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_cross_invocation_inputs_read &
- ~nir[MESA_SHADER_TESS_CTRL]->info.inputs_read_indirectly &
- ~nir[MESA_SHADER_VERTEX]->info.outputs_accessed_indirectly;
-
- /* Copy data to TCS so it can be accessed by the backend if they are merged. */
- infos[MESA_SHADER_TESS_CTRL].vs.tcs_in_out_eq = infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq;
- infos[MESA_SHADER_TESS_CTRL].vs.tcs_temp_only_input_mask = infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask;
- }
+ merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
+
+ /* Number of tessellation patches per workgroup processed by the current pipeline. */
+ unsigned num_patches = get_tcs_num_patches(
+ pipeline_key->tess_input_vertices, nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs,
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs, device->tess_offchip_block_dw_size,
+ device->physical_device->rad_info.chip_class, device->physical_device->rad_info.family);
+
+ /* LDS size used by VS+TCS for storing TCS inputs and outputs. */
+ unsigned tcs_lds_size = calculate_tess_lds_size(
+ device->physical_device->rad_info.chip_class, pipeline_key->tess_input_vertices,
+ nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out,
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_inputs, num_patches,
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_outputs,
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_linked_patch_outputs);
+
+ infos[MESA_SHADER_TESS_CTRL].num_tess_patches = num_patches;
+ infos[MESA_SHADER_TESS_CTRL].tcs.num_lds_blocks = tcs_lds_size;
+ infos[MESA_SHADER_TESS_CTRL].tcs.tes_reads_tess_factors =
+ !!(nir[MESA_SHADER_TESS_EVAL]->info.inputs_read &
+ (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER));
+ infos[MESA_SHADER_TESS_CTRL].tcs.tes_inputs_read = nir[MESA_SHADER_TESS_EVAL]->info.inputs_read;
+ infos[MESA_SHADER_TESS_CTRL].tcs.tes_patch_inputs_read =
+ nir[MESA_SHADER_TESS_EVAL]->info.patch_inputs_read;
+
+ infos[MESA_SHADER_TESS_EVAL].num_tess_patches = num_patches;
+ infos[MESA_SHADER_GEOMETRY].num_tess_patches = num_patches;
+
+ if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) {
+ /* When the number of TCS input and output vertices are the same (typically 3):
+ * - There is an equal amount of LS and HS invocations
+ * - In case of merged LSHS shaders, the LS and HS halves of the shader
+ * always process the exact same vertex. We can use this knowledge to optimize them.
+ *
+ * We don't set tcs_in_out_eq if the float controls differ because that might
+ * involve different float modes for the same block and our optimizer
+ * doesn't handle a instruction dominating another with a different mode.
+ */
+ infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq =
+ device->physical_device->rad_info.chip_class >= GFX9 &&
+ pipeline_key->tess_input_vertices ==
+ nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out &&
+ nir[MESA_SHADER_VERTEX]->info.float_controls_execution_mode ==
+ nir[MESA_SHADER_TESS_CTRL]->info.float_controls_execution_mode;
+
+ if (infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq)
+ infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask =
+ nir[MESA_SHADER_TESS_CTRL]->info.inputs_read &
+ nir[MESA_SHADER_VERTEX]->info.outputs_written &
+ ~nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_cross_invocation_inputs_read &
+ ~nir[MESA_SHADER_TESS_CTRL]->info.inputs_read_indirectly &
+ ~nir[MESA_SHADER_VERTEX]->info.outputs_accessed_indirectly;
+
+ /* Copy data to TCS so it can be accessed by the backend if they are merged. */
+ infos[MESA_SHADER_TESS_CTRL].vs.tcs_in_out_eq = infos[MESA_SHADER_VERTEX].vs.tcs_in_out_eq;
+ infos[MESA_SHADER_TESS_CTRL].vs.tcs_temp_only_input_mask =
+ infos[MESA_SHADER_VERTEX].vs.tcs_temp_only_input_mask;
+ }
}
-static
-void radv_init_feedback(const VkPipelineCreationFeedbackCreateInfoEXT *ext)
+static void
+radv_init_feedback(const VkPipelineCreationFeedbackCreateInfoEXT *ext)
{
- if (!ext)
- return;
-
- if (ext->pPipelineCreationFeedback) {
- ext->pPipelineCreationFeedback->flags = 0;
- ext->pPipelineCreationFeedback->duration = 0;
- }
-
- for (unsigned i = 0; i < ext->pipelineStageCreationFeedbackCount; ++i) {
- ext->pPipelineStageCreationFeedbacks[i].flags = 0;
- ext->pPipelineStageCreationFeedbacks[i].duration = 0;
- }
+ if (!ext)
+ return;
+
+ if (ext->pPipelineCreationFeedback) {
+ ext->pPipelineCreationFeedback->flags = 0;
+ ext->pPipelineCreationFeedback->duration = 0;
+ }
+
+ for (unsigned i = 0; i < ext->pipelineStageCreationFeedbackCount; ++i) {
+ ext->pPipelineStageCreationFeedbacks[i].flags = 0;
+ ext->pPipelineStageCreationFeedbacks[i].duration = 0;
+ }
}
-static
-void radv_start_feedback(VkPipelineCreationFeedbackEXT *feedback)
+static void
+radv_start_feedback(VkPipelineCreationFeedbackEXT *feedback)
{
- if (!feedback)
- return;
+ if (!feedback)
+ return;
- feedback->duration -= radv_get_current_time();
- feedback ->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
+ feedback->duration -= radv_get_current_time();
+ feedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
}
-static
-void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
+static void
+radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
{
- if (!feedback)
- return;
+ if (!feedback)
+ return;
- feedback->duration += radv_get_current_time();
- feedback ->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT |
- (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
+ feedback->duration += radv_get_current_time();
+ feedback->flags =
+ VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT |
+ (cache_hit ? VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT : 0);
}
static bool
-mem_vectorize_callback(unsigned align_mul, unsigned align_offset,
- unsigned bit_size,
- unsigned num_components,
- nir_intrinsic_instr *low, nir_intrinsic_instr *high,
+mem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size,
+ unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high,
void *data)
{
- if (num_components > 4)
- return false;
-
- /* >128 bit loads are split except with SMEM */
- if (bit_size * num_components > 128)
- return false;
-
- uint32_t align;
- if (align_offset)
- align = 1 << (ffs(align_offset) - 1);
- else
- align = align_mul;
-
- switch (low->intrinsic) {
- case nir_intrinsic_load_global:
- case nir_intrinsic_store_global:
- case nir_intrinsic_store_ssbo:
- case nir_intrinsic_load_ssbo:
- case nir_intrinsic_load_ubo:
- case nir_intrinsic_load_push_constant:
- return align % (bit_size == 8 ? 2 : 4) == 0;
- case nir_intrinsic_load_deref:
- case nir_intrinsic_store_deref:
- assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]),
- nir_var_mem_shared));
- FALLTHROUGH;
- case nir_intrinsic_load_shared:
- case nir_intrinsic_store_shared:
- if (bit_size * num_components == 96) /* 96 bit loads require 128 bit alignment and are split otherwise */
- return align % 16 == 0;
- else if (bit_size * num_components == 128) /* 128 bit loads require 64 bit alignment and are split otherwise */
- return align % 8 == 0;
- else
- return align % (bit_size == 8 ? 2 : 4) == 0;
- default:
- return false;
- }
- return false;
+ if (num_components > 4)
+ return false;
+
+ /* >128 bit loads are split except with SMEM */
+ if (bit_size * num_components > 128)
+ return false;
+
+ uint32_t align;
+ if (align_offset)
+ align = 1 << (ffs(align_offset) - 1);
+ else
+ align = align_mul;
+
+ switch (low->intrinsic) {
+ case nir_intrinsic_load_global:
+ case nir_intrinsic_store_global:
+ case nir_intrinsic_store_ssbo:
+ case nir_intrinsic_load_ssbo:
+ case nir_intrinsic_load_ubo:
+ case nir_intrinsic_load_push_constant:
+ return align % (bit_size == 8 ? 2 : 4) == 0;
+ case nir_intrinsic_load_deref:
+ case nir_intrinsic_store_deref:
+ assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]), nir_var_mem_shared));
+ FALLTHROUGH;
+ case nir_intrinsic_load_shared:
+ case nir_intrinsic_store_shared:
+ if (bit_size * num_components ==
+ 96) /* 96 bit loads require 128 bit alignment and are split otherwise */
+ return align % 16 == 0;
+ else if (bit_size * num_components ==
+ 128) /* 128 bit loads require 64 bit alignment and are split otherwise */
+ return align % 8 == 0;
+ else
+ return align % (bit_size == 8 ? 2 : 4) == 0;
+ default:
+ return false;
+ }
+ return false;
}
static unsigned
lower_bit_size_callback(const nir_instr *instr, void *_)
{
- struct radv_device *device = _;
- enum chip_class chip = device->physical_device->rad_info.chip_class;
-
- if (instr->type != nir_instr_type_alu)
- return 0;
- nir_alu_instr *alu = nir_instr_as_alu(instr);
-
- if (alu->dest.dest.ssa.bit_size & (8 | 16)) {
- unsigned bit_size = alu->dest.dest.ssa.bit_size;
- switch (alu->op) {
- case nir_op_iabs:
- case nir_op_bitfield_select:
- case nir_op_udiv:
- case nir_op_idiv:
- case nir_op_umod:
- case nir_op_imod:
- case nir_op_imul_high:
- case nir_op_umul_high:
- case nir_op_ineg:
- case nir_op_irem:
- case nir_op_isign:
- return 32;
- case nir_op_imax:
- case nir_op_umax:
- case nir_op_imin:
- case nir_op_umin:
- case nir_op_ishr:
- case nir_op_ushr:
- case nir_op_ishl:
- case nir_op_uadd_sat:
- return (bit_size == 8 ||
- !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0;
- default:
- return 0;
- }
- }
-
- if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) {
- unsigned bit_size = nir_src_bit_size(alu->src[0].src);
- switch (alu->op) {
- case nir_op_bit_count:
- case nir_op_find_lsb:
- case nir_op_ufind_msb:
- case nir_op_i2b1:
- return 32;
- case nir_op_ilt:
- case nir_op_ige:
- case nir_op_ieq:
- case nir_op_ine:
- case nir_op_ult:
- case nir_op_uge:
- return (bit_size == 8 ||
- !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 : 0;
- default:
- return 0;
- }
- }
-
- return 0;
+ struct radv_device *device = _;
+ enum chip_class chip = device->physical_device->rad_info.chip_class;
+
+ if (instr->type != nir_instr_type_alu)
+ return 0;
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ if (alu->dest.dest.ssa.bit_size & (8 | 16)) {
+ unsigned bit_size = alu->dest.dest.ssa.bit_size;
+ switch (alu->op) {
+ case nir_op_iabs:
+ case nir_op_bitfield_select:
+ case nir_op_udiv:
+ case nir_op_idiv:
+ case nir_op_umod:
+ case nir_op_imod:
+ case nir_op_imul_high:
+ case nir_op_umul_high:
+ case nir_op_ineg:
+ case nir_op_irem:
+ case nir_op_isign:
+ return 32;
+ case nir_op_imax:
+ case nir_op_umax:
+ case nir_op_imin:
+ case nir_op_umin:
+ case nir_op_ishr:
+ case nir_op_ushr:
+ case nir_op_ishl:
+ case nir_op_uadd_sat:
+ return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32
+ : 0;
+ default:
+ return 0;
+ }
+ }
+
+ if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) {
+ unsigned bit_size = nir_src_bit_size(alu->src[0].src);
+ switch (alu->op) {
+ case nir_op_bit_count:
+ case nir_op_find_lsb:
+ case nir_op_ufind_msb:
+ case nir_op_i2b1:
+ return 32;
+ case nir_op_ilt:
+ case nir_op_ige:
+ case nir_op_ieq:
+ case nir_op_ine:
+ case nir_op_ult:
+ case nir_op_uge:
+ return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32
+ : 0;
+ default:
+ return 0;
+ }
+ }
+
+ return 0;
}
static bool
@@ -3272,1017 +3154,991 @@ opt_vectorize_callback(const nir_instr *instr, void *_)
}
}
-VkResult radv_create_shaders(struct radv_pipeline *pipeline,
- struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const struct radv_pipeline_key *pipeline_key,
- const VkPipelineShaderStageCreateInfo **pStages,
- const VkPipelineCreateFlags flags,
- VkPipelineCreationFeedbackEXT *pipeline_feedback,
- VkPipelineCreationFeedbackEXT **stage_feedbacks)
+VkResult
+radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
+ struct radv_pipeline_cache *cache, const struct radv_pipeline_key *pipeline_key,
+ const VkPipelineShaderStageCreateInfo **pStages,
+ const VkPipelineCreateFlags flags,
+ VkPipelineCreationFeedbackEXT *pipeline_feedback,
+ VkPipelineCreationFeedbackEXT **stage_feedbacks)
{
- struct vk_shader_module fs_m = {0};
- struct vk_shader_module *modules[MESA_SHADER_STAGES] = { 0, };
- nir_shader *nir[MESA_SHADER_STAGES] = {0};
- struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
- struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{{0}}}}};
- struct radv_shader_info infos[MESA_SHADER_STAGES] = {0};
- unsigned char hash[20], gs_copy_hash[20];
- bool keep_executable_info = (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) || device->keep_shader_info;
- bool keep_statistic_info = (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
- (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) ||
- device->keep_shader_info;
- bool disable_optimizations = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
-
- radv_start_feedback(pipeline_feedback);
-
- for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (pStages[i]) {
- modules[i] = vk_shader_module_from_handle(pStages[i]->module);
- if (modules[i]->nir)
- _mesa_sha1_compute(modules[i]->nir->info.name,
- strlen(modules[i]->nir->info.name),
- modules[i]->sha1);
-
- pipeline->active_stages |= mesa_to_vk_shader_stage(i);
- }
- }
-
- radv_hash_shaders(hash, pStages, pipeline->layout, pipeline_key,
- get_hash_flags(device, keep_statistic_info));
- memcpy(gs_copy_hash, hash, 20);
- gs_copy_hash[0] ^= 1;
-
- pipeline->pipeline_hash = *(uint64_t *)hash;
-
- bool found_in_application_cache = true;
- if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info) {
- struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
- radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants,
- &found_in_application_cache);
- pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
- }
-
- if (!keep_executable_info &&
- radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders,
- &found_in_application_cache) &&
- (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
- radv_stop_feedback(pipeline_feedback, found_in_application_cache);
- return VK_SUCCESS;
- }
-
- if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
- radv_stop_feedback(pipeline_feedback, found_in_application_cache);
- return VK_PIPELINE_COMPILE_REQUIRED_EXT;
- }
-
- if (!modules[MESA_SHADER_FRAGMENT] && !modules[MESA_SHADER_COMPUTE]) {
- nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "noop_fs");
- fs_m = vk_shader_module_from_nir(fs_b.shader);
- modules[MESA_SHADER_FRAGMENT] = &fs_m;
- }
-
- for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
- const VkPipelineShaderStageCreateInfo *stage = pStages[i];
-
- if (!modules[i])
- continue;
-
- radv_start_feedback(stage_feedbacks[i]);
-
- nir[i] = radv_shader_compile_to_nir(device, modules[i],
- stage ? stage->pName : "main", i,
- stage ? stage->pSpecializationInfo : NULL,
- flags, pipeline->layout,
- pipeline_key);
-
- /* We don't want to alter meta shaders IR directly so clone it
- * first.
- */
- if (nir[i]->info.name) {
- nir[i] = nir_shader_clone(NULL, nir[i]);
- }
-
- radv_stop_feedback(stage_feedbacks[i], false);
- }
-
- bool optimize_conservatively = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
-
- radv_link_shaders(pipeline, nir, optimize_conservatively);
- radv_set_driver_locations(pipeline, nir, infos);
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (nir[i]) {
- radv_start_feedback(stage_feedbacks[i]);
- radv_optimize_nir(device, nir[i], optimize_conservatively, false);
-
- /* Gather info again, information such as outputs_read can be out-of-date. */
- nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
- radv_lower_io(device, nir[i]);
-
- radv_stop_feedback(stage_feedbacks[i], false);
- }
- }
-
- infos[MESA_SHADER_VERTEX].vs.as_ls = !!nir[MESA_SHADER_TESS_CTRL];
- infos[MESA_SHADER_VERTEX].vs.as_es = !!nir[MESA_SHADER_GEOMETRY] && !nir[MESA_SHADER_TESS_CTRL];
- infos[MESA_SHADER_TESS_EVAL].tes.as_es = !!nir[MESA_SHADER_GEOMETRY] && !!nir[MESA_SHADER_TESS_CTRL];
-
- if (nir[MESA_SHADER_TESS_CTRL]) {
- nir_lower_patch_vertices(nir[MESA_SHADER_TESS_EVAL], nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);
- gather_tess_info(device, nir, infos, pipeline_key);
- }
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (nir[i]) {
- radv_start_feedback(stage_feedbacks[i]);
-
- if (!radv_use_llvm_for_stage(device, i)) {
- NIR_PASS_V(nir[i], nir_lower_non_uniform_access,
- nir_lower_non_uniform_ubo_access |
- nir_lower_non_uniform_ssbo_access |
- nir_lower_non_uniform_texture_access |
- nir_lower_non_uniform_image_access);
- }
- NIR_PASS_V(nir[i], nir_lower_memory_model);
-
- bool lower_to_scalar = false;
-
- nir_load_store_vectorize_options vectorize_opts = {
- .modes = nir_var_mem_ssbo | nir_var_mem_ubo |
- nir_var_mem_push_const | nir_var_mem_shared |
- nir_var_mem_global,
- .callback = mem_vectorize_callback,
- .robust_modes = 0,
- };
-
- if (device->robust_buffer_access) {
- vectorize_opts.robust_modes = nir_var_mem_ubo |
- nir_var_mem_ssbo |
- nir_var_mem_global |
- nir_var_mem_push_const;
- }
-
- if (nir_opt_load_store_vectorize(nir[i], &vectorize_opts)) {
- lower_to_scalar = true;
-
- /* Gather info again, to update whether 8/16-bit are used. */
- nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
- }
-
- lower_to_scalar |= nir_opt_shrink_vectors(nir[i],
- !device->instance->disable_shrink_image_store);
-
- if (lower_to_scalar)
- nir_lower_alu_to_scalar(nir[i], NULL, NULL);
-
- /* lower ALU operations */
- /* TODO: Some 64-bit tests crash inside LLVM. */
- if (!radv_use_llvm_for_stage(device, i))
- nir_lower_int64(nir[i]);
-
- /* TODO: Implement nir_op_uadd_sat with LLVM. */
- if (!radv_use_llvm_for_stage(device, i))
- nir_opt_idiv_const(nir[i], 8);
- nir_lower_idiv(nir[i], nir_lower_idiv_precise);
-
- nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
- nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
-
- /* Lower I/O intrinsics to memory instructions. */
- bool io_to_mem = radv_lower_io_to_mem(device, nir[i], &infos[i], pipeline_key);
-
- /* optimize the lowered ALU operations */
- bool more_algebraic = true;
- while (more_algebraic) {
- more_algebraic = false;
- NIR_PASS_V(nir[i], nir_copy_prop);
- NIR_PASS_V(nir[i], nir_opt_dce);
- NIR_PASS_V(nir[i], nir_opt_constant_folding);
- NIR_PASS_V(nir[i], nir_opt_cse);
- NIR_PASS(more_algebraic, nir[i], nir_opt_algebraic);
- }
-
- if (io_to_mem || i == MESA_SHADER_COMPUTE)
- NIR_PASS_V(nir[i], nir_opt_offsets);
-
- /* Do late algebraic optimization to turn add(a,
- * neg(b)) back into subs, then the mandatory cleanup
- * after algebraic. Note that it may produce fnegs,
- * and if so then we need to keep running to squash
- * fneg(fneg(a)).
- */
- bool more_late_algebraic = true;
- while (more_late_algebraic) {
- more_late_algebraic = false;
- NIR_PASS(more_late_algebraic, nir[i], nir_opt_algebraic_late);
- NIR_PASS_V(nir[i], nir_opt_constant_folding);
- NIR_PASS_V(nir[i], nir_copy_prop);
- NIR_PASS_V(nir[i], nir_opt_dce);
- NIR_PASS_V(nir[i], nir_opt_cse);
- }
-
- if (nir[i]->info.bit_sizes_int & (8 | 16)) {
- if (device->physical_device->rad_info.chip_class >= GFX8) {
- nir_convert_to_lcssa(nir[i], true, true);
- nir_divergence_analysis(nir[i]);
- }
-
- if (nir_lower_bit_size(nir[i], lower_bit_size_callback, device)) {
- // TODO: lower idiv beforehand
- if (nir_lower_idiv(nir[i], nir_lower_idiv_precise))
- NIR_PASS_V(nir[i], nir_opt_algebraic_late); /* needed for removing ineg again */
- NIR_PASS_V(nir[i], nir_opt_constant_folding);
- NIR_PASS_V(nir[i], nir_opt_dce);
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX8)
- nir_opt_remove_phis(nir[i]); /* cleanup LCSSA phis */
- if (device->physical_device->rad_info.chip_class >= GFX9)
- NIR_PASS_V(nir[i], nir_opt_vectorize, opt_vectorize_callback, NULL);
- }
-
- /* cleanup passes */
- nir_lower_load_const_to_scalar(nir[i]);
- nir_move_options move_opts =
- nir_move_const_undef | nir_move_load_ubo | nir_move_load_input |
- nir_move_comparisons | nir_move_copies;
- nir_opt_sink(nir[i], move_opts | nir_move_load_ssbo);
- nir_opt_move(nir[i], move_opts);
-
- radv_stop_feedback(stage_feedbacks[i], false);
- }
- }
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (radv_can_dump_shader(device, modules[i], false))
- nir_print_shader(nir[i], stderr);
- }
-
- radv_fill_shader_keys(device, keys, pipeline_key, nir);
-
- radv_fill_shader_info(pipeline, pStages, keys, infos, nir);
-
- if ((nir[MESA_SHADER_VERTEX] &&
- keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) ||
- (nir[MESA_SHADER_TESS_EVAL] &&
- keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg)) {
- struct gfx10_ngg_info *ngg_info;
-
- if (nir[MESA_SHADER_GEOMETRY])
- ngg_info = &infos[MESA_SHADER_GEOMETRY].ngg_info;
- else if (nir[MESA_SHADER_TESS_CTRL])
- ngg_info = &infos[MESA_SHADER_TESS_EVAL].ngg_info;
- else
- ngg_info = &infos[MESA_SHADER_VERTEX].ngg_info;
-
- gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info);
- } else if (nir[MESA_SHADER_GEOMETRY]) {
- struct gfx9_gs_info *gs_info =
- &infos[MESA_SHADER_GEOMETRY].gs_ring_info;
-
- gfx9_get_gs_info(pipeline_key, pipeline, nir, infos, gs_info);
- }
-
- if(modules[MESA_SHADER_GEOMETRY]) {
- struct radv_shader_binary *gs_copy_binary = NULL;
- if (!pipeline->gs_copy_shader &&
- !radv_pipeline_has_ngg(pipeline)) {
- struct radv_shader_info info = {0};
- struct radv_shader_variant_key key = {0};
-
- key.has_multiview_view_index =
- keys[MESA_SHADER_GEOMETRY].has_multiview_view_index;
-
- radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY],
- pipeline->layout, &key,
- &info);
- info.wave_size = 64; /* Wave32 not supported. */
- info.ballot_bit_size = 64;
-
- pipeline->gs_copy_shader = radv_create_gs_copy_shader(
- device, nir[MESA_SHADER_GEOMETRY], &info,
- &gs_copy_binary, keep_executable_info, keep_statistic_info,
- keys[MESA_SHADER_GEOMETRY].has_multiview_view_index,
- disable_optimizations);
- }
-
- if (!keep_executable_info && pipeline->gs_copy_shader) {
- struct radv_shader_binary *gs_binaries[MESA_SHADER_STAGES] = {NULL};
- struct radv_shader_variant *gs_variants[MESA_SHADER_STAGES] = {0};
-
- gs_binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary;
- gs_variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
-
- radv_pipeline_cache_insert_shaders(device, cache,
- gs_copy_hash,
- gs_variants,
- gs_binaries);
- }
- free(gs_copy_binary);
- }
-
- if (nir[MESA_SHADER_FRAGMENT]) {
- if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
- radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
-
- pipeline->shaders[MESA_SHADER_FRAGMENT] =
- radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
- pipeline->layout, keys + MESA_SHADER_FRAGMENT,
- infos + MESA_SHADER_FRAGMENT,
- keep_executable_info, keep_statistic_info,
- disable_optimizations,
- &binaries[MESA_SHADER_FRAGMENT]);
-
- radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
- }
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) {
- if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) {
- struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
- struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
- key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
-
- radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]);
-
- pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
- pipeline->layout,
- &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
- keep_statistic_info,
- disable_optimizations,
- &binaries[MESA_SHADER_TESS_CTRL]);
-
- radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
- }
- modules[MESA_SHADER_VERTEX] = NULL;
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
- gl_shader_stage pre_stage = modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
- if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
- struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
-
- radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]);
-
- pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
- pipeline->layout,
- &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
- keep_statistic_info,
- disable_optimizations,
- &binaries[MESA_SHADER_GEOMETRY]);
-
- radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
- }
- modules[pre_stage] = NULL;
- }
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if(modules[i] && !pipeline->shaders[i]) {
- radv_start_feedback(stage_feedbacks[i]);
-
- pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
- pipeline->layout,
- keys + i, infos + i, keep_executable_info,
- keep_statistic_info,
- disable_optimizations,
- &binaries[i]);
-
- radv_stop_feedback(stage_feedbacks[i], false);
- }
- }
-
- if (!keep_executable_info) {
- radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders,
- binaries);
- }
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- free(binaries[i]);
- if (nir[i]) {
- ralloc_free(nir[i]);
-
- if (radv_can_dump_shader_stats(device, modules[i])) {
- radv_dump_shader_stats(device, pipeline, i, stderr);
- }
- }
- }
-
- if (fs_m.nir)
- ralloc_free(fs_m.nir);
-
- radv_stop_feedback(pipeline_feedback, false);
- return VK_SUCCESS;
+ struct vk_shader_module fs_m = {0};
+ struct vk_shader_module *modules[MESA_SHADER_STAGES] = {
+ 0,
+ };
+ nir_shader *nir[MESA_SHADER_STAGES] = {0};
+ struct radv_shader_binary *binaries[MESA_SHADER_STAGES] = {NULL};
+ struct radv_shader_variant_key keys[MESA_SHADER_STAGES] = {{{{{0}}}}};
+ struct radv_shader_info infos[MESA_SHADER_STAGES] = {0};
+ unsigned char hash[20], gs_copy_hash[20];
+ bool keep_executable_info =
+ (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) ||
+ device->keep_shader_info;
+ bool keep_statistic_info = (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) ||
+ (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) ||
+ device->keep_shader_info;
+ bool disable_optimizations = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
+
+ radv_start_feedback(pipeline_feedback);
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (pStages[i]) {
+ modules[i] = vk_shader_module_from_handle(pStages[i]->module);
+ if (modules[i]->nir)
+ _mesa_sha1_compute(modules[i]->nir->info.name, strlen(modules[i]->nir->info.name),
+ modules[i]->sha1);
+
+ pipeline->active_stages |= mesa_to_vk_shader_stage(i);
+ }
+ }
+
+ radv_hash_shaders(hash, pStages, pipeline->layout, pipeline_key,
+ get_hash_flags(device, keep_statistic_info));
+ memcpy(gs_copy_hash, hash, 20);
+ gs_copy_hash[0] ^= 1;
+
+ pipeline->pipeline_hash = *(uint64_t *)hash;
+
+ bool found_in_application_cache = true;
+ if (modules[MESA_SHADER_GEOMETRY] && !keep_executable_info) {
+ struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
+ radv_create_shader_variants_from_pipeline_cache(device, cache, gs_copy_hash, variants,
+ &found_in_application_cache);
+ pipeline->gs_copy_shader = variants[MESA_SHADER_GEOMETRY];
+ }
+
+ if (!keep_executable_info &&
+ radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders,
+ &found_in_application_cache) &&
+ (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
+ radv_stop_feedback(pipeline_feedback, found_in_application_cache);
+ return VK_SUCCESS;
+ }
+
+ if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
+ radv_stop_feedback(pipeline_feedback, found_in_application_cache);
+ return VK_PIPELINE_COMPILE_REQUIRED_EXT;
+ }
+
+ if (!modules[MESA_SHADER_FRAGMENT] && !modules[MESA_SHADER_COMPUTE]) {
+ nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "noop_fs");
+ fs_m = vk_shader_module_from_nir(fs_b.shader);
+ modules[MESA_SHADER_FRAGMENT] = &fs_m;
+ }
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+ const VkPipelineShaderStageCreateInfo *stage = pStages[i];
+
+ if (!modules[i])
+ continue;
+
+ radv_start_feedback(stage_feedbacks[i]);
+
+ nir[i] = radv_shader_compile_to_nir(device, modules[i], stage ? stage->pName : "main", i,
+ stage ? stage->pSpecializationInfo : NULL, flags,
+ pipeline->layout, pipeline_key);
+
+ /* We don't want to alter meta shaders IR directly so clone it
+ * first.
+ */
+ if (nir[i]->info.name) {
+ nir[i] = nir_shader_clone(NULL, nir[i]);
+ }
+
+ radv_stop_feedback(stage_feedbacks[i], false);
+ }
+
+ bool optimize_conservatively = flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT;
+
+ radv_link_shaders(pipeline, nir, optimize_conservatively);
+ radv_set_driver_locations(pipeline, nir, infos);
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (nir[i]) {
+ radv_start_feedback(stage_feedbacks[i]);
+ radv_optimize_nir(device, nir[i], optimize_conservatively, false);
+
+ /* Gather info again, information such as outputs_read can be out-of-date. */
+ nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
+ radv_lower_io(device, nir[i]);
+
+ radv_stop_feedback(stage_feedbacks[i], false);
+ }
+ }
+
+ infos[MESA_SHADER_VERTEX].vs.as_ls = !!nir[MESA_SHADER_TESS_CTRL];
+ infos[MESA_SHADER_VERTEX].vs.as_es = !!nir[MESA_SHADER_GEOMETRY] && !nir[MESA_SHADER_TESS_CTRL];
+ infos[MESA_SHADER_TESS_EVAL].tes.as_es =
+ !!nir[MESA_SHADER_GEOMETRY] && !!nir[MESA_SHADER_TESS_CTRL];
+
+ if (nir[MESA_SHADER_TESS_CTRL]) {
+ nir_lower_patch_vertices(nir[MESA_SHADER_TESS_EVAL],
+ nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);
+ gather_tess_info(device, nir, infos, pipeline_key);
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (nir[i]) {
+ radv_start_feedback(stage_feedbacks[i]);
+
+ if (!radv_use_llvm_for_stage(device, i)) {
+ NIR_PASS_V(nir[i], nir_lower_non_uniform_access,
+ nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access |
+ nir_lower_non_uniform_texture_access |
+ nir_lower_non_uniform_image_access);
+ }
+ NIR_PASS_V(nir[i], nir_lower_memory_model);
+
+ bool lower_to_scalar = false;
+
+ nir_load_store_vectorize_options vectorize_opts = {
+ .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const |
+ nir_var_mem_shared | nir_var_mem_global,
+ .callback = mem_vectorize_callback,
+ .robust_modes = 0,
+ };
+
+ if (device->robust_buffer_access) {
+ vectorize_opts.robust_modes =
+ nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_global | nir_var_mem_push_const;
+ }
+
+ if (nir_opt_load_store_vectorize(nir[i], &vectorize_opts)) {
+ lower_to_scalar = true;
+
+ /* Gather info again, to update whether 8/16-bit are used. */
+ nir_shader_gather_info(nir[i], nir_shader_get_entrypoint(nir[i]));
+ }
+
+ lower_to_scalar |=
+ nir_opt_shrink_vectors(nir[i], !device->instance->disable_shrink_image_store);
+
+ if (lower_to_scalar)
+ nir_lower_alu_to_scalar(nir[i], NULL, NULL);
+
+ /* lower ALU operations */
+ /* TODO: Some 64-bit tests crash inside LLVM. */
+ if (!radv_use_llvm_for_stage(device, i))
+ nir_lower_int64(nir[i]);
+
+ /* TODO: Implement nir_op_uadd_sat with LLVM. */
+ if (!radv_use_llvm_for_stage(device, i))
+ nir_opt_idiv_const(nir[i], 8);
+ nir_lower_idiv(nir[i], nir_lower_idiv_precise);
+
+ nir_opt_sink(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
+ nir_opt_move(nir[i], nir_move_load_input | nir_move_const_undef | nir_move_copies);
+
+ /* Lower I/O intrinsics to memory instructions. */
+ bool io_to_mem = radv_lower_io_to_mem(device, nir[i], &infos[i], pipeline_key);
+
+ /* optimize the lowered ALU operations */
+ bool more_algebraic = true;
+ while (more_algebraic) {
+ more_algebraic = false;
+ NIR_PASS_V(nir[i], nir_copy_prop);
+ NIR_PASS_V(nir[i], nir_opt_dce);
+ NIR_PASS_V(nir[i], nir_opt_constant_folding);
+ NIR_PASS_V(nir[i], nir_opt_cse);
+ NIR_PASS(more_algebraic, nir[i], nir_opt_algebraic);
+ }
+
+ if (io_to_mem || i == MESA_SHADER_COMPUTE)
+ NIR_PASS_V(nir[i], nir_opt_offsets);
+
+ /* Do late algebraic optimization to turn add(a,
+ * neg(b)) back into subs, then the mandatory cleanup
+ * after algebraic. Note that it may produce fnegs,
+ * and if so then we need to keep running to squash
+ * fneg(fneg(a)).
+ */
+ bool more_late_algebraic = true;
+ while (more_late_algebraic) {
+ more_late_algebraic = false;
+ NIR_PASS(more_late_algebraic, nir[i], nir_opt_algebraic_late);
+ NIR_PASS_V(nir[i], nir_opt_constant_folding);
+ NIR_PASS_V(nir[i], nir_copy_prop);
+ NIR_PASS_V(nir[i], nir_opt_dce);
+ NIR_PASS_V(nir[i], nir_opt_cse);
+ }
+
+ if (nir[i]->info.bit_sizes_int & (8 | 16)) {
+ if (device->physical_device->rad_info.chip_class >= GFX8) {
+ nir_convert_to_lcssa(nir[i], true, true);
+ nir_divergence_analysis(nir[i]);
+ }
+
+ if (nir_lower_bit_size(nir[i], lower_bit_size_callback, device)) {
+ // TODO: lower idiv beforehand
+ if (nir_lower_idiv(nir[i], nir_lower_idiv_precise))
+ NIR_PASS_V(nir[i], nir_opt_algebraic_late); /* needed for removing ineg again */
+ NIR_PASS_V(nir[i], nir_opt_constant_folding);
+ NIR_PASS_V(nir[i], nir_opt_dce);
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX8)
+ nir_opt_remove_phis(nir[i]); /* cleanup LCSSA phis */
+ if (device->physical_device->rad_info.chip_class >= GFX9)
+ NIR_PASS_V(nir[i], nir_opt_vectorize, opt_vectorize_callback, NULL);
+ }
+
+ /* cleanup passes */
+ nir_lower_load_const_to_scalar(nir[i]);
+ nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo |
+ nir_move_load_input | nir_move_comparisons | nir_move_copies;
+ nir_opt_sink(nir[i], move_opts | nir_move_load_ssbo);
+ nir_opt_move(nir[i], move_opts);
+
+ radv_stop_feedback(stage_feedbacks[i], false);
+ }
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (radv_can_dump_shader(device, modules[i], false))
+ nir_print_shader(nir[i], stderr);
+ }
+
+ radv_fill_shader_keys(device, keys, pipeline_key, nir);
+
+ radv_fill_shader_info(pipeline, pStages, keys, infos, nir);
+
+ if ((nir[MESA_SHADER_VERTEX] && keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) ||
+ (nir[MESA_SHADER_TESS_EVAL] && keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg)) {
+ struct gfx10_ngg_info *ngg_info;
+
+ if (nir[MESA_SHADER_GEOMETRY])
+ ngg_info = &infos[MESA_SHADER_GEOMETRY].ngg_info;
+ else if (nir[MESA_SHADER_TESS_CTRL])
+ ngg_info = &infos[MESA_SHADER_TESS_EVAL].ngg_info;
+ else
+ ngg_info = &infos[MESA_SHADER_VERTEX].ngg_info;
+
+ gfx10_get_ngg_info(pipeline_key, pipeline, nir, infos, ngg_info);
+ } else if (nir[MESA_SHADER_GEOMETRY]) {
+ struct gfx9_gs_info *gs_info = &infos[MESA_SHADER_GEOMETRY].gs_ring_info;
+
+ gfx9_get_gs_info(pipeline_key, pipeline, nir, infos, gs_info);
+ }
+
+ if (modules[MESA_SHADER_GEOMETRY]) {
+ struct radv_shader_binary *gs_copy_binary = NULL;
+ if (!pipeline->gs_copy_shader && !radv_pipeline_has_ngg(pipeline)) {
+ struct radv_shader_info info = {0};
+ struct radv_shader_variant_key key = {0};
+
+ key.has_multiview_view_index = keys[MESA_SHADER_GEOMETRY].has_multiview_view_index;
+
+ radv_nir_shader_info_pass(nir[MESA_SHADER_GEOMETRY], pipeline->layout, &key, &info);
+ info.wave_size = 64; /* Wave32 not supported. */
+ info.ballot_bit_size = 64;
+
+ pipeline->gs_copy_shader = radv_create_gs_copy_shader(
+ device, nir[MESA_SHADER_GEOMETRY], &info, &gs_copy_binary, keep_executable_info,
+ keep_statistic_info, keys[MESA_SHADER_GEOMETRY].has_multiview_view_index,
+ disable_optimizations);
+ }
+
+ if (!keep_executable_info && pipeline->gs_copy_shader) {
+ struct radv_shader_binary *gs_binaries[MESA_SHADER_STAGES] = {NULL};
+ struct radv_shader_variant *gs_variants[MESA_SHADER_STAGES] = {0};
+
+ gs_binaries[MESA_SHADER_GEOMETRY] = gs_copy_binary;
+ gs_variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
+
+ radv_pipeline_cache_insert_shaders(device, cache, gs_copy_hash, gs_variants, gs_binaries);
+ }
+ free(gs_copy_binary);
+ }
+
+ if (nir[MESA_SHADER_FRAGMENT]) {
+ if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
+ radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
+
+ pipeline->shaders[MESA_SHADER_FRAGMENT] = radv_shader_variant_compile(
+ device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1, pipeline->layout,
+ keys + MESA_SHADER_FRAGMENT, infos + MESA_SHADER_FRAGMENT, keep_executable_info,
+ keep_statistic_info, disable_optimizations, &binaries[MESA_SHADER_FRAGMENT]);
+
+ radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
+ }
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) {
+ if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) {
+ struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
+ struct radv_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
+ key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+
+ radv_start_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL]);
+
+ pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(
+ device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2, pipeline->layout, &key,
+ &infos[MESA_SHADER_TESS_CTRL], keep_executable_info, keep_statistic_info,
+ disable_optimizations, &binaries[MESA_SHADER_TESS_CTRL]);
+
+ radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
+ }
+ modules[MESA_SHADER_VERTEX] = NULL;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
+ gl_shader_stage pre_stage =
+ modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
+ struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+
+ radv_start_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY]);
+
+ pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(
+ device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2, pipeline->layout,
+ &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
+ keep_statistic_info, disable_optimizations, &binaries[MESA_SHADER_GEOMETRY]);
+
+ radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
+ }
+ modules[pre_stage] = NULL;
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (modules[i] && !pipeline->shaders[i]) {
+ radv_start_feedback(stage_feedbacks[i]);
+
+ pipeline->shaders[i] = radv_shader_variant_compile(
+ device, modules[i], &nir[i], 1, pipeline->layout, keys + i, infos + i,
+ keep_executable_info, keep_statistic_info, disable_optimizations, &binaries[i]);
+
+ radv_stop_feedback(stage_feedbacks[i], false);
+ }
+ }
+
+ if (!keep_executable_info) {
+ radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders, binaries);
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ free(binaries[i]);
+ if (nir[i]) {
+ ralloc_free(nir[i]);
+
+ if (radv_can_dump_shader_stats(device, modules[i])) {
+ radv_dump_shader_stats(device, pipeline, i, stderr);
+ }
+ }
+ }
+
+ if (fs_m.nir)
+ ralloc_free(fs_m.nir);
+
+ radv_stop_feedback(pipeline_feedback, false);
+ return VK_SUCCESS;
}
static uint32_t
-radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
- gl_shader_stage stage, enum chip_class chip_class)
+radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_stage stage,
+ enum chip_class chip_class)
{
- bool has_gs = radv_pipeline_has_gs(pipeline);
- bool has_tess = radv_pipeline_has_tess(pipeline);
- bool has_ngg = radv_pipeline_has_ngg(pipeline);
-
- switch (stage) {
- case MESA_SHADER_FRAGMENT:
- return R_00B030_SPI_SHADER_USER_DATA_PS_0;
- case MESA_SHADER_VERTEX:
- if (has_tess) {
- if (chip_class >= GFX10) {
- return R_00B430_SPI_SHADER_USER_DATA_HS_0;
- } else if (chip_class == GFX9) {
- return R_00B430_SPI_SHADER_USER_DATA_LS_0;
- } else {
- return R_00B530_SPI_SHADER_USER_DATA_LS_0;
- }
-
- }
-
- if (has_gs) {
- if (chip_class >= GFX10) {
- return R_00B230_SPI_SHADER_USER_DATA_GS_0;
- } else {
- return R_00B330_SPI_SHADER_USER_DATA_ES_0;
- }
- }
-
- if (has_ngg)
- return R_00B230_SPI_SHADER_USER_DATA_GS_0;
-
- return R_00B130_SPI_SHADER_USER_DATA_VS_0;
- case MESA_SHADER_GEOMETRY:
- return chip_class == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
- R_00B230_SPI_SHADER_USER_DATA_GS_0;
- case MESA_SHADER_COMPUTE:
- return R_00B900_COMPUTE_USER_DATA_0;
- case MESA_SHADER_TESS_CTRL:
- return chip_class == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
- R_00B430_SPI_SHADER_USER_DATA_HS_0;
- case MESA_SHADER_TESS_EVAL:
- if (has_gs) {
- return chip_class >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0 :
- R_00B330_SPI_SHADER_USER_DATA_ES_0;
- } else if (has_ngg) {
- return R_00B230_SPI_SHADER_USER_DATA_GS_0;
- } else {
- return R_00B130_SPI_SHADER_USER_DATA_VS_0;
- }
- default:
- unreachable("unknown shader");
- }
+ bool has_gs = radv_pipeline_has_gs(pipeline);
+ bool has_tess = radv_pipeline_has_tess(pipeline);
+ bool has_ngg = radv_pipeline_has_ngg(pipeline);
+
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ return R_00B030_SPI_SHADER_USER_DATA_PS_0;
+ case MESA_SHADER_VERTEX:
+ if (has_tess) {
+ if (chip_class >= GFX10) {
+ return R_00B430_SPI_SHADER_USER_DATA_HS_0;
+ } else if (chip_class == GFX9) {
+ return R_00B430_SPI_SHADER_USER_DATA_LS_0;
+ } else {
+ return R_00B530_SPI_SHADER_USER_DATA_LS_0;
+ }
+ }
+
+ if (has_gs) {
+ if (chip_class >= GFX10) {
+ return R_00B230_SPI_SHADER_USER_DATA_GS_0;
+ } else {
+ return R_00B330_SPI_SHADER_USER_DATA_ES_0;
+ }
+ }
+
+ if (has_ngg)
+ return R_00B230_SPI_SHADER_USER_DATA_GS_0;
+
+ return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ case MESA_SHADER_GEOMETRY:
+ return chip_class == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0
+ : R_00B230_SPI_SHADER_USER_DATA_GS_0;
+ case MESA_SHADER_COMPUTE:
+ return R_00B900_COMPUTE_USER_DATA_0;
+ case MESA_SHADER_TESS_CTRL:
+ return chip_class == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0
+ : R_00B430_SPI_SHADER_USER_DATA_HS_0;
+ case MESA_SHADER_TESS_EVAL:
+ if (has_gs) {
+ return chip_class >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0
+ : R_00B330_SPI_SHADER_USER_DATA_ES_0;
+ } else if (has_ngg) {
+ return R_00B230_SPI_SHADER_USER_DATA_GS_0;
+ } else {
+ return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ }
+ default:
+ unreachable("unknown shader");
+ }
}
struct radv_bin_size_entry {
- unsigned bpp;
- VkExtent2D extent;
+ unsigned bpp;
+ VkExtent2D extent;
};
static VkExtent2D
-radv_gfx9_compute_bin_size(const struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
-{
- static const struct radv_bin_size_entry color_size_table[][3][9] = {
- {
- /* One RB / SE */
- {
- /* One shader engine */
- { 0, {128, 128}},
- { 1, { 64, 128}},
- { 2, { 32, 128}},
- { 3, { 16, 128}},
- { 17, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- /* Two shader engines */
- { 0, {128, 128}},
- { 2, { 64, 128}},
- { 3, { 32, 128}},
- { 5, { 16, 128}},
- { 17, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- /* Four shader engines */
- { 0, {128, 128}},
- { 3, { 64, 128}},
- { 5, { 16, 128}},
- { 17, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- },
- {
- /* Two RB / SE */
- {
- /* One shader engine */
- { 0, {128, 128}},
- { 2, { 64, 128}},
- { 3, { 32, 128}},
- { 5, { 16, 128}},
- { 33, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- /* Two shader engines */
- { 0, {128, 128}},
- { 3, { 64, 128}},
- { 5, { 32, 128}},
- { 9, { 16, 128}},
- { 33, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- /* Four shader engines */
- { 0, {256, 256}},
- { 2, {128, 256}},
- { 3, {128, 128}},
- { 5, { 64, 128}},
- { 9, { 16, 128}},
- { 33, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- },
- {
- /* Four RB / SE */
- {
- /* One shader engine */
- { 0, {128, 256}},
- { 2, {128, 128}},
- { 3, { 64, 128}},
- { 5, { 32, 128}},
- { 9, { 16, 128}},
- { 33, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- /* Two shader engines */
- { 0, {256, 256}},
- { 2, {128, 256}},
- { 3, {128, 128}},
- { 5, { 64, 128}},
- { 9, { 32, 128}},
- { 17, { 16, 128}},
- { 33, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- /* Four shader engines */
- { 0, {256, 512}},
- { 2, {256, 256}},
- { 3, {128, 256}},
- { 5, {128, 128}},
- { 9, { 64, 128}},
- { 17, { 16, 128}},
- { 33, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- },
- };
- static const struct radv_bin_size_entry ds_size_table[][3][9] = {
- {
- // One RB / SE
- {
- // One shader engine
- { 0, {128, 256}},
- { 2, {128, 128}},
- { 4, { 64, 128}},
- { 7, { 32, 128}},
- { 13, { 16, 128}},
- { 49, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- // Two shader engines
- { 0, {256, 256}},
- { 2, {128, 256}},
- { 4, {128, 128}},
- { 7, { 64, 128}},
- { 13, { 32, 128}},
- { 25, { 16, 128}},
- { 49, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- // Four shader engines
- { 0, {256, 512}},
- { 2, {256, 256}},
- { 4, {128, 256}},
- { 7, {128, 128}},
- { 13, { 64, 128}},
- { 25, { 16, 128}},
- { 49, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- },
- {
- // Two RB / SE
- {
- // One shader engine
- { 0, {256, 256}},
- { 2, {128, 256}},
- { 4, {128, 128}},
- { 7, { 64, 128}},
- { 13, { 32, 128}},
- { 25, { 16, 128}},
- { 97, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- // Two shader engines
- { 0, {256, 512}},
- { 2, {256, 256}},
- { 4, {128, 256}},
- { 7, {128, 128}},
- { 13, { 64, 128}},
- { 25, { 32, 128}},
- { 49, { 16, 128}},
- { 97, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- {
- // Four shader engines
- { 0, {512, 512}},
- { 2, {256, 512}},
- { 4, {256, 256}},
- { 7, {128, 256}},
- { 13, {128, 128}},
- { 25, { 64, 128}},
- { 49, { 16, 128}},
- { 97, { 0, 0}},
- { UINT_MAX, { 0, 0}},
- },
- },
- {
- // Four RB / SE
- {
- // One shader engine
- { 0, {256, 512}},
- { 2, {256, 256}},
- { 4, {128, 256}},
- { 7, {128, 128}},
- { 13, { 64, 128}},
- { 25, { 32, 128}},
- { 49, { 16, 128}},
- { UINT_MAX, { 0, 0}},
- },
- {
- // Two shader engines
- { 0, {512, 512}},
- { 2, {256, 512}},
- { 4, {256, 256}},
- { 7, {128, 256}},
- { 13, {128, 128}},
- { 25, { 64, 128}},
- { 49, { 32, 128}},
- { 97, { 16, 128}},
- { UINT_MAX, { 0, 0}},
- },
- {
- // Four shader engines
- { 0, {512, 512}},
- { 4, {256, 512}},
- { 7, {256, 256}},
- { 13, {128, 256}},
- { 25, {128, 128}},
- { 49, { 64, 128}},
- { 97, { 16, 128}},
- { UINT_MAX, { 0, 0}},
- },
- },
- };
-
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- VkExtent2D extent = {512, 512};
-
- unsigned log_num_rb_per_se =
- util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_render_backends /
- pipeline->device->physical_device->rad_info.max_se);
- unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
-
- unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
- unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
- unsigned effective_samples = total_samples;
- unsigned color_bytes_per_pixel = 0;
-
- const VkPipelineColorBlendStateCreateInfo *vkblend =
- radv_pipeline_get_color_blend_state(pCreateInfo);
- if (vkblend) {
- for (unsigned i = 0; i < subpass->color_count; i++) {
- if (!vkblend->pAttachments[i].colorWriteMask)
- continue;
-
- if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
- color_bytes_per_pixel += vk_format_get_blocksize(format);
- }
-
- /* MSAA images typically don't use all samples all the time. */
- if (effective_samples >= 2 && ps_iter_samples <= 1)
- effective_samples = 2;
- color_bytes_per_pixel *= effective_samples;
- }
-
- const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
- while(color_entry[1].bpp <= color_bytes_per_pixel)
- ++color_entry;
-
- extent = color_entry->extent;
-
- if (subpass->depth_stencil_attachment) {
- struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
-
- /* Coefficients taken from AMDVLK */
- unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0;
- unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0;
- unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
-
- const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
- while(ds_entry[1].bpp <= ds_bytes_per_pixel)
- ++ds_entry;
-
- if (ds_entry->extent.width * ds_entry->extent.height < extent.width * extent.height)
- extent = ds_entry->extent;
- }
-
- return extent;
-}
-
-static VkExtent2D
-radv_gfx10_compute_bin_size(const struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
+radv_gfx9_compute_bin_size(const struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- VkExtent2D extent = {512, 512};
-
- const unsigned db_tag_size = 64;
- const unsigned db_tag_count = 312;
- const unsigned color_tag_size = 1024;
- const unsigned color_tag_count = 31;
- const unsigned fmask_tag_size = 256;
- const unsigned fmask_tag_count = 44;
-
- const unsigned rb_count = pipeline->device->physical_device->rad_info.max_render_backends;
- const unsigned pipe_count = MAX2(rb_count, pipeline->device->physical_device->rad_info.num_tcc_blocks);
+ static const struct radv_bin_size_entry color_size_table[][3][9] = {
+ {
+ /* One RB / SE */
+ {
+ /* One shader engine */
+ {0, {128, 128}},
+ {1, {64, 128}},
+ {2, {32, 128}},
+ {3, {16, 128}},
+ {17, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ /* Two shader engines */
+ {0, {128, 128}},
+ {2, {64, 128}},
+ {3, {32, 128}},
+ {5, {16, 128}},
+ {17, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ /* Four shader engines */
+ {0, {128, 128}},
+ {3, {64, 128}},
+ {5, {16, 128}},
+ {17, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ },
+ {
+ /* Two RB / SE */
+ {
+ /* One shader engine */
+ {0, {128, 128}},
+ {2, {64, 128}},
+ {3, {32, 128}},
+ {5, {16, 128}},
+ {33, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ /* Two shader engines */
+ {0, {128, 128}},
+ {3, {64, 128}},
+ {5, {32, 128}},
+ {9, {16, 128}},
+ {33, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ /* Four shader engines */
+ {0, {256, 256}},
+ {2, {128, 256}},
+ {3, {128, 128}},
+ {5, {64, 128}},
+ {9, {16, 128}},
+ {33, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ },
+ {
+ /* Four RB / SE */
+ {
+ /* One shader engine */
+ {0, {128, 256}},
+ {2, {128, 128}},
+ {3, {64, 128}},
+ {5, {32, 128}},
+ {9, {16, 128}},
+ {33, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ /* Two shader engines */
+ {0, {256, 256}},
+ {2, {128, 256}},
+ {3, {128, 128}},
+ {5, {64, 128}},
+ {9, {32, 128}},
+ {17, {16, 128}},
+ {33, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ /* Four shader engines */
+ {0, {256, 512}},
+ {2, {256, 256}},
+ {3, {128, 256}},
+ {5, {128, 128}},
+ {9, {64, 128}},
+ {17, {16, 128}},
+ {33, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ },
+ };
+ static const struct radv_bin_size_entry ds_size_table[][3][9] = {
+ {
+ // One RB / SE
+ {
+ // One shader engine
+ {0, {128, 256}},
+ {2, {128, 128}},
+ {4, {64, 128}},
+ {7, {32, 128}},
+ {13, {16, 128}},
+ {49, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ // Two shader engines
+ {0, {256, 256}},
+ {2, {128, 256}},
+ {4, {128, 128}},
+ {7, {64, 128}},
+ {13, {32, 128}},
+ {25, {16, 128}},
+ {49, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ // Four shader engines
+ {0, {256, 512}},
+ {2, {256, 256}},
+ {4, {128, 256}},
+ {7, {128, 128}},
+ {13, {64, 128}},
+ {25, {16, 128}},
+ {49, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ },
+ {
+ // Two RB / SE
+ {
+ // One shader engine
+ {0, {256, 256}},
+ {2, {128, 256}},
+ {4, {128, 128}},
+ {7, {64, 128}},
+ {13, {32, 128}},
+ {25, {16, 128}},
+ {97, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ // Two shader engines
+ {0, {256, 512}},
+ {2, {256, 256}},
+ {4, {128, 256}},
+ {7, {128, 128}},
+ {13, {64, 128}},
+ {25, {32, 128}},
+ {49, {16, 128}},
+ {97, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ // Four shader engines
+ {0, {512, 512}},
+ {2, {256, 512}},
+ {4, {256, 256}},
+ {7, {128, 256}},
+ {13, {128, 128}},
+ {25, {64, 128}},
+ {49, {16, 128}},
+ {97, {0, 0}},
+ {UINT_MAX, {0, 0}},
+ },
+ },
+ {
+ // Four RB / SE
+ {
+ // One shader engine
+ {0, {256, 512}},
+ {2, {256, 256}},
+ {4, {128, 256}},
+ {7, {128, 128}},
+ {13, {64, 128}},
+ {25, {32, 128}},
+ {49, {16, 128}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ // Two shader engines
+ {0, {512, 512}},
+ {2, {256, 512}},
+ {4, {256, 256}},
+ {7, {128, 256}},
+ {13, {128, 128}},
+ {25, {64, 128}},
+ {49, {32, 128}},
+ {97, {16, 128}},
+ {UINT_MAX, {0, 0}},
+ },
+ {
+ // Four shader engines
+ {0, {512, 512}},
+ {4, {256, 512}},
+ {7, {256, 256}},
+ {13, {128, 256}},
+ {25, {128, 128}},
+ {49, {64, 128}},
+ {97, {16, 128}},
+ {UINT_MAX, {0, 0}},
+ },
+ },
+ };
+
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ VkExtent2D extent = {512, 512};
+
+ unsigned log_num_rb_per_se =
+ util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_render_backends /
+ pipeline->device->physical_device->rad_info.max_se);
+ unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
+
+ unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
+ unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
+ unsigned effective_samples = total_samples;
+ unsigned color_bytes_per_pixel = 0;
+
+ const VkPipelineColorBlendStateCreateInfo *vkblend =
+ radv_pipeline_get_color_blend_state(pCreateInfo);
+ if (vkblend) {
+ for (unsigned i = 0; i < subpass->color_count; i++) {
+ if (!vkblend->pAttachments[i].colorWriteMask)
+ continue;
+
+ if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+ color_bytes_per_pixel += vk_format_get_blocksize(format);
+ }
+
+ /* MSAA images typically don't use all samples all the time. */
+ if (effective_samples >= 2 && ps_iter_samples <= 1)
+ effective_samples = 2;
+ color_bytes_per_pixel *= effective_samples;
+ }
- const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
- const unsigned color_tag_part = (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
- const unsigned fmask_tag_part = (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
+ const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
+ while (color_entry[1].bpp <= color_bytes_per_pixel)
+ ++color_entry;
- const unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
- const unsigned samples_log = util_logbase2_ceil(total_samples);
+ extent = color_entry->extent;
- unsigned color_bytes_per_pixel = 0;
- unsigned fmask_bytes_per_pixel = 0;
+ if (subpass->depth_stencil_attachment) {
+ struct radv_render_pass_attachment *attachment =
+ pass->attachments + subpass->depth_stencil_attachment->attachment;
- const VkPipelineColorBlendStateCreateInfo *vkblend =
- radv_pipeline_get_color_blend_state(pCreateInfo);
- if (vkblend) {
- for (unsigned i = 0; i < subpass->color_count; i++) {
- if (!vkblend->pAttachments[i].colorWriteMask)
- continue;
+ /* Coefficients taken from AMDVLK */
+ unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0;
+ unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0;
+ unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
- if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
- continue;
+ const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
+ while (ds_entry[1].bpp <= ds_bytes_per_pixel)
+ ++ds_entry;
- VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
- color_bytes_per_pixel += vk_format_get_blocksize(format);
+ if (ds_entry->extent.width * ds_entry->extent.height < extent.width * extent.height)
+ extent = ds_entry->extent;
+ }
- if (total_samples > 1) {
- assert(samples_log <= 3);
- const unsigned fmask_array[] = {0, 1, 1, 4};
- fmask_bytes_per_pixel += fmask_array[samples_log];
- }
- }
+ return extent;
+}
- color_bytes_per_pixel *= total_samples;
- }
- color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1);
+static VkExtent2D
+radv_gfx10_compute_bin_size(const struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ VkExtent2D extent = {512, 512};
+
+ const unsigned db_tag_size = 64;
+ const unsigned db_tag_count = 312;
+ const unsigned color_tag_size = 1024;
+ const unsigned color_tag_count = 31;
+ const unsigned fmask_tag_size = 256;
+ const unsigned fmask_tag_count = 44;
+
+ const unsigned rb_count = pipeline->device->physical_device->rad_info.max_render_backends;
+ const unsigned pipe_count =
+ MAX2(rb_count, pipeline->device->physical_device->rad_info.num_tcc_blocks);
+
+ const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count;
+ const unsigned color_tag_part =
+ (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count;
+ const unsigned fmask_tag_part =
+ (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count;
+
+ const unsigned total_samples =
+ 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_aa_config);
+ const unsigned samples_log = util_logbase2_ceil(total_samples);
+
+ unsigned color_bytes_per_pixel = 0;
+ unsigned fmask_bytes_per_pixel = 0;
+
+ const VkPipelineColorBlendStateCreateInfo *vkblend =
+ radv_pipeline_get_color_blend_state(pCreateInfo);
+ if (vkblend) {
+ for (unsigned i = 0; i < subpass->color_count; i++) {
+ if (!vkblend->pAttachments[i].colorWriteMask)
+ continue;
+
+ if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+ color_bytes_per_pixel += vk_format_get_blocksize(format);
+
+ if (total_samples > 1) {
+ assert(samples_log <= 3);
+ const unsigned fmask_array[] = {0, 1, 1, 4};
+ fmask_bytes_per_pixel += fmask_array[samples_log];
+ }
+ }
+
+ color_bytes_per_pixel *= total_samples;
+ }
+ color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1);
- const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel);
- extent.width = 1ull << ((color_pixel_count_log + 1) / 2);
- extent.height = 1ull << (color_pixel_count_log / 2);
+ const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel);
+ extent.width = 1ull << ((color_pixel_count_log + 1) / 2);
+ extent.height = 1ull << (color_pixel_count_log / 2);
- if (fmask_bytes_per_pixel) {
- const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel);
+ if (fmask_bytes_per_pixel) {
+ const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel);
- const VkExtent2D fmask_extent = (VkExtent2D){
- .width = 1ull << ((fmask_pixel_count_log + 1) / 2),
- .height = 1ull << (color_pixel_count_log / 2)
- };
+ const VkExtent2D fmask_extent =
+ (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2),
+ .height = 1ull << (color_pixel_count_log / 2)};
- if (fmask_extent.width * fmask_extent.height < extent.width * extent.height)
- extent = fmask_extent;
- }
+ if (fmask_extent.width * fmask_extent.height < extent.width * extent.height)
+ extent = fmask_extent;
+ }
- if (subpass->depth_stencil_attachment) {
- struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
+ if (subpass->depth_stencil_attachment) {
+ struct radv_render_pass_attachment *attachment =
+ pass->attachments + subpass->depth_stencil_attachment->attachment;
- /* Coefficients taken from AMDVLK */
- unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0;
- unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0;
- unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples;
+ /* Coefficients taken from AMDVLK */
+ unsigned depth_coeff = vk_format_has_depth(attachment->format) ? 5 : 0;
+ unsigned stencil_coeff = vk_format_has_stencil(attachment->format) ? 1 : 0;
+ unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples;
- const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel);
+ const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel);
- const VkExtent2D db_extent = (VkExtent2D){
- .width = 1ull << ((db_pixel_count_log + 1) / 2),
- .height = 1ull << (color_pixel_count_log / 2)
- };
+ const VkExtent2D db_extent = (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2),
+ .height = 1ull << (color_pixel_count_log / 2)};
- if (db_extent.width * db_extent.height < extent.width * extent.height)
- extent = db_extent;
- }
+ if (db_extent.width * db_extent.height < extent.width * extent.height)
+ extent = db_extent;
+ }
- extent.width = MAX2(extent.width, 128);
- extent.height = MAX2(extent.width, 64);
+ extent.width = MAX2(extent.width, 128);
+ extent.height = MAX2(extent.width, 64);
- return extent;
+ return extent;
}
static void
radv_pipeline_init_disabled_binning_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- uint32_t pa_sc_binner_cntl_0 =
- S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
- S_028C44_DISABLE_START_OF_PRIM(1);
- uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- const VkPipelineColorBlendStateCreateInfo *vkblend =
- radv_pipeline_get_color_blend_state(pCreateInfo);
- unsigned min_bytes_per_pixel = 0;
-
- if (vkblend) {
- for (unsigned i = 0; i < subpass->color_count; i++) {
- if (!vkblend->pAttachments[i].colorWriteMask)
- continue;
-
- if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
- continue;
-
- VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
- unsigned bytes = vk_format_get_blocksize(format);
- if (!min_bytes_per_pixel || bytes < min_bytes_per_pixel)
- min_bytes_per_pixel = bytes;
- }
- }
-
- pa_sc_binner_cntl_0 =
- S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) |
- S_028C44_BIN_SIZE_X(0) |
- S_028C44_BIN_SIZE_Y(0) |
- S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */
- S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */
- S_028C44_DISABLE_START_OF_PRIM(1);
- }
-
- pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
- pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
+ uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+ S_028C44_DISABLE_START_OF_PRIM(1);
+ uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ const VkPipelineColorBlendStateCreateInfo *vkblend =
+ radv_pipeline_get_color_blend_state(pCreateInfo);
+ unsigned min_bytes_per_pixel = 0;
+
+ if (vkblend) {
+ for (unsigned i = 0; i < subpass->color_count; i++) {
+ if (!vkblend->pAttachments[i].colorWriteMask)
+ continue;
+
+ if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+ unsigned bytes = vk_format_get_blocksize(format);
+ if (!min_bytes_per_pixel || bytes < min_bytes_per_pixel)
+ min_bytes_per_pixel = bytes;
+ }
+ }
+
+ pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) |
+ S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */
+ S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */
+ S_028C44_DISABLE_START_OF_PRIM(1);
+ }
+
+ pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
+ pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
}
struct radv_binning_settings
radv_get_binning_settings(const struct radv_physical_device *pdev)
{
- struct radv_binning_settings settings;
- if (pdev->rad_info.has_dedicated_vram) {
- if (pdev->rad_info.max_render_backends > 4) {
- settings.context_states_per_bin = 1;
- settings.persistent_states_per_bin = 1;
- } else {
- settings.context_states_per_bin = 3;
- settings.persistent_states_per_bin = 8;
- }
- settings.fpovs_per_batch = 63;
- } else {
- /* The context states are affected by the scissor bug. */
- settings.context_states_per_bin = 6;
- /* 32 causes hangs for RAVEN. */
- settings.persistent_states_per_bin = 16;
- settings.fpovs_per_batch = 63;
- }
-
- if (pdev->rad_info.has_gfx9_scissor_bug)
- settings.context_states_per_bin = 1;
-
- return settings;
+ struct radv_binning_settings settings;
+ if (pdev->rad_info.has_dedicated_vram) {
+ if (pdev->rad_info.max_render_backends > 4) {
+ settings.context_states_per_bin = 1;
+ settings.persistent_states_per_bin = 1;
+ } else {
+ settings.context_states_per_bin = 3;
+ settings.persistent_states_per_bin = 8;
+ }
+ settings.fpovs_per_batch = 63;
+ } else {
+ /* The context states are affected by the scissor bug. */
+ settings.context_states_per_bin = 6;
+ /* 32 causes hangs for RAVEN. */
+ settings.persistent_states_per_bin = 16;
+ settings.fpovs_per_batch = 63;
+ }
+
+ if (pdev->rad_info.has_gfx9_scissor_bug)
+ settings.context_states_per_bin = 1;
+
+ return settings;
}
static void
radv_pipeline_init_binning_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_blend_state *blend)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_blend_state *blend)
{
- if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
- return;
-
- VkExtent2D bin_size;
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo);
- } else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) {
- bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo);
- } else
- unreachable("Unhandled generation for binning bin size calculation");
-
- if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
- struct radv_binning_settings settings =
- radv_get_binning_settings(pipeline->device->physical_device);
-
- bool disable_start_of_prim = true;
- uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
-
- const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
- if (pipeline->device->dfsm_allowed && ps &&
- !ps->info.ps.can_discard &&
- !ps->info.ps.writes_memory &&
- blend->cb_target_enabled_4bit) {
- db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
- disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
- }
-
- const uint32_t pa_sc_binner_cntl_0 =
- S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
- S_028C44_BIN_SIZE_X(bin_size.width == 16) |
- S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
- S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
- S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
- S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
- S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
- S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
- S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) |
- S_028C44_OPTIMAL_BIN_SELECTION(1);
-
- pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
- pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
- } else
- radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
+ if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
+ return;
+
+ VkExtent2D bin_size;
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+ bin_size = radv_gfx10_compute_bin_size(pipeline, pCreateInfo);
+ } else if (pipeline->device->physical_device->rad_info.chip_class == GFX9) {
+ bin_size = radv_gfx9_compute_bin_size(pipeline, pCreateInfo);
+ } else
+ unreachable("Unhandled generation for binning bin size calculation");
+
+ if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
+ struct radv_binning_settings settings =
+ radv_get_binning_settings(pipeline->device->physical_device);
+
+ bool disable_start_of_prim = true;
+ uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
+
+ const struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+ if (pipeline->device->dfsm_allowed && ps && !ps->info.ps.can_discard &&
+ !ps->info.ps.writes_memory && blend->cb_target_enabled_4bit) {
+ db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_AUTO);
+ disable_start_of_prim = (blend->blend_enable_4bit & blend->cb_target_enabled_4bit) != 0;
+ }
+
+ const uint32_t pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
+ S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
+ S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
+ S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
+ S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) |
+ S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) |
+ S_028C44_DISABLE_START_OF_PRIM(disable_start_of_prim) |
+ S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1);
+
+ pipeline->graphics.binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0;
+ pipeline->graphics.binning.db_dfsm_control = db_dfsm_control;
+ } else
+ radv_pipeline_init_disabled_binning_state(pipeline, pCreateInfo);
}
-
static void
radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
const struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo,
const struct radv_graphics_pipeline_create_info *extra)
{
- const VkPipelineDepthStencilStateCreateInfo *vkds = radv_pipeline_get_depth_stencil_state(pCreateInfo);
- RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
- struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
- struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- struct radv_render_pass_attachment *attachment = NULL;
- uint32_t db_render_control = 0, db_render_override2 = 0;
- uint32_t db_render_override = 0;
-
- if (subpass->depth_stencil_attachment)
- attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
-
- bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format);
-
- if (vkds && has_depth_attachment) {
- /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
- db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
- db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE(1);
- }
-
- if (attachment && extra) {
- db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
- db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
-
- db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable);
- db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable);
- db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
- db_render_override2 |= S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear);
- db_render_override2 |= S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear);
- }
-
- db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
- S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
-
- if (!pCreateInfo->pRasterizationState->depthClampEnable &&
- ps->info.ps.writes_z) {
- /* From VK_EXT_depth_range_unrestricted spec:
- *
- * "The behavior described in Primitive Clipping still applies.
- * If depth clamping is disabled the depth values are still
- * clipped to 0 ≤ zc ≤ wc before the viewport transform. If
- * depth clamping is enabled the above equation is ignored and
- * the depth values are instead clamped to the VkViewport
- * minDepth and maxDepth values, which in the case of this
- * extension can be outside of the 0.0 to 1.0 range."
- */
- db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1);
- }
-
- radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control);
- radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
- radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
+ const VkPipelineDepthStencilStateCreateInfo *vkds =
+ radv_pipeline_get_depth_stencil_state(pCreateInfo);
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ struct radv_render_pass_attachment *attachment = NULL;
+ uint32_t db_render_control = 0, db_render_override2 = 0;
+ uint32_t db_render_override = 0;
+
+ if (subpass->depth_stencil_attachment)
+ attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;
+
+ bool has_depth_attachment = attachment && vk_format_has_depth(attachment->format);
+
+ if (vkds && has_depth_attachment) {
+ /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
+ db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
+ db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE(1);
+ }
+
+ if (attachment && extra) {
+ db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
+ db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
+
+ db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable);
+ db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable);
+ db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable);
+ db_render_override2 |=
+ S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear);
+ db_render_override2 |=
+ S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear);
+ }
+
+ db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+ S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);
+
+ if (!pCreateInfo->pRasterizationState->depthClampEnable && ps->info.ps.writes_z) {
+ /* From VK_EXT_depth_range_unrestricted spec:
+ *
+ * "The behavior described in Primitive Clipping still applies.
+ * If depth clamping is disabled the depth values are still
+ * clipped to 0 ≤ zc ≤ wc before the viewport transform. If
+ * depth clamping is enabled the above equation is ignored and
+ * the depth values are instead clamped to the VkViewport
+ * minDepth and maxDepth values, which in the case of this
+ * extension can be outside of the 0.0 to 1.0 range."
+ */
+ db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1);
+ }
+
+ radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, db_render_control);
+ radeon_set_context_reg(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, db_render_override);
+ radeon_set_context_reg(ctx_cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
}
static void
@@ -4290,1083 +4146,1044 @@ radv_pipeline_generate_blend_state(struct radeon_cmdbuf *ctx_cs,
const struct radv_pipeline *pipeline,
const struct radv_blend_state *blend)
{
- radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8);
- radeon_emit_array(ctx_cs, blend->cb_blend_control,
- 8);
- radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
- radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
+ radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8);
+ radeon_emit_array(ctx_cs, blend->cb_blend_control, 8);
+ radeon_set_context_reg(ctx_cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
+ radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
- if (pipeline->device->physical_device->rad_info.has_rbplus) {
+ if (pipeline->device->physical_device->rad_info.has_rbplus) {
- radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
- radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
- }
+ radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8);
+ radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8);
+ }
- radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
+ radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
- radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
- radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
+ radeon_set_context_reg(ctx_cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
+ radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
}
static void
radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs,
- const struct radv_pipeline *pipeline,
+ const struct radv_pipeline *pipeline,
const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
- const VkConservativeRasterizationModeEXT mode =
- radv_get_conservative_raster_mode(vkraster);
- uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
- bool depth_clip_disable = vkraster->depthClampEnable;
-
- const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
- vk_find_struct_const(vkraster->pNext, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
- if (depth_clip_state) {
- depth_clip_disable = !depth_clip_state->depthClipEnable;
- }
-
- radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL,
- S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
- S_028810_ZCLIP_NEAR_DISABLE(depth_clip_disable ? 1 : 0) |
- S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
- S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
- S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
-
- radeon_set_context_reg(ctx_cs, R_028BDC_PA_SC_LINE_CNTL,
- S_028BDC_DX10_DIAMOND_TEST_ENA(1));
-
- /* Conservative rasterization. */
- if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
- pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) |
- S_028C4C_POSTZ_AA_MASK_ENABLE(1) |
- S_028C4C_CENTROID_SAMPLE_OVERRIDE(1);
-
- if (mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) {
- pa_sc_conservative_rast |=
- S_028C4C_OVER_RAST_ENABLE(1) |
- S_028C4C_OVER_RAST_SAMPLE_SELECT(0) |
- S_028C4C_UNDER_RAST_ENABLE(0) |
- S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) |
- S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1);
- } else {
- assert(mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT);
- pa_sc_conservative_rast |=
- S_028C4C_OVER_RAST_ENABLE(0) |
- S_028C4C_OVER_RAST_SAMPLE_SELECT(1) |
- S_028C4C_UNDER_RAST_ENABLE(1) |
- S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) |
- S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0);
- }
- }
-
- radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
- pa_sc_conservative_rast);
-}
+ const VkPipelineRasterizationStateCreateInfo *vkraster = pCreateInfo->pRasterizationState;
+ const VkConservativeRasterizationModeEXT mode = radv_get_conservative_raster_mode(vkraster);
+ uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
+ bool depth_clip_disable = vkraster->depthClampEnable;
+
+ const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
+ vk_find_struct_const(vkraster->pNext,
+ PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
+ if (depth_clip_state) {
+ depth_clip_disable = !depth_clip_state->depthClipEnable;
+ }
+
+ radeon_set_context_reg(
+ ctx_cs, R_028810_PA_CL_CLIP_CNTL,
+ S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
+ S_028810_ZCLIP_NEAR_DISABLE(depth_clip_disable ? 1 : 0) |
+ S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
+ S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
+ S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));
+
+ radeon_set_context_reg(ctx_cs, R_028BDC_PA_SC_LINE_CNTL, S_028BDC_DX10_DIAMOND_TEST_ENA(1));
+
+ /* Conservative rasterization. */
+ if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) {
+ pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) |
+ S_028C4C_CENTROID_SAMPLE_OVERRIDE(1);
+
+ if (mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) {
+ pa_sc_conservative_rast |=
+ S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) |
+ S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) |
+ S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1);
+ } else {
+ assert(mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT);
+ pa_sc_conservative_rast |=
+ S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) |
+ S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) |
+ S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0);
+ }
+ }
+ radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
+ pa_sc_conservative_rast);
+}
static void
radv_pipeline_generate_multisample_state(struct radeon_cmdbuf *ctx_cs,
const struct radv_pipeline *pipeline)
{
- const struct radv_multisample_state *ms = &pipeline->graphics.ms;
-
- radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
- radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]);
- radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]);
-
- radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa);
- radeon_set_context_reg(ctx_cs, R_028A48_PA_SC_MODE_CNTL_0, ms->pa_sc_mode_cntl_0);
- radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
- radeon_set_context_reg(ctx_cs, R_028BE0_PA_SC_AA_CONFIG, ms->pa_sc_aa_config);
-
- /* The exclusion bits can be set to improve rasterization efficiency
- * if no sample lies on the pixel boundary (-8 sample offset). It's
- * currently always TRUE because the driver doesn't support 16 samples.
- */
- bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7;
- radeon_set_context_reg(ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
- S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) |
- S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
-
- /* GFX9: Flush DFSM when the AA mode changes. */
- if (pipeline->device->dfsm_allowed) {
- radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
+ const struct radv_multisample_state *ms = &pipeline->graphics.ms;
+
+ radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+ radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]);
+ radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]);
+
+ radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa);
+ radeon_set_context_reg(ctx_cs, R_028A48_PA_SC_MODE_CNTL_0, ms->pa_sc_mode_cntl_0);
+ radeon_set_context_reg(ctx_cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
+ radeon_set_context_reg(ctx_cs, R_028BE0_PA_SC_AA_CONFIG, ms->pa_sc_aa_config);
+
+ /* The exclusion bits can be set to improve rasterization efficiency
+ * if no sample lies on the pixel boundary (-8 sample offset). It's
+ * currently always TRUE because the driver doesn't support 16 samples.
+ */
+ bool exclusion = pipeline->device->physical_device->rad_info.chip_class >= GFX7;
+ radeon_set_context_reg(
+ ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL,
+ S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion));
+
+ /* GFX9: Flush DFSM when the AA mode changes. */
+ if (pipeline->device->dfsm_allowed) {
+ radeon_emit(ctx_cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(ctx_cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+ }
}
static void
radv_pipeline_generate_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs,
const struct radv_pipeline *pipeline)
{
- const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
- const struct radv_shader_variant *vs =
- pipeline->shaders[MESA_SHADER_TESS_EVAL] ?
- pipeline->shaders[MESA_SHADER_TESS_EVAL] :
- pipeline->shaders[MESA_SHADER_VERTEX];
- unsigned vgt_primitiveid_en = 0;
- uint32_t vgt_gs_mode = 0;
-
- if (radv_pipeline_has_ngg(pipeline))
- return;
-
- if (radv_pipeline_has_gs(pipeline)) {
- const struct radv_shader_variant *gs =
- pipeline->shaders[MESA_SHADER_GEOMETRY];
-
- vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
- pipeline->device->physical_device->rad_info.chip_class);
- } else if (outinfo->export_prim_id || vs->info.uses_prim_id) {
- vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
- vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1);
- }
-
- radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
- radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
+ const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+ const struct radv_shader_variant *vs = pipeline->shaders[MESA_SHADER_TESS_EVAL]
+ ? pipeline->shaders[MESA_SHADER_TESS_EVAL]
+ : pipeline->shaders[MESA_SHADER_VERTEX];
+ unsigned vgt_primitiveid_en = 0;
+ uint32_t vgt_gs_mode = 0;
+
+ if (radv_pipeline_has_ngg(pipeline))
+ return;
+
+ if (radv_pipeline_has_gs(pipeline)) {
+ const struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+ vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
+ pipeline->device->physical_device->rad_info.chip_class);
+ } else if (outinfo->export_prim_id || vs->info.uses_prim_id) {
+ vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
+ vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1);
+ }
+
+ radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
+ radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
}
static void
-radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline,
- const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
- uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-
- radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
- radeon_emit(cs, shader->config.rsrc1);
- radeon_emit(cs, shader->config.rsrc2);
-
- const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
- unsigned clip_dist_mask, cull_dist_mask, total_mask;
- clip_dist_mask = outinfo->clip_dist_mask;
- cull_dist_mask = outinfo->cull_dist_mask;
- total_mask = clip_dist_mask | cull_dist_mask;
-
- bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
- pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
- bool misc_vec_ena = outinfo->writes_pointsize ||
- outinfo->writes_layer ||
- outinfo->writes_viewport_index ||
- writes_primitive_shading_rate;
- unsigned spi_vs_out_config, nparams;
-
- /* VS is required to export at least one param. */
- nparams = MAX2(outinfo->param_exports, 1);
- spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0);
- }
-
- radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, spi_vs_out_config);
-
- radeon_set_context_reg(ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
- S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE));
-
- radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
- S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
- S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
- S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
- S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
- S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
- cull_dist_mask << 8 |
- clip_dist_mask);
-
- if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
- radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF,
- outinfo->writes_viewport_index);
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B124_MEM_BASE(va >> 40));
+ radeon_emit(cs, shader->config.rsrc1);
+ radeon_emit(cs, shader->config.rsrc2);
+
+ const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+ unsigned clip_dist_mask, cull_dist_mask, total_mask;
+ clip_dist_mask = outinfo->clip_dist_mask;
+ cull_dist_mask = outinfo->cull_dist_mask;
+ total_mask = clip_dist_mask | cull_dist_mask;
+
+ bool writes_primitive_shading_rate =
+ outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
+ bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
+ outinfo->writes_viewport_index || writes_primitive_shading_rate;
+ unsigned spi_vs_out_config, nparams;
+
+ /* VS is required to export at least one param. */
+ nparams = MAX2(outinfo->param_exports, 1);
+ spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+ spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0);
+ }
+
+ radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, spi_vs_out_config);
+
+ radeon_set_context_reg(
+ ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
+ S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
+ S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP
+ : V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP
+ : V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
+ : V_02870C_SPI_SHADER_NONE));
+
+ radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
+ S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+ S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+ S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+ cull_dist_mask << 8 | clip_dist_mask);
+
+ if (pipeline->device->physical_device->rad_info.chip_class <= GFX8)
+ radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index);
}
static void
-radv_pipeline_generate_hw_es(struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline,
- const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_es(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
- uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
- radeon_emit(cs, shader->config.rsrc1);
- radeon_emit(cs, shader->config.rsrc2);
+ radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+ radeon_emit(cs, shader->config.rsrc1);
+ radeon_emit(cs, shader->config.rsrc2);
}
static void
-radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline,
- const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_ls(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
- unsigned num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks;
- uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- uint32_t rsrc2 = shader->config.rsrc2;
-
- radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
-
- rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
- if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
- pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
- radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
-
- radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
- radeon_emit(cs, shader->config.rsrc1);
- radeon_emit(cs, rsrc2);
+ unsigned num_lds_blocks = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks;
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ uint32_t rsrc2 = shader->config.rsrc2;
+
+ radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
+
+ rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks);
+ if (pipeline->device->physical_device->rad_info.chip_class == GFX7 &&
+ pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
+ radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
+
+ radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+ radeon_emit(cs, shader->config.rsrc1);
+ radeon_emit(cs, rsrc2);
}
static void
-radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline,
- const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
- uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- gl_shader_stage es_type =
- radv_pipeline_has_tess(pipeline) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
- struct radv_shader_variant *es =
- es_type == MESA_SHADER_TESS_EVAL ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX];
- const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
-
- radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
- radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
- radeon_emit(cs, shader->config.rsrc1);
- radeon_emit(cs, shader->config.rsrc2);
-
- const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
- unsigned clip_dist_mask, cull_dist_mask, total_mask;
- clip_dist_mask = outinfo->clip_dist_mask;
- cull_dist_mask = outinfo->cull_dist_mask;
- total_mask = clip_dist_mask | cull_dist_mask;
-
- bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
- pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
- bool misc_vec_ena = outinfo->writes_pointsize ||
- outinfo->writes_layer ||
- outinfo->writes_viewport_index ||
- writes_primitive_shading_rate;
- bool es_enable_prim_id = outinfo->export_prim_id ||
- (es && es->info.uses_prim_id);
- bool break_wave_at_eoi = false;
- unsigned ge_cntl;
- unsigned nparams;
-
- if (es_type == MESA_SHADER_TESS_EVAL) {
- struct radv_shader_variant *gs =
- pipeline->shaders[MESA_SHADER_GEOMETRY];
-
- if (es_enable_prim_id || (gs && gs->info.uses_prim_id))
- break_wave_at_eoi = true;
- }
-
- nparams = MAX2(outinfo->param_exports, 1);
- radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG,
- S_0286C4_VS_EXPORT_COUNT(nparams - 1) |
- S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0));
-
- radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT,
- S_028708_IDX0_EXPORT_FORMAT(V_028708_SPI_SHADER_1COMP));
- radeon_set_context_reg(ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
- S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
- S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE) |
- S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
- V_02870C_SPI_SHADER_4COMP :
- V_02870C_SPI_SHADER_NONE));
-
- radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
- S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
- S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
- S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
- S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
- S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
- S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
- cull_dist_mask << 8 |
- clip_dist_mask);
-
- radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
- S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
- S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id));
-
- radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- ngg_state->vgt_esgs_ring_itemsize);
-
- /* NGG specific registers. */
- struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
- uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1;
-
- radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
- S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
- S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
- S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
- radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
- S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts));
- radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL,
- S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) |
- S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */
- radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
- S_028B90_CNT(gs_num_invocations) |
- S_028B90_ENABLE(gs_num_invocations > 1) |
- S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance));
-
- /* User edge flags are set by the pos exports. If user edge flags are
- * not used, we must use hw-generated edge flags and pass them via
- * the prim export to prevent drawing lines on internal edges of
- * decomposed primitives (such as quads) with polygon mode = lines.
- *
- * TODO: We should combine hw-generated edge flags with user edge
- * flags in the shader.
- */
- radeon_set_context_reg(ctx_cs, R_028838_PA_CL_NGG_CNTL,
- S_028838_INDEX_BUF_EDGE_FLAG_ENA(!radv_pipeline_has_tess(pipeline) &&
- !radv_pipeline_has_gs(pipeline)) |
- /* Reuse for NGG. */
- S_028838_VERTEX_REUSE_DEPTH(pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 30 : 0));
-
- ge_cntl = S_03096C_PRIM_GRP_SIZE(ngg_state->max_gsprims) |
- S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
- S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
-
- /* Bug workaround for a possible hang with non-tessellation cases.
- * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
- *
- * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
- */
- if (pipeline->device->physical_device->rad_info.chip_class == GFX10 &&
- !radv_pipeline_has_tess(pipeline) &&
- ngg_state->hw_max_esverts != 256) {
- ge_cntl &= C_03096C_VERT_GRP_SIZE;
-
- if (ngg_state->hw_max_esverts > 5) {
- ge_cntl |= S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts - 5);
- }
- }
-
- radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, ge_cntl);
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ gl_shader_stage es_type =
+ radv_pipeline_has_tess(pipeline) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ struct radv_shader_variant *es = es_type == MESA_SHADER_TESS_EVAL
+ ? pipeline->shaders[MESA_SHADER_TESS_EVAL]
+ : pipeline->shaders[MESA_SHADER_VERTEX];
+ const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info;
+
+ radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+ radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
+ radeon_emit(cs, shader->config.rsrc1);
+ radeon_emit(cs, shader->config.rsrc2);
+
+ const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+ unsigned clip_dist_mask, cull_dist_mask, total_mask;
+ clip_dist_mask = outinfo->clip_dist_mask;
+ cull_dist_mask = outinfo->cull_dist_mask;
+ total_mask = clip_dist_mask | cull_dist_mask;
+
+ bool writes_primitive_shading_rate =
+ outinfo->writes_primitive_shading_rate || pipeline->device->force_vrs != RADV_FORCE_VRS_NONE;
+ bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer ||
+ outinfo->writes_viewport_index || writes_primitive_shading_rate;
+ bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id);
+ bool break_wave_at_eoi = false;
+ unsigned ge_cntl;
+ unsigned nparams;
+
+ if (es_type == MESA_SHADER_TESS_EVAL) {
+ struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+ if (es_enable_prim_id || (gs && gs->info.uses_prim_id))
+ break_wave_at_eoi = true;
+ }
+
+ nparams = MAX2(outinfo->param_exports, 1);
+ radeon_set_context_reg(
+ ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG,
+ S_0286C4_VS_EXPORT_COUNT(nparams - 1) | S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0));
+
+ radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT,
+ S_028708_IDX0_EXPORT_FORMAT(V_028708_SPI_SHADER_1COMP));
+ radeon_set_context_reg(
+ ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT,
+ S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
+ S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP
+ : V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP
+ : V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP
+ : V_02870C_SPI_SHADER_NONE));
+
+ radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL,
+ S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+ S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+ S_02881C_USE_VTX_VRS_RATE(writes_primitive_shading_rate) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+ cull_dist_mask << 8 | clip_dist_mask);
+
+ radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN,
+ S_028A84_PRIMITIVEID_EN(es_enable_prim_id) |
+ S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id));
+
+ radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+ ngg_state->vgt_esgs_ring_itemsize);
+
+ /* NGG specific registers. */
+ struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+ uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1;
+
+ radeon_set_context_reg(
+ ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+ S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) |
+ S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) |
+ S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations));
+ radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP,
+ S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts));
+ radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL,
+ S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) |
+ S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */
+ radeon_set_context_reg(
+ ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) |
+ S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance));
+
+ /* User edge flags are set by the pos exports. If user edge flags are
+ * not used, we must use hw-generated edge flags and pass them via
+ * the prim export to prevent drawing lines on internal edges of
+ * decomposed primitives (such as quads) with polygon mode = lines.
+ *
+ * TODO: We should combine hw-generated edge flags with user edge
+ * flags in the shader.
+ */
+ radeon_set_context_reg(
+ ctx_cs, R_028838_PA_CL_NGG_CNTL,
+ S_028838_INDEX_BUF_EDGE_FLAG_ENA(!radv_pipeline_has_tess(pipeline) &&
+ !radv_pipeline_has_gs(pipeline)) |
+ /* Reuse for NGG. */
+ S_028838_VERTEX_REUSE_DEPTH(
+ pipeline->device->physical_device->rad_info.chip_class >= GFX10_3 ? 30 : 0));
+
+ ge_cntl = S_03096C_PRIM_GRP_SIZE(ngg_state->max_gsprims) |
+ S_03096C_VERT_GRP_SIZE(256) | /* 256 = disable vertex grouping */
+ S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi);
+
+ /* Bug workaround for a possible hang with non-tessellation cases.
+ * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0
+ *
+ * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5
+ */
+ if (pipeline->device->physical_device->rad_info.chip_class == GFX10 &&
+ !radv_pipeline_has_tess(pipeline) && ngg_state->hw_max_esverts != 256) {
+ ge_cntl &= C_03096C_VERT_GRP_SIZE;
+
+ if (ngg_state->hw_max_esverts > 5) {
+ ge_cntl |= S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts - 5);
+ }
+ }
+
+ radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, ge_cntl);
}
static void
-radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline,
- const struct radv_shader_variant *shader)
+radv_pipeline_generate_hw_hs(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *shader)
{
- uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
- } else {
- radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B414_MEM_BASE(va >> 40));
- }
-
- radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
- radeon_emit(cs, shader->config.rsrc1);
- radeon_emit(cs, shader->config.rsrc2);
- } else {
- radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
- radeon_emit(cs, shader->config.rsrc1);
- radeon_emit(cs, shader->config.rsrc2);
- }
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B524_MEM_BASE(va >> 40));
+ } else {
+ radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B414_MEM_BASE(va >> 40));
+ }
+
+ radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
+ radeon_emit(cs, shader->config.rsrc1);
+ radeon_emit(cs, shader->config.rsrc2);
+ } else {
+ radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B424_MEM_BASE(va >> 40));
+ radeon_emit(cs, shader->config.rsrc1);
+ radeon_emit(cs, shader->config.rsrc2);
+ }
}
static void
-radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline)
+radv_pipeline_generate_vertex_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *vs;
-
- /* Skip shaders merged into HS/GS */
- vs = pipeline->shaders[MESA_SHADER_VERTEX];
- if (!vs)
- return;
-
- if (vs->info.vs.as_ls)
- radv_pipeline_generate_hw_ls(cs, pipeline, vs);
- else if (vs->info.vs.as_es)
- radv_pipeline_generate_hw_es(cs, pipeline, vs);
- else if (vs->info.is_ngg)
- radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, vs);
- else
- radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs);
+ struct radv_shader_variant *vs;
+
+ /* Skip shaders merged into HS/GS */
+ vs = pipeline->shaders[MESA_SHADER_VERTEX];
+ if (!vs)
+ return;
+
+ if (vs->info.vs.as_ls)
+ radv_pipeline_generate_hw_ls(cs, pipeline, vs);
+ else if (vs->info.vs.as_es)
+ radv_pipeline_generate_hw_es(cs, pipeline, vs);
+ else if (vs->info.is_ngg)
+ radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, vs);
+ else
+ radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, vs);
}
static void
-radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline)
+radv_pipeline_generate_tess_shaders(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *tes, *tcs;
-
- tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL];
- tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
-
- if (tes) {
- if (tes->info.is_ngg) {
- radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, tes);
- } else if (tes->info.tes.as_es)
- radv_pipeline_generate_hw_es(cs, pipeline, tes);
- else
- radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, tes);
- }
-
- radv_pipeline_generate_hw_hs(cs, pipeline, tcs);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
- !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
- radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
- S_028A44_ES_VERTS_PER_SUBGRP(250) |
- S_028A44_GS_PRIMS_PER_SUBGRP(126) |
- S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
- }
+ struct radv_shader_variant *tes, *tcs;
+
+ tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL];
+ tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+
+ if (tes) {
+ if (tes->info.is_ngg) {
+ radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, tes);
+ } else if (tes->info.tes.as_es)
+ radv_pipeline_generate_hw_es(cs, pipeline, tes);
+ else
+ radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, tes);
+ }
+
+ radv_pipeline_generate_hw_hs(cs, pipeline, tcs);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+ !radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+ radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+ S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) |
+ S_028A44_GS_INST_PRIMS_IN_SUBGRP(126));
+ }
}
static void
radv_pipeline_generate_tess_state(struct radeon_cmdbuf *ctx_cs,
- const struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL);
- unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
- unsigned num_tcs_input_cp, num_tcs_output_cp, num_patches;
- unsigned ls_hs_config;
-
- num_tcs_input_cp = pCreateInfo->pTessellationState->patchControlPoints;
- num_tcs_output_cp = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; //TCS VERTICES OUT
- num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
-
- ls_hs_config = S_028B58_NUM_PATCHES(num_patches) |
- S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
- S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
- radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
- 2, ls_hs_config);
- } else {
- radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG,
- ls_hs_config);
- }
-
- switch (tes->info.tes.primitive_mode) {
- case GL_TRIANGLES:
- type = V_028B6C_TESS_TRIANGLE;
- break;
- case GL_QUADS:
- type = V_028B6C_TESS_QUAD;
- break;
- case GL_ISOLINES:
- type = V_028B6C_TESS_ISOLINE;
- break;
- }
-
- switch (tes->info.tes.spacing) {
- case TESS_SPACING_EQUAL:
- partitioning = V_028B6C_PART_INTEGER;
- break;
- case TESS_SPACING_FRACTIONAL_ODD:
- partitioning = V_028B6C_PART_FRAC_ODD;
- break;
- case TESS_SPACING_FRACTIONAL_EVEN:
- partitioning = V_028B6C_PART_FRAC_EVEN;
- break;
- default:
- break;
- }
-
- bool ccw = tes->info.tes.ccw;
- const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
- vk_find_struct_const(pCreateInfo->pTessellationState,
- PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
-
- if (domain_origin_state && domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
- ccw = !ccw;
-
- if (tes->info.tes.point_mode)
- topology = V_028B6C_OUTPUT_POINT;
- else if (tes->info.tes.primitive_mode == GL_ISOLINES)
- topology = V_028B6C_OUTPUT_LINE;
- else if (ccw)
- topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
- else
- topology = V_028B6C_OUTPUT_TRIANGLE_CW;
-
- if (pipeline->device->physical_device->rad_info.has_distributed_tess) {
- if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
- pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
- distribution_mode = V_028B6C_TRAPEZOIDS;
- else
- distribution_mode = V_028B6C_DONUTS;
- } else
- distribution_mode = V_028B6C_NO_DIST;
-
- radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
- S_028B6C_TYPE(type) |
- S_028B6C_PARTITIONING(partitioning) |
- S_028B6C_TOPOLOGY(topology) |
- S_028B6C_DISTRIBUTION_MODE(distribution_mode));
+ struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL);
+ unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
+ unsigned num_tcs_input_cp, num_tcs_output_cp, num_patches;
+ unsigned ls_hs_config;
+
+ num_tcs_input_cp = pCreateInfo->pTessellationState->patchControlPoints;
+ num_tcs_output_cp =
+ pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; // TCS VERTICES OUT
+ num_patches = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
+
+ ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) |
+ S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX7) {
+ radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config);
+ } else {
+ radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config);
+ }
+
+ switch (tes->info.tes.primitive_mode) {
+ case GL_TRIANGLES:
+ type = V_028B6C_TESS_TRIANGLE;
+ break;
+ case GL_QUADS:
+ type = V_028B6C_TESS_QUAD;
+ break;
+ case GL_ISOLINES:
+ type = V_028B6C_TESS_ISOLINE;
+ break;
+ }
+
+ switch (tes->info.tes.spacing) {
+ case TESS_SPACING_EQUAL:
+ partitioning = V_028B6C_PART_INTEGER;
+ break;
+ case TESS_SPACING_FRACTIONAL_ODD:
+ partitioning = V_028B6C_PART_FRAC_ODD;
+ break;
+ case TESS_SPACING_FRACTIONAL_EVEN:
+ partitioning = V_028B6C_PART_FRAC_EVEN;
+ break;
+ default:
+ break;
+ }
+
+ bool ccw = tes->info.tes.ccw;
+ const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =
+ vk_find_struct_const(pCreateInfo->pTessellationState,
+ PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);
+
+ if (domain_origin_state &&
+ domain_origin_state->domainOrigin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
+ ccw = !ccw;
+
+ if (tes->info.tes.point_mode)
+ topology = V_028B6C_OUTPUT_POINT;
+ else if (tes->info.tes.primitive_mode == GL_ISOLINES)
+ topology = V_028B6C_OUTPUT_LINE;
+ else if (ccw)
+ topology = V_028B6C_OUTPUT_TRIANGLE_CCW;
+ else
+ topology = V_028B6C_OUTPUT_TRIANGLE_CW;
+
+ if (pipeline->device->physical_device->rad_info.has_distributed_tess) {
+ if (pipeline->device->physical_device->rad_info.family == CHIP_FIJI ||
+ pipeline->device->physical_device->rad_info.family >= CHIP_POLARIS10)
+ distribution_mode = V_028B6C_TRAPEZOIDS;
+ else
+ distribution_mode = V_028B6C_DONUTS;
+ } else
+ distribution_mode = V_028B6C_NO_DIST;
+
+ radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM,
+ S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) |
+ S_028B6C_TOPOLOGY(topology) |
+ S_028B6C_DISTRIBUTION_MODE(distribution_mode));
}
static void
-radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline,
- const struct radv_shader_variant *gs)
+radv_pipeline_generate_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_pipeline *pipeline,
+ const struct radv_shader_variant *gs)
{
- const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info;
- unsigned gs_max_out_vertices;
- const uint8_t *num_components;
- uint8_t max_stream;
- unsigned offset;
- uint64_t va;
-
- gs_max_out_vertices = gs->info.gs.vertices_out;
- max_stream = gs->info.gs.max_stream;
- num_components = gs->info.gs.num_stream_output_components;
-
- offset = num_components[0] * gs_max_out_vertices;
-
- radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
- radeon_emit(ctx_cs, offset);
- if (max_stream >= 1)
- offset += num_components[1] * gs_max_out_vertices;
- radeon_emit(ctx_cs, offset);
- if (max_stream >= 2)
- offset += num_components[2] * gs_max_out_vertices;
- radeon_emit(ctx_cs, offset);
- if (max_stream >= 3)
- offset += num_components[3] * gs_max_out_vertices;
- radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
-
- radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
- radeon_emit(ctx_cs, num_components[0]);
- radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0);
- radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0);
- radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0);
-
- uint32_t gs_num_invocations = gs->info.gs.invocations;
- radeon_set_context_reg(ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
- S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
- S_028B90_ENABLE(gs_num_invocations > 0));
-
- radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
- gs_state->vgt_esgs_ring_itemsize);
-
- va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
- } else {
- radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B214_MEM_BASE(va >> 40));
- }
-
- radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
- radeon_emit(cs, gs->config.rsrc1);
- radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
-
- radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
- radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, gs_state->vgt_gs_max_prims_per_subgroup);
- } else {
- radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
- radeon_emit(cs, gs->config.rsrc1);
- radeon_emit(cs, gs->config.rsrc2);
- }
-
- radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
+ const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info;
+ unsigned gs_max_out_vertices;
+ const uint8_t *num_components;
+ uint8_t max_stream;
+ unsigned offset;
+ uint64_t va;
+
+ gs_max_out_vertices = gs->info.gs.vertices_out;
+ max_stream = gs->info.gs.max_stream;
+ num_components = gs->info.gs.num_stream_output_components;
+
+ offset = num_components[0] * gs_max_out_vertices;
+
+ radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
+ radeon_emit(ctx_cs, offset);
+ if (max_stream >= 1)
+ offset += num_components[1] * gs_max_out_vertices;
+ radeon_emit(ctx_cs, offset);
+ if (max_stream >= 2)
+ offset += num_components[2] * gs_max_out_vertices;
+ radeon_emit(ctx_cs, offset);
+ if (max_stream >= 3)
+ offset += num_components[3] * gs_max_out_vertices;
+ radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset);
+
+ radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
+ radeon_emit(ctx_cs, num_components[0]);
+ radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0);
+ radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0);
+ radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0);
+
+ uint32_t gs_num_invocations = gs->info.gs.invocations;
+ radeon_set_context_reg(
+ ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0));
+
+ radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+ gs_state->vgt_esgs_ring_itemsize);
+
+ va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B324_MEM_BASE(va >> 40));
+ } else {
+ radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B214_MEM_BASE(va >> 40));
+ }
+
+ radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
+ radeon_emit(cs, gs->config.rsrc1);
+ radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size));
+
+ radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl);
+ radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP,
+ gs_state->vgt_gs_max_prims_per_subgroup);
+ } else {
+ radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B224_MEM_BASE(va >> 40));
+ radeon_emit(cs, gs->config.rsrc1);
+ radeon_emit(cs, gs->config.rsrc2);
+ }
+
+ radv_pipeline_generate_hw_vs(ctx_cs, cs, pipeline, pipeline->gs_copy_shader);
}
static void
-radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline)
+radv_pipeline_generate_geometry_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ const struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *gs;
+ struct radv_shader_variant *gs;
- gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
- if (!gs)
- return;
+ gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+ if (!gs)
+ return;
- if (gs->info.is_ngg)
- radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, gs);
- else
- radv_pipeline_generate_hw_gs(ctx_cs, cs, pipeline, gs);
+ if (gs->info.is_ngg)
+ radv_pipeline_generate_hw_ngg(ctx_cs, cs, pipeline, gs);
+ else
+ radv_pipeline_generate_hw_gs(ctx_cs, cs, pipeline, gs);
- radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT,
- gs->info.gs.vertices_out);
+ radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
}
-static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade,
- bool explicit, bool float16)
+static uint32_t
+offset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool float16)
{
- uint32_t ps_input_cntl;
- if (offset <= AC_EXP_PARAM_OFFSET_31) {
- ps_input_cntl = S_028644_OFFSET(offset);
- if (flat_shade || explicit)
- ps_input_cntl |= S_028644_FLAT_SHADE(1);
- if (explicit) {
- /* Force parameter cache to be read in passthrough
- * mode.
- */
- ps_input_cntl |= S_028644_OFFSET(1 << 5);
- }
- if (float16) {
- ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) |
- S_028644_ATTR0_VALID(1);
- }
- } else {
- /* The input is a DEFAULT_VAL constant. */
- assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
- offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
- offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
- ps_input_cntl = S_028644_OFFSET(0x20) |
- S_028644_DEFAULT_VAL(offset);
- }
- return ps_input_cntl;
+ uint32_t ps_input_cntl;
+ if (offset <= AC_EXP_PARAM_OFFSET_31) {
+ ps_input_cntl = S_028644_OFFSET(offset);
+ if (flat_shade || explicit)
+ ps_input_cntl |= S_028644_FLAT_SHADE(1);
+ if (explicit) {
+ /* Force parameter cache to be read in passthrough
+ * mode.
+ */
+ ps_input_cntl |= S_028644_OFFSET(1 << 5);
+ }
+ if (float16) {
+ ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | S_028644_ATTR0_VALID(1);
+ }
+ } else {
+ /* The input is a DEFAULT_VAL constant. */
+ assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
+ offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
+ ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
+ }
+ return ps_input_cntl;
}
static void
-radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs,
- const struct radv_pipeline *pipeline)
+radv_pipeline_generate_ps_inputs(struct radeon_cmdbuf *ctx_cs, const struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
- uint32_t ps_input_cntl[32];
-
- unsigned ps_offset = 0;
-
- if (ps->info.ps.prim_id_input) {
- unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
- if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
- ++ps_offset;
- }
- }
-
- if (ps->info.ps.layer_input ||
- ps->info.needs_multiview_view_index) {
- unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
- if (vs_offset != AC_EXP_PARAM_UNDEFINED)
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
- else
- ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
- ++ps_offset;
- }
-
- if (ps->info.ps.viewport_index_input) {
- unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VIEWPORT];
- if (vs_offset != AC_EXP_PARAM_UNDEFINED)
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
- else
- ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
- ++ps_offset;
- }
-
- if (ps->info.ps.has_pcoord) {
- unsigned val;
- val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
- ps_input_cntl[ps_offset] = val;
- ps_offset++;
- }
-
- if (ps->info.ps.num_input_clips_culls) {
- unsigned vs_offset;
-
- vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
- if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
- ++ps_offset;
- }
-
- vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
- if (vs_offset != AC_EXP_PARAM_UNDEFINED &&
- ps->info.ps.num_input_clips_culls > 4) {
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
- ++ps_offset;
- }
- }
-
- for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.ps.input_mask; ++i) {
- unsigned vs_offset;
- bool flat_shade;
- bool explicit;
- bool float16;
- if (!(ps->info.ps.input_mask & (1u << i)))
- continue;
-
- vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
- if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
- ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
- ++ps_offset;
- continue;
- }
-
- flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << ps_offset));
- explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << ps_offset));
- float16 = !!(ps->info.ps.float16_shaded_mask & (1u << ps_offset));
-
- ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16);
- ++ps_offset;
- }
-
- if (ps_offset) {
- radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset);
- for (unsigned i = 0; i < ps_offset; i++) {
- radeon_emit(ctx_cs, ps_input_cntl[i]);
- }
- }
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline);
+ uint32_t ps_input_cntl[32];
+
+ unsigned ps_offset = 0;
+
+ if (ps->info.ps.prim_id_input) {
+ unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
+ ++ps_offset;
+ }
+ }
+
+ if (ps->info.ps.layer_input || ps->info.needs_multiview_view_index) {
+ unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED)
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
+ else
+ ps_input_cntl[ps_offset] =
+ offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
+ ++ps_offset;
+ }
+
+ if (ps->info.ps.viewport_index_input) {
+ unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VIEWPORT];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED)
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true, false, false);
+ else
+ ps_input_cntl[ps_offset] =
+ offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true, false, false);
+ ++ps_offset;
+ }
+
+ if (ps->info.ps.has_pcoord) {
+ unsigned val;
+ val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
+ ps_input_cntl[ps_offset] = val;
+ ps_offset++;
+ }
+
+ if (ps->info.ps.num_input_clips_culls) {
+ unsigned vs_offset;
+
+ vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST0];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
+ ++ps_offset;
+ }
+
+ vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_CLIP_DIST1];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED && ps->info.ps.num_input_clips_culls > 4) {
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, false, false, false);
+ ++ps_offset;
+ }
+ }
+
+ for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.ps.input_mask; ++i) {
+ unsigned vs_offset;
+ bool flat_shade;
+ bool explicit;
+ bool float16;
+ if (!(ps->info.ps.input_mask & (1u << i)))
+ continue;
+
+ vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
+ if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
+ ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
+ ++ps_offset;
+ continue;
+ }
+
+ flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << ps_offset));
+ explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << ps_offset));
+ float16 = !!(ps->info.ps.float16_shaded_mask & (1u << ps_offset));
+
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16);
+ ++ps_offset;
+ }
+
+ if (ps_offset) {
+ radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset);
+ for (unsigned i = 0; i < ps_offset; i++) {
+ radeon_emit(ctx_cs, ps_input_cntl[i]);
+ }
+ }
}
static uint32_t
radv_compute_db_shader_control(const struct radv_device *device,
- const struct radv_pipeline *pipeline,
+ const struct radv_pipeline *pipeline,
const struct radv_shader_variant *ps)
{
- unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z;
- unsigned z_order;
- if (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory)
- z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
- else
- z_order = V_02880C_LATE_Z;
-
- if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
- conservative_z_export = V_02880C_EXPORT_GREATER_THAN_Z;
- else if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_LESS)
- conservative_z_export = V_02880C_EXPORT_LESS_THAN_Z;
-
- bool disable_rbplus = device->physical_device->rad_info.has_rbplus &&
- !device->physical_device->rad_info.rbplus_allowed;
-
- /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
- * but this appears to break Project Cars (DXVK). See
- * https://bugs.freedesktop.org/show_bug.cgi?id=109401
- */
- bool mask_export_enable = ps->info.ps.writes_sample_mask;
-
- return S_02880C_Z_EXPORT_ENABLE(ps->info.ps.writes_z) |
- S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) |
- S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) |
- S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
- S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) |
- S_02880C_Z_ORDER(z_order) |
- S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) |
- S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) |
- S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) |
- S_02880C_EXEC_ON_NOOP(ps->info.ps.writes_memory) |
- S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
+ unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z;
+ unsigned z_order;
+ if (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory)
+ z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
+ else
+ z_order = V_02880C_LATE_Z;
+
+ if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER)
+ conservative_z_export = V_02880C_EXPORT_GREATER_THAN_Z;
+ else if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_LESS)
+ conservative_z_export = V_02880C_EXPORT_LESS_THAN_Z;
+
+ bool disable_rbplus = device->physical_device->rad_info.has_rbplus &&
+ !device->physical_device->rad_info.rbplus_allowed;
+
+ /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
+ * but this appears to break Project Cars (DXVK). See
+ * https://bugs.freedesktop.org/show_bug.cgi?id=109401
+ */
+ bool mask_export_enable = ps->info.ps.writes_sample_mask;
+
+ return S_02880C_Z_EXPORT_ENABLE(ps->info.ps.writes_z) |
+ S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) |
+ S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) |
+ S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) |
+ S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_Z_ORDER(z_order) |
+ S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) |
+ S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) |
+ S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) |
+ S_02880C_EXEC_ON_NOOP(ps->info.ps.writes_memory) |
+ S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
}
static void
-radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs,
- struct radeon_cmdbuf *cs,
- struct radv_pipeline *pipeline)
+radv_pipeline_generate_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs,
+ struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *ps;
- uint64_t va;
- assert (pipeline->shaders[MESA_SHADER_FRAGMENT]);
+ struct radv_shader_variant *ps;
+ uint64_t va;
+ assert(pipeline->shaders[MESA_SHADER_FRAGMENT]);
- ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- va = radv_buffer_get_va(ps->bo) + ps->bo_offset;
+ ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ va = radv_buffer_get_va(ps->bo) + ps->bo_offset;
- radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
- radeon_emit(cs, ps->config.rsrc1);
- radeon_emit(cs, ps->config.rsrc2);
+ radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B024_MEM_BASE(va >> 40));
+ radeon_emit(cs, ps->config.rsrc1);
+ radeon_emit(cs, ps->config.rsrc2);
- radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL,
- radv_compute_db_shader_control(pipeline->device,
- pipeline, ps));
+ radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL,
+ radv_compute_db_shader_control(pipeline->device, pipeline, ps));
- radeon_set_context_reg(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA,
- ps->config.spi_ps_input_ena);
+ radeon_set_context_reg(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA, ps->config.spi_ps_input_ena);
- radeon_set_context_reg(ctx_cs, R_0286D0_SPI_PS_INPUT_ADDR,
- ps->config.spi_ps_input_addr);
+ radeon_set_context_reg(ctx_cs, R_0286D0_SPI_PS_INPUT_ADDR, ps->config.spi_ps_input_addr);
- radeon_set_context_reg(ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
- S_0286D8_NUM_INTERP(ps->info.ps.num_interp) |
- S_0286D8_PS_W32_EN(ps->info.wave_size == 32));
+ radeon_set_context_reg(
+ ctx_cs, R_0286D8_SPI_PS_IN_CONTROL,
+ S_0286D8_NUM_INTERP(ps->info.ps.num_interp) | S_0286D8_PS_W32_EN(ps->info.wave_size == 32));
- radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
+ radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->graphics.spi_baryc_cntl);
- radeon_set_context_reg(ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
- ac_get_spi_shader_z_format(ps->info.ps.writes_z,
- ps->info.ps.writes_stencil,
- ps->info.ps.writes_sample_mask));
+ radeon_set_context_reg(
+ ctx_cs, R_028710_SPI_SHADER_Z_FORMAT,
+ ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil,
+ ps->info.ps.writes_sample_mask));
- if (pipeline->device->dfsm_allowed) {
- /* optimise this? */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
- }
+ if (pipeline->device->dfsm_allowed) {
+ /* optimise this? */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_DFSM) | EVENT_INDEX(0));
+ }
}
static void
radv_pipeline_generate_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs,
- const struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
- if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
- pipeline->device->physical_device->rad_info.chip_class >= GFX10)
- return;
-
- unsigned vtx_reuse_depth = 30;
- if (radv_pipeline_has_tess(pipeline) &&
- radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
- vtx_reuse_depth = 14;
- }
- radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
- S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
+ if (pipeline->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
+ pipeline->device->physical_device->rad_info.chip_class >= GFX10)
+ return;
+
+ unsigned vtx_reuse_depth = 30;
+ if (radv_pipeline_has_tess(pipeline) &&
+ radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing ==
+ TESS_SPACING_FRACTIONAL_ODD) {
+ vtx_reuse_depth = 14;
+ }
+ radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
+ S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth));
}
static void
radv_pipeline_generate_vgt_shader_config(struct radeon_cmdbuf *ctx_cs,
- const struct radv_pipeline *pipeline)
+ const struct radv_pipeline *pipeline)
{
- uint32_t stages = 0;
- if (radv_pipeline_has_tess(pipeline)) {
- stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
- S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
-
- if (radv_pipeline_has_gs(pipeline))
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) |
- S_028B54_GS_EN(1);
- else if (radv_pipeline_has_ngg(pipeline))
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
- else
- stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
- } else if (radv_pipeline_has_gs(pipeline)) {
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
- S_028B54_GS_EN(1);
- } else if (radv_pipeline_has_ngg(pipeline)) {
- stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
- }
-
- if (radv_pipeline_has_ngg(pipeline)) {
- stages |= S_028B54_PRIMGEN_EN(1);
- if (pipeline->streamout_shader)
- stages |= S_028B54_NGG_WAVE_ID_EN(1);
- if (radv_pipeline_has_ngg_passthrough(pipeline))
- stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
- } else if (radv_pipeline_has_gs(pipeline)) {
- stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
- }
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
- stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
- uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
-
- if (radv_pipeline_has_tess(pipeline))
- hs_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
-
- if (pipeline->shaders[MESA_SHADER_GEOMETRY]) {
- vs_size = gs_size = pipeline->shaders[MESA_SHADER_GEOMETRY]->info.wave_size;
- if (pipeline->gs_copy_shader)
- vs_size = pipeline->gs_copy_shader->info.wave_size;
- } else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
- vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
- else if (pipeline->shaders[MESA_SHADER_VERTEX])
- vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.wave_size;
-
- if (radv_pipeline_has_ngg(pipeline))
- gs_size = vs_size;
-
- /* legacy GS only supports Wave64 */
- stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) |
- S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) |
- S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0);
- }
-
- radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
+ uint32_t stages = 0;
+ if (radv_pipeline_has_tess(pipeline)) {
+ stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1);
+
+ if (radv_pipeline_has_gs(pipeline))
+ stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1);
+ else if (radv_pipeline_has_ngg(pipeline))
+ stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS);
+ else
+ stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+ } else if (radv_pipeline_has_gs(pipeline)) {
+ stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1);
+ } else if (radv_pipeline_has_ngg(pipeline)) {
+ stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL);
+ }
+
+ if (radv_pipeline_has_ngg(pipeline)) {
+ stages |= S_028B54_PRIMGEN_EN(1);
+ if (pipeline->streamout_shader)
+ stages |= S_028B54_NGG_WAVE_ID_EN(1);
+ if (radv_pipeline_has_ngg_passthrough(pipeline))
+ stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1);
+ } else if (radv_pipeline_has_gs(pipeline)) {
+ stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+ }
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9)
+ stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10) {
+ uint8_t hs_size = 64, gs_size = 64, vs_size = 64;
+
+ if (radv_pipeline_has_tess(pipeline))
+ hs_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.wave_size;
+
+ if (pipeline->shaders[MESA_SHADER_GEOMETRY]) {
+ vs_size = gs_size = pipeline->shaders[MESA_SHADER_GEOMETRY]->info.wave_size;
+ if (pipeline->gs_copy_shader)
+ vs_size = pipeline->gs_copy_shader->info.wave_size;
+ } else if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
+ vs_size = pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.wave_size;
+ else if (pipeline->shaders[MESA_SHADER_VERTEX])
+ vs_size = pipeline->shaders[MESA_SHADER_VERTEX]->info.wave_size;
+
+ if (radv_pipeline_has_ngg(pipeline))
+ gs_size = vs_size;
+
+ /* legacy GS only supports Wave64 */
+ stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) |
+ S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) |
+ S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0);
+ }
+
+ radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages);
}
static void
radv_pipeline_generate_cliprect_rule(struct radeon_cmdbuf *ctx_cs,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
- vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
- uint32_t cliprect_rule = 0;
-
- if (!discard_rectangle_info) {
- cliprect_rule = 0xffff;
- } else {
- for (unsigned i = 0; i < (1u << MAX_DISCARD_RECTANGLES); ++i) {
- /* Interpret i as a bitmask, and then set the bit in
- * the mask if that combination of rectangles in which
- * the pixel is contained should pass the cliprect
- * test.
- */
- unsigned relevant_subset = i & ((1u << discard_rectangle_info->discardRectangleCount) - 1);
-
- if (discard_rectangle_info->discardRectangleMode == VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT &&
- !relevant_subset)
- continue;
-
- if (discard_rectangle_info->discardRectangleMode == VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT &&
- relevant_subset)
- continue;
-
- cliprect_rule |= 1u << i;
- }
- }
-
- radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule);
+ const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
+ vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
+ uint32_t cliprect_rule = 0;
+
+ if (!discard_rectangle_info) {
+ cliprect_rule = 0xffff;
+ } else {
+ for (unsigned i = 0; i < (1u << MAX_DISCARD_RECTANGLES); ++i) {
+ /* Interpret i as a bitmask, and then set the bit in
+ * the mask if that combination of rectangles in which
+ * the pixel is contained should pass the cliprect
+ * test.
+ */
+ unsigned relevant_subset = i & ((1u << discard_rectangle_info->discardRectangleCount) - 1);
+
+ if (discard_rectangle_info->discardRectangleMode ==
+ VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT &&
+ !relevant_subset)
+ continue;
+
+ if (discard_rectangle_info->discardRectangleMode ==
+ VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT &&
+ relevant_subset)
+ continue;
+
+ cliprect_rule |= 1u << i;
+ }
+ }
+
+ radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule);
}
static void
-gfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs,
- struct radv_pipeline *pipeline)
+gfx10_pipeline_generate_ge_cntl(struct radeon_cmdbuf *ctx_cs, struct radv_pipeline *pipeline)
{
- bool break_wave_at_eoi = false;
- unsigned primgroup_size;
- unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */
-
- if (radv_pipeline_has_tess(pipeline)) {
- primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
- } else if (radv_pipeline_has_gs(pipeline)) {
- const struct gfx9_gs_info *gs_state =
- &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
- unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl;
- primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl);
- } else {
- primgroup_size = 128; /* recommended without a GS and tess */
- }
-
- if (radv_pipeline_has_tess(pipeline)) {
- if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
- radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
- break_wave_at_eoi = true;
- }
-
- radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL,
- S_03096C_PRIM_GRP_SIZE(primgroup_size) |
- S_03096C_VERT_GRP_SIZE(vertgroup_size) |
- S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ |
- S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi));
+ bool break_wave_at_eoi = false;
+ unsigned primgroup_size;
+ unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */
+
+ if (radv_pipeline_has_tess(pipeline)) {
+ primgroup_size = pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches;
+ } else if (radv_pipeline_has_gs(pipeline)) {
+ const struct gfx9_gs_info *gs_state =
+ &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info;
+ unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl;
+ primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl);
+ } else {
+ primgroup_size = 128; /* recommended without a GS and tess */
+ }
+
+ if (radv_pipeline_has_tess(pipeline)) {
+ if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id ||
+ radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.uses_prim_id)
+ break_wave_at_eoi = true;
+ }
+
+ radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL,
+ S_03096C_PRIM_GRP_SIZE(primgroup_size) |
+ S_03096C_VERT_GRP_SIZE(vertgroup_size) |
+ S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ |
+ S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi));
}
static void
radv_pipeline_generate_vgt_gs_out(struct radeon_cmdbuf *ctx_cs,
- const struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_graphics_pipeline_create_info *extra)
+ const struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra)
{
- uint32_t gs_out;
-
- if (radv_pipeline_has_gs(pipeline)) {
- gs_out = si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
- } else if (radv_pipeline_has_tess(pipeline)) {
- if (pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
- gs_out = V_028A6C_POINTLIST;
- } else {
- gs_out = si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.primitive_mode);
- }
- } else {
- gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology);
- }
-
- if (extra && extra->use_rectlist) {
- gs_out = V_028A6C_TRISTRIP;
- if (radv_pipeline_has_ngg(pipeline))
- gs_out = V_028A6C_RECTLIST;
- }
-
- radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
+ uint32_t gs_out;
+
+ if (radv_pipeline_has_gs(pipeline)) {
+ gs_out =
+ si_conv_gl_prim_to_gs_out(pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim);
+ } else if (radv_pipeline_has_tess(pipeline)) {
+ if (pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) {
+ gs_out = V_028A6C_POINTLIST;
+ } else {
+ gs_out = si_conv_gl_prim_to_gs_out(
+ pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.tes.primitive_mode);
+ }
+ } else {
+ gs_out = si_conv_prim_to_gs_out(pCreateInfo->pInputAssemblyState->topology);
+ }
+
+ if (extra && extra->use_rectlist) {
+ gs_out = V_028A6C_TRISTRIP;
+ if (radv_pipeline_has_ngg(pipeline))
+ gs_out = V_028A6C_RECTLIST;
+ }
+
+ radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out);
}
static bool
gfx103_pipeline_vrs_coarse_shading(const struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- struct radv_device *device = pipeline->device;
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ struct radv_device *device = pipeline->device;
- if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
- return false;
+ if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING)
+ return false;
- if (!ps->info.ps.allow_flat_shading)
- return false;
+ if (!ps->info.ps.allow_flat_shading)
+ return false;
- return true;
+ return true;
}
static void
gfx103_pipeline_generate_vrs_state(struct radeon_cmdbuf *ctx_cs,
- const struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- uint32_t mode = V_028064_VRS_COMB_MODE_PASSTHRU;
- uint8_t rate_x = 0, rate_y = 0;
- bool enable_vrs = false;
-
- if (vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) ||
- radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR)) {
- /* Enable draw call VRS because it's explicitly requested. */
- enable_vrs = true;
- } else if (gfx103_pipeline_vrs_coarse_shading(pipeline)) {
- /* Enable VRS coarse shading 2x2 if the driver determined that
- * it's safe to enable.
- */
- mode = V_028064_VRS_COMB_MODE_OVERRIDE;
- rate_x = rate_y = 1;
- } else if (pipeline->device->force_vrs != RADV_FORCE_VRS_NONE) {
- /* Force enable vertex VRS if requested by the user. */
- radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL,
- S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
- S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
-
- /* If the shader is using discard, turn off coarse shading
- * because discard at 2x2 pixel granularity degrades quality
- * too much. MIN allows sample shading but not coarse shading.
- */
- struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
-
- mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN
- : V_028064_VRS_COMB_MODE_PASSTHRU;
- }
-
- radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL,
- S_028A98_EN_VRS_RATE(enable_vrs));
-
- radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL,
- S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
- S_028064_VRS_OVERRIDE_RATE_X(rate_x) |
- S_028064_VRS_OVERRIDE_RATE_Y(rate_y));
+ uint32_t mode = V_028064_VRS_COMB_MODE_PASSTHRU;
+ uint8_t rate_x = 0, rate_y = 0;
+ bool enable_vrs = false;
+
+ if (vk_find_struct_const(pCreateInfo->pNext,
+ PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR) ||
+ radv_is_state_dynamic(pCreateInfo, VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR)) {
+ /* Enable draw call VRS because it's explicitly requested. */
+ enable_vrs = true;
+ } else if (gfx103_pipeline_vrs_coarse_shading(pipeline)) {
+ /* Enable VRS coarse shading 2x2 if the driver determined that
+ * it's safe to enable.
+ */
+ mode = V_028064_VRS_COMB_MODE_OVERRIDE;
+ rate_x = rate_y = 1;
+ } else if (pipeline->device->force_vrs != RADV_FORCE_VRS_NONE) {
+ /* Force enable vertex VRS if requested by the user. */
+ radeon_set_context_reg(
+ ctx_cs, R_028848_PA_CL_VRS_CNTL,
+ S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) |
+ S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
+
+ /* If the shader is using discard, turn off coarse shading
+ * because discard at 2x2 pixel granularity degrades quality
+ * too much. MIN allows sample shading but not coarse shading.
+ */
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+ mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU;
+ }
+
+ radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, S_028A98_EN_VRS_RATE(enable_vrs));
+
+ radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL,
+ S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) |
+ S_028064_VRS_OVERRIDE_RATE_X(rate_x) |
+ S_028064_VRS_OVERRIDE_RATE_Y(rate_y));
}
static void
@@ -5375,802 +5192,780 @@ radv_pipeline_generate_pm4(struct radv_pipeline *pipeline,
const struct radv_graphics_pipeline_create_info *extra,
const struct radv_blend_state *blend)
{
- struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs;
- struct radeon_cmdbuf *cs = &pipeline->cs;
-
- cs->max_dw = 64;
- ctx_cs->max_dw = 256;
- cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
- ctx_cs->buf = cs->buf + cs->max_dw;
-
- radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, pCreateInfo, extra);
- radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend);
- radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo);
- radv_pipeline_generate_multisample_state(ctx_cs, pipeline);
- radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline);
- radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline);
-
- if (radv_pipeline_has_tess(pipeline)) {
- radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline);
- radv_pipeline_generate_tess_state(ctx_cs, pipeline, pCreateInfo);
- }
-
- radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline);
- radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline);
- radv_pipeline_generate_ps_inputs(ctx_cs, pipeline);
- radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline);
- radv_pipeline_generate_vgt_shader_config(ctx_cs, pipeline);
- radv_pipeline_generate_cliprect_rule(ctx_cs, pCreateInfo);
- radv_pipeline_generate_vgt_gs_out(ctx_cs, pipeline, pCreateInfo, extra);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 && !radv_pipeline_has_ngg(pipeline))
- gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
- gfx103_pipeline_generate_vrs_state(ctx_cs, pipeline, pCreateInfo);
-
- pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4);
-
- assert(ctx_cs->cdw <= ctx_cs->max_dw);
- assert(cs->cdw <= cs->max_dw);
+ struct radeon_cmdbuf *ctx_cs = &pipeline->ctx_cs;
+ struct radeon_cmdbuf *cs = &pipeline->cs;
+
+ cs->max_dw = 64;
+ ctx_cs->max_dw = 256;
+ cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw));
+ ctx_cs->buf = cs->buf + cs->max_dw;
+
+ radv_pipeline_generate_depth_stencil_state(ctx_cs, pipeline, pCreateInfo, extra);
+ radv_pipeline_generate_blend_state(ctx_cs, pipeline, blend);
+ radv_pipeline_generate_raster_state(ctx_cs, pipeline, pCreateInfo);
+ radv_pipeline_generate_multisample_state(ctx_cs, pipeline);
+ radv_pipeline_generate_vgt_gs_mode(ctx_cs, pipeline);
+ radv_pipeline_generate_vertex_shader(ctx_cs, cs, pipeline);
+
+ if (radv_pipeline_has_tess(pipeline)) {
+ radv_pipeline_generate_tess_shaders(ctx_cs, cs, pipeline);
+ radv_pipeline_generate_tess_state(ctx_cs, pipeline, pCreateInfo);
+ }
+
+ radv_pipeline_generate_geometry_shader(ctx_cs, cs, pipeline);
+ radv_pipeline_generate_fragment_shader(ctx_cs, cs, pipeline);
+ radv_pipeline_generate_ps_inputs(ctx_cs, pipeline);
+ radv_pipeline_generate_vgt_vertex_reuse(ctx_cs, pipeline);
+ radv_pipeline_generate_vgt_shader_config(ctx_cs, pipeline);
+ radv_pipeline_generate_cliprect_rule(ctx_cs, pCreateInfo);
+ radv_pipeline_generate_vgt_gs_out(ctx_cs, pipeline, pCreateInfo, extra);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10 &&
+ !radv_pipeline_has_ngg(pipeline))
+ gfx10_pipeline_generate_ge_cntl(ctx_cs, pipeline);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
+ gfx103_pipeline_generate_vrs_state(ctx_cs, pipeline, pCreateInfo);
+
+ pipeline->ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4);
+
+ assert(ctx_cs->cdw <= ctx_cs->max_dw);
+ assert(cs->cdw <= cs->max_dw);
}
static void
radv_pipeline_init_vertex_input_state(struct radv_pipeline *pipeline,
- const VkGraphicsPipelineCreateInfo *pCreateInfo)
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
{
- const VkPipelineVertexInputStateCreateInfo *vi_info =
- pCreateInfo->pVertexInputState;
+ const VkPipelineVertexInputStateCreateInfo *vi_info = pCreateInfo->pVertexInputState;
- for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
- const VkVertexInputBindingDescription *desc =
- &vi_info->pVertexBindingDescriptions[i];
+ for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
+ const VkVertexInputBindingDescription *desc = &vi_info->pVertexBindingDescriptions[i];
- pipeline->binding_stride[desc->binding] = desc->stride;
- pipeline->num_vertex_bindings =
- MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
- }
+ pipeline->binding_stride[desc->binding] = desc->stride;
+ pipeline->num_vertex_bindings = MAX2(pipeline->num_vertex_bindings, desc->binding + 1);
+ }
}
static struct radv_shader_variant *
radv_pipeline_get_streamout_shader(struct radv_pipeline *pipeline)
{
- int i;
+ int i;
- for (i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) {
- struct radv_shader_variant *shader =
- radv_get_shader(pipeline, i);
+ for (i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) {
+ struct radv_shader_variant *shader = radv_get_shader(pipeline, i);
- if (shader && shader->info.so.num_outputs > 0)
- return shader;
- }
+ if (shader && shader->info.so.num_outputs > 0)
+ return shader;
+ }
- return NULL;
+ return NULL;
}
static void
radv_pipeline_init_shader_stages_state(struct radv_pipeline *pipeline)
{
- struct radv_device *device = pipeline->device;
-
- for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
- pipeline->user_data_0[i] =
- radv_pipeline_stage_to_user_data_0(pipeline, i,
- device->physical_device->rad_info.chip_class);
-
- if (pipeline->shaders[i]) {
- pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets;
- }
- }
-
- struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
- AC_UD_VS_BASE_VERTEX_START_INSTANCE);
- if (loc->sgpr_idx != -1) {
- pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
- pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
- pipeline->graphics.vtx_emit_num = loc->num_sgprs;
- pipeline->graphics.uses_drawid = radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id;
- pipeline->graphics.uses_baseinstance = radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_base_instance;
- }
+ struct radv_device *device = pipeline->device;
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ pipeline->user_data_0[i] = radv_pipeline_stage_to_user_data_0(
+ pipeline, i, device->physical_device->rad_info.chip_class);
+
+ if (pipeline->shaders[i]) {
+ pipeline->need_indirect_descriptor_sets |=
+ pipeline->shaders[i]->info.need_indirect_descriptor_sets;
+ }
+ }
+
+ struct radv_userdata_info *loc =
+ radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+ if (loc->sgpr_idx != -1) {
+ pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
+ pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
+ pipeline->graphics.vtx_emit_num = loc->num_sgprs;
+ pipeline->graphics.uses_drawid =
+ radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_draw_id;
+ pipeline->graphics.uses_baseinstance =
+ radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.vs.needs_base_instance;
+ }
}
static VkResult
-radv_pipeline_init(struct radv_pipeline *pipeline,
- struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_graphics_pipeline_create_info *extra)
+radv_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *device,
+ struct radv_pipeline_cache *cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra)
{
- VkResult result;
-
- pipeline->device = device;
- pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
- assert(pipeline->layout);
-
- struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
-
- const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
- vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
- radv_init_feedback(creation_feedback);
-
- VkPipelineCreationFeedbackEXT *pipeline_feedback = creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
-
- const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
- VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
- for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
- gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
- pStages[stage] = &pCreateInfo->pStages[i];
- if(creation_feedback)
- stage_feedbacks[stage] = &creation_feedback->pPipelineStageCreationFeedbacks[i];
- }
-
- struct radv_pipeline_key key = radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend);
-
- result = radv_create_shaders(pipeline, device, cache, &key, pStages,
- pCreateInfo->flags, pipeline_feedback,
- stage_feedbacks);
- if (result != VK_SUCCESS)
- return result;
-
- pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
- radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
- radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra);
- radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra);
- radv_pipeline_init_raster_state(pipeline, pCreateInfo);
- radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo);
-
- if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
- gfx103_pipeline_init_vrs_state(pipeline, pCreateInfo);
-
- /* Ensure that some export memory is always allocated, for two reasons:
- *
- * 1) Correctness: The hardware ignores the EXEC mask if no export
- * memory is allocated, so KILL and alpha test do not work correctly
- * without this.
- * 2) Performance: Every shader needs at least a NULL export, even when
- * it writes no color/depth output. The NULL export instruction
- * stalls without this setting.
- *
- * Don't add this to CB_SHADER_MASK.
- *
- * GFX10 supports pixel shaders without exports by setting both the
- * color and Z formats to SPI_SHADER_ZERO. The hw will skip export
- * instructions if any are present.
- */
- struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
- if ((pipeline->device->physical_device->rad_info.chip_class <= GFX9 ||
- ps->info.ps.can_discard) &&
- !blend.spi_shader_col_format) {
- if (!ps->info.ps.writes_z &&
- !ps->info.ps.writes_stencil &&
- !ps->info.ps.writes_sample_mask)
- blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
- }
-
- if (extra &&
- (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR ||
- extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS ||
- extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS ||
- extra->custom_blend_mode == V_028808_CB_RESOLVE)) {
- /* According to the CB spec states, CB_SHADER_MASK should be
- * set to enable writes to all four channels of MRT0.
- */
- blend.cb_shader_mask = 0xf;
- }
-
- pipeline->graphics.col_format = blend.spi_shader_col_format;
- pipeline->graphics.cb_target_mask = blend.cb_target_mask;
-
- if (radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
- struct radv_shader_variant *gs =
- pipeline->shaders[MESA_SHADER_GEOMETRY];
-
- radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info);
- }
-
- if (radv_pipeline_has_tess(pipeline)) {
- pipeline->graphics.tess_patch_control_points =
- pCreateInfo->pTessellationState->patchControlPoints;
- }
-
- radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo);
- radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend);
- radv_pipeline_init_shader_stages_state(pipeline);
- radv_pipeline_init_scratch(device, pipeline);
-
- /* Find the last vertex shader stage that eventually uses streamout. */
- pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
-
- radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend);
-
- return result;
-}
+ VkResult result;
+
+ pipeline->device = device;
+ pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
+ assert(pipeline->layout);
+
+ struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
+
+ const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
+ vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+ radv_init_feedback(creation_feedback);
+
+ VkPipelineCreationFeedbackEXT *pipeline_feedback =
+ creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
+
+ const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = {
+ 0,
+ };
+ VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = {0};
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1;
+ pStages[stage] = &pCreateInfo->pStages[i];
+ if (creation_feedback)
+ stage_feedbacks[stage] = &creation_feedback->pPipelineStageCreationFeedbacks[i];
+ }
-VkResult
-radv_graphics_pipeline_create(
- VkDevice _device,
- VkPipelineCache _cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_graphics_pipeline_create_info *extra,
- const VkAllocationCallbacks *pAllocator,
- VkPipeline *pPipeline)
-{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
- struct radv_pipeline *pipeline;
- VkResult result;
+ struct radv_pipeline_key key =
+ radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &blend);
+
+ result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags,
+ pipeline_feedback, stage_feedbacks);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pipeline->graphics.spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1);
+ radv_pipeline_init_multisample_state(pipeline, &blend, pCreateInfo);
+ radv_pipeline_init_input_assembly_state(pipeline, pCreateInfo, extra);
+ radv_pipeline_init_dynamic_state(pipeline, pCreateInfo, extra);
+ radv_pipeline_init_raster_state(pipeline, pCreateInfo);
+ radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX10_3)
+ gfx103_pipeline_init_vrs_state(pipeline, pCreateInfo);
+
+ /* Ensure that some export memory is always allocated, for two reasons:
+ *
+ * 1) Correctness: The hardware ignores the EXEC mask if no export
+ * memory is allocated, so KILL and alpha test do not work correctly
+ * without this.
+ * 2) Performance: Every shader needs at least a NULL export, even when
+ * it writes no color/depth output. The NULL export instruction
+ * stalls without this setting.
+ *
+ * Don't add this to CB_SHADER_MASK.
+ *
+ * GFX10 supports pixel shaders without exports by setting both the
+ * color and Z formats to SPI_SHADER_ZERO. The hw will skip export
+ * instructions if any are present.
+ */
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ if ((pipeline->device->physical_device->rad_info.chip_class <= GFX9 ||
+ ps->info.ps.can_discard) &&
+ !blend.spi_shader_col_format) {
+ if (!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask)
+ blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R;
+ }
+
+ if (extra && (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR ||
+ extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS ||
+ extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS ||
+ extra->custom_blend_mode == V_028808_CB_RESOLVE)) {
+ /* According to the CB spec states, CB_SHADER_MASK should be
+ * set to enable writes to all four channels of MRT0.
+ */
+ blend.cb_shader_mask = 0xf;
+ }
+
+ pipeline->graphics.col_format = blend.spi_shader_col_format;
+ pipeline->graphics.cb_target_mask = blend.cb_target_mask;
+
+ if (radv_pipeline_has_gs(pipeline) && !radv_pipeline_has_ngg(pipeline)) {
+ struct radv_shader_variant *gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+ radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info);
+ }
+
+ if (radv_pipeline_has_tess(pipeline)) {
+ pipeline->graphics.tess_patch_control_points =
+ pCreateInfo->pTessellationState->patchControlPoints;
+ }
- pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (pipeline == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ radv_pipeline_init_vertex_input_state(pipeline, pCreateInfo);
+ radv_pipeline_init_binning_state(pipeline, pCreateInfo, &blend);
+ radv_pipeline_init_shader_stages_state(pipeline);
+ radv_pipeline_init_scratch(device, pipeline);
- vk_object_base_init(&device->vk, &pipeline->base,
- VK_OBJECT_TYPE_PIPELINE);
+ /* Find the last vertex shader stage that eventually uses streamout. */
+ pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline);
- result = radv_pipeline_init(pipeline, device, cache,
- pCreateInfo, extra);
- if (result != VK_SUCCESS) {
- radv_pipeline_destroy(device, pipeline, pAllocator);
- return result;
- }
+ radv_pipeline_generate_pm4(pipeline, pCreateInfo, extra, &blend);
- *pPipeline = radv_pipeline_to_handle(pipeline);
+ return result;
+}
- return VK_SUCCESS;
+VkResult
+radv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra,
+ const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
+{
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+ struct radv_pipeline *pipeline;
+ VkResult result;
+
+ pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (pipeline == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
+
+ result = radv_pipeline_init(pipeline, device, cache, pCreateInfo, extra);
+ if (result != VK_SUCCESS) {
+ radv_pipeline_destroy(device, pipeline, pAllocator);
+ return result;
+ }
+
+ *pPipeline = radv_pipeline_to_handle(pipeline);
+
+ return VK_SUCCESS;
}
-VkResult radv_CreateGraphicsPipelines(
- VkDevice _device,
- VkPipelineCache pipelineCache,
- uint32_t count,
- const VkGraphicsPipelineCreateInfo* pCreateInfos,
- const VkAllocationCallbacks* pAllocator,
- VkPipeline* pPipelines)
+VkResult
+radv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
+ const VkGraphicsPipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
{
- VkResult result = VK_SUCCESS;
- unsigned i = 0;
-
- for (; i < count; i++) {
- VkResult r;
- r = radv_graphics_pipeline_create(_device,
- pipelineCache,
- &pCreateInfos[i],
- NULL, pAllocator, &pPipelines[i]);
- if (r != VK_SUCCESS) {
- result = r;
- pPipelines[i] = VK_NULL_HANDLE;
-
- if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
- break;
- }
- }
-
- for (; i < count; ++i)
- pPipelines[i] = VK_NULL_HANDLE;
-
- return result;
+ VkResult result = VK_SUCCESS;
+ unsigned i = 0;
+
+ for (; i < count; i++) {
+ VkResult r;
+ r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator,
+ &pPipelines[i]);
+ if (r != VK_SUCCESS) {
+ result = r;
+ pPipelines[i] = VK_NULL_HANDLE;
+
+ if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
+ break;
+ }
+ }
+
+ for (; i < count; ++i)
+ pPipelines[i] = VK_NULL_HANDLE;
+
+ return result;
}
static void
-radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline)
+radv_pipeline_generate_hw_cs(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
- uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
- struct radv_device *device = pipeline->device;
-
- radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
- radeon_emit(cs, va >> 8);
- radeon_emit(cs, S_00B834_DATA(va >> 40));
-
- radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
- radeon_emit(cs, shader->config.rsrc1);
- radeon_emit(cs, shader->config.rsrc2);
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
- }
+ struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ struct radv_device *device = pipeline->device;
+
+ radeon_set_sh_reg_seq(cs, R_00B830_COMPUTE_PGM_LO, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, S_00B834_DATA(va >> 40));
+
+ radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2);
+ radeon_emit(cs, shader->config.rsrc1);
+ radeon_emit(cs, shader->config.rsrc2);
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3);
+ }
}
static void
-radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs,
- const struct radv_pipeline *pipeline)
+radv_pipeline_generate_compute_state(struct radeon_cmdbuf *cs, const struct radv_pipeline *pipeline)
{
- struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
- struct radv_device *device = pipeline->device;
- unsigned threads_per_threadgroup;
- unsigned threadgroups_per_cu = 1;
- unsigned waves_per_threadgroup;
- unsigned max_waves_per_sh = 0;
-
- /* Calculate best compute resource limits. */
- threads_per_threadgroup = shader->info.cs.block_size[0] *
- shader->info.cs.block_size[1] *
- shader->info.cs.block_size[2];
- waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup,
- shader->info.wave_size);
-
- if (device->physical_device->rad_info.chip_class >= GFX10 &&
- waves_per_threadgroup == 1)
- threadgroups_per_cu = 2;
-
- radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
- ac_get_compute_resource_limits(&device->physical_device->rad_info,
- waves_per_threadgroup,
- max_waves_per_sh,
- threadgroups_per_cu));
-
- radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
- radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
- radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
- radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
+ struct radv_shader_variant *shader = pipeline->shaders[MESA_SHADER_COMPUTE];
+ struct radv_device *device = pipeline->device;
+ unsigned threads_per_threadgroup;
+ unsigned threadgroups_per_cu = 1;
+ unsigned waves_per_threadgroup;
+ unsigned max_waves_per_sh = 0;
+
+ /* Calculate best compute resource limits. */
+ threads_per_threadgroup =
+ shader->info.cs.block_size[0] * shader->info.cs.block_size[1] * shader->info.cs.block_size[2];
+ waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, shader->info.wave_size);
+
+ if (device->physical_device->rad_info.chip_class >= GFX10 && waves_per_threadgroup == 1)
+ threadgroups_per_cu = 2;
+
+ radeon_set_sh_reg(
+ cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
+ ac_get_compute_resource_limits(&device->physical_device->rad_info, waves_per_threadgroup,
+ max_waves_per_sh, threadgroups_per_cu));
+
+ radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0]));
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1]));
+ radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2]));
}
static void
radv_compute_generate_pm4(struct radv_pipeline *pipeline)
{
- struct radv_device *device = pipeline->device;
- struct radeon_cmdbuf *cs = &pipeline->cs;
+ struct radv_device *device = pipeline->device;
+ struct radeon_cmdbuf *cs = &pipeline->cs;
- cs->max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16;
- cs->buf = malloc(cs->max_dw * 4);
+ cs->max_dw = device->physical_device->rad_info.chip_class >= GFX10 ? 19 : 16;
+ cs->buf = malloc(cs->max_dw * 4);
- radv_pipeline_generate_hw_cs(cs, pipeline);
- radv_pipeline_generate_compute_state(cs, pipeline);
+ radv_pipeline_generate_hw_cs(cs, pipeline);
+ radv_pipeline_generate_compute_state(cs, pipeline);
- assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
+ assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
}
static struct radv_pipeline_key
radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline,
- const VkComputePipelineCreateInfo *pCreateInfo)
+ const VkComputePipelineCreateInfo *pCreateInfo)
{
- const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage;
- struct radv_pipeline_key key;
- memset(&key, 0, sizeof(key));
-
- if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
- key.optimisations_disabled = 1;
-
- const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *subgroup_size =
- vk_find_struct_const(stage->pNext,
- PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
-
- if (subgroup_size) {
- assert(subgroup_size->requiredSubgroupSize == 32 ||
- subgroup_size->requiredSubgroupSize == 64);
- key.compute_subgroup_size = subgroup_size->requiredSubgroupSize;
- } else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
- key.require_full_subgroups = true;
- }
-
- return key;
+ const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage;
+ struct radv_pipeline_key key;
+ memset(&key, 0, sizeof(key));
+
+ if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
+ key.optimisations_disabled = 1;
+
+ const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *subgroup_size =
+ vk_find_struct_const(stage->pNext,
+ PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+
+ if (subgroup_size) {
+ assert(subgroup_size->requiredSubgroupSize == 32 ||
+ subgroup_size->requiredSubgroupSize == 64);
+ key.compute_subgroup_size = subgroup_size->requiredSubgroupSize;
+ } else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
+ key.require_full_subgroups = true;
+ }
+
+ return key;
}
-static VkResult radv_compute_pipeline_create(
- VkDevice _device,
- VkPipelineCache _cache,
- const VkComputePipelineCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkPipeline* pPipeline)
+static VkResult
+radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
+ const VkComputePipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
- const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
- VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = { 0 };
- struct radv_pipeline *pipeline;
- VkResult result;
-
- pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (pipeline == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &pipeline->base,
- VK_OBJECT_TYPE_PIPELINE);
-
- pipeline->device = device;
- pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
- assert(pipeline->layout);
-
- const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
- vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
- radv_init_feedback(creation_feedback);
-
- VkPipelineCreationFeedbackEXT *pipeline_feedback = creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
- if (creation_feedback)
- stage_feedbacks[MESA_SHADER_COMPUTE] = &creation_feedback->pPipelineStageCreationFeedbacks[0];
-
- pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
-
- struct radv_pipeline_key key =
- radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
-
- result = radv_create_shaders(pipeline, device, cache, &key, pStages,
- pCreateInfo->flags, pipeline_feedback,
- stage_feedbacks);
- if (result != VK_SUCCESS) {
- radv_pipeline_destroy(device, pipeline, pAllocator);
- return result;
- }
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+ const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = {
+ 0,
+ };
+ VkPipelineCreationFeedbackEXT *stage_feedbacks[MESA_SHADER_STAGES] = {0};
+ struct radv_pipeline *pipeline;
+ VkResult result;
+
+ pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (pipeline == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
+
+ pipeline->device = device;
+ pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
+ assert(pipeline->layout);
+
+ const VkPipelineCreationFeedbackCreateInfoEXT *creation_feedback =
+ vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
+ radv_init_feedback(creation_feedback);
+
+ VkPipelineCreationFeedbackEXT *pipeline_feedback =
+ creation_feedback ? creation_feedback->pPipelineCreationFeedback : NULL;
+ if (creation_feedback)
+ stage_feedbacks[MESA_SHADER_COMPUTE] = &creation_feedback->pPipelineStageCreationFeedbacks[0];
+
+ pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
+
+ struct radv_pipeline_key key = radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
+
+ result = radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags,
+ pipeline_feedback, stage_feedbacks);
+ if (result != VK_SUCCESS) {
+ radv_pipeline_destroy(device, pipeline, pAllocator);
+ return result;
+ }
- pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
- pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
- radv_pipeline_init_scratch(device, pipeline);
+ pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(
+ pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
+ pipeline->need_indirect_descriptor_sets |=
+ pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
+ radv_pipeline_init_scratch(device, pipeline);
- radv_compute_generate_pm4(pipeline);
+ radv_compute_generate_pm4(pipeline);
- *pPipeline = radv_pipeline_to_handle(pipeline);
+ *pPipeline = radv_pipeline_to_handle(pipeline);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-VkResult radv_CreateComputePipelines(
- VkDevice _device,
- VkPipelineCache pipelineCache,
- uint32_t count,
- const VkComputePipelineCreateInfo* pCreateInfos,
- const VkAllocationCallbacks* pAllocator,
- VkPipeline* pPipelines)
+VkResult
+radv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count,
+ const VkComputePipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
{
- VkResult result = VK_SUCCESS;
-
- unsigned i = 0;
- for (; i < count; i++) {
- VkResult r;
- r = radv_compute_pipeline_create(_device, pipelineCache,
- &pCreateInfos[i],
- pAllocator, &pPipelines[i]);
- if (r != VK_SUCCESS) {
- result = r;
- pPipelines[i] = VK_NULL_HANDLE;
-
- if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
- break;
- }
- }
-
- for (; i < count; ++i)
- pPipelines[i] = VK_NULL_HANDLE;
-
- return result;
-}
+ VkResult result = VK_SUCCESS;
+
+ unsigned i = 0;
+ for (; i < count; i++) {
+ VkResult r;
+ r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator,
+ &pPipelines[i]);
+ if (r != VK_SUCCESS) {
+ result = r;
+ pPipelines[i] = VK_NULL_HANDLE;
+
+ if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT_EXT)
+ break;
+ }
+ }
+ for (; i < count; ++i)
+ pPipelines[i] = VK_NULL_HANDLE;
-static uint32_t radv_get_executable_count(const struct radv_pipeline *pipeline)
+ return result;
+}
+
+static uint32_t
+radv_get_executable_count(const struct radv_pipeline *pipeline)
{
- uint32_t ret = 0;
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (!pipeline->shaders[i])
- continue;
-
- if (i == MESA_SHADER_GEOMETRY &&
- !radv_pipeline_has_ngg(pipeline)) {
- ret += 2u;
- } else {
- ret += 1u;
- }
-
- }
- return ret;
+ uint32_t ret = 0;
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!pipeline->shaders[i])
+ continue;
+
+ if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) {
+ ret += 2u;
+ } else {
+ ret += 1u;
+ }
+ }
+ return ret;
}
static struct radv_shader_variant *
-radv_get_shader_from_executable_index(const struct radv_pipeline *pipeline, int index, gl_shader_stage *stage)
+radv_get_shader_from_executable_index(const struct radv_pipeline *pipeline, int index,
+ gl_shader_stage *stage)
{
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (!pipeline->shaders[i])
- continue;
- if (!index) {
- *stage = i;
- return pipeline->shaders[i];
- }
-
- --index;
-
- if (i == MESA_SHADER_GEOMETRY &&
- !radv_pipeline_has_ngg(pipeline)) {
- if (!index) {
- *stage = i;
- return pipeline->gs_copy_shader;
- }
- --index;
- }
- }
-
- *stage = -1;
- return NULL;
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!pipeline->shaders[i])
+ continue;
+ if (!index) {
+ *stage = i;
+ return pipeline->shaders[i];
+ }
+
+ --index;
+
+ if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) {
+ if (!index) {
+ *stage = i;
+ return pipeline->gs_copy_shader;
+ }
+ --index;
+ }
+ }
+
+ *stage = -1;
+ return NULL;
}
/* Basically strlcpy (which does not exist on linux) specialized for
* descriptions. */
-static void desc_copy(char *desc, const char *src) {
- int len = strlen(src);
- assert(len < VK_MAX_DESCRIPTION_SIZE);
- memcpy(desc, src, len);
- memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len);
+static void
+desc_copy(char *desc, const char *src)
+{
+ int len = strlen(src);
+ assert(len < VK_MAX_DESCRIPTION_SIZE);
+ memcpy(desc, src, len);
+ memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len);
}
-VkResult radv_GetPipelineExecutablePropertiesKHR(
- VkDevice _device,
- const VkPipelineInfoKHR* pPipelineInfo,
- uint32_t* pExecutableCount,
- VkPipelineExecutablePropertiesKHR* pProperties)
+VkResult
+radv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo,
+ uint32_t *pExecutableCount,
+ VkPipelineExecutablePropertiesKHR *pProperties)
{
- RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
- const uint32_t total_count = radv_get_executable_count(pipeline);
-
- if (!pProperties) {
- *pExecutableCount = total_count;
- return VK_SUCCESS;
- }
-
- const uint32_t count = MIN2(total_count, *pExecutableCount);
- for (unsigned i = 0, executable_idx = 0;
- i < MESA_SHADER_STAGES && executable_idx < count; ++i) {
- if (!pipeline->shaders[i])
- continue;
- pProperties[executable_idx].stages = mesa_to_vk_shader_stage(i);
- const char *name = NULL;
- const char *description = NULL;
- switch(i) {
- case MESA_SHADER_VERTEX:
- name = "Vertex Shader";
- description = "Vulkan Vertex Shader";
- break;
- case MESA_SHADER_TESS_CTRL:
- if (!pipeline->shaders[MESA_SHADER_VERTEX]) {
- pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
- name = "Vertex + Tessellation Control Shaders";
- description = "Combined Vulkan Vertex and Tessellation Control Shaders";
- } else {
- name = "Tessellation Control Shader";
- description = "Vulkan Tessellation Control Shader";
- }
- break;
- case MESA_SHADER_TESS_EVAL:
- name = "Tessellation Evaluation Shader";
- description = "Vulkan Tessellation Evaluation Shader";
- break;
- case MESA_SHADER_GEOMETRY:
- if (radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
- pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
- name = "Tessellation Evaluation + Geometry Shaders";
- description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders";
- } else if (!radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_VERTEX]) {
- pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
- name = "Vertex + Geometry Shader";
- description = "Combined Vulkan Vertex and Geometry Shaders";
- } else {
- name = "Geometry Shader";
- description = "Vulkan Geometry Shader";
- }
- break;
- case MESA_SHADER_FRAGMENT:
- name = "Fragment Shader";
- description = "Vulkan Fragment Shader";
- break;
- case MESA_SHADER_COMPUTE:
- name = "Compute Shader";
- description = "Vulkan Compute Shader";
- break;
- }
-
- pProperties[executable_idx].subgroupSize = pipeline->shaders[i]->info.wave_size;
- desc_copy(pProperties[executable_idx].name, name);
- desc_copy(pProperties[executable_idx].description, description);
-
- ++executable_idx;
- if (i == MESA_SHADER_GEOMETRY &&
- !radv_pipeline_has_ngg(pipeline)) {
- assert(pipeline->gs_copy_shader);
- if (executable_idx >= count)
- break;
-
- pProperties[executable_idx].stages = VK_SHADER_STAGE_GEOMETRY_BIT;
- pProperties[executable_idx].subgroupSize = 64;
- desc_copy(pProperties[executable_idx].name, "GS Copy Shader");
- desc_copy(pProperties[executable_idx].description,
- "Extra shader stage that loads the GS output ringbuffer into the rasterizer");
-
- ++executable_idx;
- }
- }
-
- VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS;
- *pExecutableCount = count;
- return result;
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline);
+ const uint32_t total_count = radv_get_executable_count(pipeline);
+
+ if (!pProperties) {
+ *pExecutableCount = total_count;
+ return VK_SUCCESS;
+ }
+
+ const uint32_t count = MIN2(total_count, *pExecutableCount);
+ for (unsigned i = 0, executable_idx = 0; i < MESA_SHADER_STAGES && executable_idx < count; ++i) {
+ if (!pipeline->shaders[i])
+ continue;
+ pProperties[executable_idx].stages = mesa_to_vk_shader_stage(i);
+ const char *name = NULL;
+ const char *description = NULL;
+ switch (i) {
+ case MESA_SHADER_VERTEX:
+ name = "Vertex Shader";
+ description = "Vulkan Vertex Shader";
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (!pipeline->shaders[MESA_SHADER_VERTEX]) {
+ pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
+ name = "Vertex + Tessellation Control Shaders";
+ description = "Combined Vulkan Vertex and Tessellation Control Shaders";
+ } else {
+ name = "Tessellation Control Shader";
+ description = "Vulkan Tessellation Control Shader";
+ }
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ name = "Tessellation Evaluation Shader";
+ description = "Vulkan Tessellation Evaluation Shader";
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) {
+ pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
+ name = "Tessellation Evaluation + Geometry Shaders";
+ description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders";
+ } else if (!radv_pipeline_has_tess(pipeline) && !pipeline->shaders[MESA_SHADER_VERTEX]) {
+ pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT;
+ name = "Vertex + Geometry Shader";
+ description = "Combined Vulkan Vertex and Geometry Shaders";
+ } else {
+ name = "Geometry Shader";
+ description = "Vulkan Geometry Shader";
+ }
+ break;
+ case MESA_SHADER_FRAGMENT:
+ name = "Fragment Shader";
+ description = "Vulkan Fragment Shader";
+ break;
+ case MESA_SHADER_COMPUTE:
+ name = "Compute Shader";
+ description = "Vulkan Compute Shader";
+ break;
+ }
+
+ pProperties[executable_idx].subgroupSize = pipeline->shaders[i]->info.wave_size;
+ desc_copy(pProperties[executable_idx].name, name);
+ desc_copy(pProperties[executable_idx].description, description);
+
+ ++executable_idx;
+ if (i == MESA_SHADER_GEOMETRY && !radv_pipeline_has_ngg(pipeline)) {
+ assert(pipeline->gs_copy_shader);
+ if (executable_idx >= count)
+ break;
+
+ pProperties[executable_idx].stages = VK_SHADER_STAGE_GEOMETRY_BIT;
+ pProperties[executable_idx].subgroupSize = 64;
+ desc_copy(pProperties[executable_idx].name, "GS Copy Shader");
+ desc_copy(pProperties[executable_idx].description,
+ "Extra shader stage that loads the GS output ringbuffer into the rasterizer");
+
+ ++executable_idx;
+ }
+ }
+
+ VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS;
+ *pExecutableCount = count;
+ return result;
}
-VkResult radv_GetPipelineExecutableStatisticsKHR(
- VkDevice _device,
- const VkPipelineExecutableInfoKHR* pExecutableInfo,
- uint32_t* pStatisticCount,
- VkPipelineExecutableStatisticKHR* pStatistics)
+VkResult
+radv_GetPipelineExecutableStatisticsKHR(VkDevice _device,
+ const VkPipelineExecutableInfoKHR *pExecutableInfo,
+ uint32_t *pStatisticCount,
+ VkPipelineExecutableStatisticKHR *pStatistics)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
- gl_shader_stage stage;
- struct radv_shader_variant *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
-
- enum chip_class chip_class = device->physical_device->rad_info.chip_class;
- unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
- unsigned max_waves = radv_get_max_waves(device, shader, stage);
-
- VkPipelineExecutableStatisticKHR *s = pStatistics;
- VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
- VkResult result = VK_SUCCESS;
-
- if (s < end) {
- desc_copy(s->name, "SGPRs");
- desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->config.num_sgprs;
- }
- ++s;
-
- if (s < end) {
- desc_copy(s->name, "VGPRs");
- desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->config.num_vgprs;
- }
- ++s;
-
- if (s < end) {
- desc_copy(s->name, "Spilled SGPRs");
- desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->config.spilled_sgprs;
- }
- ++s;
-
- if (s < end) {
- desc_copy(s->name, "Spilled VGPRs");
- desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->config.spilled_vgprs;
- }
- ++s;
-
- if (s < end) {
- desc_copy(s->name, "PrivMem VGPRs");
- desc_copy(s->description, "Number of VGPRs stored in private memory per subgroup");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->info.private_mem_vgprs;
- }
- ++s;
-
- if (s < end) {
- desc_copy(s->name, "Code size");
- desc_copy(s->description, "Code size in bytes");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->exec_size;
- }
- ++s;
-
- if (s < end) {
- desc_copy(s->name, "LDS size");
- desc_copy(s->description, "LDS size in bytes per workgroup");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->config.lds_size * lds_increment;
- }
- ++s;
-
- if (s < end) {
- desc_copy(s->name, "Scratch size");
- desc_copy(s->description, "Private memory in bytes per subgroup");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->config.scratch_bytes_per_wave;
- }
- ++s;
-
- if (s < end) {
- desc_copy(s->name, "Subgroups per SIMD");
- desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = max_waves;
- }
- ++s;
-
- if (shader->statistics) {
- for (unsigned i = 0; i < aco_num_statistics; i++) {
- const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
- if (s < end) {
- desc_copy(s->name, info->name);
- desc_copy(s->description, info->desc);
- s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
- s->value.u64 = shader->statistics[i];
- }
- ++s;
- }
- }
-
- if (!pStatistics)
- *pStatisticCount = s - pStatistics;
- else if (s > end) {
- *pStatisticCount = end - pStatistics;
- result = VK_INCOMPLETE;
- } else {
- *pStatisticCount = s - pStatistics;
- }
-
- return result;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
+ gl_shader_stage stage;
+ struct radv_shader_variant *shader =
+ radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
+
+ enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+ unsigned lds_increment = chip_class >= GFX7 ? 512 : 256;
+ unsigned max_waves = radv_get_max_waves(device, shader, stage);
+
+ VkPipelineExecutableStatisticKHR *s = pStatistics;
+ VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0);
+ VkResult result = VK_SUCCESS;
+
+ if (s < end) {
+ desc_copy(s->name, "SGPRs");
+ desc_copy(s->description, "Number of SGPR registers allocated per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.num_sgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "VGPRs");
+ desc_copy(s->description, "Number of VGPR registers allocated per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.num_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Spilled SGPRs");
+ desc_copy(s->description, "Number of SGPR registers spilled per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.spilled_sgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Spilled VGPRs");
+ desc_copy(s->description, "Number of VGPR registers spilled per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.spilled_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "PrivMem VGPRs");
+ desc_copy(s->description, "Number of VGPRs stored in private memory per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->info.private_mem_vgprs;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Code size");
+ desc_copy(s->description, "Code size in bytes");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->exec_size;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "LDS size");
+ desc_copy(s->description, "LDS size in bytes per workgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.lds_size * lds_increment;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Scratch size");
+ desc_copy(s->description, "Private memory in bytes per subgroup");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->config.scratch_bytes_per_wave;
+ }
+ ++s;
+
+ if (s < end) {
+ desc_copy(s->name, "Subgroups per SIMD");
+ desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit");
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = max_waves;
+ }
+ ++s;
+
+ if (shader->statistics) {
+ for (unsigned i = 0; i < aco_num_statistics; i++) {
+ const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i];
+ if (s < end) {
+ desc_copy(s->name, info->name);
+ desc_copy(s->description, info->desc);
+ s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
+ s->value.u64 = shader->statistics[i];
+ }
+ ++s;
+ }
+ }
+
+ if (!pStatistics)
+ *pStatisticCount = s - pStatistics;
+ else if (s > end) {
+ *pStatisticCount = end - pStatistics;
+ result = VK_INCOMPLETE;
+ } else {
+ *pStatisticCount = s - pStatistics;
+ }
+
+ return result;
}
-static VkResult radv_copy_representation(void *data, size_t *data_size, const char *src)
+static VkResult
+radv_copy_representation(void *data, size_t *data_size, const char *src)
{
- size_t total_size = strlen(src) + 1;
+ size_t total_size = strlen(src) + 1;
- if (!data) {
- *data_size = total_size;
- return VK_SUCCESS;
- }
+ if (!data) {
+ *data_size = total_size;
+ return VK_SUCCESS;
+ }
- size_t size = MIN2(total_size, *data_size);
+ size_t size = MIN2(total_size, *data_size);
- memcpy(data, src, size);
- if (size)
- *((char*)data + size - 1) = 0;
- return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
+ memcpy(data, src, size);
+ if (size)
+ *((char *)data + size - 1) = 0;
+ return size < total_size ? VK_INCOMPLETE : VK_SUCCESS;
}
-VkResult radv_GetPipelineExecutableInternalRepresentationsKHR(
- VkDevice device,
- const VkPipelineExecutableInfoKHR* pExecutableInfo,
- uint32_t* pInternalRepresentationCount,
- VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
+VkResult
+radv_GetPipelineExecutableInternalRepresentationsKHR(
+ VkDevice device, const VkPipelineExecutableInfoKHR *pExecutableInfo,
+ uint32_t *pInternalRepresentationCount,
+ VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations)
{
- RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
- gl_shader_stage stage;
- struct radv_shader_variant *shader = radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
-
- VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
- VkPipelineExecutableInternalRepresentationKHR *end = p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
- VkResult result = VK_SUCCESS;
- /* optimized NIR */
- if (p < end) {
- p->isText = true;
- desc_copy(p->name, "NIR Shader(s)");
- desc_copy(p->description, "The optimized NIR shader(s)");
- if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
- result = VK_INCOMPLETE;
- }
- ++p;
-
- /* backend IR */
- if (p < end) {
- p->isText = true;
- if (radv_use_llvm_for_stage(pipeline->device, stage)) {
- desc_copy(p->name, "LLVM IR");
- desc_copy(p->description, "The LLVM IR after some optimizations");
- } else {
- desc_copy(p->name, "ACO IR");
- desc_copy(p->description, "The ACO IR after some optimizations");
- }
- if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
- result = VK_INCOMPLETE;
- }
- ++p;
-
- /* Disassembler */
- if (p < end) {
- p->isText = true;
- desc_copy(p->name, "Assembly");
- desc_copy(p->description, "Final Assembly");
- if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
- result = VK_INCOMPLETE;
- }
- ++p;
-
- if (!pInternalRepresentations)
- *pInternalRepresentationCount = p - pInternalRepresentations;
- else if(p > end) {
- result = VK_INCOMPLETE;
- *pInternalRepresentationCount = end - pInternalRepresentations;
- } else {
- *pInternalRepresentationCount = p - pInternalRepresentations;
- }
-
- return result;
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline);
+ gl_shader_stage stage;
+ struct radv_shader_variant *shader =
+ radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage);
+
+ VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations;
+ VkPipelineExecutableInternalRepresentationKHR *end =
+ p + (pInternalRepresentations ? *pInternalRepresentationCount : 0);
+ VkResult result = VK_SUCCESS;
+ /* optimized NIR */
+ if (p < end) {
+ p->isText = true;
+ desc_copy(p->name, "NIR Shader(s)");
+ desc_copy(p->description, "The optimized NIR shader(s)");
+ if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS)
+ result = VK_INCOMPLETE;
+ }
+ ++p;
+
+ /* backend IR */
+ if (p < end) {
+ p->isText = true;
+ if (radv_use_llvm_for_stage(pipeline->device, stage)) {
+ desc_copy(p->name, "LLVM IR");
+ desc_copy(p->description, "The LLVM IR after some optimizations");
+ } else {
+ desc_copy(p->name, "ACO IR");
+ desc_copy(p->description, "The ACO IR after some optimizations");
+ }
+ if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS)
+ result = VK_INCOMPLETE;
+ }
+ ++p;
+
+ /* Disassembler */
+ if (p < end) {
+ p->isText = true;
+ desc_copy(p->name, "Assembly");
+ desc_copy(p->description, "Final Assembly");
+ if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS)
+ result = VK_INCOMPLETE;
+ }
+ ++p;
+
+ if (!pInternalRepresentations)
+ *pInternalRepresentationCount = p - pInternalRepresentations;
+ else if (p > end) {
+ result = VK_INCOMPLETE;
+ *pInternalRepresentationCount = end - pInternalRepresentations;
+ } else {
+ *pInternalRepresentationCount = p - pInternalRepresentations;
+ }
+
+ return result;
}
diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c
index d25a12f5151..dafe9b43b74 100644
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -21,629 +21,594 @@
* IN THE SOFTWARE.
*/
-#include "util/macros.h"
-#include "util/mesa-sha1.h"
#include "util/debug.h"
#include "util/disk_cache.h"
+#include "util/macros.h"
+#include "util/mesa-sha1.h"
#include "util/u_atomic.h"
+#include "vulkan/util/vk_util.h"
#include "radv_debug.h"
#include "radv_private.h"
#include "radv_shader.h"
-#include "vulkan/util/vk_util.h"
#include "ac_nir_to_llvm.h"
struct cache_entry {
- union {
- unsigned char sha1[20];
- uint32_t sha1_dw[5];
- };
- uint32_t binary_sizes[MESA_SHADER_STAGES];
- struct radv_shader_variant *variants[MESA_SHADER_STAGES];
- char code[0];
+ union {
+ unsigned char sha1[20];
+ uint32_t sha1_dw[5];
+ };
+ uint32_t binary_sizes[MESA_SHADER_STAGES];
+ struct radv_shader_variant *variants[MESA_SHADER_STAGES];
+ char code[0];
};
static void
radv_pipeline_cache_lock(struct radv_pipeline_cache *cache)
{
- if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)
- return;
+ if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)
+ return;
- mtx_lock(&cache->mutex);
+ mtx_lock(&cache->mutex);
}
static void
radv_pipeline_cache_unlock(struct radv_pipeline_cache *cache)
{
- if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)
- return;
+ if (cache->flags & VK_PIPELINE_CACHE_CREATE_EXTERNALLY_SYNCHRONIZED_BIT_EXT)
+ return;
- mtx_unlock(&cache->mutex);
+ mtx_unlock(&cache->mutex);
}
void
-radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
- struct radv_device *device)
+radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device)
{
- cache->device = device;
- mtx_init(&cache->mutex, mtx_plain);
- cache->flags = 0;
-
- cache->modified = false;
- cache->kernel_count = 0;
- cache->total_size = 0;
- cache->table_size = 1024;
- const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
- cache->hash_table = malloc(byte_size);
-
- /* We don't consider allocation failure fatal, we just start with a 0-sized
- * cache. Disable caching when we want to keep shader debug info, since
- * we don't get the debug info on cached shaders. */
- if (cache->hash_table == NULL ||
- (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
- cache->table_size = 0;
- else
- memset(cache->hash_table, 0, byte_size);
+ cache->device = device;
+ mtx_init(&cache->mutex, mtx_plain);
+ cache->flags = 0;
+
+ cache->modified = false;
+ cache->kernel_count = 0;
+ cache->total_size = 0;
+ cache->table_size = 1024;
+ const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
+ cache->hash_table = malloc(byte_size);
+
+ /* We don't consider allocation failure fatal, we just start with a 0-sized
+ * cache. Disable caching when we want to keep shader debug info, since
+ * we don't get the debug info on cached shaders. */
+ if (cache->hash_table == NULL || (device->instance->debug_flags & RADV_DEBUG_NO_CACHE))
+ cache->table_size = 0;
+ else
+ memset(cache->hash_table, 0, byte_size);
}
void
radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
{
- for (unsigned i = 0; i < cache->table_size; ++i)
- if (cache->hash_table[i]) {
- for(int j = 0; j < MESA_SHADER_STAGES; ++j) {
- if (cache->hash_table[i]->variants[j])
- radv_shader_variant_destroy(cache->device,
- cache->hash_table[i]->variants[j]);
- }
- vk_free(&cache->alloc, cache->hash_table[i]);
- }
- mtx_destroy(&cache->mutex);
- free(cache->hash_table);
+ for (unsigned i = 0; i < cache->table_size; ++i)
+ if (cache->hash_table[i]) {
+ for (int j = 0; j < MESA_SHADER_STAGES; ++j) {
+ if (cache->hash_table[i]->variants[j])
+ radv_shader_variant_destroy(cache->device, cache->hash_table[i]->variants[j]);
+ }
+ vk_free(&cache->alloc, cache->hash_table[i]);
+ }
+ mtx_destroy(&cache->mutex);
+ free(cache->hash_table);
}
static uint32_t
entry_size(struct cache_entry *entry)
{
- size_t ret = sizeof(*entry);
- for (int i = 0; i < MESA_SHADER_STAGES; ++i)
- if (entry->binary_sizes[i])
- ret += entry->binary_sizes[i];
- ret = align(ret, alignof(struct cache_entry));
- return ret;
+ size_t ret = sizeof(*entry);
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (entry->binary_sizes[i])
+ ret += entry->binary_sizes[i];
+ ret = align(ret, alignof(struct cache_entry));
+ return ret;
}
void
-radv_hash_shaders(unsigned char *hash,
- const VkPipelineShaderStageCreateInfo **stages,
- const struct radv_pipeline_layout *layout,
- const struct radv_pipeline_key *key,
- uint32_t flags)
+radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **stages,
+ const struct radv_pipeline_layout *layout, const struct radv_pipeline_key *key,
+ uint32_t flags)
{
- struct mesa_sha1 ctx;
-
- _mesa_sha1_init(&ctx);
- if (key)
- _mesa_sha1_update(&ctx, key, sizeof(*key));
- if (layout)
- _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (stages[i]) {
- RADV_FROM_HANDLE(vk_shader_module, module, stages[i]->module);
- const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
-
- _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
- _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
- if (spec_info && spec_info->mapEntryCount) {
- _mesa_sha1_update(&ctx, spec_info->pMapEntries,
- spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
- _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
- }
- }
- }
- _mesa_sha1_update(&ctx, &flags, 4);
- _mesa_sha1_final(&ctx, hash);
+ struct mesa_sha1 ctx;
+
+ _mesa_sha1_init(&ctx);
+ if (key)
+ _mesa_sha1_update(&ctx, key, sizeof(*key));
+ if (layout)
+ _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (stages[i]) {
+ RADV_FROM_HANDLE(vk_shader_module, module, stages[i]->module);
+ const VkSpecializationInfo *spec_info = stages[i]->pSpecializationInfo;
+
+ _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
+ _mesa_sha1_update(&ctx, stages[i]->pName, strlen(stages[i]->pName));
+ if (spec_info && spec_info->mapEntryCount) {
+ _mesa_sha1_update(&ctx, spec_info->pMapEntries,
+ spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
+ _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize);
+ }
+ }
+ }
+ _mesa_sha1_update(&ctx, &flags, 4);
+ _mesa_sha1_final(&ctx, hash);
}
-
static struct cache_entry *
-radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
- const unsigned char *sha1)
+radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, const unsigned char *sha1)
{
- const uint32_t mask = cache->table_size - 1;
- const uint32_t start = (*(uint32_t *) sha1);
+ const uint32_t mask = cache->table_size - 1;
+ const uint32_t start = (*(uint32_t *)sha1);
- if (cache->table_size == 0)
- return NULL;
+ if (cache->table_size == 0)
+ return NULL;
- for (uint32_t i = 0; i < cache->table_size; i++) {
- const uint32_t index = (start + i) & mask;
- struct cache_entry *entry = cache->hash_table[index];
+ for (uint32_t i = 0; i < cache->table_size; i++) {
+ const uint32_t index = (start + i) & mask;
+ struct cache_entry *entry = cache->hash_table[index];
- if (!entry)
- return NULL;
+ if (!entry)
+ return NULL;
- if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
- return entry;
- }
- }
+ if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
+ return entry;
+ }
+ }
- unreachable("hash table should never be full");
+ unreachable("hash table should never be full");
}
static struct cache_entry *
-radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
- const unsigned char *sha1)
+radv_pipeline_cache_search(struct radv_pipeline_cache *cache, const unsigned char *sha1)
{
- struct cache_entry *entry;
+ struct cache_entry *entry;
- radv_pipeline_cache_lock(cache);
+ radv_pipeline_cache_lock(cache);
- entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+ entry = radv_pipeline_cache_search_unlocked(cache, sha1);
- radv_pipeline_cache_unlock(cache);
+ radv_pipeline_cache_unlock(cache);
- return entry;
+ return entry;
}
static void
-radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
- struct cache_entry *entry)
+radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry)
{
- const uint32_t mask = cache->table_size - 1;
- const uint32_t start = entry->sha1_dw[0];
-
- /* We'll always be able to insert when we get here. */
- assert(cache->kernel_count < cache->table_size / 2);
-
- for (uint32_t i = 0; i < cache->table_size; i++) {
- const uint32_t index = (start + i) & mask;
- if (!cache->hash_table[index]) {
- cache->hash_table[index] = entry;
- break;
- }
- }
-
- cache->total_size += entry_size(entry);
- cache->kernel_count++;
+ const uint32_t mask = cache->table_size - 1;
+ const uint32_t start = entry->sha1_dw[0];
+
+ /* We'll always be able to insert when we get here. */
+ assert(cache->kernel_count < cache->table_size / 2);
+
+ for (uint32_t i = 0; i < cache->table_size; i++) {
+ const uint32_t index = (start + i) & mask;
+ if (!cache->hash_table[index]) {
+ cache->hash_table[index] = entry;
+ break;
+ }
+ }
+
+ cache->total_size += entry_size(entry);
+ cache->kernel_count++;
}
-
static VkResult
radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
{
- const uint32_t table_size = cache->table_size * 2;
- const uint32_t old_table_size = cache->table_size;
- const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
- struct cache_entry **table;
- struct cache_entry **old_table = cache->hash_table;
+ const uint32_t table_size = cache->table_size * 2;
+ const uint32_t old_table_size = cache->table_size;
+ const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
+ struct cache_entry **table;
+ struct cache_entry **old_table = cache->hash_table;
- table = malloc(byte_size);
- if (table == NULL)
- return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ table = malloc(byte_size);
+ if (table == NULL)
+ return vk_error(cache->device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- cache->hash_table = table;
- cache->table_size = table_size;
- cache->kernel_count = 0;
- cache->total_size = 0;
+ cache->hash_table = table;
+ cache->table_size = table_size;
+ cache->kernel_count = 0;
+ cache->total_size = 0;
- memset(cache->hash_table, 0, byte_size);
- for (uint32_t i = 0; i < old_table_size; i++) {
- struct cache_entry *entry = old_table[i];
- if (!entry)
- continue;
+ memset(cache->hash_table, 0, byte_size);
+ for (uint32_t i = 0; i < old_table_size; i++) {
+ struct cache_entry *entry = old_table[i];
+ if (!entry)
+ continue;
- radv_pipeline_cache_set_entry(cache, entry);
- }
+ radv_pipeline_cache_set_entry(cache, entry);
+ }
- free(old_table);
+ free(old_table);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
static void
-radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
- struct cache_entry *entry)
+radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, struct cache_entry *entry)
{
- if (cache->kernel_count == cache->table_size / 2)
- radv_pipeline_cache_grow(cache);
-
- /* Failing to grow that hash table isn't fatal, but may mean we don't
- * have enough space to add this new kernel. Only add it if there's room.
- */
- if (cache->kernel_count < cache->table_size / 2)
- radv_pipeline_cache_set_entry(cache, entry);
+ if (cache->kernel_count == cache->table_size / 2)
+ radv_pipeline_cache_grow(cache);
+
+ /* Failing to grow that hash table isn't fatal, but may mean we don't
+ * have enough space to add this new kernel. Only add it if there's room.
+ */
+ if (cache->kernel_count < cache->table_size / 2)
+ radv_pipeline_cache_set_entry(cache, entry);
}
static bool
radv_is_cache_disabled(struct radv_device *device)
{
- /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with
- * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
- */
- return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE);
+ /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with
+ * MESA_GLSL_CACHE_DISABLE=1, and when VK_AMD_shader_info is requested.
+ */
+ return (device->instance->debug_flags & RADV_DEBUG_NO_CACHE);
}
bool
radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const unsigned char *sha1,
- struct radv_shader_variant **variants,
- bool *found_in_application_cache)
+ struct radv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct radv_shader_variant **variants,
+ bool *found_in_application_cache)
{
- struct cache_entry *entry;
-
- if (!cache) {
- cache = device->mem_cache;
- *found_in_application_cache = false;
- }
-
- radv_pipeline_cache_lock(cache);
-
- entry = radv_pipeline_cache_search_unlocked(cache, sha1);
-
- if (!entry) {
- *found_in_application_cache = false;
-
- /* Don't cache when we want debug info, since this isn't
- * present in the cache.
- */
- if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) {
- radv_pipeline_cache_unlock(cache);
- return false;
- }
-
- uint8_t disk_sha1[20];
- disk_cache_compute_key(device->physical_device->disk_cache,
- sha1, 20, disk_sha1);
-
- entry = (struct cache_entry *)
- disk_cache_get(device->physical_device->disk_cache,
- disk_sha1, NULL);
- if (!entry) {
- radv_pipeline_cache_unlock(cache);
- return false;
- } else {
- size_t size = entry_size(entry);
- struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
- if (!new_entry) {
- free(entry);
- radv_pipeline_cache_unlock(cache);
- return false;
- }
-
- memcpy(new_entry, entry, entry_size(entry));
- free(entry);
- entry = new_entry;
-
- if (!(device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE) ||
- cache != device->mem_cache)
- radv_pipeline_cache_add_entry(cache, new_entry);
- }
- }
-
- char *p = entry->code;
- for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (!entry->variants[i] && entry->binary_sizes[i]) {
- struct radv_shader_binary *binary = calloc(1, entry->binary_sizes[i]);
- memcpy(binary, p, entry->binary_sizes[i]);
- p += entry->binary_sizes[i];
-
- entry->variants[i] = radv_shader_variant_create(device, binary, false);
- free(binary);
- } else if (entry->binary_sizes[i]) {
- p += entry->binary_sizes[i];
- }
-
- }
-
- memcpy(variants, entry->variants, sizeof(entry->variants));
-
- if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE &&
- cache == device->mem_cache)
- vk_free(&cache->alloc, entry);
- else {
- for (int i = 0; i < MESA_SHADER_STAGES; ++i)
- if (entry->variants[i])
- p_atomic_inc(&entry->variants[i]->ref_count);
- }
-
- radv_pipeline_cache_unlock(cache);
- return true;
+ struct cache_entry *entry;
+
+ if (!cache) {
+ cache = device->mem_cache;
+ *found_in_application_cache = false;
+ }
+
+ radv_pipeline_cache_lock(cache);
+
+ entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+
+ if (!entry) {
+ *found_in_application_cache = false;
+
+ /* Don't cache when we want debug info, since this isn't
+ * present in the cache.
+ */
+ if (radv_is_cache_disabled(device) || !device->physical_device->disk_cache) {
+ radv_pipeline_cache_unlock(cache);
+ return false;
+ }
+
+ uint8_t disk_sha1[20];
+ disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, disk_sha1);
+
+ entry =
+ (struct cache_entry *)disk_cache_get(device->physical_device->disk_cache, disk_sha1, NULL);
+ if (!entry) {
+ radv_pipeline_cache_unlock(cache);
+ return false;
+ } else {
+ size_t size = entry_size(entry);
+ struct cache_entry *new_entry =
+ vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+ if (!new_entry) {
+ free(entry);
+ radv_pipeline_cache_unlock(cache);
+ return false;
+ }
+
+ memcpy(new_entry, entry, entry_size(entry));
+ free(entry);
+ entry = new_entry;
+
+ if (!(device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE) ||
+ cache != device->mem_cache)
+ radv_pipeline_cache_add_entry(cache, new_entry);
+ }
+ }
+
+ char *p = entry->code;
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!entry->variants[i] && entry->binary_sizes[i]) {
+ struct radv_shader_binary *binary = calloc(1, entry->binary_sizes[i]);
+ memcpy(binary, p, entry->binary_sizes[i]);
+ p += entry->binary_sizes[i];
+
+ entry->variants[i] = radv_shader_variant_create(device, binary, false);
+ free(binary);
+ } else if (entry->binary_sizes[i]) {
+ p += entry->binary_sizes[i];
+ }
+ }
+
+ memcpy(variants, entry->variants, sizeof(entry->variants));
+
+ if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache)
+ vk_free(&cache->alloc, entry);
+ else {
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (entry->variants[i])
+ p_atomic_inc(&entry->variants[i]->ref_count);
+ }
+
+ radv_pipeline_cache_unlock(cache);
+ return true;
}
void
-radv_pipeline_cache_insert_shaders(struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const unsigned char *sha1,
- struct radv_shader_variant **variants,
- struct radv_shader_binary *const *binaries)
+radv_pipeline_cache_insert_shaders(struct radv_device *device, struct radv_pipeline_cache *cache,
+ const unsigned char *sha1, struct radv_shader_variant **variants,
+ struct radv_shader_binary *const *binaries)
{
- if (!cache)
- cache = device->mem_cache;
-
- radv_pipeline_cache_lock(cache);
- struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
- if (entry) {
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (entry->variants[i]) {
- radv_shader_variant_destroy(cache->device, variants[i]);
- variants[i] = entry->variants[i];
- } else {
- entry->variants[i] = variants[i];
- }
- if (variants[i])
- p_atomic_inc(&variants[i]->ref_count);
- }
- radv_pipeline_cache_unlock(cache);
- return;
- }
-
- /* Don't cache when we want debug info, since this isn't
- * present in the cache.
- */
- if (radv_is_cache_disabled(device)) {
- radv_pipeline_cache_unlock(cache);
- return;
- }
-
- size_t size = sizeof(*entry);
- for (int i = 0; i < MESA_SHADER_STAGES; ++i)
- if (variants[i])
- size += binaries[i]->total_size;
- const size_t size_without_align = size;
- size = align(size_without_align, alignof(struct cache_entry));
-
- entry = vk_alloc(&cache->alloc, size, 8,
- VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
- if (!entry) {
- radv_pipeline_cache_unlock(cache);
- return;
- }
-
- memset(entry, 0, sizeof(*entry));
- memcpy(entry->sha1, sha1, 20);
-
- char* p = entry->code;
-
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (!variants[i])
- continue;
-
- entry->binary_sizes[i] = binaries[i]->total_size;
-
- memcpy(p, binaries[i], binaries[i]->total_size);
- p += binaries[i]->total_size;
- }
-
- // Make valgrind happy by filling the alignment hole at the end.
- assert(p == (char*)entry + size_without_align);
- assert(sizeof(*entry) + (p - entry->code) == size_without_align);
- memset((char*)entry + size_without_align, 0, size - size_without_align);
-
- /* Always add cache items to disk. This will allow collection of
- * compiled shaders by third parties such as steam, even if the app
- * implements its own pipeline cache.
- */
- if (device->physical_device->disk_cache) {
- uint8_t disk_sha1[20];
- disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20,
- disk_sha1);
-
- disk_cache_put(device->physical_device->disk_cache, disk_sha1,
- entry, entry_size(entry), NULL);
- }
-
- if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE &&
- cache == device->mem_cache) {
- vk_free2(&cache->alloc, NULL, entry);
- radv_pipeline_cache_unlock(cache);
- return;
- }
-
- /* We delay setting the variant so we have reproducible disk cache
- * items.
- */
- for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
- if (!variants[i])
- continue;
-
- entry->variants[i] = variants[i];
- p_atomic_inc(&variants[i]->ref_count);
- }
-
- radv_pipeline_cache_add_entry(cache, entry);
-
- cache->modified = true;
- radv_pipeline_cache_unlock(cache);
- return;
+ if (!cache)
+ cache = device->mem_cache;
+
+ radv_pipeline_cache_lock(cache);
+ struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
+ if (entry) {
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (entry->variants[i]) {
+ radv_shader_variant_destroy(cache->device, variants[i]);
+ variants[i] = entry->variants[i];
+ } else {
+ entry->variants[i] = variants[i];
+ }
+ if (variants[i])
+ p_atomic_inc(&variants[i]->ref_count);
+ }
+ radv_pipeline_cache_unlock(cache);
+ return;
+ }
+
+ /* Don't cache when we want debug info, since this isn't
+ * present in the cache.
+ */
+ if (radv_is_cache_disabled(device)) {
+ radv_pipeline_cache_unlock(cache);
+ return;
+ }
+
+ size_t size = sizeof(*entry);
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ if (variants[i])
+ size += binaries[i]->total_size;
+ const size_t size_without_align = size;
+ size = align(size_without_align, alignof(struct cache_entry));
+
+ entry = vk_alloc(&cache->alloc, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+ if (!entry) {
+ radv_pipeline_cache_unlock(cache);
+ return;
+ }
+
+ memset(entry, 0, sizeof(*entry));
+ memcpy(entry->sha1, sha1, 20);
+
+ char *p = entry->code;
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!variants[i])
+ continue;
+
+ entry->binary_sizes[i] = binaries[i]->total_size;
+
+ memcpy(p, binaries[i], binaries[i]->total_size);
+ p += binaries[i]->total_size;
+ }
+
+ // Make valgrind happy by filling the alignment hole at the end.
+ assert(p == (char *)entry + size_without_align);
+ assert(sizeof(*entry) + (p - entry->code) == size_without_align);
+ memset((char *)entry + size_without_align, 0, size - size_without_align);
+
+ /* Always add cache items to disk. This will allow collection of
+ * compiled shaders by third parties such as steam, even if the app
+ * implements its own pipeline cache.
+ */
+ if (device->physical_device->disk_cache) {
+ uint8_t disk_sha1[20];
+ disk_cache_compute_key(device->physical_device->disk_cache, sha1, 20, disk_sha1);
+
+ disk_cache_put(device->physical_device->disk_cache, disk_sha1, entry, entry_size(entry),
+ NULL);
+ }
+
+ if (device->instance->debug_flags & RADV_DEBUG_NO_MEMORY_CACHE && cache == device->mem_cache) {
+ vk_free2(&cache->alloc, NULL, entry);
+ radv_pipeline_cache_unlock(cache);
+ return;
+ }
+
+ /* We delay setting the variant so we have reproducible disk cache
+ * items.
+ */
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (!variants[i])
+ continue;
+
+ entry->variants[i] = variants[i];
+ p_atomic_inc(&variants[i]->ref_count);
+ }
+
+ radv_pipeline_cache_add_entry(cache, entry);
+
+ cache->modified = true;
+ radv_pipeline_cache_unlock(cache);
+ return;
}
bool
-radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
- const void *data, size_t size)
+radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size)
{
- struct radv_device *device = cache->device;
- struct vk_pipeline_cache_header header;
-
- if (size < sizeof(header))
- return false;
- memcpy(&header, data, sizeof(header));
- if (header.header_size < sizeof(header))
- return false;
- if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
- return false;
- if (header.vendor_id != ATI_VENDOR_ID)
- return false;
- if (header.device_id != device->physical_device->rad_info.pci_id)
- return false;
- if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
- return false;
-
- char *end = (char *) data + size;
- char *p = (char *) data + header.header_size;
-
- while (end - p >= sizeof(struct cache_entry)) {
- struct cache_entry *entry = (struct cache_entry*)p;
- struct cache_entry *dest_entry;
- size_t size_of_entry = entry_size(entry);
- if(end - p < size_of_entry)
- break;
-
- dest_entry = vk_alloc(&cache->alloc, size_of_entry,
- 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
- if (dest_entry) {
- memcpy(dest_entry, entry, size_of_entry);
- for (int i = 0; i < MESA_SHADER_STAGES; ++i)
- dest_entry->variants[i] = NULL;
- radv_pipeline_cache_add_entry(cache, dest_entry);
- }
- p += size_of_entry;
- }
-
- return true;
+ struct radv_device *device = cache->device;
+ struct vk_pipeline_cache_header header;
+
+ if (size < sizeof(header))
+ return false;
+ memcpy(&header, data, sizeof(header));
+ if (header.header_size < sizeof(header))
+ return false;
+ if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
+ return false;
+ if (header.vendor_id != ATI_VENDOR_ID)
+ return false;
+ if (header.device_id != device->physical_device->rad_info.pci_id)
+ return false;
+ if (memcmp(header.uuid, device->physical_device->cache_uuid, VK_UUID_SIZE) != 0)
+ return false;
+
+ char *end = (char *)data + size;
+ char *p = (char *)data + header.header_size;
+
+ while (end - p >= sizeof(struct cache_entry)) {
+ struct cache_entry *entry = (struct cache_entry *)p;
+ struct cache_entry *dest_entry;
+ size_t size_of_entry = entry_size(entry);
+ if (end - p < size_of_entry)
+ break;
+
+ dest_entry = vk_alloc(&cache->alloc, size_of_entry, 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
+ if (dest_entry) {
+ memcpy(dest_entry, entry, size_of_entry);
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i)
+ dest_entry->variants[i] = NULL;
+ radv_pipeline_cache_add_entry(cache, dest_entry);
+ }
+ p += size_of_entry;
+ }
+
+ return true;
}
-VkResult radv_CreatePipelineCache(
- VkDevice _device,
- const VkPipelineCacheCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkPipelineCache* pPipelineCache)
+VkResult
+radv_CreatePipelineCache(VkDevice _device, const VkPipelineCacheCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkPipelineCache *pPipelineCache)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_pipeline_cache *cache;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_pipeline_cache *cache;
- assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
- cache = vk_alloc2(&device->vk.alloc, pAllocator,
- sizeof(*cache), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
- if (cache == NULL)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ cache = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*cache), 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (cache == NULL)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
- vk_object_base_init(&device->vk, &cache->base,
- VK_OBJECT_TYPE_PIPELINE_CACHE);
+ vk_object_base_init(&device->vk, &cache->base, VK_OBJECT_TYPE_PIPELINE_CACHE);
- if (pAllocator)
- cache->alloc = *pAllocator;
- else
- cache->alloc = device->vk.alloc;
+ if (pAllocator)
+ cache->alloc = *pAllocator;
+ else
+ cache->alloc = device->vk.alloc;
- radv_pipeline_cache_init(cache, device);
- cache->flags = pCreateInfo->flags;
+ radv_pipeline_cache_init(cache, device);
+ cache->flags = pCreateInfo->flags;
- if (pCreateInfo->initialDataSize > 0) {
- radv_pipeline_cache_load(cache,
- pCreateInfo->pInitialData,
- pCreateInfo->initialDataSize);
- }
+ if (pCreateInfo->initialDataSize > 0) {
+ radv_pipeline_cache_load(cache, pCreateInfo->pInitialData, pCreateInfo->initialDataSize);
+ }
- *pPipelineCache = radv_pipeline_cache_to_handle(cache);
+ *pPipelineCache = radv_pipeline_cache_to_handle(cache);
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-void radv_DestroyPipelineCache(
- VkDevice _device,
- VkPipelineCache _cache,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyPipelineCache(VkDevice _device, VkPipelineCache _cache,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
- if (!cache)
- return;
- radv_pipeline_cache_finish(cache);
+ if (!cache)
+ return;
+ radv_pipeline_cache_finish(cache);
- vk_object_base_finish(&cache->base);
- vk_free2(&device->vk.alloc, pAllocator, cache);
+ vk_object_base_finish(&cache->base);
+ vk_free2(&device->vk.alloc, pAllocator, cache);
}
-VkResult radv_GetPipelineCacheData(
- VkDevice _device,
- VkPipelineCache _cache,
- size_t* pDataSize,
- void* pData)
+VkResult
+radv_GetPipelineCacheData(VkDevice _device, VkPipelineCache _cache, size_t *pDataSize, void *pData)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
- struct vk_pipeline_cache_header *header;
- VkResult result = VK_SUCCESS;
-
- radv_pipeline_cache_lock(cache);
-
- const size_t size = sizeof(*header) + cache->total_size;
- if (pData == NULL) {
- radv_pipeline_cache_unlock(cache);
- *pDataSize = size;
- return VK_SUCCESS;
- }
- if (*pDataSize < sizeof(*header)) {
- radv_pipeline_cache_unlock(cache);
- *pDataSize = 0;
- return VK_INCOMPLETE;
- }
- void *p = pData, *end = (char *) pData + *pDataSize;
- header = p;
- header->header_size = align(sizeof(*header), alignof(struct cache_entry));
- header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
- header->vendor_id = ATI_VENDOR_ID;
- header->device_id = device->physical_device->rad_info.pci_id;
- memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
- p = (char *)p + header->header_size;
-
- struct cache_entry *entry;
- for (uint32_t i = 0; i < cache->table_size; i++) {
- if (!cache->hash_table[i])
- continue;
- entry = cache->hash_table[i];
- const uint32_t size_of_entry = entry_size(entry);
- if ((char *)end < (char *)p + size_of_entry) {
- result = VK_INCOMPLETE;
- break;
- }
-
- memcpy(p, entry, size_of_entry);
- for(int j = 0; j < MESA_SHADER_STAGES; ++j)
- ((struct cache_entry*)p)->variants[j] = NULL;
- p = (char *)p + size_of_entry;
- }
- *pDataSize = (char *)p - (char *)pData;
-
- radv_pipeline_cache_unlock(cache);
- return result;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
+ struct vk_pipeline_cache_header *header;
+ VkResult result = VK_SUCCESS;
+
+ radv_pipeline_cache_lock(cache);
+
+ const size_t size = sizeof(*header) + cache->total_size;
+ if (pData == NULL) {
+ radv_pipeline_cache_unlock(cache);
+ *pDataSize = size;
+ return VK_SUCCESS;
+ }
+ if (*pDataSize < sizeof(*header)) {
+ radv_pipeline_cache_unlock(cache);
+ *pDataSize = 0;
+ return VK_INCOMPLETE;
+ }
+ void *p = pData, *end = (char *)pData + *pDataSize;
+ header = p;
+ header->header_size = align(sizeof(*header), alignof(struct cache_entry));
+ header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
+ header->vendor_id = ATI_VENDOR_ID;
+ header->device_id = device->physical_device->rad_info.pci_id;
+ memcpy(header->uuid, device->physical_device->cache_uuid, VK_UUID_SIZE);
+ p = (char *)p + header->header_size;
+
+ struct cache_entry *entry;
+ for (uint32_t i = 0; i < cache->table_size; i++) {
+ if (!cache->hash_table[i])
+ continue;
+ entry = cache->hash_table[i];
+ const uint32_t size_of_entry = entry_size(entry);
+ if ((char *)end < (char *)p + size_of_entry) {
+ result = VK_INCOMPLETE;
+ break;
+ }
+
+ memcpy(p, entry, size_of_entry);
+ for (int j = 0; j < MESA_SHADER_STAGES; ++j)
+ ((struct cache_entry *)p)->variants[j] = NULL;
+ p = (char *)p + size_of_entry;
+ }
+ *pDataSize = (char *)p - (char *)pData;
+
+ radv_pipeline_cache_unlock(cache);
+ return result;
}
static void
-radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
- struct radv_pipeline_cache *src)
+radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, struct radv_pipeline_cache *src)
{
- for (uint32_t i = 0; i < src->table_size; i++) {
- struct cache_entry *entry = src->hash_table[i];
- if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
- continue;
+ for (uint32_t i = 0; i < src->table_size; i++) {
+ struct cache_entry *entry = src->hash_table[i];
+ if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
+ continue;
- radv_pipeline_cache_add_entry(dst, entry);
+ radv_pipeline_cache_add_entry(dst, entry);
- src->hash_table[i] = NULL;
- }
+ src->hash_table[i] = NULL;
+ }
}
-VkResult radv_MergePipelineCaches(
- VkDevice _device,
- VkPipelineCache destCache,
- uint32_t srcCacheCount,
- const VkPipelineCache* pSrcCaches)
+VkResult
+radv_MergePipelineCaches(VkDevice _device, VkPipelineCache destCache, uint32_t srcCacheCount,
+ const VkPipelineCache *pSrcCaches)
{
- RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
+ RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
- for (uint32_t i = 0; i < srcCacheCount; i++) {
- RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
+ for (uint32_t i = 0; i < srcCacheCount; i++) {
+ RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
- radv_pipeline_cache_merge(dst, src);
- }
+ radv_pipeline_cache_merge(dst, src);
+ }
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 02368fa1e07..15ae7ebb36c 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -28,15 +28,15 @@
#ifndef RADV_PRIVATE_H
#define RADV_PRIVATE_H
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdbool.h>
#include <assert.h>
+#include <stdbool.h>
#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
#ifdef HAVE_VALGRIND
-#include <valgrind.h>
#include <memcheck.h>
+#include <valgrind.h>
#define VG(x) x
#else
#define VG(x) ((void)0)
@@ -50,30 +50,30 @@
#include "compiler/shader_enums.h"
#include "util/bitscan.h"
#include "util/cnd_monotonic.h"
-#include "util/macros.h"
#include "util/list.h"
+#include "util/macros.h"
#include "util/rwlock.h"
#include "util/xmlconfig.h"
#include "vk_alloc.h"
#include "vk_debug_report.h"
#include "vk_device.h"
-#include "vk_instance.h"
#include "vk_format.h"
+#include "vk_instance.h"
#include "vk_physical_device.h"
#include "vk_shader_module.h"
#include "vk_util.h"
-#include "radv_radeon_winsys.h"
#include "ac_binary.h"
-#include "ac_nir_to_llvm.h"
#include "ac_gpu_info.h"
-#include "ac_surface.h"
#include "ac_llvm_build.h"
#include "ac_llvm_util.h"
+#include "ac_nir_to_llvm.h"
+#include "ac_sqtt.h"
+#include "ac_surface.h"
#include "radv_constants.h"
#include "radv_descriptor_set.h"
+#include "radv_radeon_winsys.h"
#include "sid.h"
-#include "ac_sqtt.h"
/* Pre-declarations needed for WSI entrypoints */
struct wl_surface;
@@ -82,10 +82,10 @@ typedef struct xcb_connection_t xcb_connection_t;
typedef uint32_t xcb_visualid_t;
typedef uint32_t xcb_window_t;
+#include <vulkan/vk_android_native_buffer.h>
+#include <vulkan/vk_icd.h>
#include <vulkan/vulkan.h>
#include <vulkan/vulkan_android.h>
-#include <vulkan/vk_icd.h>
-#include <vulkan/vk_android_native_buffer.h>
#include "radv_entrypoints.h"
@@ -118,80 +118,80 @@ typedef uint32_t xcb_window_t;
static inline uint32_t
align_u32(uint32_t v, uint32_t a)
{
- assert(a != 0 && a == (a & -a));
- return (v + a - 1) & ~(a - 1);
+ assert(a != 0 && a == (a & -a));
+ return (v + a - 1) & ~(a - 1);
}
static inline uint32_t
align_u32_npot(uint32_t v, uint32_t a)
{
- return (v + a - 1) / a * a;
+ return (v + a - 1) / a * a;
}
static inline uint64_t
align_u64(uint64_t v, uint64_t a)
{
- assert(a != 0 && a == (a & -a));
- return (v + a - 1) & ~(a - 1);
+ assert(a != 0 && a == (a & -a));
+ return (v + a - 1) & ~(a - 1);
}
static inline int32_t
align_i32(int32_t v, int32_t a)
{
- assert(a != 0 && a == (a & -a));
- return (v + a - 1) & ~(a - 1);
+ assert(a != 0 && a == (a & -a));
+ return (v + a - 1) & ~(a - 1);
}
/** Alignment must be a power of 2. */
static inline bool
radv_is_aligned(uintmax_t n, uintmax_t a)
{
- assert(a == (a & -a));
- return (n & (a - 1)) == 0;
+ assert(a == (a & -a));
+ return (n & (a - 1)) == 0;
}
static inline uint32_t
round_up_u32(uint32_t v, uint32_t a)
{
- return (v + a - 1) / a;
+ return (v + a - 1) / a;
}
static inline uint64_t
round_up_u64(uint64_t v, uint64_t a)
{
- return (v + a - 1) / a;
+ return (v + a - 1) / a;
}
static inline uint32_t
radv_minify(uint32_t n, uint32_t levels)
{
- if (unlikely(n == 0))
- return 0;
- else
- return MAX2(n >> levels, 1);
+ if (unlikely(n == 0))
+ return 0;
+ else
+ return MAX2(n >> levels, 1);
}
static inline float
radv_clamp_f(float f, float min, float max)
{
- assert(min < max);
-
- if (f > max)
- return max;
- else if (f < min)
- return min;
- else
- return f;
+ assert(min < max);
+
+ if (f > max)
+ return max;
+ else if (f < min)
+ return min;
+ else
+ return f;
}
static inline bool
radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
{
- if (*inout_mask & clear_mask) {
- *inout_mask &= ~clear_mask;
- return true;
- } else {
- return false;
- }
+ if (*inout_mask & clear_mask) {
+ *inout_mask &= ~clear_mask;
+ return true;
+ } else {
+ return false;
+ }
}
/* Whenever we generate an error, pass it through this function. Useful for
@@ -203,26 +203,21 @@ struct radv_image_view;
struct radv_instance;
VkResult __vk_errorv(struct radv_instance *instance, const void *object,
- VkDebugReportObjectTypeEXT type, VkResult error,
- const char *file, int line, const char *format,
- va_list args);
+ VkDebugReportObjectTypeEXT type, VkResult error, const char *file, int line,
+ const char *format, va_list args);
VkResult __vk_errorf(struct radv_instance *instance, const void *object,
- VkDebugReportObjectTypeEXT type, VkResult error,
- const char *file, int line, const char *format, ...)
- radv_printflike(7, 8);
-
-#define vk_error(instance, error) \
- __vk_errorf(instance, NULL, \
- VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, \
- error, __FILE__, __LINE__, NULL);
-#define vk_errorf(instance, error, format, ...) \
- __vk_errorf(instance, NULL, \
- VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, \
- error, __FILE__, __LINE__, format, ## __VA_ARGS__);
-
-void __radv_finishme(const char *file, int line, const char *format, ...)
- radv_printflike(3, 4);
+ VkDebugReportObjectTypeEXT type, VkResult error, const char *file, int line,
+ const char *format, ...) radv_printflike(7, 8);
+
+#define vk_error(instance, error) \
+ __vk_errorf(instance, NULL, VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, error, __FILE__, __LINE__, \
+ NULL);
+#define vk_errorf(instance, error, format, ...) \
+ __vk_errorf(instance, NULL, VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT, error, __FILE__, __LINE__, \
+ format, ##__VA_ARGS__);
+
+void __radv_finishme(const char *file, int line, const char *format, ...) radv_printflike(3, 4);
void radv_loge(const char *format, ...) radv_printflike(1, 2);
void radv_loge_v(const char *format, va_list va);
void radv_logi(const char *format, ...) radv_printflike(1, 2);
@@ -231,23 +226,26 @@ void radv_logi_v(const char *format, va_list va);
/**
* Print a FINISHME message, including its source location.
*/
-#define radv_finishme(format, ...) \
- do { \
- static bool reported = false; \
- if (!reported) { \
- __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
- reported = true; \
- } \
- } while (0)
+#define radv_finishme(format, ...) \
+ do { \
+ static bool reported = false; \
+ if (!reported) { \
+ __radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
+ reported = true; \
+ } \
+ } while (0)
/* A non-fatal assert. Useful for debugging. */
#ifdef NDEBUG
-#define radv_assert(x) do {} while(0)
+#define radv_assert(x) \
+ do { \
+ } while (0)
#else
-#define radv_assert(x) do { \
- if (unlikely(!(x))) \
- fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
-} while (0)
+#define radv_assert(x) \
+ do { \
+ if (unlikely(!(x))) \
+ fprintf(stderr, "%s:%d ASSERT: %s\n", __FILE__, __LINE__, #x); \
+ } while (0)
#endif
int radv_get_instance_entrypoint_index(const char *name);
@@ -259,78 +257,78 @@ const char *radv_get_physical_device_entry_name(int index);
const char *radv_get_device_entry_name(int index);
struct radv_physical_device {
- struct vk_physical_device vk;
+ struct vk_physical_device vk;
- /* Link in radv_instance::physical_devices */
- struct list_head link;
+ /* Link in radv_instance::physical_devices */
+ struct list_head link;
- struct radv_instance * instance;
+ struct radv_instance *instance;
- struct radeon_winsys *ws;
- struct radeon_info rad_info;
- char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
- uint8_t driver_uuid[VK_UUID_SIZE];
- uint8_t device_uuid[VK_UUID_SIZE];
- uint8_t cache_uuid[VK_UUID_SIZE];
+ struct radeon_winsys *ws;
+ struct radeon_info rad_info;
+ char name[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE];
+ uint8_t driver_uuid[VK_UUID_SIZE];
+ uint8_t device_uuid[VK_UUID_SIZE];
+ uint8_t cache_uuid[VK_UUID_SIZE];
- int local_fd;
- int master_fd;
- struct wsi_device wsi_device;
+ int local_fd;
+ int master_fd;
+ struct wsi_device wsi_device;
- bool out_of_order_rast_allowed;
+ bool out_of_order_rast_allowed;
- /* Whether DCC should be enabled for MSAA textures. */
- bool dcc_msaa_allowed;
+ /* Whether DCC should be enabled for MSAA textures. */
+ bool dcc_msaa_allowed;
- /* Whether to enable NGG. */
- bool use_ngg;
+ /* Whether to enable NGG. */
+ bool use_ngg;
- /* Whether to enable NGG streamout. */
- bool use_ngg_streamout;
+ /* Whether to enable NGG streamout. */
+ bool use_ngg_streamout;
- /* Number of threads per wave. */
- uint8_t ps_wave_size;
- uint8_t cs_wave_size;
- uint8_t ge_wave_size;
+ /* Number of threads per wave. */
+ uint8_t ps_wave_size;
+ uint8_t cs_wave_size;
+ uint8_t ge_wave_size;
- /* Whether to use the LLVM compiler backend */
- bool use_llvm;
+ /* Whether to use the LLVM compiler backend */
+ bool use_llvm;
- /* This is the drivers on-disk cache used as a fallback as opposed to
- * the pipeline cache defined by apps.
- */
- struct disk_cache * disk_cache;
+ /* This is the drivers on-disk cache used as a fallback as opposed to
+ * the pipeline cache defined by apps.
+ */
+ struct disk_cache *disk_cache;
- VkPhysicalDeviceMemoryProperties memory_properties;
- enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
- enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
- unsigned heaps;
+ VkPhysicalDeviceMemoryProperties memory_properties;
+ enum radeon_bo_domain memory_domains[VK_MAX_MEMORY_TYPES];
+ enum radeon_bo_flag memory_flags[VK_MAX_MEMORY_TYPES];
+ unsigned heaps;
#ifndef _WIN32
- drmPciBusInfo bus_info;
+ drmPciBusInfo bus_info;
#endif
};
struct radv_instance {
- struct vk_instance vk;
+ struct vk_instance vk;
- VkAllocationCallbacks alloc;
+ VkAllocationCallbacks alloc;
- uint64_t debug_flags;
- uint64_t perftest_flags;
+ uint64_t debug_flags;
+ uint64_t perftest_flags;
- bool physical_devices_enumerated;
- struct list_head physical_devices;
+ bool physical_devices_enumerated;
+ struct list_head physical_devices;
- struct driOptionCache dri_options;
- struct driOptionCache available_dri_options;
+ struct driOptionCache dri_options;
+ struct driOptionCache available_dri_options;
- /**
- * Workarounds for game bugs.
- */
- bool enable_mrt_output_nan_fixup;
- bool disable_tc_compat_htile_in_general;
- bool disable_shrink_image_store;
+ /**
+ * Workarounds for game bugs.
+ */
+ bool enable_mrt_output_nan_fixup;
+ bool disable_tc_compat_htile_in_general;
+ bool disable_shrink_image_store;
};
VkResult radv_init_wsi(struct radv_physical_device *physical_device);
@@ -339,327 +337,328 @@ void radv_finish_wsi(struct radv_physical_device *physical_device);
struct cache_entry;
struct radv_pipeline_cache {
- struct vk_object_base base;
- struct radv_device * device;
- mtx_t mutex;
- VkPipelineCacheCreateFlags flags;
+ struct vk_object_base base;
+ struct radv_device *device;
+ mtx_t mutex;
+ VkPipelineCacheCreateFlags flags;
- uint32_t total_size;
- uint32_t table_size;
- uint32_t kernel_count;
- struct cache_entry ** hash_table;
- bool modified;
+ uint32_t total_size;
+ uint32_t table_size;
+ uint32_t kernel_count;
+ struct cache_entry **hash_table;
+ bool modified;
- VkAllocationCallbacks alloc;
+ VkAllocationCallbacks alloc;
};
struct radv_pipeline_key {
- uint32_t instance_rate_inputs;
- uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
- uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
- uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
- uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
- uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
- enum ac_fetch_format vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
- uint32_t vertex_post_shuffle;
- unsigned tess_input_vertices;
- uint32_t col_format;
- uint32_t is_int8;
- uint32_t is_int10;
- uint8_t log2_ps_iter_samples;
- uint8_t num_samples;
- uint32_t has_multiview_view_index : 1;
- uint32_t optimisations_disabled : 1;
- uint8_t topology;
-
- /* Non-zero if a required subgroup size is specified via
- * VK_EXT_subgroup_size_control.
- */
- uint8_t compute_subgroup_size;
- bool require_full_subgroups;
+ uint32_t instance_rate_inputs;
+ uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
+ uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
+ uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
+ uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
+ uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
+ enum ac_fetch_format vertex_alpha_adjust[MAX_VERTEX_ATTRIBS];
+ uint32_t vertex_post_shuffle;
+ unsigned tess_input_vertices;
+ uint32_t col_format;
+ uint32_t is_int8;
+ uint32_t is_int10;
+ uint8_t log2_ps_iter_samples;
+ uint8_t num_samples;
+ uint32_t has_multiview_view_index : 1;
+ uint32_t optimisations_disabled : 1;
+ uint8_t topology;
+
+ /* Non-zero if a required subgroup size is specified via
+ * VK_EXT_subgroup_size_control.
+ */
+ uint8_t compute_subgroup_size;
+ bool require_full_subgroups;
};
struct radv_shader_binary;
struct radv_shader_variant;
-void
-radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
- struct radv_device *device);
-void
-radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
-bool
-radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
- const void *data, size_t size);
-
-bool
-radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const unsigned char *sha1,
- struct radv_shader_variant **variants,
- bool *found_in_application_cache);
-
-void
-radv_pipeline_cache_insert_shaders(struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const unsigned char *sha1,
- struct radv_shader_variant **variants,
- struct radv_shader_binary *const *binaries);
+void radv_pipeline_cache_init(struct radv_pipeline_cache *cache, struct radv_device *device);
+void radv_pipeline_cache_finish(struct radv_pipeline_cache *cache);
+bool radv_pipeline_cache_load(struct radv_pipeline_cache *cache, const void *data, size_t size);
+
+bool radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
+ struct radv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct radv_shader_variant **variants,
+ bool *found_in_application_cache);
+
+void radv_pipeline_cache_insert_shaders(struct radv_device *device,
+ struct radv_pipeline_cache *cache,
+ const unsigned char *sha1,
+ struct radv_shader_variant **variants,
+ struct radv_shader_binary *const *binaries);
enum radv_blit_ds_layout {
- RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
- RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
- RADV_BLIT_DS_LAYOUT_COUNT,
+ RADV_BLIT_DS_LAYOUT_TILE_ENABLE,
+ RADV_BLIT_DS_LAYOUT_TILE_DISABLE,
+ RADV_BLIT_DS_LAYOUT_COUNT,
};
-static inline enum radv_blit_ds_layout radv_meta_blit_ds_to_type(VkImageLayout layout)
+static inline enum radv_blit_ds_layout
+radv_meta_blit_ds_to_type(VkImageLayout layout)
{
- return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
+ return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_BLIT_DS_LAYOUT_TILE_DISABLE
+ : RADV_BLIT_DS_LAYOUT_TILE_ENABLE;
}
-static inline VkImageLayout radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
+static inline VkImageLayout
+radv_meta_blit_ds_to_layout(enum radv_blit_ds_layout ds_layout)
{
- return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
+ return ds_layout == RADV_BLIT_DS_LAYOUT_TILE_ENABLE ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+ : VK_IMAGE_LAYOUT_GENERAL;
}
enum radv_meta_dst_layout {
- RADV_META_DST_LAYOUT_GENERAL,
- RADV_META_DST_LAYOUT_OPTIMAL,
- RADV_META_DST_LAYOUT_COUNT,
+ RADV_META_DST_LAYOUT_GENERAL,
+ RADV_META_DST_LAYOUT_OPTIMAL,
+ RADV_META_DST_LAYOUT_COUNT,
};
-static inline enum radv_meta_dst_layout radv_meta_dst_layout_from_layout(VkImageLayout layout)
+static inline enum radv_meta_dst_layout
+radv_meta_dst_layout_from_layout(VkImageLayout layout)
{
- return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL : RADV_META_DST_LAYOUT_OPTIMAL;
+ return (layout == VK_IMAGE_LAYOUT_GENERAL) ? RADV_META_DST_LAYOUT_GENERAL
+ : RADV_META_DST_LAYOUT_OPTIMAL;
}
-static inline VkImageLayout radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
+static inline VkImageLayout
+radv_meta_dst_layout_to_layout(enum radv_meta_dst_layout layout)
{
- return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
+ return layout == RADV_META_DST_LAYOUT_OPTIMAL ? VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
+ : VK_IMAGE_LAYOUT_GENERAL;
}
struct radv_meta_state {
- VkAllocationCallbacks alloc;
-
- struct radv_pipeline_cache cache;
-
- /*
- * For on-demand pipeline creation, makes sure that
- * only one thread tries to build a pipeline at the same time.
- */
- mtx_t mtx;
-
- /**
- * Use array element `i` for images with `2^i` samples.
- */
- struct {
- VkRenderPass render_pass[NUM_META_FS_KEYS];
- VkPipeline color_pipelines[NUM_META_FS_KEYS];
-
- VkRenderPass depthstencil_rp;
- VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
- VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
- VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-
- VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
- VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
- VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
- } clear[MAX_SAMPLES_LOG2];
-
- VkPipelineLayout clear_color_p_layout;
- VkPipelineLayout clear_depth_p_layout;
- VkPipelineLayout clear_depth_unrestricted_p_layout;
-
- /* Optimized compute fast HTILE clear for stencil or depth only. */
- VkPipeline clear_htile_mask_pipeline;
- VkPipelineLayout clear_htile_mask_p_layout;
- VkDescriptorSetLayout clear_htile_mask_ds_layout;
-
- struct {
- VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
-
- /** Pipeline that blits from a 1D image. */
- VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
-
- /** Pipeline that blits from a 2D image. */
- VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
-
- /** Pipeline that blits from a 3D image. */
- VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
-
- VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
- VkPipeline depth_only_1d_pipeline;
- VkPipeline depth_only_2d_pipeline;
- VkPipeline depth_only_3d_pipeline;
-
- VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
- VkPipeline stencil_only_1d_pipeline;
- VkPipeline stencil_only_2d_pipeline;
- VkPipeline stencil_only_3d_pipeline;
- VkPipelineLayout pipeline_layout;
- VkDescriptorSetLayout ds_layout;
- } blit;
-
- struct {
- VkPipelineLayout p_layouts[5];
- VkDescriptorSetLayout ds_layouts[5];
- VkPipeline pipelines[5][NUM_META_FS_KEYS];
-
- VkPipeline depth_only_pipeline[5];
-
- VkPipeline stencil_only_pipeline[5];
- } blit2d[MAX_SAMPLES_LOG2];
-
- VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
- VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
- VkRenderPass blit2d_stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
-
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline;
- VkPipeline pipeline_3d;
- } itob;
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline;
- VkPipeline pipeline_3d;
- } btoi;
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline;
- } btoi_r32g32b32;
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline[MAX_SAMPLES_LOG2];
- VkPipeline pipeline_3d;
- } itoi;
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline;
- } itoi_r32g32b32;
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline[MAX_SAMPLES_LOG2];
- VkPipeline pipeline_3d;
- } cleari;
- struct {
- VkPipelineLayout img_p_layout;
- VkDescriptorSetLayout img_ds_layout;
- VkPipeline pipeline;
- } cleari_r32g32b32;
-
- struct {
- VkPipelineLayout p_layout;
- VkPipeline pipeline[NUM_META_FS_KEYS];
- VkRenderPass pass[NUM_META_FS_KEYS];
- } resolve;
-
- struct {
- VkDescriptorSetLayout ds_layout;
- VkPipelineLayout p_layout;
- struct {
- VkPipeline pipeline;
- VkPipeline i_pipeline;
- VkPipeline srgb_pipeline;
- } rc[MAX_SAMPLES_LOG2];
-
- VkPipeline depth_zero_pipeline;
- struct {
- VkPipeline average_pipeline;
- VkPipeline max_pipeline;
- VkPipeline min_pipeline;
- } depth[MAX_SAMPLES_LOG2];
-
- VkPipeline stencil_zero_pipeline;
- struct {
- VkPipeline max_pipeline;
- VkPipeline min_pipeline;
- } stencil[MAX_SAMPLES_LOG2];
- } resolve_compute;
-
- struct {
- VkDescriptorSetLayout ds_layout;
- VkPipelineLayout p_layout;
-
- struct {
- VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
- VkPipeline pipeline[NUM_META_FS_KEYS];
- } rc[MAX_SAMPLES_LOG2];
-
- VkRenderPass depth_render_pass;
- VkPipeline depth_zero_pipeline;
- struct {
- VkPipeline average_pipeline;
- VkPipeline max_pipeline;
- VkPipeline min_pipeline;
- } depth[MAX_SAMPLES_LOG2];
-
- VkRenderPass stencil_render_pass;
- VkPipeline stencil_zero_pipeline;
- struct {
- VkPipeline max_pipeline;
- VkPipeline min_pipeline;
- } stencil[MAX_SAMPLES_LOG2];
- } resolve_fragment;
-
- struct {
- VkPipelineLayout p_layout;
- VkPipeline decompress_pipeline[NUM_DEPTH_DECOMPRESS_PIPELINES];
- VkPipeline resummarize_pipeline;
- VkRenderPass pass;
- } depth_decomp[MAX_SAMPLES_LOG2];
-
- struct {
- VkPipelineLayout p_layout;
- VkPipeline cmask_eliminate_pipeline;
- VkPipeline fmask_decompress_pipeline;
- VkPipeline dcc_decompress_pipeline;
- VkRenderPass pass;
-
- VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
- VkPipelineLayout dcc_decompress_compute_p_layout;
- VkPipeline dcc_decompress_compute_pipeline;
- } fast_clear_flush;
-
- struct {
- VkPipelineLayout fill_p_layout;
- VkPipelineLayout copy_p_layout;
- VkDescriptorSetLayout fill_ds_layout;
- VkDescriptorSetLayout copy_ds_layout;
- VkPipeline fill_pipeline;
- VkPipeline copy_pipeline;
- } buffer;
-
- struct {
- VkDescriptorSetLayout ds_layout;
- VkPipelineLayout p_layout;
- VkPipeline occlusion_query_pipeline;
- VkPipeline pipeline_statistics_query_pipeline;
- VkPipeline tfb_query_pipeline;
- VkPipeline timestamp_query_pipeline;
- } query;
-
- struct {
- VkDescriptorSetLayout ds_layout;
- VkPipelineLayout p_layout;
- VkPipeline pipeline[MAX_SAMPLES_LOG2];
- } fmask_expand;
-
- struct {
- VkDescriptorSetLayout ds_layout;
- VkPipelineLayout p_layout;
- VkPipeline pipeline;
- } dcc_retile;
+ VkAllocationCallbacks alloc;
+
+ struct radv_pipeline_cache cache;
+
+ /*
+ * For on-demand pipeline creation, makes sure that
+ * only one thread tries to build a pipeline at the same time.
+ */
+ mtx_t mtx;
+
+ /**
+ * Use array element `i` for images with `2^i` samples.
+ */
+ struct {
+ VkRenderPass render_pass[NUM_META_FS_KEYS];
+ VkPipeline color_pipelines[NUM_META_FS_KEYS];
+
+ VkRenderPass depthstencil_rp;
+ VkPipeline depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkPipeline stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkPipeline depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+
+ VkPipeline depth_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkPipeline stencil_only_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ VkPipeline depthstencil_unrestricted_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
+ } clear[MAX_SAMPLES_LOG2];
+
+ VkPipelineLayout clear_color_p_layout;
+ VkPipelineLayout clear_depth_p_layout;
+ VkPipelineLayout clear_depth_unrestricted_p_layout;
+
+ /* Optimized compute fast HTILE clear for stencil or depth only. */
+ VkPipeline clear_htile_mask_pipeline;
+ VkPipelineLayout clear_htile_mask_p_layout;
+ VkDescriptorSetLayout clear_htile_mask_ds_layout;
+
+ struct {
+ VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
+
+ /** Pipeline that blits from a 1D image. */
+ VkPipeline pipeline_1d_src[NUM_META_FS_KEYS];
+
+ /** Pipeline that blits from a 2D image. */
+ VkPipeline pipeline_2d_src[NUM_META_FS_KEYS];
+
+ /** Pipeline that blits from a 3D image. */
+ VkPipeline pipeline_3d_src[NUM_META_FS_KEYS];
+
+ VkRenderPass depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+ VkPipeline depth_only_1d_pipeline;
+ VkPipeline depth_only_2d_pipeline;
+ VkPipeline depth_only_3d_pipeline;
+
+ VkRenderPass stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+ VkPipeline stencil_only_1d_pipeline;
+ VkPipeline stencil_only_2d_pipeline;
+ VkPipeline stencil_only_3d_pipeline;
+ VkPipelineLayout pipeline_layout;
+ VkDescriptorSetLayout ds_layout;
+ } blit;
+
+ struct {
+ VkPipelineLayout p_layouts[5];
+ VkDescriptorSetLayout ds_layouts[5];
+ VkPipeline pipelines[5][NUM_META_FS_KEYS];
+
+ VkPipeline depth_only_pipeline[5];
+
+ VkPipeline stencil_only_pipeline[5];
+ } blit2d[MAX_SAMPLES_LOG2];
+
+ VkRenderPass blit2d_render_passes[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
+ VkRenderPass blit2d_depth_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+ VkRenderPass blit2d_stencil_only_rp[RADV_BLIT_DS_LAYOUT_COUNT];
+
+ struct {
+ VkPipelineLayout img_p_layout;
+ VkDescriptorSetLayout img_ds_layout;
+ VkPipeline pipeline;
+ VkPipeline pipeline_3d;
+ } itob;
+ struct {
+ VkPipelineLayout img_p_layout;
+ VkDescriptorSetLayout img_ds_layout;
+ VkPipeline pipeline;
+ VkPipeline pipeline_3d;
+ } btoi;
+ struct {
+ VkPipelineLayout img_p_layout;
+ VkDescriptorSetLayout img_ds_layout;
+ VkPipeline pipeline;
+ } btoi_r32g32b32;
+ struct {
+ VkPipelineLayout img_p_layout;
+ VkDescriptorSetLayout img_ds_layout;
+ VkPipeline pipeline[MAX_SAMPLES_LOG2];
+ VkPipeline pipeline_3d;
+ } itoi;
+ struct {
+ VkPipelineLayout img_p_layout;
+ VkDescriptorSetLayout img_ds_layout;
+ VkPipeline pipeline;
+ } itoi_r32g32b32;
+ struct {
+ VkPipelineLayout img_p_layout;
+ VkDescriptorSetLayout img_ds_layout;
+ VkPipeline pipeline[MAX_SAMPLES_LOG2];
+ VkPipeline pipeline_3d;
+ } cleari;
+ struct {
+ VkPipelineLayout img_p_layout;
+ VkDescriptorSetLayout img_ds_layout;
+ VkPipeline pipeline;
+ } cleari_r32g32b32;
+
+ struct {
+ VkPipelineLayout p_layout;
+ VkPipeline pipeline[NUM_META_FS_KEYS];
+ VkRenderPass pass[NUM_META_FS_KEYS];
+ } resolve;
+
+ struct {
+ VkDescriptorSetLayout ds_layout;
+ VkPipelineLayout p_layout;
+ struct {
+ VkPipeline pipeline;
+ VkPipeline i_pipeline;
+ VkPipeline srgb_pipeline;
+ } rc[MAX_SAMPLES_LOG2];
+
+ VkPipeline depth_zero_pipeline;
+ struct {
+ VkPipeline average_pipeline;
+ VkPipeline max_pipeline;
+ VkPipeline min_pipeline;
+ } depth[MAX_SAMPLES_LOG2];
+
+ VkPipeline stencil_zero_pipeline;
+ struct {
+ VkPipeline max_pipeline;
+ VkPipeline min_pipeline;
+ } stencil[MAX_SAMPLES_LOG2];
+ } resolve_compute;
+
+ struct {
+ VkDescriptorSetLayout ds_layout;
+ VkPipelineLayout p_layout;
+
+ struct {
+ VkRenderPass render_pass[NUM_META_FS_KEYS][RADV_META_DST_LAYOUT_COUNT];
+ VkPipeline pipeline[NUM_META_FS_KEYS];
+ } rc[MAX_SAMPLES_LOG2];
+
+ VkRenderPass depth_render_pass;
+ VkPipeline depth_zero_pipeline;
+ struct {
+ VkPipeline average_pipeline;
+ VkPipeline max_pipeline;
+ VkPipeline min_pipeline;
+ } depth[MAX_SAMPLES_LOG2];
+
+ VkRenderPass stencil_render_pass;
+ VkPipeline stencil_zero_pipeline;
+ struct {
+ VkPipeline max_pipeline;
+ VkPipeline min_pipeline;
+ } stencil[MAX_SAMPLES_LOG2];
+ } resolve_fragment;
+
+ struct {
+ VkPipelineLayout p_layout;
+ VkPipeline decompress_pipeline[NUM_DEPTH_DECOMPRESS_PIPELINES];
+ VkPipeline resummarize_pipeline;
+ VkRenderPass pass;
+ } depth_decomp[MAX_SAMPLES_LOG2];
+
+ struct {
+ VkPipelineLayout p_layout;
+ VkPipeline cmask_eliminate_pipeline;
+ VkPipeline fmask_decompress_pipeline;
+ VkPipeline dcc_decompress_pipeline;
+ VkRenderPass pass;
+
+ VkDescriptorSetLayout dcc_decompress_compute_ds_layout;
+ VkPipelineLayout dcc_decompress_compute_p_layout;
+ VkPipeline dcc_decompress_compute_pipeline;
+ } fast_clear_flush;
+
+ struct {
+ VkPipelineLayout fill_p_layout;
+ VkPipelineLayout copy_p_layout;
+ VkDescriptorSetLayout fill_ds_layout;
+ VkDescriptorSetLayout copy_ds_layout;
+ VkPipeline fill_pipeline;
+ VkPipeline copy_pipeline;
+ } buffer;
+
+ struct {
+ VkDescriptorSetLayout ds_layout;
+ VkPipelineLayout p_layout;
+ VkPipeline occlusion_query_pipeline;
+ VkPipeline pipeline_statistics_query_pipeline;
+ VkPipeline tfb_query_pipeline;
+ VkPipeline timestamp_query_pipeline;
+ } query;
+
+ struct {
+ VkDescriptorSetLayout ds_layout;
+ VkPipelineLayout p_layout;
+ VkPipeline pipeline[MAX_SAMPLES_LOG2];
+ } fmask_expand;
+
+ struct {
+ VkDescriptorSetLayout ds_layout;
+ VkPipelineLayout p_layout;
+ VkPipeline pipeline;
+ } dcc_retile;
};
/* queue types */
-#define RADV_QUEUE_GENERAL 0
-#define RADV_QUEUE_COMPUTE 1
+#define RADV_QUEUE_GENERAL 0
+#define RADV_QUEUE_COMPUTE 1
#define RADV_QUEUE_TRANSFER 2
/* Not a real queue family */
@@ -674,588 +673,574 @@ struct radv_deferred_queue_submission;
enum ring_type radv_queue_family_to_ring(int f);
struct radv_queue {
- struct vk_object_base base;
- struct radv_device * device;
- struct radeon_winsys_ctx *hw_ctx;
- enum radeon_ctx_priority priority;
- uint32_t queue_family_index;
- int queue_idx;
- VkDeviceQueueCreateFlags flags;
-
- uint32_t scratch_size_per_wave;
- uint32_t scratch_waves;
- uint32_t compute_scratch_size_per_wave;
- uint32_t compute_scratch_waves;
- uint32_t esgs_ring_size;
- uint32_t gsvs_ring_size;
- bool has_tess_rings;
- bool has_gds;
- bool has_gds_oa;
- bool has_sample_positions;
-
- struct radeon_winsys_bo *scratch_bo;
- struct radeon_winsys_bo *descriptor_bo;
- struct radeon_winsys_bo *compute_scratch_bo;
- struct radeon_winsys_bo *esgs_ring_bo;
- struct radeon_winsys_bo *gsvs_ring_bo;
- struct radeon_winsys_bo *tess_rings_bo;
- struct radeon_winsys_bo *gds_bo;
- struct radeon_winsys_bo *gds_oa_bo;
- struct radeon_cmdbuf *initial_preamble_cs;
- struct radeon_cmdbuf *initial_full_flush_preamble_cs;
- struct radeon_cmdbuf *continue_preamble_cs;
-
- struct list_head pending_submissions;
- mtx_t pending_mutex;
-
- mtx_t thread_mutex;
- struct u_cnd_monotonic thread_cond;
- struct radv_deferred_queue_submission *thread_submission;
- thrd_t submission_thread;
- bool thread_exit;
- bool thread_running;
- bool cond_created;
+ struct vk_object_base base;
+ struct radv_device *device;
+ struct radeon_winsys_ctx *hw_ctx;
+ enum radeon_ctx_priority priority;
+ uint32_t queue_family_index;
+ int queue_idx;
+ VkDeviceQueueCreateFlags flags;
+
+ uint32_t scratch_size_per_wave;
+ uint32_t scratch_waves;
+ uint32_t compute_scratch_size_per_wave;
+ uint32_t compute_scratch_waves;
+ uint32_t esgs_ring_size;
+ uint32_t gsvs_ring_size;
+ bool has_tess_rings;
+ bool has_gds;
+ bool has_gds_oa;
+ bool has_sample_positions;
+
+ struct radeon_winsys_bo *scratch_bo;
+ struct radeon_winsys_bo *descriptor_bo;
+ struct radeon_winsys_bo *compute_scratch_bo;
+ struct radeon_winsys_bo *esgs_ring_bo;
+ struct radeon_winsys_bo *gsvs_ring_bo;
+ struct radeon_winsys_bo *tess_rings_bo;
+ struct radeon_winsys_bo *gds_bo;
+ struct radeon_winsys_bo *gds_oa_bo;
+ struct radeon_cmdbuf *initial_preamble_cs;
+ struct radeon_cmdbuf *initial_full_flush_preamble_cs;
+ struct radeon_cmdbuf *continue_preamble_cs;
+
+ struct list_head pending_submissions;
+ mtx_t pending_mutex;
+
+ mtx_t thread_mutex;
+ struct u_cnd_monotonic thread_cond;
+ struct radv_deferred_queue_submission *thread_submission;
+ thrd_t submission_thread;
+ bool thread_exit;
+ bool thread_running;
+ bool cond_created;
};
#define RADV_BORDER_COLOR_COUNT 4096
#define RADV_BORDER_COLOR_BUFFER_SIZE (sizeof(VkClearColorValue) * RADV_BORDER_COLOR_COUNT)
struct radv_device_border_color_data {
- bool used[RADV_BORDER_COLOR_COUNT];
+ bool used[RADV_BORDER_COLOR_COUNT];
- struct radeon_winsys_bo *bo;
- VkClearColorValue *colors_gpu_ptr;
+ struct radeon_winsys_bo *bo;
+ VkClearColorValue *colors_gpu_ptr;
- /* Mutex is required to guarantee vkCreateSampler thread safety
- * given that we are writing to a buffer and checking color occupation */
- mtx_t mutex;
+ /* Mutex is required to guarantee vkCreateSampler thread safety
+ * given that we are writing to a buffer and checking color occupation */
+ mtx_t mutex;
};
-enum radv_force_vrs
-{
- RADV_FORCE_VRS_NONE = 0,
- RADV_FORCE_VRS_2x2,
- RADV_FORCE_VRS_2x1,
- RADV_FORCE_VRS_1x2,
+enum radv_force_vrs {
+ RADV_FORCE_VRS_NONE = 0,
+ RADV_FORCE_VRS_2x2,
+ RADV_FORCE_VRS_2x1,
+ RADV_FORCE_VRS_1x2,
};
struct radv_device {
- struct vk_device vk;
+ struct vk_device vk;
- struct radv_instance * instance;
- struct radeon_winsys *ws;
+ struct radv_instance *instance;
+ struct radeon_winsys *ws;
- struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
- struct radv_meta_state meta_state;
+ struct radeon_winsys_ctx *hw_ctx[RADV_NUM_HW_CTX];
+ struct radv_meta_state meta_state;
- struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
- int queue_count[RADV_MAX_QUEUE_FAMILIES];
- struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
+ struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
+ int queue_count[RADV_MAX_QUEUE_FAMILIES];
+ struct radeon_cmdbuf *empty_cs[RADV_MAX_QUEUE_FAMILIES];
- bool pbb_allowed;
- bool dfsm_allowed;
- uint32_t tess_offchip_block_dw_size;
- uint32_t scratch_waves;
- uint32_t dispatch_initiator;
+ bool pbb_allowed;
+ bool dfsm_allowed;
+ uint32_t tess_offchip_block_dw_size;
+ uint32_t scratch_waves;
+ uint32_t dispatch_initiator;
- uint32_t gs_table_depth;
+ uint32_t gs_table_depth;
- /* MSAA sample locations.
- * The first index is the sample index.
- * The second index is the coordinate: X, Y. */
- float sample_locations_1x[1][2];
- float sample_locations_2x[2][2];
- float sample_locations_4x[4][2];
- float sample_locations_8x[8][2];
+ /* MSAA sample locations.
+ * The first index is the sample index.
+ * The second index is the coordinate: X, Y. */
+ float sample_locations_1x[1][2];
+ float sample_locations_2x[2][2];
+ float sample_locations_4x[4][2];
+ float sample_locations_8x[8][2];
- /* GFX7 and later */
- uint32_t gfx_init_size_dw;
- struct radeon_winsys_bo *gfx_init;
+ /* GFX7 and later */
+ uint32_t gfx_init_size_dw;
+ struct radeon_winsys_bo *gfx_init;
- struct radeon_winsys_bo *trace_bo;
- uint32_t *trace_id_ptr;
+ struct radeon_winsys_bo *trace_bo;
+ uint32_t *trace_id_ptr;
- /* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */
- bool keep_shader_info;
+ /* Whether to keep shader debug info, for tracing or VK_AMD_shader_info */
+ bool keep_shader_info;
- struct radv_physical_device *physical_device;
+ struct radv_physical_device *physical_device;
- /* Backup in-memory cache to be used if the app doesn't provide one */
- struct radv_pipeline_cache * mem_cache;
+ /* Backup in-memory cache to be used if the app doesn't provide one */
+ struct radv_pipeline_cache *mem_cache;
- /*
- * use different counters so MSAA MRTs get consecutive surface indices,
- * even if MASK is allocated in between.
- */
- uint32_t image_mrt_offset_counter;
- uint32_t fmask_mrt_offset_counter;
- struct list_head shader_slabs;
- mtx_t shader_slab_mutex;
+ /*
+ * use different counters so MSAA MRTs get consecutive surface indices,
+ * even if MASK is allocated in between.
+ */
+ uint32_t image_mrt_offset_counter;
+ uint32_t fmask_mrt_offset_counter;
+ struct list_head shader_slabs;
+ mtx_t shader_slab_mutex;
- /* For detecting VM faults reported by dmesg. */
- uint64_t dmesg_timestamp;
+ /* For detecting VM faults reported by dmesg. */
+ uint64_t dmesg_timestamp;
- /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
- bool robust_buffer_access;
- bool robust_buffer_access2;
+ /* Whether the app has enabled the robustBufferAccess/robustBufferAccess2 features. */
+ bool robust_buffer_access;
+ bool robust_buffer_access2;
- /* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug
- * on some GFX10.3 chips.
- */
- bool adjust_frag_coord_z;
+ /* Whether gl_FragCoord.z should be adjusted for VRS due to a hw bug
+ * on some GFX10.3 chips.
+ */
+ bool adjust_frag_coord_z;
- /* Whether the driver uses a global BO list. */
- bool use_global_bo_list;
+ /* Whether the driver uses a global BO list. */
+ bool use_global_bo_list;
- /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
- int force_aniso;
+ /* Whether anisotropy is forced with RADV_TEX_ANISO (-1 is disabled). */
+ int force_aniso;
- struct radv_device_border_color_data border_color_data;
+ struct radv_device_border_color_data border_color_data;
- /* Condition variable for legacy timelines, to notify waiters when a
- * new point gets submitted. */
- struct u_cnd_monotonic timeline_cond;
+ /* Condition variable for legacy timelines, to notify waiters when a
+ * new point gets submitted. */
+ struct u_cnd_monotonic timeline_cond;
- /* Thread trace. */
- struct ac_thread_trace_data thread_trace;
+ /* Thread trace. */
+ struct ac_thread_trace_data thread_trace;
- /* Trap handler. */
- struct radv_shader_variant *trap_handler_shader;
- struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
- uint32_t *tma_ptr;
+ /* Trap handler. */
+ struct radv_shader_variant *trap_handler_shader;
+ struct radeon_winsys_bo *tma_bo; /* Trap Memory Address */
+ uint32_t *tma_ptr;
- /* Overallocation. */
- bool overallocation_disallowed;
- uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
- mtx_t overallocation_mutex;
+ /* Overallocation. */
+ bool overallocation_disallowed;
+ uint64_t allocated_memory_size[VK_MAX_MEMORY_HEAPS];
+ mtx_t overallocation_mutex;
- /* Track the number of device loss occurs. */
- int lost;
+ /* Track the number of device loss occurs. */
+ int lost;
- /* Whether the user forced VRS rates on GFX10.3+. */
- enum radv_force_vrs force_vrs;
+ /* Whether the user forced VRS rates on GFX10.3+. */
+ enum radv_force_vrs force_vrs;
};
-VkResult _radv_device_set_lost(struct radv_device *device,
- const char *file, int line,
- const char *msg, ...)
- radv_printflike(4, 5);
+VkResult _radv_device_set_lost(struct radv_device *device, const char *file, int line,
+ const char *msg, ...) radv_printflike(4, 5);
-#define radv_device_set_lost(dev, ...) \
- _radv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
+#define radv_device_set_lost(dev, ...) _radv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)
static inline bool
radv_device_is_lost(const struct radv_device *device)
{
- return unlikely(p_atomic_read(&device->lost));
+ return unlikely(p_atomic_read(&device->lost));
}
struct radv_device_memory {
- struct vk_object_base base;
- struct radeon_winsys_bo *bo;
- /* for dedicated allocations */
- struct radv_image *image;
- struct radv_buffer *buffer;
- uint32_t heap_index;
- uint64_t alloc_size;
- void * map;
- void * user_ptr;
+ struct vk_object_base base;
+ struct radeon_winsys_bo *bo;
+ /* for dedicated allocations */
+ struct radv_image *image;
+ struct radv_buffer *buffer;
+ uint32_t heap_index;
+ uint64_t alloc_size;
+ void *map;
+ void *user_ptr;
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
- struct AHardwareBuffer * android_hardware_buffer;
+ struct AHardwareBuffer *android_hardware_buffer;
#endif
};
-
struct radv_descriptor_range {
- uint64_t va;
- uint32_t size;
+ uint64_t va;
+ uint32_t size;
};
struct radv_descriptor_set_header {
- struct vk_object_base base;
- const struct radv_descriptor_set_layout *layout;
- uint32_t size;
- uint32_t buffer_count;
+ struct vk_object_base base;
+ const struct radv_descriptor_set_layout *layout;
+ uint32_t size;
+ uint32_t buffer_count;
- struct radeon_winsys_bo *bo;
- uint64_t va;
- uint32_t *mapped_ptr;
- struct radv_descriptor_range *dynamic_descriptors;
+ struct radeon_winsys_bo *bo;
+ uint64_t va;
+ uint32_t *mapped_ptr;
+ struct radv_descriptor_range *dynamic_descriptors;
};
struct radv_descriptor_set {
- struct radv_descriptor_set_header header;
+ struct radv_descriptor_set_header header;
- struct radeon_winsys_bo *descriptors[];
+ struct radeon_winsys_bo *descriptors[];
};
-struct radv_push_descriptor_set
-{
- struct radv_descriptor_set_header set;
- uint32_t capacity;
+struct radv_push_descriptor_set {
+ struct radv_descriptor_set_header set;
+ uint32_t capacity;
};
struct radv_descriptor_pool_entry {
- uint32_t offset;
- uint32_t size;
- struct radv_descriptor_set *set;
+ uint32_t offset;
+ uint32_t size;
+ struct radv_descriptor_set *set;
};
struct radv_descriptor_pool {
- struct vk_object_base base;
- struct radeon_winsys_bo *bo;
- uint8_t *host_bo;
- uint8_t *mapped_ptr;
- uint64_t current_offset;
- uint64_t size;
+ struct vk_object_base base;
+ struct radeon_winsys_bo *bo;
+ uint8_t *host_bo;
+ uint8_t *mapped_ptr;
+ uint64_t current_offset;
+ uint64_t size;
- uint8_t *host_memory_base;
- uint8_t *host_memory_ptr;
- uint8_t *host_memory_end;
+ uint8_t *host_memory_base;
+ uint8_t *host_memory_ptr;
+ uint8_t *host_memory_end;
- uint32_t entry_count;
- uint32_t max_entry_count;
- struct radv_descriptor_pool_entry entries[0];
+ uint32_t entry_count;
+ uint32_t max_entry_count;
+ struct radv_descriptor_pool_entry entries[0];
};
struct radv_descriptor_update_template_entry {
- VkDescriptorType descriptor_type;
+ VkDescriptorType descriptor_type;
- /* The number of descriptors to update */
- uint32_t descriptor_count;
+ /* The number of descriptors to update */
+ uint32_t descriptor_count;
- /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
- uint32_t dst_offset;
+ /* Into mapped_ptr or dynamic_descriptors, in units of the respective array */
+ uint32_t dst_offset;
- /* In dwords. Not valid/used for dynamic descriptors */
- uint32_t dst_stride;
+ /* In dwords. Not valid/used for dynamic descriptors */
+ uint32_t dst_stride;
- uint32_t buffer_offset;
+ uint32_t buffer_offset;
- /* Only valid for combined image samplers and samplers */
- uint8_t has_sampler;
- uint8_t sampler_offset;
+ /* Only valid for combined image samplers and samplers */
+ uint8_t has_sampler;
+ uint8_t sampler_offset;
- /* In bytes */
- size_t src_offset;
- size_t src_stride;
+ /* In bytes */
+ size_t src_offset;
+ size_t src_stride;
- /* For push descriptors */
- const uint32_t *immutable_samplers;
+ /* For push descriptors */
+ const uint32_t *immutable_samplers;
};
struct radv_descriptor_update_template {
- struct vk_object_base base;
- uint32_t entry_count;
- VkPipelineBindPoint bind_point;
- struct radv_descriptor_update_template_entry entry[0];
+ struct vk_object_base base;
+ uint32_t entry_count;
+ VkPipelineBindPoint bind_point;
+ struct radv_descriptor_update_template_entry entry[0];
};
struct radv_buffer {
- struct vk_object_base base;
- VkDeviceSize size;
+ struct vk_object_base base;
+ VkDeviceSize size;
- VkBufferUsageFlags usage;
- VkBufferCreateFlags flags;
+ VkBufferUsageFlags usage;
+ VkBufferCreateFlags flags;
- /* Set when bound */
- struct radeon_winsys_bo * bo;
- VkDeviceSize offset;
+ /* Set when bound */
+ struct radeon_winsys_bo *bo;
+ VkDeviceSize offset;
- bool shareable;
+ bool shareable;
};
enum radv_dynamic_state_bits {
- RADV_DYNAMIC_VIEWPORT = 1ull << 0,
- RADV_DYNAMIC_SCISSOR = 1ull << 1,
- RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
- RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
- RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
- RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
- RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
- RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
- RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
- RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
- RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
- RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
- RADV_DYNAMIC_CULL_MODE = 1ull << 12,
- RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
- RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
- RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
- RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
- RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
- RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
- RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
- RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
- RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
- RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
- RADV_DYNAMIC_ALL = (1ull << 23) - 1,
+ RADV_DYNAMIC_VIEWPORT = 1ull << 0,
+ RADV_DYNAMIC_SCISSOR = 1ull << 1,
+ RADV_DYNAMIC_LINE_WIDTH = 1ull << 2,
+ RADV_DYNAMIC_DEPTH_BIAS = 1ull << 3,
+ RADV_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
+ RADV_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
+ RADV_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
+ RADV_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
+ RADV_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
+ RADV_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
+ RADV_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
+ RADV_DYNAMIC_LINE_STIPPLE = 1ull << 11,
+ RADV_DYNAMIC_CULL_MODE = 1ull << 12,
+ RADV_DYNAMIC_FRONT_FACE = 1ull << 13,
+ RADV_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
+ RADV_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
+ RADV_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
+ RADV_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
+ RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
+ RADV_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
+ RADV_DYNAMIC_STENCIL_OP = 1ull << 20,
+ RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
+ RADV_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
+ RADV_DYNAMIC_ALL = (1ull << 23) - 1,
};
enum radv_cmd_dirty_bits {
- /* Keep the dynamic state dirty bits in sync with
- * enum radv_dynamic_state_bits */
- RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
- RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
- RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
- RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
- RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
- RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
- RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
- RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
- RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
- RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
- RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
- RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
- RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
- RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
- RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 23) - 1,
- RADV_CMD_DIRTY_PIPELINE = 1ull << 23,
- RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 24,
- RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 25,
- RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 26,
- RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 27,
+ /* Keep the dynamic state dirty bits in sync with
+ * enum radv_dynamic_state_bits */
+ RADV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1ull << 0,
+ RADV_CMD_DIRTY_DYNAMIC_SCISSOR = 1ull << 1,
+ RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1ull << 2,
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1ull << 3,
+ RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1ull << 4,
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1ull << 5,
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1ull << 6,
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1ull << 7,
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1ull << 8,
+ RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE = 1ull << 9,
+ RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1ull << 10,
+ RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1ull << 11,
+ RADV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1ull << 12,
+ RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1ull << 13,
+ RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1ull << 14,
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1ull << 15,
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1ull << 16,
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1ull << 17,
+ RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1ull << 18,
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1ull << 19,
+ RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1ull << 20,
+ RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1ull << 21,
+ RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE = 1ull << 22,
+ RADV_CMD_DIRTY_DYNAMIC_ALL = (1ull << 23) - 1,
+ RADV_CMD_DIRTY_PIPELINE = 1ull << 23,
+ RADV_CMD_DIRTY_INDEX_BUFFER = 1ull << 24,
+ RADV_CMD_DIRTY_FRAMEBUFFER = 1ull << 25,
+ RADV_CMD_DIRTY_VERTEX_BUFFER = 1ull << 26,
+ RADV_CMD_DIRTY_STREAMOUT_BUFFER = 1ull << 27,
};
enum radv_cmd_flush_bits {
- /* Instruction cache. */
- RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
- /* Scalar L1 cache. */
- RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
- /* Vector L1 cache. */
- RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
- /* L2 cache + L2 metadata cache writeback & invalidate.
- * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
- RADV_CMD_FLAG_INV_L2 = 1 << 3,
- /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
- * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
- * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
- RADV_CMD_FLAG_WB_L2 = 1 << 4,
- /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
- * changed and we want to read an image from shaders. */
- RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
- /* Framebuffer caches */
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
- RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
- RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
- /* Engine synchronization. */
- RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
- RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
- /* Pipeline query controls. */
- RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
- RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
- RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
-
- RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER = (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
+ /* Instruction cache. */
+ RADV_CMD_FLAG_INV_ICACHE = 1 << 0,
+ /* Scalar L1 cache. */
+ RADV_CMD_FLAG_INV_SCACHE = 1 << 1,
+ /* Vector L1 cache. */
+ RADV_CMD_FLAG_INV_VCACHE = 1 << 2,
+ /* L2 cache + L2 metadata cache writeback & invalidate.
+ * GFX6-8: Used by shaders only. GFX9-10: Used by everything. */
+ RADV_CMD_FLAG_INV_L2 = 1 << 3,
+ /* L2 writeback (write dirty L2 lines to memory for non-L2 clients).
+ * Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.
+ * GFX6-7 will do complete invalidation, because the writeback is unsupported. */
+ RADV_CMD_FLAG_WB_L2 = 1 << 4,
+ /* Invalidate the metadata cache. To be used when the DCC/HTILE metadata
+ * changed and we want to read an image from shaders. */
+ RADV_CMD_FLAG_INV_L2_METADATA = 1 << 5,
+ /* Framebuffer caches */
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB_META = 1 << 6,
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB_META = 1 << 7,
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB = 1 << 8,
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB = 1 << 9,
+ /* Engine synchronization. */
+ RADV_CMD_FLAG_VS_PARTIAL_FLUSH = 1 << 10,
+ RADV_CMD_FLAG_PS_PARTIAL_FLUSH = 1 << 11,
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH = 1 << 12,
+ RADV_CMD_FLAG_VGT_FLUSH = 1 << 13,
+ /* Pipeline query controls. */
+ RADV_CMD_FLAG_START_PIPELINE_STATS = 1 << 14,
+ RADV_CMD_FLAG_STOP_PIPELINE_STATS = 1 << 15,
+ RADV_CMD_FLAG_VGT_STREAMOUT_SYNC = 1 << 16,
+
+ RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER =
+ (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META)
};
struct radv_vertex_binding {
- struct radv_buffer * buffer;
- VkDeviceSize offset;
- VkDeviceSize size;
- VkDeviceSize stride;
+ struct radv_buffer *buffer;
+ VkDeviceSize offset;
+ VkDeviceSize size;
+ VkDeviceSize stride;
};
struct radv_streamout_binding {
- struct radv_buffer *buffer;
- VkDeviceSize offset;
- VkDeviceSize size;
+ struct radv_buffer *buffer;
+ VkDeviceSize offset;
+ VkDeviceSize size;
};
struct radv_streamout_state {
- /* Mask of bound streamout buffers. */
- uint8_t enabled_mask;
+ /* Mask of bound streamout buffers. */
+ uint8_t enabled_mask;
- /* External state that comes from the last vertex stage, it must be
- * set explicitely when binding a new graphics pipeline.
- */
- uint16_t stride_in_dw[MAX_SO_BUFFERS];
- uint32_t enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
+ /* External state that comes from the last vertex stage, it must be
+ * set explicitely when binding a new graphics pipeline.
+ */
+ uint16_t stride_in_dw[MAX_SO_BUFFERS];
+ uint32_t enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */
- /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
- uint32_t hw_enabled_mask;
+ /* State of VGT_STRMOUT_BUFFER_(CONFIG|END) */
+ uint32_t hw_enabled_mask;
- /* State of VGT_STRMOUT_(CONFIG|EN) */
- bool streamout_enabled;
+ /* State of VGT_STRMOUT_(CONFIG|EN) */
+ bool streamout_enabled;
};
struct radv_viewport_state {
- uint32_t count;
- VkViewport viewports[MAX_VIEWPORTS];
+ uint32_t count;
+ VkViewport viewports[MAX_VIEWPORTS];
};
struct radv_scissor_state {
- uint32_t count;
- VkRect2D scissors[MAX_SCISSORS];
+ uint32_t count;
+ VkRect2D scissors[MAX_SCISSORS];
};
struct radv_discard_rectangle_state {
- uint32_t count;
- VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
+ uint32_t count;
+ VkRect2D rectangles[MAX_DISCARD_RECTANGLES];
};
struct radv_sample_locations_state {
- VkSampleCountFlagBits per_pixel;
- VkExtent2D grid_size;
- uint32_t count;
- VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
+ VkSampleCountFlagBits per_pixel;
+ VkExtent2D grid_size;
+ uint32_t count;
+ VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];
};
struct radv_dynamic_state {
- /**
- * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
- * Defines the set of saved dynamic state.
- */
- uint64_t mask;
-
- struct radv_viewport_state viewport;
-
- struct radv_scissor_state scissor;
-
- float line_width;
-
- struct {
- float bias;
- float clamp;
- float slope;
- } depth_bias;
-
- float blend_constants[4];
-
- struct {
- float min;
- float max;
- } depth_bounds;
-
- struct {
- uint32_t front;
- uint32_t back;
- } stencil_compare_mask;
-
- struct {
- uint32_t front;
- uint32_t back;
- } stencil_write_mask;
-
- struct {
- struct {
- VkStencilOp fail_op;
- VkStencilOp pass_op;
- VkStencilOp depth_fail_op;
- VkCompareOp compare_op;
- } front;
-
- struct {
- VkStencilOp fail_op;
- VkStencilOp pass_op;
- VkStencilOp depth_fail_op;
- VkCompareOp compare_op;
- } back;
- } stencil_op;
-
- struct {
- uint32_t front;
- uint32_t back;
- } stencil_reference;
-
- struct radv_discard_rectangle_state discard_rectangle;
-
- struct radv_sample_locations_state sample_location;
-
- struct {
- uint32_t factor;
- uint16_t pattern;
- } line_stipple;
-
- VkCullModeFlags cull_mode;
- VkFrontFace front_face;
- unsigned primitive_topology;
-
- bool depth_test_enable;
- bool depth_write_enable;
- VkCompareOp depth_compare_op;
- bool depth_bounds_test_enable;
- bool stencil_test_enable;
-
- struct {
- VkExtent2D size;
- VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
- } fragment_shading_rate;
+ /**
+ * Bitmask of (1ull << VK_DYNAMIC_STATE_*).
+ * Defines the set of saved dynamic state.
+ */
+ uint64_t mask;
+
+ struct radv_viewport_state viewport;
+
+ struct radv_scissor_state scissor;
+
+ float line_width;
+
+ struct {
+ float bias;
+ float clamp;
+ float slope;
+ } depth_bias;
+
+ float blend_constants[4];
+
+ struct {
+ float min;
+ float max;
+ } depth_bounds;
+
+ struct {
+ uint32_t front;
+ uint32_t back;
+ } stencil_compare_mask;
+
+ struct {
+ uint32_t front;
+ uint32_t back;
+ } stencil_write_mask;
+
+ struct {
+ struct {
+ VkStencilOp fail_op;
+ VkStencilOp pass_op;
+ VkStencilOp depth_fail_op;
+ VkCompareOp compare_op;
+ } front;
+
+ struct {
+ VkStencilOp fail_op;
+ VkStencilOp pass_op;
+ VkStencilOp depth_fail_op;
+ VkCompareOp compare_op;
+ } back;
+ } stencil_op;
+
+ struct {
+ uint32_t front;
+ uint32_t back;
+ } stencil_reference;
+
+ struct radv_discard_rectangle_state discard_rectangle;
+
+ struct radv_sample_locations_state sample_location;
+
+ struct {
+ uint32_t factor;
+ uint16_t pattern;
+ } line_stipple;
+
+ VkCullModeFlags cull_mode;
+ VkFrontFace front_face;
+ unsigned primitive_topology;
+
+ bool depth_test_enable;
+ bool depth_write_enable;
+ VkCompareOp depth_compare_op;
+ bool depth_bounds_test_enable;
+ bool stencil_test_enable;
+
+ struct {
+ VkExtent2D size;
+ VkFragmentShadingRateCombinerOpKHR combiner_ops[2];
+ } fragment_shading_rate;
};
extern const struct radv_dynamic_state default_dynamic_state;
-const char *
-radv_get_debug_option_name(int id);
+const char *radv_get_debug_option_name(int id);
-const char *
-radv_get_perftest_option_name(int id);
+const char *radv_get_perftest_option_name(int id);
-int
-radv_get_int_debug_option(const char *name, int default_value);
+int radv_get_int_debug_option(const char *name, int default_value);
struct radv_color_buffer_info {
- uint64_t cb_color_base;
- uint64_t cb_color_cmask;
- uint64_t cb_color_fmask;
- uint64_t cb_dcc_base;
- uint32_t cb_color_slice;
- uint32_t cb_color_view;
- uint32_t cb_color_info;
- uint32_t cb_color_attrib;
- uint32_t cb_color_attrib2; /* GFX9 and later */
- uint32_t cb_color_attrib3; /* GFX10 and later */
- uint32_t cb_dcc_control;
- uint32_t cb_color_cmask_slice;
- uint32_t cb_color_fmask_slice;
- union {
- uint32_t cb_color_pitch; // GFX6-GFX8
- uint32_t cb_mrt_epitch; // GFX9+
- };
+ uint64_t cb_color_base;
+ uint64_t cb_color_cmask;
+ uint64_t cb_color_fmask;
+ uint64_t cb_dcc_base;
+ uint32_t cb_color_slice;
+ uint32_t cb_color_view;
+ uint32_t cb_color_info;
+ uint32_t cb_color_attrib;
+ uint32_t cb_color_attrib2; /* GFX9 and later */
+ uint32_t cb_color_attrib3; /* GFX10 and later */
+ uint32_t cb_dcc_control;
+ uint32_t cb_color_cmask_slice;
+ uint32_t cb_color_fmask_slice;
+ union {
+ uint32_t cb_color_pitch; // GFX6-GFX8
+ uint32_t cb_mrt_epitch; // GFX9+
+ };
};
struct radv_ds_buffer_info {
- uint64_t db_z_read_base;
- uint64_t db_stencil_read_base;
- uint64_t db_z_write_base;
- uint64_t db_stencil_write_base;
- uint64_t db_htile_data_base;
- uint32_t db_depth_info;
- uint32_t db_z_info;
- uint32_t db_stencil_info;
- uint32_t db_depth_view;
- uint32_t db_depth_size;
- uint32_t db_depth_slice;
- uint32_t db_htile_surface;
- uint32_t pa_su_poly_offset_db_fmt_cntl;
- uint32_t db_z_info2; /* GFX9 only */
- uint32_t db_stencil_info2; /* GFX9 only */
-};
-
-void
-radv_initialise_color_surface(struct radv_device *device,
- struct radv_color_buffer_info *cb,
- struct radv_image_view *iview);
-void
-radv_initialise_ds_surface(struct radv_device *device,
- struct radv_ds_buffer_info *ds,
- struct radv_image_view *iview);
+ uint64_t db_z_read_base;
+ uint64_t db_stencil_read_base;
+ uint64_t db_z_write_base;
+ uint64_t db_stencil_write_base;
+ uint64_t db_htile_data_base;
+ uint32_t db_depth_info;
+ uint32_t db_z_info;
+ uint32_t db_stencil_info;
+ uint32_t db_depth_view;
+ uint32_t db_depth_size;
+ uint32_t db_depth_slice;
+ uint32_t db_htile_surface;
+ uint32_t pa_su_poly_offset_db_fmt_cntl;
+ uint32_t db_z_info2; /* GFX9 only */
+ uint32_t db_stencil_info2; /* GFX9 only */
+};
+
+void radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
+ struct radv_image_view *iview);
+void radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
+ struct radv_image_view *iview);
/**
* Attachment state when recording a renderpass instance.
@@ -1263,205 +1248,205 @@ radv_initialise_ds_surface(struct radv_device *device,
* The clear value is valid only if there exists a pending clear.
*/
struct radv_attachment_state {
- VkImageAspectFlags pending_clear_aspects;
- uint32_t cleared_views;
- VkClearValue clear_value;
- VkImageLayout current_layout;
- VkImageLayout current_stencil_layout;
- bool current_in_render_loop;
- bool disable_dcc;
- struct radv_sample_locations_state sample_location;
-
- union {
- struct radv_color_buffer_info cb;
- struct radv_ds_buffer_info ds;
- };
- struct radv_image_view *iview;
+ VkImageAspectFlags pending_clear_aspects;
+ uint32_t cleared_views;
+ VkClearValue clear_value;
+ VkImageLayout current_layout;
+ VkImageLayout current_stencil_layout;
+ bool current_in_render_loop;
+ bool disable_dcc;
+ struct radv_sample_locations_state sample_location;
+
+ union {
+ struct radv_color_buffer_info cb;
+ struct radv_ds_buffer_info ds;
+ };
+ struct radv_image_view *iview;
};
struct radv_descriptor_state {
- struct radv_descriptor_set *sets[MAX_SETS];
- uint32_t dirty;
- uint32_t valid;
- struct radv_push_descriptor_set push_set;
- bool push_dirty;
- uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
+ struct radv_descriptor_set *sets[MAX_SETS];
+ uint32_t dirty;
+ uint32_t valid;
+ struct radv_push_descriptor_set push_set;
+ bool push_dirty;
+ uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
};
struct radv_subpass_sample_locs_state {
- uint32_t subpass_idx;
- struct radv_sample_locations_state sample_location;
+ uint32_t subpass_idx;
+ struct radv_sample_locations_state sample_location;
};
enum rgp_flush_bits {
- RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
- RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
- RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
- RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
- RGP_FLUSH_PFP_SYNC_ME = 0x10,
- RGP_FLUSH_SYNC_CP_DMA = 0x20,
- RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
- RGP_FLUSH_INVAL_ICACHE = 0x80,
- RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
- RGP_FLUSH_FLUSH_L2 = 0x200,
- RGP_FLUSH_INVAL_L2 = 0x400,
- RGP_FLUSH_FLUSH_CB = 0x800,
- RGP_FLUSH_INVAL_CB = 0x1000,
- RGP_FLUSH_FLUSH_DB = 0x2000,
- RGP_FLUSH_INVAL_DB = 0x4000,
- RGP_FLUSH_INVAL_L1 = 0x8000,
+ RGP_FLUSH_WAIT_ON_EOP_TS = 0x1,
+ RGP_FLUSH_VS_PARTIAL_FLUSH = 0x2,
+ RGP_FLUSH_PS_PARTIAL_FLUSH = 0x4,
+ RGP_FLUSH_CS_PARTIAL_FLUSH = 0x8,
+ RGP_FLUSH_PFP_SYNC_ME = 0x10,
+ RGP_FLUSH_SYNC_CP_DMA = 0x20,
+ RGP_FLUSH_INVAL_VMEM_L0 = 0x40,
+ RGP_FLUSH_INVAL_ICACHE = 0x80,
+ RGP_FLUSH_INVAL_SMEM_L0 = 0x100,
+ RGP_FLUSH_FLUSH_L2 = 0x200,
+ RGP_FLUSH_INVAL_L2 = 0x400,
+ RGP_FLUSH_FLUSH_CB = 0x800,
+ RGP_FLUSH_INVAL_CB = 0x1000,
+ RGP_FLUSH_FLUSH_DB = 0x2000,
+ RGP_FLUSH_INVAL_DB = 0x4000,
+ RGP_FLUSH_INVAL_L1 = 0x8000,
};
struct radv_cmd_state {
- /* Vertex descriptors */
- uint64_t vb_va;
- unsigned vb_size;
-
- bool predicating;
- uint64_t dirty;
-
- uint32_t prefetch_L2_mask;
-
- struct radv_pipeline * pipeline;
- struct radv_pipeline * emitted_pipeline;
- struct radv_pipeline * compute_pipeline;
- struct radv_pipeline * emitted_compute_pipeline;
- struct radv_framebuffer * framebuffer;
- struct radv_render_pass * pass;
- const struct radv_subpass * subpass;
- struct radv_dynamic_state dynamic;
- struct radv_attachment_state * attachments;
- struct radv_streamout_state streamout;
- VkRect2D render_area;
-
- uint32_t num_subpass_sample_locs;
- struct radv_subpass_sample_locs_state * subpass_sample_locs;
-
- /* Index buffer */
- struct radv_buffer *index_buffer;
- uint64_t index_offset;
- uint32_t index_type;
- uint32_t max_index_count;
- uint64_t index_va;
- int32_t last_index_type;
-
- int32_t last_primitive_reset_en;
- uint32_t last_primitive_reset_index;
- enum radv_cmd_flush_bits flush_bits;
- unsigned active_occlusion_queries;
- bool perfect_occlusion_queries_enabled;
- unsigned active_pipeline_queries;
- unsigned active_pipeline_gds_queries;
- uint32_t trace_id;
- uint32_t last_ia_multi_vgt_param;
-
- uint32_t last_num_instances;
- uint32_t last_first_instance;
- uint32_t last_vertex_offset;
- uint32_t last_drawid;
-
- uint32_t last_sx_ps_downconvert;
- uint32_t last_sx_blend_opt_epsilon;
- uint32_t last_sx_blend_opt_control;
-
- /* Whether CP DMA is busy/idle. */
- bool dma_is_busy;
-
- /* Conditional rendering info. */
- uint8_t predication_op; /* 32-bit or 64-bit predicate value */
- int predication_type; /* -1: disabled, 0: normal, 1: inverted */
- uint64_t predication_va;
-
- /* Inheritance info. */
- VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
-
- bool context_roll_without_scissor_emitted;
-
- /* SQTT related state. */
- uint32_t current_event_type;
- uint32_t num_events;
- uint32_t num_layout_transitions;
- bool pending_sqtt_barrier_end;
- enum rgp_flush_bits sqtt_flush_bits;
-
- uint8_t cb_mip[MAX_RTS];
+ /* Vertex descriptors */
+ uint64_t vb_va;
+ unsigned vb_size;
+
+ bool predicating;
+ uint64_t dirty;
+
+ uint32_t prefetch_L2_mask;
+
+ struct radv_pipeline *pipeline;
+ struct radv_pipeline *emitted_pipeline;
+ struct radv_pipeline *compute_pipeline;
+ struct radv_pipeline *emitted_compute_pipeline;
+ struct radv_framebuffer *framebuffer;
+ struct radv_render_pass *pass;
+ const struct radv_subpass *subpass;
+ struct radv_dynamic_state dynamic;
+ struct radv_attachment_state *attachments;
+ struct radv_streamout_state streamout;
+ VkRect2D render_area;
+
+ uint32_t num_subpass_sample_locs;
+ struct radv_subpass_sample_locs_state *subpass_sample_locs;
+
+ /* Index buffer */
+ struct radv_buffer *index_buffer;
+ uint64_t index_offset;
+ uint32_t index_type;
+ uint32_t max_index_count;
+ uint64_t index_va;
+ int32_t last_index_type;
+
+ int32_t last_primitive_reset_en;
+ uint32_t last_primitive_reset_index;
+ enum radv_cmd_flush_bits flush_bits;
+ unsigned active_occlusion_queries;
+ bool perfect_occlusion_queries_enabled;
+ unsigned active_pipeline_queries;
+ unsigned active_pipeline_gds_queries;
+ uint32_t trace_id;
+ uint32_t last_ia_multi_vgt_param;
+
+ uint32_t last_num_instances;
+ uint32_t last_first_instance;
+ uint32_t last_vertex_offset;
+ uint32_t last_drawid;
+
+ uint32_t last_sx_ps_downconvert;
+ uint32_t last_sx_blend_opt_epsilon;
+ uint32_t last_sx_blend_opt_control;
+
+ /* Whether CP DMA is busy/idle. */
+ bool dma_is_busy;
+
+ /* Conditional rendering info. */
+ uint8_t predication_op; /* 32-bit or 64-bit predicate value */
+ int predication_type; /* -1: disabled, 0: normal, 1: inverted */
+ uint64_t predication_va;
+
+ /* Inheritance info. */
+ VkQueryPipelineStatisticFlags inherited_pipeline_statistics;
+
+ bool context_roll_without_scissor_emitted;
+
+ /* SQTT related state. */
+ uint32_t current_event_type;
+ uint32_t num_events;
+ uint32_t num_layout_transitions;
+ bool pending_sqtt_barrier_end;
+ enum rgp_flush_bits sqtt_flush_bits;
+
+ uint8_t cb_mip[MAX_RTS];
};
struct radv_cmd_pool {
- struct vk_object_base base;
- VkAllocationCallbacks alloc;
- struct list_head cmd_buffers;
- struct list_head free_cmd_buffers;
- uint32_t queue_family_index;
+ struct vk_object_base base;
+ VkAllocationCallbacks alloc;
+ struct list_head cmd_buffers;
+ struct list_head free_cmd_buffers;
+ uint32_t queue_family_index;
};
struct radv_cmd_buffer_upload {
- uint8_t *map;
- unsigned offset;
- uint64_t size;
- struct radeon_winsys_bo *upload_bo;
- struct list_head list;
+ uint8_t *map;
+ unsigned offset;
+ uint64_t size;
+ struct radeon_winsys_bo *upload_bo;
+ struct list_head list;
};
enum radv_cmd_buffer_status {
- RADV_CMD_BUFFER_STATUS_INVALID,
- RADV_CMD_BUFFER_STATUS_INITIAL,
- RADV_CMD_BUFFER_STATUS_RECORDING,
- RADV_CMD_BUFFER_STATUS_EXECUTABLE,
- RADV_CMD_BUFFER_STATUS_PENDING,
+ RADV_CMD_BUFFER_STATUS_INVALID,
+ RADV_CMD_BUFFER_STATUS_INITIAL,
+ RADV_CMD_BUFFER_STATUS_RECORDING,
+ RADV_CMD_BUFFER_STATUS_EXECUTABLE,
+ RADV_CMD_BUFFER_STATUS_PENDING,
};
struct radv_cmd_buffer {
- struct vk_object_base base;
+ struct vk_object_base base;
- struct radv_device * device;
+ struct radv_device *device;
- struct radv_cmd_pool * pool;
- struct list_head pool_link;
+ struct radv_cmd_pool *pool;
+ struct list_head pool_link;
- VkCommandBufferUsageFlags usage_flags;
- VkCommandBufferLevel level;
- enum radv_cmd_buffer_status status;
- struct radeon_cmdbuf *cs;
- struct radv_cmd_state state;
- struct radv_vertex_binding vertex_bindings[MAX_VBS];
- struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
- uint32_t queue_family_index;
+ VkCommandBufferUsageFlags usage_flags;
+ VkCommandBufferLevel level;
+ enum radv_cmd_buffer_status status;
+ struct radeon_cmdbuf *cs;
+ struct radv_cmd_state state;
+ struct radv_vertex_binding vertex_bindings[MAX_VBS];
+ struct radv_streamout_binding streamout_bindings[MAX_SO_BUFFERS];
+ uint32_t queue_family_index;
- uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
- VkShaderStageFlags push_constant_stages;
- struct radv_descriptor_set_header meta_push_descriptors;
+ uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
+ VkShaderStageFlags push_constant_stages;
+ struct radv_descriptor_set_header meta_push_descriptors;
- struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
+ struct radv_descriptor_state descriptors[MAX_BIND_POINTS];
- struct radv_cmd_buffer_upload upload;
+ struct radv_cmd_buffer_upload upload;
- uint32_t scratch_size_per_wave_needed;
- uint32_t scratch_waves_wanted;
- uint32_t compute_scratch_size_per_wave_needed;
- uint32_t compute_scratch_waves_wanted;
- uint32_t esgs_ring_size_needed;
- uint32_t gsvs_ring_size_needed;
- bool tess_rings_needed;
- bool gds_needed; /* for GFX10 streamout and NGG GS queries */
- bool gds_oa_needed; /* for GFX10 streamout */
- bool sample_positions_needed;
+ uint32_t scratch_size_per_wave_needed;
+ uint32_t scratch_waves_wanted;
+ uint32_t compute_scratch_size_per_wave_needed;
+ uint32_t compute_scratch_waves_wanted;
+ uint32_t esgs_ring_size_needed;
+ uint32_t gsvs_ring_size_needed;
+ bool tess_rings_needed;
+ bool gds_needed; /* for GFX10 streamout and NGG GS queries */
+ bool gds_oa_needed; /* for GFX10 streamout */
+ bool sample_positions_needed;
- VkResult record_result;
+ VkResult record_result;
- uint64_t gfx9_fence_va;
- uint32_t gfx9_fence_idx;
- uint64_t gfx9_eop_bug_va;
+ uint64_t gfx9_fence_va;
+ uint32_t gfx9_fence_idx;
+ uint64_t gfx9_eop_bug_va;
- /**
- * Whether a query pool has been resetted and we have to flush caches.
- */
- bool pending_reset_query;
+ /**
+ * Whether a query pool has been resetted and we have to flush caches.
+ */
+ bool pending_reset_query;
- /**
- * Bitmask of pending active query flushes.
- */
- enum radv_cmd_flush_bits active_query_flush_bits;
+ /**
+ * Bitmask of pending active query flushes.
+ */
+ enum radv_cmd_flush_bits active_query_flush_bits;
};
struct radv_image;
@@ -1469,162 +1454,130 @@ struct radv_image_view;
bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
-void si_emit_graphics(struct radv_device *device,
- struct radeon_cmdbuf *cs);
-void si_emit_compute(struct radv_device *device,
- struct radeon_cmdbuf *cs);
+void si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs);
+void si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs);
void cik_create_gfx_config(struct radv_device *device);
-void si_write_viewport(struct radeon_cmdbuf *cs, int first_vp,
- int count, const VkViewport *viewports);
-void si_write_scissors(struct radeon_cmdbuf *cs, int first,
- int count, const VkRect2D *scissors,
- const VkViewport *viewports, bool can_use_guardband);
-uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
- bool instanced_draw, bool indirect_draw,
- bool count_from_stream_output,
- uint32_t draw_vertex_count,
- unsigned topology);
-void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
- enum chip_class chip_class,
- bool is_mec,
- unsigned event, unsigned event_flags,
- unsigned dst_sel, unsigned data_sel,
- uint64_t va,
- uint32_t new_fence,
- uint64_t gfx9_eop_bug_va);
-
-void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va,
- uint32_t ref, uint32_t mask);
-void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
- enum chip_class chip_class,
- uint32_t *fence_ptr, uint64_t va,
- bool is_mec,
- enum radv_cmd_flush_bits flush_bits,
- enum rgp_flush_bits *sqtt_flush_bits,
- uint64_t gfx9_eop_bug_va);
+void si_write_viewport(struct radeon_cmdbuf *cs, int first_vp, int count,
+ const VkViewport *viewports);
+void si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
+ const VkViewport *viewports, bool can_use_guardband);
+uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
+ bool indirect_draw, bool count_from_stream_output,
+ uint32_t draw_vertex_count, unsigned topology);
+void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
+ unsigned event, unsigned event_flags, unsigned dst_sel,
+ unsigned data_sel, uint64_t va, uint32_t new_fence,
+ uint64_t gfx9_eop_bug_va);
+
+void radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref,
+ uint32_t mask);
+void si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class,
+ uint32_t *fence_ptr, uint64_t va, bool is_mec,
+ enum radv_cmd_flush_bits flush_bits,
+ enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va);
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
-void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
- bool draw_visible, unsigned pred_op,
- uint64_t va);
-void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
- uint64_t src_va, uint64_t dest_va,
- uint64_t size);
-void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
- unsigned size);
-void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
- uint64_t size, unsigned value);
+void si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
+ unsigned pred_op, uint64_t va);
+void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
+ uint64_t size);
+void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size);
+void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
+ unsigned value);
void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer);
void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
-bool
-radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
- unsigned size, unsigned *out_offset, void **ptr);
-void
-radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_subpass *subpass);
-bool
-radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
- unsigned size, const void *data, unsigned *out_offset);
+bool radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
+ unsigned *out_offset, void **ptr);
+void radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
+ const struct radv_subpass *subpass);
+bool radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size,
+ const void *data, unsigned *out_offset);
void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
void radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
- VkResolveModeFlagBits resolve_mode);
+ VkImageAspectFlags aspects,
+ VkResolveModeFlagBits resolve_mode);
void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
void radv_depth_stencil_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlags aspects,
- VkResolveModeFlagBits resolve_mode);
+ VkImageAspectFlags aspects,
+ VkResolveModeFlagBits resolve_mode);
void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples);
unsigned radv_get_default_max_sample_dist(int log_samples);
void radv_device_init_msaa(struct radv_device *device);
void radv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- VkClearDepthStencilValue ds_clear_value,
- VkImageAspectFlags aspects);
+ const struct radv_image_view *iview,
+ VkClearDepthStencilValue ds_clear_value,
+ VkImageAspectFlags aspects);
void radv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image_view *iview,
- int cb_idx,
- uint32_t color_values[2]);
+ const struct radv_image_view *iview, int cb_idx,
+ uint32_t color_values[2]);
bool radv_image_use_dcc_image_stores(const struct radv_device *device,
- const struct radv_image *image);
+ const struct radv_image *image);
bool radv_image_use_dcc_predication(const struct radv_device *device,
- const struct radv_image *image);
-void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, bool value);
-
-void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range, bool value);
-enum radv_cmd_flush_bits
-radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
- VkAccessFlags src_flags,
- const struct radv_image *image);
-enum radv_cmd_flush_bits
-radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
- VkAccessFlags dst_flags,
- const struct radv_image *image);
-uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_image *image,
- struct radeon_winsys_bo *bo,
- uint64_t offset, uint64_t size, uint32_t value);
+ const struct radv_image *image);
+void radv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, bool value);
+
+void radv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, bool value);
+enum radv_cmd_flush_bits radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
+ VkAccessFlags src_flags,
+ const struct radv_image *image);
+enum radv_cmd_flush_bits radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
+ VkAccessFlags dst_flags,
+ const struct radv_image *image);
+uint32_t radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
+ struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size,
+ uint32_t value);
void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);
-bool radv_get_memory_fd(struct radv_device *device,
- struct radv_device_memory *memory,
- int *pFD);
-void radv_free_memory(struct radv_device *device,
- const VkAllocationCallbacks* pAllocator,
- struct radv_device_memory *mem);
+bool radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD);
+void radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_device_memory *mem);
static inline void
-radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs,
- unsigned sh_offset, unsigned pointer_count,
- bool use_32bit_pointers)
+radv_emit_shader_pointer_head(struct radeon_cmdbuf *cs, unsigned sh_offset, unsigned pointer_count,
+ bool use_32bit_pointers)
{
- radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
- radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
+ radeon_emit(cs, PKT3(PKT3_SET_SH_REG, pointer_count * (use_32bit_pointers ? 1 : 2), 0));
+ radeon_emit(cs, (sh_offset - SI_SH_REG_OFFSET) >> 2);
}
static inline void
-radv_emit_shader_pointer_body(struct radv_device *device,
- struct radeon_cmdbuf *cs,
- uint64_t va, bool use_32bit_pointers)
+radv_emit_shader_pointer_body(struct radv_device *device, struct radeon_cmdbuf *cs, uint64_t va,
+ bool use_32bit_pointers)
{
- radeon_emit(cs, va);
-
- if (use_32bit_pointers) {
- assert(va == 0 ||
- (va >> 32) == device->physical_device->rad_info.address32_hi);
- } else {
- radeon_emit(cs, va >> 32);
- }
+ radeon_emit(cs, va);
+
+ if (use_32bit_pointers) {
+ assert(va == 0 || (va >> 32) == device->physical_device->rad_info.address32_hi);
+ } else {
+ radeon_emit(cs, va >> 32);
+ }
}
static inline void
-radv_emit_shader_pointer(struct radv_device *device,
- struct radeon_cmdbuf *cs,
- uint32_t sh_offset, uint64_t va, bool global)
+radv_emit_shader_pointer(struct radv_device *device, struct radeon_cmdbuf *cs, uint32_t sh_offset,
+ uint64_t va, bool global)
{
- bool use_32bit_pointers = !global;
+ bool use_32bit_pointers = !global;
- radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
- radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
+ radv_emit_shader_pointer_head(cs, sh_offset, 1, use_32bit_pointers);
+ radv_emit_shader_pointer_body(device, cs, va, use_32bit_pointers);
}
static inline struct radv_descriptor_state *
-radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point)
+radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
{
- assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ||
- bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
- return &cmd_buffer->descriptors[bind_point];
+ assert(bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS ||
+ bind_point == VK_PIPELINE_BIND_POINT_COMPUTE);
+ return &cmd_buffer->descriptors[bind_point];
}
/*
@@ -1633,148 +1586,142 @@ radv_get_descriptors_state(struct radv_cmd_buffer *cmd_buffer,
* Limitations: Can't call normal dispatch functions without binding or rebinding
* the compute pipeline.
*/
-void radv_unaligned_dispatch(
- struct radv_cmd_buffer *cmd_buffer,
- uint32_t x,
- uint32_t y,
- uint32_t z);
+void radv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
+ uint32_t z);
struct radv_event {
- struct vk_object_base base;
- struct radeon_winsys_bo *bo;
- uint64_t *map;
-};
-
-#define RADV_HASH_SHADER_NO_NGG (1 << 0)
-#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1)
-#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
-#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
-#define RADV_HASH_SHADER_LLVM (1 << 4)
-#define RADV_HASH_SHADER_DISCARD_TO_DEMOTE (1 << 5)
-#define RADV_HASH_SHADER_MRT_NAN_FIXUP (1 << 6)
-#define RADV_HASH_SHADER_INVARIANT_GEOM (1 << 7)
-#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
-#define RADV_HASH_SHADER_FORCE_VRS_2x2 (1 << 9)
-#define RADV_HASH_SHADER_FORCE_VRS_2x1 (1 << 10)
-#define RADV_HASH_SHADER_FORCE_VRS_1x2 (1 << 11)
-
-void
-radv_hash_shaders(unsigned char *hash,
- const VkPipelineShaderStageCreateInfo **stages,
- const struct radv_pipeline_layout *layout,
- const struct radv_pipeline_key *key,
- uint32_t flags);
+ struct vk_object_base base;
+ struct radeon_winsys_bo *bo;
+ uint64_t *map;
+};
+
+#define RADV_HASH_SHADER_NO_NGG (1 << 0)
+#define RADV_HASH_SHADER_CS_WAVE32 (1 << 1)
+#define RADV_HASH_SHADER_PS_WAVE32 (1 << 2)
+#define RADV_HASH_SHADER_GE_WAVE32 (1 << 3)
+#define RADV_HASH_SHADER_LLVM (1 << 4)
+#define RADV_HASH_SHADER_DISCARD_TO_DEMOTE (1 << 5)
+#define RADV_HASH_SHADER_MRT_NAN_FIXUP (1 << 6)
+#define RADV_HASH_SHADER_INVARIANT_GEOM (1 << 7)
+#define RADV_HASH_SHADER_KEEP_STATISTICS (1 << 8)
+#define RADV_HASH_SHADER_FORCE_VRS_2x2 (1 << 9)
+#define RADV_HASH_SHADER_FORCE_VRS_2x1 (1 << 10)
+#define RADV_HASH_SHADER_FORCE_VRS_1x2 (1 << 11)
+
+void radv_hash_shaders(unsigned char *hash, const VkPipelineShaderStageCreateInfo **stages,
+ const struct radv_pipeline_layout *layout,
+ const struct radv_pipeline_key *key, uint32_t flags);
#define RADV_STAGE_MASK ((1 << MESA_SHADER_STAGES) - 1)
-#define radv_foreach_stage(stage, stage_bits) \
- for (gl_shader_stage stage, \
- __tmp = (gl_shader_stage)((stage_bits) & RADV_STAGE_MASK); \
- stage = ffs(__tmp) - 1, __tmp; \
- __tmp &= ~(1 << (stage)))
+#define radv_foreach_stage(stage, stage_bits) \
+ for (gl_shader_stage stage, __tmp = (gl_shader_stage)((stage_bits)&RADV_STAGE_MASK); \
+ stage = ffs(__tmp) - 1, __tmp; __tmp &= ~(1 << (stage)))
extern const VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS];
unsigned radv_format_meta_fs_key(struct radv_device *device, VkFormat format);
struct radv_multisample_state {
- uint32_t db_eqaa;
- uint32_t pa_sc_mode_cntl_0;
- uint32_t pa_sc_mode_cntl_1;
- uint32_t pa_sc_aa_config;
- uint32_t pa_sc_aa_mask[2];
- unsigned num_samples;
+ uint32_t db_eqaa;
+ uint32_t pa_sc_mode_cntl_0;
+ uint32_t pa_sc_mode_cntl_1;
+ uint32_t pa_sc_aa_config;
+ uint32_t pa_sc_aa_mask[2];
+ unsigned num_samples;
};
struct radv_vrs_state {
- uint32_t pa_cl_vrs_cntl;
+ uint32_t pa_cl_vrs_cntl;
};
struct radv_prim_vertex_count {
- uint8_t min;
- uint8_t incr;
+ uint8_t min;
+ uint8_t incr;
};
struct radv_ia_multi_vgt_param_helpers {
- uint32_t base;
- bool partial_es_wave;
- uint8_t primgroup_size;
- bool ia_switch_on_eoi;
- bool partial_vs_wave;
+ uint32_t base;
+ bool partial_es_wave;
+ uint8_t primgroup_size;
+ bool ia_switch_on_eoi;
+ bool partial_vs_wave;
};
struct radv_binning_state {
- uint32_t pa_sc_binner_cntl_0;
- uint32_t db_dfsm_control;
+ uint32_t pa_sc_binner_cntl_0;
+ uint32_t db_dfsm_control;
};
#define SI_GS_PER_ES 128
struct radv_pipeline {
- struct vk_object_base base;
- struct radv_device * device;
- struct radv_dynamic_state dynamic_state;
-
- struct radv_pipeline_layout * layout;
-
- bool need_indirect_descriptor_sets;
- struct radv_shader_variant * shaders[MESA_SHADER_STAGES];
- struct radv_shader_variant *gs_copy_shader;
- VkShaderStageFlags active_stages;
-
- struct radeon_cmdbuf cs;
- uint32_t ctx_cs_hash;
- struct radeon_cmdbuf ctx_cs;
-
- uint32_t binding_stride[MAX_VBS];
- uint8_t num_vertex_bindings;
-
- uint32_t user_data_0[MESA_SHADER_STAGES];
- union {
- struct {
- struct radv_multisample_state ms;
- struct radv_binning_state binning;
- struct radv_vrs_state vrs;
- uint32_t spi_baryc_cntl;
- bool prim_restart_enable;
- unsigned esgs_ring_size;
- unsigned gsvs_ring_size;
- uint32_t vtx_base_sgpr;
- struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
- uint8_t vtx_emit_num;
- bool uses_drawid;
- bool uses_baseinstance;
- bool can_use_guardband;
- uint32_t needed_dynamic_state;
- bool disable_out_of_order_rast_for_occlusion;
- unsigned tess_patch_control_points;
- unsigned pa_su_sc_mode_cntl;
- unsigned db_depth_control;
- bool uses_dynamic_stride;
-
- /* Used for rbplus */
- uint32_t col_format;
- uint32_t cb_target_mask;
- } graphics;
- };
-
- unsigned max_waves;
- unsigned scratch_bytes_per_wave;
-
- /* Not NULL if graphics pipeline uses streamout. */
- struct radv_shader_variant *streamout_shader;
-
- /* Unique pipeline hash identifier. */
- uint64_t pipeline_hash;
-};
-
-static inline bool radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
+ struct vk_object_base base;
+ struct radv_device *device;
+ struct radv_dynamic_state dynamic_state;
+
+ struct radv_pipeline_layout *layout;
+
+ bool need_indirect_descriptor_sets;
+ struct radv_shader_variant *shaders[MESA_SHADER_STAGES];
+ struct radv_shader_variant *gs_copy_shader;
+ VkShaderStageFlags active_stages;
+
+ struct radeon_cmdbuf cs;
+ uint32_t ctx_cs_hash;
+ struct radeon_cmdbuf ctx_cs;
+
+ uint32_t binding_stride[MAX_VBS];
+ uint8_t num_vertex_bindings;
+
+ uint32_t user_data_0[MESA_SHADER_STAGES];
+ union {
+ struct {
+ struct radv_multisample_state ms;
+ struct radv_binning_state binning;
+ struct radv_vrs_state vrs;
+ uint32_t spi_baryc_cntl;
+ bool prim_restart_enable;
+ unsigned esgs_ring_size;
+ unsigned gsvs_ring_size;
+ uint32_t vtx_base_sgpr;
+ struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param;
+ uint8_t vtx_emit_num;
+ bool uses_drawid;
+ bool uses_baseinstance;
+ bool can_use_guardband;
+ uint32_t needed_dynamic_state;
+ bool disable_out_of_order_rast_for_occlusion;
+ unsigned tess_patch_control_points;
+ unsigned pa_su_sc_mode_cntl;
+ unsigned db_depth_control;
+ bool uses_dynamic_stride;
+
+ /* Used for rbplus */
+ uint32_t col_format;
+ uint32_t cb_target_mask;
+ } graphics;
+ };
+
+ unsigned max_waves;
+ unsigned scratch_bytes_per_wave;
+
+ /* Not NULL if graphics pipeline uses streamout. */
+ struct radv_shader_variant *streamout_shader;
+
+ /* Unique pipeline hash identifier. */
+ uint64_t pipeline_hash;
+};
+
+static inline bool
+radv_pipeline_has_gs(const struct radv_pipeline *pipeline)
{
- return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
+ return pipeline->shaders[MESA_SHADER_GEOMETRY] ? true : false;
}
-static inline bool radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
+static inline bool
+radv_pipeline_has_tess(const struct radv_pipeline *pipeline)
{
- return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
+ return pipeline->shaders[MESA_SHADER_TESS_CTRL] ? true : false;
}
bool radv_pipeline_has_ngg(const struct radv_pipeline *pipeline);
@@ -1784,125 +1731,115 @@ bool radv_pipeline_has_ngg_passthrough(const struct radv_pipeline *pipeline);
bool radv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline);
struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
- gl_shader_stage stage,
- int idx);
+ gl_shader_stage stage, int idx);
struct radv_shader_variant *radv_get_shader(const struct radv_pipeline *pipeline,
- gl_shader_stage stage);
+ gl_shader_stage stage);
struct radv_graphics_pipeline_create_info {
- bool use_rectlist;
- bool db_depth_clear;
- bool db_stencil_clear;
- bool db_depth_disable_expclear;
- bool db_stencil_disable_expclear;
- bool depth_compress_disable;
- bool stencil_compress_disable;
- bool resummarize_enable;
- uint32_t custom_blend_mode;
-};
-
-VkResult
-radv_graphics_pipeline_create(VkDevice device,
- VkPipelineCache cache,
- const VkGraphicsPipelineCreateInfo *pCreateInfo,
- const struct radv_graphics_pipeline_create_info *extra,
- const VkAllocationCallbacks *alloc,
- VkPipeline *pPipeline);
+ bool use_rectlist;
+ bool db_depth_clear;
+ bool db_stencil_clear;
+ bool db_depth_disable_expclear;
+ bool db_stencil_disable_expclear;
+ bool depth_compress_disable;
+ bool stencil_compress_disable;
+ bool resummarize_enable;
+ uint32_t custom_blend_mode;
+};
+
+VkResult radv_graphics_pipeline_create(VkDevice device, VkPipelineCache cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra,
+ const VkAllocationCallbacks *alloc, VkPipeline *pPipeline);
struct radv_binning_settings {
- unsigned context_states_per_bin; /* allowed range: [1, 6] */
- unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
- unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+ unsigned context_states_per_bin; /* allowed range: [1, 6] */
+ unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+ unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
};
-struct radv_binning_settings
-radv_get_binning_settings(const struct radv_physical_device *pdev);
+struct radv_binning_settings radv_get_binning_settings(const struct radv_physical_device *pdev);
struct vk_format_description;
uint32_t radv_translate_buffer_dataformat(const struct util_format_description *desc,
- int first_non_void);
+ int first_non_void);
uint32_t radv_translate_buffer_numformat(const struct util_format_description *desc,
- int first_non_void);
+ int first_non_void);
bool radv_is_buffer_format_supported(VkFormat format, bool *scaled);
uint32_t radv_translate_colorformat(VkFormat format);
-uint32_t radv_translate_color_numformat(VkFormat format,
- const struct util_format_description *desc,
- int first_non_void);
+uint32_t radv_translate_color_numformat(VkFormat format, const struct util_format_description *desc,
+ int first_non_void);
uint32_t radv_colorformat_endian_swap(uint32_t colorformat);
unsigned radv_translate_colorswap(VkFormat format, bool do_endian_swap);
uint32_t radv_translate_dbformat(VkFormat format);
-uint32_t radv_translate_tex_dataformat(VkFormat format,
- const struct util_format_description *desc,
- int first_non_void);
-uint32_t radv_translate_tex_numformat(VkFormat format,
- const struct util_format_description *desc,
- int first_non_void);
-bool radv_format_pack_clear_color(VkFormat format,
- uint32_t clear_vals[2],
- VkClearColorValue *value);
+uint32_t radv_translate_tex_dataformat(VkFormat format, const struct util_format_description *desc,
+ int first_non_void);
+uint32_t radv_translate_tex_numformat(VkFormat format, const struct util_format_description *desc,
+ int first_non_void);
+bool radv_format_pack_clear_color(VkFormat format, uint32_t clear_vals[2],
+ VkClearColorValue *value);
bool radv_is_colorbuffer_format_supported(const struct radv_physical_device *pdevice,
VkFormat format, bool *blendable);
-bool radv_dcc_formats_compatible(VkFormat format1,
- VkFormat format2);
+bool radv_dcc_formats_compatible(VkFormat format1, VkFormat format2);
bool radv_is_atomic_format_supported(VkFormat format);
bool radv_device_supports_etc(struct radv_physical_device *physical_device);
struct radv_image_plane {
- VkFormat format;
- struct radeon_surf surface;
+ VkFormat format;
+ struct radeon_surf surface;
};
struct radv_image {
- struct vk_object_base base;
- VkImageType type;
- /* The original VkFormat provided by the client. This may not match any
- * of the actual surface formats.
- */
- VkFormat vk_format;
- VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
- struct ac_surf_info info;
- VkImageTiling tiling; /** VkImageCreateInfo::tiling */
- VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
-
- VkDeviceSize size;
- uint32_t alignment;
-
- unsigned queue_family_mask;
- bool exclusive;
- bool shareable;
-
- /* Set when bound */
- struct radeon_winsys_bo *bo;
- VkDeviceSize offset;
- bool tc_compatible_cmask;
-
- uint64_t clear_value_offset;
- uint64_t fce_pred_offset;
- uint64_t dcc_pred_offset;
-
- /* On some GPUs DCC needs different tiling of the metadata for
- * rendering and for display, so we're stuck with having the metadata
- * two times and then occasionally copying one into the other.
- *
- * The retile map is an array of (src index, dst index) pairs to
- * determine how it should be copied between the two.
- */
- struct radeon_winsys_bo *retile_map;
-
- /*
- * Metadata for the TC-compat zrange workaround. If the 32-bit value
- * stored at this offset is UINT_MAX, the driver will emit
- * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
- * SET_CONTEXT_REG packet.
- */
- uint64_t tc_compat_zrange_offset;
-
- /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
- VkDeviceMemory owned_memory;
-
- unsigned plane_count;
- struct radv_image_plane planes[0];
+ struct vk_object_base base;
+ VkImageType type;
+ /* The original VkFormat provided by the client. This may not match any
+ * of the actual surface formats.
+ */
+ VkFormat vk_format;
+ VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
+ struct ac_surf_info info;
+ VkImageTiling tiling; /** VkImageCreateInfo::tiling */
+ VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
+
+ VkDeviceSize size;
+ uint32_t alignment;
+
+ unsigned queue_family_mask;
+ bool exclusive;
+ bool shareable;
+
+ /* Set when bound */
+ struct radeon_winsys_bo *bo;
+ VkDeviceSize offset;
+ bool tc_compatible_cmask;
+
+ uint64_t clear_value_offset;
+ uint64_t fce_pred_offset;
+ uint64_t dcc_pred_offset;
+
+ /* On some GPUs DCC needs different tiling of the metadata for
+ * rendering and for display, so we're stuck with having the metadata
+ * two times and then occasionally copying one into the other.
+ *
+ * The retile map is an array of (src index, dst index) pairs to
+ * determine how it should be copied between the two.
+ */
+ struct radeon_winsys_bo *retile_map;
+
+ /*
+ * Metadata for the TC-compat zrange workaround. If the 32-bit value
+ * stored at this offset is UINT_MAX, the driver will emit
+ * DB_Z_INFO.ZRANGE_PRECISION=0, otherwise it will skip the
+ * SET_CONTEXT_REG packet.
+ */
+ uint64_t tc_compat_zrange_offset;
+
+ /* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
+ VkDeviceMemory owned_memory;
+
+ unsigned plane_count;
+ struct radv_image_plane planes[0];
};
/* Whether the image has a htile that is known consistent with the contents of
@@ -1912,27 +1849,17 @@ struct radv_image {
* correct results.
*/
bool radv_layout_is_htile_compressed(const struct radv_device *device,
- const struct radv_image *image,
- VkImageLayout layout,
- bool in_render_loop,
- unsigned queue_mask);
-
-bool radv_layout_can_fast_clear(const struct radv_device *device,
- const struct radv_image *image,
- VkImageLayout layout,
- bool in_render_loop,
- unsigned queue_mask);
-
-bool radv_layout_dcc_compressed(const struct radv_device *device,
- const struct radv_image *image,
- VkImageLayout layout,
- bool in_render_loop,
- unsigned queue_mask);
-
-bool radv_layout_fmask_compressed(const struct radv_device *device,
- const struct radv_image *image,
- VkImageLayout layout,
- unsigned queue_mask);
+ const struct radv_image *image, VkImageLayout layout,
+ bool in_render_loop, unsigned queue_mask);
+
+bool radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
+ VkImageLayout layout, bool in_render_loop, unsigned queue_mask);
+
+bool radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
+ VkImageLayout layout, bool in_render_loop, unsigned queue_mask);
+
+bool radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
+ VkImageLayout layout, unsigned queue_mask);
/**
* Return whether the image has CMASK metadata for color surfaces.
@@ -1940,7 +1867,7 @@ bool radv_layout_fmask_compressed(const struct radv_device *device,
static inline bool
radv_image_has_cmask(const struct radv_image *image)
{
- return image->planes[0].surface.cmask_offset;
+ return image->planes[0].surface.cmask_offset;
}
/**
@@ -1949,7 +1876,7 @@ radv_image_has_cmask(const struct radv_image *image)
static inline bool
radv_image_has_fmask(const struct radv_image *image)
{
- return image->planes[0].surface.fmask_offset;
+ return image->planes[0].surface.fmask_offset;
}
/**
@@ -1958,7 +1885,7 @@ radv_image_has_fmask(const struct radv_image *image)
static inline bool
radv_image_has_dcc(const struct radv_image *image)
{
- return image->planes[0].surface.dcc_offset;
+ return image->planes[0].surface.dcc_offset;
}
/**
@@ -1967,7 +1894,7 @@ radv_image_has_dcc(const struct radv_image *image)
static inline bool
radv_image_is_tc_compat_cmask(const struct radv_image *image)
{
- return radv_image_has_fmask(image) && image->tc_compatible_cmask;
+ return radv_image_has_fmask(image) && image->tc_compatible_cmask;
}
/**
@@ -1976,8 +1903,7 @@ radv_image_is_tc_compat_cmask(const struct radv_image *image)
static inline bool
radv_dcc_enabled(const struct radv_image *image, unsigned level)
{
- return radv_image_has_dcc(image) &&
- level < image->planes[0].surface.num_dcc_levels;
+ return radv_image_has_dcc(image) && level < image->planes[0].surface.num_dcc_levels;
}
/**
@@ -1986,9 +1912,7 @@ radv_dcc_enabled(const struct radv_image *image, unsigned level)
static inline bool
radv_image_has_CB_metadata(const struct radv_image *image)
{
- return radv_image_has_cmask(image) ||
- radv_image_has_fmask(image) ||
- radv_image_has_dcc(image);
+ return radv_image_has_cmask(image) || radv_image_has_fmask(image) || radv_image_has_dcc(image);
}
/**
@@ -1997,7 +1921,7 @@ radv_image_has_CB_metadata(const struct radv_image *image)
static inline bool
radv_image_has_htile(const struct radv_image *image)
{
- return image->planes[0].surface.htile_size;
+ return image->planes[0].surface.htile_size;
}
/**
@@ -2006,8 +1930,7 @@ radv_image_has_htile(const struct radv_image *image)
static inline bool
radv_htile_enabled(const struct radv_image *image, unsigned level)
{
- return radv_image_has_htile(image) &&
- level < image->planes[0].surface.num_htile_levels;
+ return radv_image_has_htile(image) && level < image->planes[0].surface.num_htile_levels;
}
/**
@@ -2016,8 +1939,8 @@ radv_htile_enabled(const struct radv_image *image, unsigned level)
static inline bool
radv_image_is_tc_compat_htile(const struct radv_image *image)
{
- return radv_image_has_htile(image) &&
- (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
+ return radv_image_has_htile(image) &&
+ (image->planes[0].surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE);
}
/**
@@ -2025,559 +1948,509 @@ radv_image_is_tc_compat_htile(const struct radv_image *image)
* improve HiZ Z-Range precision.
*/
static inline bool
-radv_image_tile_stencil_disabled(const struct radv_device *device,
- const struct radv_image *image)
+radv_image_tile_stencil_disabled(const struct radv_device *device, const struct radv_image *image)
{
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- return !vk_format_has_stencil(image->vk_format);
- } else {
- /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
- * the TC-compat ZRANGE issue even if no stencil is used.
- */
- return !vk_format_has_stencil(image->vk_format) &&
- !radv_image_is_tc_compat_htile(image);
- }
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ return !vk_format_has_stencil(image->vk_format);
+ } else {
+ /* Due to a hw bug, TILE_STENCIL_DISABLE must be set to 0 for
+ * the TC-compat ZRANGE issue even if no stencil is used.
+ */
+ return !vk_format_has_stencil(image->vk_format) && !radv_image_is_tc_compat_htile(image);
+ }
}
static inline bool
radv_image_has_clear_value(const struct radv_image *image)
{
- return image->clear_value_offset != 0;
+ return image->clear_value_offset != 0;
}
static inline uint64_t
-radv_image_get_fast_clear_va(const struct radv_image *image,
- uint32_t base_level)
+radv_image_get_fast_clear_va(const struct radv_image *image, uint32_t base_level)
{
- assert(radv_image_has_clear_value(image));
+ assert(radv_image_has_clear_value(image));
- uint64_t va = radv_buffer_get_va(image->bo);
- va += image->offset + image->clear_value_offset + base_level * 8;
- return va;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->clear_value_offset + base_level * 8;
+ return va;
}
static inline uint64_t
-radv_image_get_fce_pred_va(const struct radv_image *image,
- uint32_t base_level)
+radv_image_get_fce_pred_va(const struct radv_image *image, uint32_t base_level)
{
- assert(image->fce_pred_offset != 0);
+ assert(image->fce_pred_offset != 0);
- uint64_t va = radv_buffer_get_va(image->bo);
- va += image->offset + image->fce_pred_offset + base_level * 8;
- return va;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->fce_pred_offset + base_level * 8;
+ return va;
}
static inline uint64_t
-radv_image_get_dcc_pred_va(const struct radv_image *image,
- uint32_t base_level)
+radv_image_get_dcc_pred_va(const struct radv_image *image, uint32_t base_level)
{
- assert(image->dcc_pred_offset != 0);
+ assert(image->dcc_pred_offset != 0);
- uint64_t va = radv_buffer_get_va(image->bo);
- va += image->offset + image->dcc_pred_offset + base_level * 8;
- return va;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->dcc_pred_offset + base_level * 8;
+ return va;
}
static inline uint64_t
-radv_get_tc_compat_zrange_va(const struct radv_image *image,
- uint32_t base_level)
+radv_get_tc_compat_zrange_va(const struct radv_image *image, uint32_t base_level)
{
- assert(image->tc_compat_zrange_offset != 0);
+ assert(image->tc_compat_zrange_offset != 0);
- uint64_t va = radv_buffer_get_va(image->bo);
- va += image->offset + image->tc_compat_zrange_offset + base_level * 4;
- return va;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->tc_compat_zrange_offset + base_level * 4;
+ return va;
}
static inline uint64_t
-radv_get_ds_clear_value_va(const struct radv_image *image,
- uint32_t base_level)
+radv_get_ds_clear_value_va(const struct radv_image *image, uint32_t base_level)
{
- assert(radv_image_has_clear_value(image));
+ assert(radv_image_has_clear_value(image));
- uint64_t va = radv_buffer_get_va(image->bo);
- va += image->offset + image->clear_value_offset + base_level * 8;
- return va;
+ uint64_t va = radv_buffer_get_va(image->bo);
+ va += image->offset + image->clear_value_offset + base_level * 8;
+ return va;
}
static inline uint32_t
-radv_get_htile_initial_value(const struct radv_device *device,
- const struct radv_image *image)
+radv_get_htile_initial_value(const struct radv_device *device, const struct radv_image *image)
{
- uint32_t initial_value;
-
- if (radv_image_tile_stencil_disabled(device, image)) {
- /* Z only (no stencil):
- *
- * |31 18|17 4|3 0|
- * +---------+---------+-------+
- * | Max Z | Min Z | ZMask |
- */
- initial_value = 0xfffc000f;
- } else {
- /* Z and stencil:
- *
- * |31 12|11 10|9 8|7 6|5 4|3 0|
- * +-----------+-----+------+-----+-----+-------+
- * | Z Range | | SMem | SR1 | SR0 | ZMask |
- *
- * SR0/SR1 contains the stencil test results. Initializing
- * SR0/SR1 to 0x3 means the stencil test result is unknown.
- */
- initial_value = 0xfffff3ff;
- }
-
- return initial_value;
+ uint32_t initial_value;
+
+ if (radv_image_tile_stencil_disabled(device, image)) {
+ /* Z only (no stencil):
+ *
+ * |31 18|17 4|3 0|
+ * +---------+---------+-------+
+ * | Max Z | Min Z | ZMask |
+ */
+ initial_value = 0xfffc000f;
+ } else {
+ /* Z and stencil:
+ *
+ * |31 12|11 10|9 8|7 6|5 4|3 0|
+ * +-----------+-----+------+-----+-----+-------+
+ * | Z Range | | SMem | SR1 | SR0 | ZMask |
+ *
+ * SR0/SR1 contains the stencil test results. Initializing
+ * SR0/SR1 to 0x3 means the stencil test result is unknown.
+ */
+ initial_value = 0xfffff3ff;
+ }
+
+ return initial_value;
}
-unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family);
+unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t family,
+ uint32_t queue_family);
static inline uint32_t
-radv_get_layerCount(const struct radv_image *image,
- const VkImageSubresourceRange *range)
+radv_get_layerCount(const struct radv_image *image, const VkImageSubresourceRange *range)
{
- return range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
- image->info.array_size - range->baseArrayLayer : range->layerCount;
+ return range->layerCount == VK_REMAINING_ARRAY_LAYERS
+ ? image->info.array_size - range->baseArrayLayer
+ : range->layerCount;
}
static inline uint32_t
-radv_get_levelCount(const struct radv_image *image,
- const VkImageSubresourceRange *range)
+radv_get_levelCount(const struct radv_image *image, const VkImageSubresourceRange *range)
{
- return range->levelCount == VK_REMAINING_MIP_LEVELS ?
- image->info.levels - range->baseMipLevel : range->levelCount;
+ return range->levelCount == VK_REMAINING_MIP_LEVELS ? image->info.levels - range->baseMipLevel
+ : range->levelCount;
}
-bool
-radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
+bool radv_image_is_renderable(struct radv_device *device, struct radv_image *image);
struct radeon_bo_metadata;
-void
-radv_init_metadata(struct radv_device *device,
- struct radv_image *image,
- struct radeon_bo_metadata *metadata);
+void radv_init_metadata(struct radv_device *device, struct radv_image *image,
+ struct radeon_bo_metadata *metadata);
-void
-radv_image_override_offset_stride(struct radv_device *device,
- struct radv_image *image,
- uint64_t offset, uint32_t stride);
+void radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
+ uint64_t offset, uint32_t stride);
union radv_descriptor {
- struct {
- uint32_t plane0_descriptor[8];
- uint32_t fmask_descriptor[8];
- };
- struct {
- uint32_t plane_descriptors[3][8];
- };
+ struct {
+ uint32_t plane0_descriptor[8];
+ uint32_t fmask_descriptor[8];
+ };
+ struct {
+ uint32_t plane_descriptors[3][8];
+ };
};
struct radv_image_view {
- struct vk_object_base base;
- struct radv_image *image; /**< VkImageViewCreateInfo::image */
- struct radeon_winsys_bo *bo;
+ struct vk_object_base base;
+ struct radv_image *image; /**< VkImageViewCreateInfo::image */
+ struct radeon_winsys_bo *bo;
- VkImageViewType type;
- VkImageAspectFlags aspect_mask;
- VkFormat vk_format;
- unsigned plane_id;
- bool multiple_planes;
- uint32_t base_layer;
- uint32_t layer_count;
- uint32_t base_mip;
- uint32_t level_count;
- VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
+ VkImageViewType type;
+ VkImageAspectFlags aspect_mask;
+ VkFormat vk_format;
+ unsigned plane_id;
+ bool multiple_planes;
+ uint32_t base_layer;
+ uint32_t layer_count;
+ uint32_t base_mip;
+ uint32_t level_count;
+ VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */
- /* Whether the image iview supports fast clear. */
- bool support_fast_clear;
+ /* Whether the image iview supports fast clear. */
+ bool support_fast_clear;
- union radv_descriptor descriptor;
+ union radv_descriptor descriptor;
- /* Descriptor for use as a storage image as opposed to a sampled image.
- * This has a few differences for cube maps (e.g. type).
- */
- union radv_descriptor storage_descriptor;
+ /* Descriptor for use as a storage image as opposed to a sampled image.
+ * This has a few differences for cube maps (e.g. type).
+ */
+ union radv_descriptor storage_descriptor;
};
struct radv_image_create_info {
- const VkImageCreateInfo *vk_info;
- bool scanout;
- bool no_metadata_planes;
- const struct radeon_bo_metadata *bo_metadata;
+ const VkImageCreateInfo *vk_info;
+ bool scanout;
+ bool no_metadata_planes;
+ const struct radeon_bo_metadata *bo_metadata;
};
VkResult
-radv_image_create_layout(struct radv_device *device,
- struct radv_image_create_info create_info,
+radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
struct radv_image *image);
-VkResult radv_image_create(VkDevice _device,
- const struct radv_image_create_info *info,
- const VkAllocationCallbacks* alloc,
- VkImage *pImage);
+VkResult radv_image_create(VkDevice _device, const struct radv_image_create_info *info,
+ const VkAllocationCallbacks *alloc, VkImage *pImage);
-bool
-radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev,
- const void *pNext, VkFormat format,
- VkImageCreateFlags flags);
+bool radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
+ VkFormat format, VkImageCreateFlags flags);
bool vi_alpha_is_on_msb(struct radv_device *device, VkFormat format);
-VkResult
-radv_image_from_gralloc(VkDevice device_h,
- const VkImageCreateInfo *base_info,
- const VkNativeBufferANDROID *gralloc_info,
- const VkAllocationCallbacks *alloc,
- VkImage *out_image_h);
-uint64_t
-radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
- const VkImageUsageFlags vk_usage);
-VkResult
-radv_import_ahb_memory(struct radv_device *device,
- struct radv_device_memory *mem,
- unsigned priority,
- const VkImportAndroidHardwareBufferInfoANDROID *info);
-VkResult
-radv_create_ahb_memory(struct radv_device *device,
- struct radv_device_memory *mem,
- unsigned priority,
- const VkMemoryAllocateInfo *pAllocateInfo);
+VkResult radv_image_from_gralloc(VkDevice device_h, const VkImageCreateInfo *base_info,
+ const VkNativeBufferANDROID *gralloc_info,
+ const VkAllocationCallbacks *alloc, VkImage *out_image_h);
+uint64_t radv_ahb_usage_from_vk_usage(const VkImageCreateFlags vk_create,
+ const VkImageUsageFlags vk_usage);
+VkResult radv_import_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
+ unsigned priority,
+ const VkImportAndroidHardwareBufferInfoANDROID *info);
+VkResult radv_create_ahb_memory(struct radv_device *device, struct radv_device_memory *mem,
+ unsigned priority, const VkMemoryAllocateInfo *pAllocateInfo);
-VkFormat
-radv_select_android_external_format(const void *next, VkFormat default_format);
+VkFormat radv_select_android_external_format(const void *next, VkFormat default_format);
bool radv_android_gralloc_supports_format(VkFormat format, VkImageUsageFlagBits usage);
struct radv_image_view_extra_create_info {
- bool disable_compression;
+ bool disable_compression;
};
-void radv_image_view_init(struct radv_image_view *view,
- struct radv_device *device,
- const VkImageViewCreateInfo *pCreateInfo,
- const struct radv_image_view_extra_create_info* extra_create_info);
+void radv_image_view_init(struct radv_image_view *view, struct radv_device *device,
+ const VkImageViewCreateInfo *pCreateInfo,
+ const struct radv_image_view_extra_create_info *extra_create_info);
VkFormat radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask);
struct radv_sampler_ycbcr_conversion {
- struct vk_object_base base;
- VkFormat format;
- VkSamplerYcbcrModelConversion ycbcr_model;
- VkSamplerYcbcrRange ycbcr_range;
- VkComponentMapping components;
- VkChromaLocation chroma_offsets[2];
- VkFilter chroma_filter;
+ struct vk_object_base base;
+ VkFormat format;
+ VkSamplerYcbcrModelConversion ycbcr_model;
+ VkSamplerYcbcrRange ycbcr_range;
+ VkComponentMapping components;
+ VkChromaLocation chroma_offsets[2];
+ VkFilter chroma_filter;
};
struct radv_buffer_view {
- struct vk_object_base base;
- struct radeon_winsys_bo *bo;
- VkFormat vk_format;
- uint64_t range; /**< VkBufferViewCreateInfo::range */
- uint32_t state[4];
+ struct vk_object_base base;
+ struct radeon_winsys_bo *bo;
+ VkFormat vk_format;
+ uint64_t range; /**< VkBufferViewCreateInfo::range */
+ uint32_t state[4];
};
-void radv_buffer_view_init(struct radv_buffer_view *view,
- struct radv_device *device,
- const VkBufferViewCreateInfo* pCreateInfo);
+void radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
+ const VkBufferViewCreateInfo *pCreateInfo);
static inline struct VkExtent3D
-radv_sanitize_image_extent(const VkImageType imageType,
- const struct VkExtent3D imageExtent)
+radv_sanitize_image_extent(const VkImageType imageType, const struct VkExtent3D imageExtent)
{
- switch (imageType) {
- case VK_IMAGE_TYPE_1D:
- return (VkExtent3D) { imageExtent.width, 1, 1 };
- case VK_IMAGE_TYPE_2D:
- return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };
- case VK_IMAGE_TYPE_3D:
- return imageExtent;
- default:
- unreachable("invalid image type");
- }
+ switch (imageType) {
+ case VK_IMAGE_TYPE_1D:
+ return (VkExtent3D){imageExtent.width, 1, 1};
+ case VK_IMAGE_TYPE_2D:
+ return (VkExtent3D){imageExtent.width, imageExtent.height, 1};
+ case VK_IMAGE_TYPE_3D:
+ return imageExtent;
+ default:
+ unreachable("invalid image type");
+ }
}
static inline struct VkOffset3D
-radv_sanitize_image_offset(const VkImageType imageType,
- const struct VkOffset3D imageOffset)
+radv_sanitize_image_offset(const VkImageType imageType, const struct VkOffset3D imageOffset)
{
- switch (imageType) {
- case VK_IMAGE_TYPE_1D:
- return (VkOffset3D) { imageOffset.x, 0, 0 };
- case VK_IMAGE_TYPE_2D:
- return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };
- case VK_IMAGE_TYPE_3D:
- return imageOffset;
- default:
- unreachable("invalid image type");
- }
+ switch (imageType) {
+ case VK_IMAGE_TYPE_1D:
+ return (VkOffset3D){imageOffset.x, 0, 0};
+ case VK_IMAGE_TYPE_2D:
+ return (VkOffset3D){imageOffset.x, imageOffset.y, 0};
+ case VK_IMAGE_TYPE_3D:
+ return imageOffset;
+ default:
+ unreachable("invalid image type");
+ }
}
static inline bool
-radv_image_extent_compare(const struct radv_image *image,
- const VkExtent3D *extent)
+radv_image_extent_compare(const struct radv_image *image, const VkExtent3D *extent)
{
- if (extent->width != image->info.width ||
- extent->height != image->info.height ||
- extent->depth != image->info.depth)
- return false;
- return true;
+ if (extent->width != image->info.width || extent->height != image->info.height ||
+ extent->depth != image->info.depth)
+ return false;
+ return true;
}
struct radv_sampler {
- struct vk_object_base base;
- uint32_t state[4];
- struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
- uint32_t border_color_slot;
+ struct vk_object_base base;
+ uint32_t state[4];
+ struct radv_sampler_ycbcr_conversion *ycbcr_sampler;
+ uint32_t border_color_slot;
};
struct radv_framebuffer {
- struct vk_object_base base;
- uint32_t width;
- uint32_t height;
- uint32_t layers;
+ struct vk_object_base base;
+ uint32_t width;
+ uint32_t height;
+ uint32_t layers;
- bool imageless;
+ bool imageless;
- uint32_t attachment_count;
- struct radv_image_view *attachments[0];
+ uint32_t attachment_count;
+ struct radv_image_view *attachments[0];
};
struct radv_subpass_barrier {
- VkPipelineStageFlags src_stage_mask;
- VkAccessFlags src_access_mask;
- VkAccessFlags dst_access_mask;
+ VkPipelineStageFlags src_stage_mask;
+ VkAccessFlags src_access_mask;
+ VkAccessFlags dst_access_mask;
};
void radv_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_subpass_barrier *barrier);
+ const struct radv_subpass_barrier *barrier);
struct radv_subpass_attachment {
- uint32_t attachment;
- VkImageLayout layout;
- VkImageLayout stencil_layout;
- bool in_render_loop;
+ uint32_t attachment;
+ VkImageLayout layout;
+ VkImageLayout stencil_layout;
+ bool in_render_loop;
};
struct radv_subpass {
- uint32_t attachment_count;
- struct radv_subpass_attachment * attachments;
+ uint32_t attachment_count;
+ struct radv_subpass_attachment *attachments;
- uint32_t input_count;
- uint32_t color_count;
- struct radv_subpass_attachment * input_attachments;
- struct radv_subpass_attachment * color_attachments;
- struct radv_subpass_attachment * resolve_attachments;
- struct radv_subpass_attachment * depth_stencil_attachment;
- struct radv_subpass_attachment * ds_resolve_attachment;
- VkResolveModeFlagBits depth_resolve_mode;
- VkResolveModeFlagBits stencil_resolve_mode;
+ uint32_t input_count;
+ uint32_t color_count;
+ struct radv_subpass_attachment *input_attachments;
+ struct radv_subpass_attachment *color_attachments;
+ struct radv_subpass_attachment *resolve_attachments;
+ struct radv_subpass_attachment *depth_stencil_attachment;
+ struct radv_subpass_attachment *ds_resolve_attachment;
+ VkResolveModeFlagBits depth_resolve_mode;
+ VkResolveModeFlagBits stencil_resolve_mode;
- /** Subpass has at least one color resolve attachment */
- bool has_color_resolve;
+ /** Subpass has at least one color resolve attachment */
+ bool has_color_resolve;
- /** Subpass has at least one color attachment */
- bool has_color_att;
+ /** Subpass has at least one color attachment */
+ bool has_color_att;
- struct radv_subpass_barrier start_barrier;
+ struct radv_subpass_barrier start_barrier;
- uint32_t view_mask;
+ uint32_t view_mask;
- VkSampleCountFlagBits color_sample_count;
- VkSampleCountFlagBits depth_sample_count;
- VkSampleCountFlagBits max_sample_count;
+ VkSampleCountFlagBits color_sample_count;
+ VkSampleCountFlagBits depth_sample_count;
+ VkSampleCountFlagBits max_sample_count;
- /* Whether the subpass has ingoing/outgoing external dependencies. */
- bool has_ingoing_dep;
- bool has_outgoing_dep;
+ /* Whether the subpass has ingoing/outgoing external dependencies. */
+ bool has_ingoing_dep;
+ bool has_outgoing_dep;
};
-uint32_t
-radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
+uint32_t radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer);
struct radv_render_pass_attachment {
- VkFormat format;
- uint32_t samples;
- VkAttachmentLoadOp load_op;
- VkAttachmentLoadOp stencil_load_op;
- VkImageLayout initial_layout;
- VkImageLayout final_layout;
- VkImageLayout stencil_initial_layout;
- VkImageLayout stencil_final_layout;
+ VkFormat format;
+ uint32_t samples;
+ VkAttachmentLoadOp load_op;
+ VkAttachmentLoadOp stencil_load_op;
+ VkImageLayout initial_layout;
+ VkImageLayout final_layout;
+ VkImageLayout stencil_initial_layout;
+ VkImageLayout stencil_final_layout;
- /* The subpass id in which the attachment will be used first/last. */
- uint32_t first_subpass_idx;
- uint32_t last_subpass_idx;
+ /* The subpass id in which the attachment will be used first/last. */
+ uint32_t first_subpass_idx;
+ uint32_t last_subpass_idx;
};
struct radv_render_pass {
- struct vk_object_base base;
- uint32_t attachment_count;
- uint32_t subpass_count;
- struct radv_subpass_attachment * subpass_attachments;
- struct radv_render_pass_attachment * attachments;
- struct radv_subpass_barrier end_barrier;
- struct radv_subpass subpasses[0];
+ struct vk_object_base base;
+ uint32_t attachment_count;
+ uint32_t subpass_count;
+ struct radv_subpass_attachment *subpass_attachments;
+ struct radv_render_pass_attachment *attachments;
+ struct radv_subpass_barrier end_barrier;
+ struct radv_subpass subpasses[0];
};
VkResult radv_device_init_meta(struct radv_device *device);
void radv_device_finish_meta(struct radv_device *device);
struct radv_query_pool {
- struct vk_object_base base;
- struct radeon_winsys_bo *bo;
- uint32_t stride;
- uint32_t availability_offset;
- uint64_t size;
- char *ptr;
- VkQueryType type;
- uint32_t pipeline_stats_mask;
+ struct vk_object_base base;
+ struct radeon_winsys_bo *bo;
+ uint32_t stride;
+ uint32_t availability_offset;
+ uint64_t size;
+ char *ptr;
+ VkQueryType type;
+ uint32_t pipeline_stats_mask;
};
typedef enum {
- RADV_SEMAPHORE_NONE,
- RADV_SEMAPHORE_SYNCOBJ,
- RADV_SEMAPHORE_TIMELINE_SYNCOBJ,
- RADV_SEMAPHORE_TIMELINE,
+ RADV_SEMAPHORE_NONE,
+ RADV_SEMAPHORE_SYNCOBJ,
+ RADV_SEMAPHORE_TIMELINE_SYNCOBJ,
+ RADV_SEMAPHORE_TIMELINE,
} radv_semaphore_kind;
struct radv_deferred_queue_submission;
struct radv_timeline_waiter {
- struct list_head list;
- struct radv_deferred_queue_submission *submission;
- uint64_t value;
+ struct list_head list;
+ struct radv_deferred_queue_submission *submission;
+ uint64_t value;
};
struct radv_timeline_point {
- struct list_head list;
+ struct list_head list;
- uint64_t value;
- uint32_t syncobj;
+ uint64_t value;
+ uint32_t syncobj;
- /* Separate from the list to accomodate CPU wait being async, as well
- * as prevent point deletion during submission. */
- unsigned wait_count;
+ /* Separate from the list to accomodate CPU wait being async, as well
+ * as prevent point deletion during submission. */
+ unsigned wait_count;
};
struct radv_timeline {
- mtx_t mutex;
+ mtx_t mutex;
- uint64_t highest_signaled;
- uint64_t highest_submitted;
+ uint64_t highest_signaled;
+ uint64_t highest_submitted;
- struct list_head points;
+ struct list_head points;
- /* Keep free points on hand so we do not have to recreate syncobjs all
- * the time. */
- struct list_head free_points;
+ /* Keep free points on hand so we do not have to recreate syncobjs all
+ * the time. */
+ struct list_head free_points;
- /* Submissions that are deferred waiting for a specific value to be
- * submitted. */
- struct list_head waiters;
+ /* Submissions that are deferred waiting for a specific value to be
+ * submitted. */
+ struct list_head waiters;
};
struct radv_timeline_syncobj {
- /* Keep syncobj first, so common-code can just handle this as
- * non-timeline syncobj. */
- uint32_t syncobj;
- uint64_t max_point; /* max submitted point. */
+ /* Keep syncobj first, so common-code can just handle this as
+ * non-timeline syncobj. */
+ uint32_t syncobj;
+ uint64_t max_point; /* max submitted point. */
};
struct radv_semaphore_part {
- radv_semaphore_kind kind;
- union {
- uint32_t syncobj;
- struct radv_timeline timeline;
- struct radv_timeline_syncobj timeline_syncobj;
- };
+ radv_semaphore_kind kind;
+ union {
+ uint32_t syncobj;
+ struct radv_timeline timeline;
+ struct radv_timeline_syncobj timeline_syncobj;
+ };
};
struct radv_semaphore {
- struct vk_object_base base;
- struct radv_semaphore_part permanent;
- struct radv_semaphore_part temporary;
-};
-
-bool radv_queue_internal_submit(struct radv_queue *queue,
- struct radeon_cmdbuf *cs);
-
-void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint bind_point,
- struct radv_descriptor_set *set,
- unsigned idx);
-
-void
-radv_update_descriptor_sets(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- VkDescriptorSet overrideSet,
- uint32_t descriptorWriteCount,
- const VkWriteDescriptorSet *pDescriptorWrites,
- uint32_t descriptorCopyCount,
- const VkCopyDescriptorSet *pDescriptorCopies);
-
-void
-radv_update_descriptor_set_with_template(struct radv_device *device,
- struct radv_cmd_buffer *cmd_buffer,
- struct radv_descriptor_set *set,
- VkDescriptorUpdateTemplate descriptorUpdateTemplate,
- const void *pData);
+ struct vk_object_base base;
+ struct radv_semaphore_part permanent;
+ struct radv_semaphore_part temporary;
+};
+
+bool radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs);
+
+void radv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
+ struct radv_descriptor_set *set, unsigned idx);
+
+void radv_update_descriptor_sets(struct radv_device *device, struct radv_cmd_buffer *cmd_buffer,
+ VkDescriptorSet overrideSet, uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites,
+ uint32_t descriptorCopyCount,
+ const VkCopyDescriptorSet *pDescriptorCopies);
+
+void radv_update_descriptor_set_with_template(struct radv_device *device,
+ struct radv_cmd_buffer *cmd_buffer,
+ struct radv_descriptor_set *set,
+ VkDescriptorUpdateTemplate descriptorUpdateTemplate,
+ const void *pData);
void radv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
- VkPipelineBindPoint pipelineBindPoint,
- VkPipelineLayout _layout,
- uint32_t set,
- uint32_t descriptorWriteCount,
+ VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
+ uint32_t set, uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites);
-uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range,
- uint32_t value);
+uint32_t radv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range, uint32_t value);
-uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer,
- struct radv_image *image,
- const VkImageSubresourceRange *range);
+uint32_t radv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
+ const VkImageSubresourceRange *range);
typedef enum {
- RADV_FENCE_NONE,
- RADV_FENCE_SYNCOBJ,
+ RADV_FENCE_NONE,
+ RADV_FENCE_SYNCOBJ,
} radv_fence_kind;
struct radv_fence_part {
- radv_fence_kind kind;
+ radv_fence_kind kind;
- /* DRM syncobj handle for syncobj-based fences. */
- uint32_t syncobj;
+ /* DRM syncobj handle for syncobj-based fences. */
+ uint32_t syncobj;
};
struct radv_fence {
- struct vk_object_base base;
- struct radv_fence_part permanent;
- struct radv_fence_part temporary;
+ struct vk_object_base base;
+ struct radv_fence_part permanent;
+ struct radv_fence_part temporary;
};
/* radv_nir_to_llvm.c */
struct radv_shader_args;
-void llvm_compile_shader(struct radv_device *device,
- unsigned shader_count,
- struct nir_shader *const *shaders,
- struct radv_shader_binary **binary,
- struct radv_shader_args *args);
+void llvm_compile_shader(struct radv_device *device, unsigned shader_count,
+ struct nir_shader *const *shaders, struct radv_shader_binary **binary,
+ struct radv_shader_args *args);
-unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
- gl_shader_stage stage,
- const struct nir_shader *nir);
+unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
+ const struct nir_shader *nir);
/* radv_shader_info.h */
struct radv_shader_info;
struct radv_shader_variant_key;
void radv_nir_shader_info_pass(const struct nir_shader *nir,
- const struct radv_pipeline_layout *layout,
- const struct radv_shader_variant_key *key,
- struct radv_shader_info *info);
+ const struct radv_pipeline_layout *layout,
+ const struct radv_shader_variant_key *key,
+ struct radv_shader_info *info);
void radv_nir_shader_info_init(struct radv_shader_info *info);
@@ -2585,27 +2458,25 @@ bool radv_thread_trace_init(struct radv_device *device);
void radv_thread_trace_finish(struct radv_device *device);
bool radv_begin_thread_trace(struct radv_queue *queue);
bool radv_end_thread_trace(struct radv_queue *queue);
-bool radv_get_thread_trace(struct radv_queue *queue,
- struct ac_thread_trace *thread_trace);
-void radv_emit_thread_trace_userdata(const struct radv_device *device,
- struct radeon_cmdbuf *cs,
- const void *data, uint32_t num_dwords);
+bool radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace);
+void radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_cmdbuf *cs,
+ const void *data, uint32_t num_dwords);
/* radv_sqtt_layer_.c */
struct radv_barrier_data {
- union {
- struct {
- uint16_t depth_stencil_expand : 1;
- uint16_t htile_hiz_range_expand : 1;
- uint16_t depth_stencil_resummarize : 1;
- uint16_t dcc_decompress : 1;
- uint16_t fmask_decompress : 1;
- uint16_t fast_clear_eliminate : 1;
- uint16_t fmask_color_expand : 1;
- uint16_t init_mask_ram : 1;
- uint16_t reserved : 8;
- };
- uint16_t all;
- } layout_transitions;
+ union {
+ struct {
+ uint16_t depth_stencil_expand : 1;
+ uint16_t htile_hiz_range_expand : 1;
+ uint16_t depth_stencil_resummarize : 1;
+ uint16_t dcc_decompress : 1;
+ uint16_t fmask_decompress : 1;
+ uint16_t fast_clear_eliminate : 1;
+ uint16_t fmask_color_expand : 1;
+ uint16_t init_mask_ram : 1;
+ uint16_t reserved : 8;
+ };
+ uint16_t all;
+ } layout_transitions;
};
/**
@@ -2613,23 +2484,23 @@ struct radv_barrier_data {
* the Vulkan client (does not include PAL-defined values). (Table 15)
*/
enum rgp_barrier_reason {
- RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
+ RGP_BARRIER_UNKNOWN_REASON = 0xFFFFFFFF,
- /* External app-generated barrier reasons, i.e. API synchronization
- * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
- */
- RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
- RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
- RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
+ /* External app-generated barrier reasons, i.e. API synchronization
+ * commands Range of valid values: [0x00000001 ... 0x7FFFFFFF].
+ */
+ RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER = 0x00000001,
+ RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC = 0x00000002,
+ RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS = 0x00000003,
- /* Internal barrier reasons, i.e. implicit synchronization inserted by
- * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
- */
- RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
- RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
- RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
- RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
- RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
+ /* Internal barrier reasons, i.e. implicit synchronization inserted by
+ * the Vulkan driver Range of valid values: [0xC0000000 ... 0xFFFFFFFE].
+ */
+ RGP_BARRIER_INTERNAL_BASE = 0xC0000000,
+ RGP_BARRIER_INTERNAL_PRE_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 0,
+ RGP_BARRIER_INTERNAL_POST_RESET_QUERY_POOL_SYNC = RGP_BARRIER_INTERNAL_BASE + 1,
+ RGP_BARRIER_INTERNAL_GPU_EVENT_RECYCLE_STALL = RGP_BARRIER_INTERNAL_BASE + 2,
+ RGP_BARRIER_INTERNAL_PRE_COPY_QUERY_POOL_RESULTS_SYNC = RGP_BARRIER_INTERNAL_BASE + 3
};
void radv_describe_begin_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
@@ -2637,106 +2508,107 @@ void radv_describe_end_cmd_buffer(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_draw(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_dispatch(struct radv_cmd_buffer *cmd_buffer, int x, int y, int z);
void radv_describe_begin_render_pass_clear(struct radv_cmd_buffer *cmd_buffer,
- VkImageAspectFlagBits aspects);
+ VkImageAspectFlagBits aspects);
void radv_describe_end_render_pass_clear(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_begin_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_end_render_pass_resolve(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_barrier_start(struct radv_cmd_buffer *cmd_buffer,
- enum rgp_barrier_reason reason);
+ enum rgp_barrier_reason reason);
void radv_describe_barrier_end(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_barrier_end_delayed(struct radv_cmd_buffer *cmd_buffer);
void radv_describe_layout_transition(struct radv_cmd_buffer *cmd_buffer,
- const struct radv_barrier_data *barrier);
+ const struct radv_barrier_data *barrier);
uint64_t radv_get_current_time(void);
static inline uint32_t
si_conv_gl_prim_to_vertices(unsigned gl_prim)
{
- switch (gl_prim) {
- case 0: /* GL_POINTS */
- return 1;
- case 1: /* GL_LINES */
- case 3: /* GL_LINE_STRIP */
- return 2;
- case 4: /* GL_TRIANGLES */
- case 5: /* GL_TRIANGLE_STRIP */
- return 3;
- case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
- return 4;
- case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
- return 6;
- case 7: /* GL_QUADS */
- return V_028A6C_TRISTRIP;
- default:
- assert(0);
- return 0;
- }
+ switch (gl_prim) {
+ case 0: /* GL_POINTS */
+ return 1;
+ case 1: /* GL_LINES */
+ case 3: /* GL_LINE_STRIP */
+ return 2;
+ case 4: /* GL_TRIANGLES */
+ case 5: /* GL_TRIANGLE_STRIP */
+ return 3;
+ case 0xA: /* GL_LINE_STRIP_ADJACENCY_ARB */
+ return 4;
+ case 0xc: /* GL_TRIANGLES_ADJACENCY_ARB */
+ return 6;
+ case 7: /* GL_QUADS */
+ return V_028A6C_TRISTRIP;
+ default:
+ assert(0);
+ return 0;
+ }
}
-
struct radv_extra_render_pass_begin_info {
- bool disable_dcc;
+ bool disable_dcc;
};
void radv_cmd_buffer_begin_render_pass(struct radv_cmd_buffer *cmd_buffer,
- const VkRenderPassBeginInfo *pRenderPassBegin,
- const struct radv_extra_render_pass_begin_info *extra_info);
+ const VkRenderPassBeginInfo *pRenderPassBegin,
+ const struct radv_extra_render_pass_begin_info *extra_info);
void radv_cmd_buffer_end_render_pass(struct radv_cmd_buffer *cmd_buffer);
-static inline uint32_t si_translate_prim(unsigned topology)
+static inline uint32_t
+si_translate_prim(unsigned topology)
{
- switch (topology) {
- case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
- return V_008958_DI_PT_POINTLIST;
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
- return V_008958_DI_PT_LINELIST;
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
- return V_008958_DI_PT_LINESTRIP;
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
- return V_008958_DI_PT_TRILIST;
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
- return V_008958_DI_PT_TRISTRIP;
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
- return V_008958_DI_PT_TRIFAN;
- case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
- return V_008958_DI_PT_LINELIST_ADJ;
- case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
- return V_008958_DI_PT_LINESTRIP_ADJ;
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
- return V_008958_DI_PT_TRILIST_ADJ;
- case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
- return V_008958_DI_PT_TRISTRIP_ADJ;
- case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
- return V_008958_DI_PT_PATCH;
- default:
- assert(0);
- return 0;
- }
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ return V_008958_DI_PT_POINTLIST;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ return V_008958_DI_PT_LINELIST;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ return V_008958_DI_PT_LINESTRIP;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+ return V_008958_DI_PT_TRILIST;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ return V_008958_DI_PT_TRISTRIP;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ return V_008958_DI_PT_TRIFAN;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ return V_008958_DI_PT_LINELIST_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ return V_008958_DI_PT_LINESTRIP_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ return V_008958_DI_PT_TRILIST_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ return V_008958_DI_PT_TRISTRIP_ADJ;
+ case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+ return V_008958_DI_PT_PATCH;
+ default:
+ assert(0);
+ return 0;
+ }
}
-static inline uint32_t si_translate_stencil_op(enum VkStencilOp op)
+static inline uint32_t
+si_translate_stencil_op(enum VkStencilOp op)
{
- switch (op) {
- case VK_STENCIL_OP_KEEP:
- return V_02842C_STENCIL_KEEP;
- case VK_STENCIL_OP_ZERO:
- return V_02842C_STENCIL_ZERO;
- case VK_STENCIL_OP_REPLACE:
- return V_02842C_STENCIL_REPLACE_TEST;
- case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
- return V_02842C_STENCIL_ADD_CLAMP;
- case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
- return V_02842C_STENCIL_SUB_CLAMP;
- case VK_STENCIL_OP_INVERT:
- return V_02842C_STENCIL_INVERT;
- case VK_STENCIL_OP_INCREMENT_AND_WRAP:
- return V_02842C_STENCIL_ADD_WRAP;
- case VK_STENCIL_OP_DECREMENT_AND_WRAP:
- return V_02842C_STENCIL_SUB_WRAP;
- default:
- return 0;
- }
+ switch (op) {
+ case VK_STENCIL_OP_KEEP:
+ return V_02842C_STENCIL_KEEP;
+ case VK_STENCIL_OP_ZERO:
+ return V_02842C_STENCIL_ZERO;
+ case VK_STENCIL_OP_REPLACE:
+ return V_02842C_STENCIL_REPLACE_TEST;
+ case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
+ return V_02842C_STENCIL_ADD_CLAMP;
+ case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
+ return V_02842C_STENCIL_SUB_CLAMP;
+ case VK_STENCIL_OP_INVERT:
+ return V_02842C_STENCIL_INVERT;
+ case VK_STENCIL_OP_INCREMENT_AND_WRAP:
+ return V_02842C_STENCIL_ADD_WRAP;
+ case VK_STENCIL_OP_DECREMENT_AND_WRAP:
+ return V_02842C_STENCIL_SUB_WRAP;
+ default:
+ return 0;
+ }
}
/**
@@ -2746,39 +2618,35 @@ static inline uint32_t si_translate_stencil_op(enum VkStencilOp op)
static inline bool
radv_use_llvm_for_stage(struct radv_device *device, UNUSED gl_shader_stage stage)
{
- return device->physical_device->use_llvm;
+ return device->physical_device->use_llvm;
}
-#define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \
- \
- static inline struct __radv_type * \
- __radv_type ## _from_handle(__VkType _handle) \
- { \
- return (struct __radv_type *) _handle; \
- } \
- \
- static inline __VkType \
- __radv_type ## _to_handle(struct __radv_type *_obj) \
- { \
- return (__VkType) _obj; \
- }
-
-#define RADV_DEFINE_NONDISP_HANDLE_CASTS(__radv_type, __VkType) \
- \
- static inline struct __radv_type * \
- __radv_type ## _from_handle(__VkType _handle) \
- { \
- return (struct __radv_type *)(uintptr_t) _handle; \
- } \
- \
- static inline __VkType \
- __radv_type ## _to_handle(struct __radv_type *_obj) \
- { \
- return (__VkType)(uintptr_t) _obj; \
- }
-
-#define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
- struct __radv_type *__name = __radv_type ## _from_handle(__handle)
+#define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType) \
+ \
+ static inline struct __radv_type *__radv_type##_from_handle(__VkType _handle) \
+ { \
+ return (struct __radv_type *)_handle; \
+ } \
+ \
+ static inline __VkType __radv_type##_to_handle(struct __radv_type *_obj) \
+ { \
+ return (__VkType)_obj; \
+ }
+
+#define RADV_DEFINE_NONDISP_HANDLE_CASTS(__radv_type, __VkType) \
+ \
+ static inline struct __radv_type *__radv_type##_from_handle(__VkType _handle) \
+ { \
+ return (struct __radv_type *)(uintptr_t)_handle; \
+ } \
+ \
+ static inline __VkType __radv_type##_to_handle(struct __radv_type *_obj) \
+ { \
+ return (__VkType)(uintptr_t)_obj; \
+ }
+
+#define RADV_FROM_HANDLE(__radv_type, __name, __handle) \
+ struct __radv_type *__name = __radv_type##_from_handle(__handle)
RADV_DEFINE_HANDLE_CASTS(radv_cmd_buffer, VkCommandBuffer)
RADV_DEFINE_HANDLE_CASTS(radv_device, VkDevice)
diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
index b4814d24f45..3038cfde6a4 100644
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -24,16 +24,16 @@
*/
#include <assert.h>
+#include <fcntl.h>
#include <stdbool.h>
#include <string.h>
-#include <fcntl.h>
#include "nir/nir_builder.h"
+#include "util/u_atomic.h"
+#include "radv_cs.h"
#include "radv_meta.h"
#include "radv_private.h"
-#include "radv_cs.h"
#include "sid.h"
-#include "util/u_atomic.h"
#define TIMESTAMP_NOT_READY UINT64_MAX
@@ -43,1763 +43,1631 @@ static const unsigned pipeline_statistics_indices[] = {7, 6, 3, 4, 5, 2, 1, 0, 8
static unsigned
radv_get_pipeline_statistics_index(const VkQueryPipelineStatisticFlagBits flag)
{
- int offset = ffs(flag) - 1;
- assert(offset < ARRAY_SIZE(pipeline_statistics_indices));
- return pipeline_statistics_indices[offset];
+ int offset = ffs(flag) - 1;
+ assert(offset < ARRAY_SIZE(pipeline_statistics_indices));
+ return pipeline_statistics_indices[offset];
}
-static nir_ssa_def *nir_test_flag(nir_builder *b, nir_ssa_def *flags, uint32_t flag)
+static nir_ssa_def *
+nir_test_flag(nir_builder *b, nir_ssa_def *flags, uint32_t flag)
{
- return nir_i2b(b, nir_iand(b, flags, nir_imm_int(b, flag)));
+ return nir_i2b(b, nir_iand(b, flags, nir_imm_int(b, flag)));
}
-static void radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count)
+static void
+radv_break_on_count(nir_builder *b, nir_variable *var, nir_ssa_def *count)
{
- nir_ssa_def *counter = nir_load_var(b, var);
+ nir_ssa_def *counter = nir_load_var(b, var);
- nir_push_if(b, nir_uge(b, counter, count));
- nir_jump(b, nir_jump_break);
- nir_pop_if(b, NULL);
+ nir_push_if(b, nir_uge(b, counter, count));
+ nir_jump(b, nir_jump_break);
+ nir_pop_if(b, NULL);
- counter = nir_iadd(b, counter, nir_imm_int(b, 1));
- nir_store_var(b, var, counter, 0x1);
+ counter = nir_iadd(b, counter, nir_imm_int(b, 1));
+ nir_store_var(b, var, counter, 0x1);
}
static void
radv_store_availability(nir_builder *b, nir_ssa_def *flags, nir_ssa_def *dst_buf,
nir_ssa_def *offset, nir_ssa_def *value32)
{
- nir_push_if(b, nir_test_flag(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
+ nir_push_if(b, nir_test_flag(b, flags, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT));
- nir_push_if(b, nir_test_flag(b, flags, VK_QUERY_RESULT_64_BIT));
+ nir_push_if(b, nir_test_flag(b, flags, VK_QUERY_RESULT_64_BIT));
- nir_store_ssbo(b, nir_vec2(b, value32, nir_imm_int(b, 0)),
- dst_buf, offset, .write_mask=0x3, .align_mul=8);
+ nir_store_ssbo(b, nir_vec2(b, value32, nir_imm_int(b, 0)), dst_buf, offset, .write_mask = 0x3,
+ .align_mul = 8);
- nir_push_else(b, NULL);
+ nir_push_else(b, NULL);
- nir_store_ssbo(b, value32, dst_buf, offset, .write_mask=0x1, .align_mul=4);
+ nir_store_ssbo(b, value32, dst_buf, offset, .write_mask = 0x1, .align_mul = 4);
- nir_pop_if(b, NULL);
+ nir_pop_if(b, NULL);
- nir_pop_if(b, NULL);
+ nir_pop_if(b, NULL);
}
static nir_shader *
-build_occlusion_query_shader(struct radv_device *device) {
- /* the shader this builds is roughly
- *
- * push constants {
- * uint32_t flags;
- * uint32_t dst_stride;
- * };
- *
- * uint32_t src_stride = 16 * db_count;
- *
- * location(binding = 0) buffer dst_buf;
- * location(binding = 1) buffer src_buf;
- *
- * void main() {
- * uint64_t result = 0;
- * uint64_t src_offset = src_stride * global_id.x;
- * uint64_t dst_offset = dst_stride * global_id.x;
- * bool available = true;
- * for (int i = 0; i < db_count; ++i) {
- * if (enabled_rb_mask & (1 << i)) {
- * uint64_t start = src_buf[src_offset + 16 * i];
- * uint64_t end = src_buf[src_offset + 16 * i + 8];
- * if ((start & (1ull << 63)) && (end & (1ull << 63)))
- * result += end - start;
- * else
- * available = false;
- * }
- * }
- * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
- * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
- * if (flags & VK_QUERY_RESULT_64_BIT)
- * dst_buf[dst_offset] = result;
- * else
- * dst_buf[dst_offset] = (uint32_t)result.
- * }
- * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- * dst_buf[dst_offset + elem_size] = available;
- * }
- * }
- */
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "occlusion_query");
- b.shader->info.cs.local_size[0] = 64;
- b.shader->info.cs.local_size[1] = 1;
- b.shader->info.cs.local_size[2] = 1;
-
- nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
- nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
- nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
- nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
- nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
- unsigned enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
- unsigned db_count = device->physical_device->rad_info.max_render_backends;
-
- nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=16);
-
- nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
- nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
- nir_ssa_def *input_stride = nir_imm_int(&b, db_count * 16);
- nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
- nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range=16);
- nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
-
- nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
- nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
- nir_store_var(&b, available, nir_imm_true(&b), 0x1);
-
- nir_push_loop(&b);
-
- nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
- radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
-
- nir_ssa_def *enabled_cond =
- nir_iand(&b, nir_imm_int(&b, enabled_rb_mask),
- nir_ishl(&b, nir_imm_int(&b, 1), current_outer_count));
-
- nir_push_if(&b, nir_i2b(&b, enabled_cond));
-
- nir_ssa_def *load_offset = nir_imul(&b, current_outer_count, nir_imm_int(&b, 16));
- load_offset = nir_iadd(&b, input_base, load_offset);
-
- nir_ssa_def *load = nir_load_ssbo(&b, 2, 64, src_buf, load_offset, .align_mul=16);
-
- nir_store_var(&b, start, nir_channel(&b, load, 0), 0x1);
- nir_store_var(&b, end, nir_channel(&b, load, 1), 0x1);
-
- nir_ssa_def *start_done = nir_ilt(&b, nir_load_var(&b, start), nir_imm_int64(&b, 0));
- nir_ssa_def *end_done = nir_ilt(&b, nir_load_var(&b, end), nir_imm_int64(&b, 0));
-
- nir_push_if(&b, nir_iand(&b, start_done, end_done));
-
- nir_store_var(&b, result,
- nir_iadd(&b, nir_load_var(&b, result),
- nir_isub(&b, nir_load_var(&b, end),
- nir_load_var(&b, start))), 0x1);
-
- nir_push_else(&b, NULL);
-
- nir_store_var(&b, available, nir_imm_false(&b), 0x1);
-
- nir_pop_if(&b, NULL);
- nir_pop_if(&b, NULL);
- nir_pop_loop(&b, NULL);
-
- /* Store the result if complete or if partial results have been requested. */
-
- nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
- nir_ssa_def *result_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
- nir_push_if(&b,
- nir_ior(&b,
- nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
- nir_load_var(&b, available)));
-
- nir_push_if(&b, result_is_64bit);
-
- nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base,
- .write_mask=0x1, .align_mul=8);
+build_occlusion_query_shader(struct radv_device *device)
+{
+ /* the shader this builds is roughly
+ *
+ * push constants {
+ * uint32_t flags;
+ * uint32_t dst_stride;
+ * };
+ *
+ * uint32_t src_stride = 16 * db_count;
+ *
+ * location(binding = 0) buffer dst_buf;
+ * location(binding = 1) buffer src_buf;
+ *
+ * void main() {
+ * uint64_t result = 0;
+ * uint64_t src_offset = src_stride * global_id.x;
+ * uint64_t dst_offset = dst_stride * global_id.x;
+ * bool available = true;
+ * for (int i = 0; i < db_count; ++i) {
+ * if (enabled_rb_mask & (1 << i)) {
+ * uint64_t start = src_buf[src_offset + 16 * i];
+ * uint64_t end = src_buf[src_offset + 16 * i + 8];
+ * if ((start & (1ull << 63)) && (end & (1ull << 63)))
+ * result += end - start;
+ * else
+ * available = false;
+ * }
+ * }
+ * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+ * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
+ * if (flags & VK_QUERY_RESULT_64_BIT)
+ * dst_buf[dst_offset] = result;
+ * else
+ * dst_buf[dst_offset] = (uint32_t)result.
+ * }
+ * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ * dst_buf[dst_offset + elem_size] = available;
+ * }
+ * }
+ */
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "occlusion_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
+ nir_variable *outer_counter =
+ nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
+ nir_variable *start = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "start");
+ nir_variable *end = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "end");
+ nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
+ unsigned enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
+ unsigned db_count = device->physical_device->rad_info.max_render_backends;
+
+ nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
+
+ nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+ nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+ nir_ssa_def *input_stride = nir_imm_int(&b, db_count * 16);
+ nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+ nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
+ nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+ nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
+ nir_store_var(&b, outer_counter, nir_imm_int(&b, 0), 0x1);
+ nir_store_var(&b, available, nir_imm_true(&b), 0x1);
+
+ nir_push_loop(&b);
+
+ nir_ssa_def *current_outer_count = nir_load_var(&b, outer_counter);
+ radv_break_on_count(&b, outer_counter, nir_imm_int(&b, db_count));
+
+ nir_ssa_def *enabled_cond = nir_iand(&b, nir_imm_int(&b, enabled_rb_mask),
+ nir_ishl(&b, nir_imm_int(&b, 1), current_outer_count));
+
+ nir_push_if(&b, nir_i2b(&b, enabled_cond));
+
+ nir_ssa_def *load_offset = nir_imul(&b, current_outer_count, nir_imm_int(&b, 16));
+ load_offset = nir_iadd(&b, input_base, load_offset);
+
+ nir_ssa_def *load = nir_load_ssbo(&b, 2, 64, src_buf, load_offset, .align_mul = 16);
+
+ nir_store_var(&b, start, nir_channel(&b, load, 0), 0x1);
+ nir_store_var(&b, end, nir_channel(&b, load, 1), 0x1);
+
+ nir_ssa_def *start_done = nir_ilt(&b, nir_load_var(&b, start), nir_imm_int64(&b, 0));
+ nir_ssa_def *end_done = nir_ilt(&b, nir_load_var(&b, end), nir_imm_int64(&b, 0));
+
+ nir_push_if(&b, nir_iand(&b, start_done, end_done));
+
+ nir_store_var(&b, result,
+ nir_iadd(&b, nir_load_var(&b, result),
+ nir_isub(&b, nir_load_var(&b, end), nir_load_var(&b, start))),
+ 0x1);
+
+ nir_push_else(&b, NULL);
+
+ nir_store_var(&b, available, nir_imm_false(&b), 0x1);
+
+ nir_pop_if(&b, NULL);
+ nir_pop_if(&b, NULL);
+ nir_pop_loop(&b, NULL);
+
+ /* Store the result if complete or if partial results have been requested. */
+
+ nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
+ nir_ssa_def *result_size =
+ nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+ nir_push_if(&b, nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
+ nir_load_var(&b, available)));
+
+ nir_push_if(&b, result_is_64bit);
+
+ nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base, .write_mask = 0x1,
+ .align_mul = 8);
- nir_push_else(&b, NULL);
+ nir_push_else(&b, NULL);
- nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf,
- output_base, .write_mask=0x1, .align_mul=8);
+ nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base,
+ .write_mask = 0x1, .align_mul = 8);
- nir_pop_if(&b, NULL);
- nir_pop_if(&b, NULL);
+ nir_pop_if(&b, NULL);
+ nir_pop_if(&b, NULL);
- radv_store_availability(&b, flags, dst_buf,
- nir_iadd(&b, result_size, output_base),
- nir_b2i32(&b, nir_load_var(&b, available)));
+ radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, result_size, output_base),
+ nir_b2i32(&b, nir_load_var(&b, available)));
- return b.shader;
+ return b.shader;
}
static nir_shader *
-build_pipeline_statistics_query_shader(struct radv_device *device) {
- /* the shader this builds is roughly
- *
- * push constants {
- * uint32_t flags;
- * uint32_t dst_stride;
- * uint32_t stats_mask;
- * uint32_t avail_offset;
- * };
- *
- * uint32_t src_stride = pipelinestat_block_size * 2;
- *
- * location(binding = 0) buffer dst_buf;
- * location(binding = 1) buffer src_buf;
- *
- * void main() {
- * uint64_t src_offset = src_stride * global_id.x;
- * uint64_t dst_base = dst_stride * global_id.x;
- * uint64_t dst_offset = dst_base;
- * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
- * uint32_t elem_count = stats_mask >> 16;
- * uint32_t available32 = src_buf[avail_offset + 4 * global_id.x];
- * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- * dst_buf[dst_offset + elem_count * elem_size] = available32;
- * }
- * if ((bool)available32) {
- * // repeat 11 times:
- * if (stats_mask & (1 << 0)) {
- * uint64_t start = src_buf[src_offset + 8 * indices[0]];
- * uint64_t end = src_buf[src_offset + 8 * indices[0] + pipelinestat_block_size];
- * uint64_t result = end - start;
- * if (flags & VK_QUERY_RESULT_64_BIT)
- * dst_buf[dst_offset] = result;
- * else
- * dst_buf[dst_offset] = (uint32_t)result.
- * dst_offset += elem_size;
- * }
- * } else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
- * // Set everything to 0 as we don't know what is valid.
- * for (int i = 0; i < elem_count; ++i)
- * dst_buf[dst_base + elem_size * i] = 0;
- * }
- * }
- */
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "pipeline_statistics_query");
- b.shader->info.cs.local_size[0] = 64;
- b.shader->info.cs.local_size[1] = 1;
- b.shader->info.cs.local_size[2] = 1;
-
- nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
-
- nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=16);
- nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range=16);
- nir_ssa_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range=16);
-
- nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
- nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
-
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
- nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
- nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
- nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range=16);
- nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
-
- avail_offset = nir_iadd(&b, avail_offset,
- nir_imul(&b, global_id, nir_imm_int(&b, 4)));
-
- nir_ssa_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset, .align_mul=4);
-
- nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
- nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
- nir_ssa_def *elem_count = nir_ushr(&b, stats_mask, nir_imm_int(&b, 16));
-
- radv_store_availability(&b, flags, dst_buf,
- nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)),
- available32);
-
- nir_push_if(&b, nir_i2b(&b, available32));
-
- nir_store_var(&b, output_offset, output_base, 0x1);
- for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
- nir_push_if(&b, nir_test_flag(&b, stats_mask, 1u << i));
-
- nir_ssa_def *start_offset = nir_iadd(&b, input_base,
- nir_imm_int(&b, pipeline_statistics_indices[i] * 8));
- nir_ssa_def *start = nir_load_ssbo(&b, 1, 64, src_buf, start_offset, .align_mul=8);
-
- nir_ssa_def *end_offset = nir_iadd(&b, input_base,
- nir_imm_int(&b, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size));
- nir_ssa_def *end = nir_load_ssbo(&b, 1, 64, src_buf, end_offset, .align_mul=8);
-
- nir_ssa_def *result = nir_isub(&b, end, start);
-
- /* Store result */
- nir_push_if(&b, result_is_64bit);
-
- nir_store_ssbo(&b, result, dst_buf, nir_load_var(&b, output_offset),
- .write_mask=0x1, .align_mul=8);
-
- nir_push_else(&b, NULL);
-
- nir_store_ssbo(&b, nir_u2u32(&b, result), dst_buf, nir_load_var(&b, output_offset),
- .write_mask=0x1, .align_mul=4);
-
- nir_pop_if(&b, NULL);
-
- nir_store_var(&b, output_offset,
- nir_iadd(&b, nir_load_var(&b, output_offset),
- elem_size), 0x1);
+build_pipeline_statistics_query_shader(struct radv_device *device)
+{
+ /* the shader this builds is roughly
+ *
+ * push constants {
+ * uint32_t flags;
+ * uint32_t dst_stride;
+ * uint32_t stats_mask;
+ * uint32_t avail_offset;
+ * };
+ *
+ * uint32_t src_stride = pipelinestat_block_size * 2;
+ *
+ * location(binding = 0) buffer dst_buf;
+ * location(binding = 1) buffer src_buf;
+ *
+ * void main() {
+ * uint64_t src_offset = src_stride * global_id.x;
+ * uint64_t dst_base = dst_stride * global_id.x;
+ * uint64_t dst_offset = dst_base;
+ * uint32_t elem_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+ * uint32_t elem_count = stats_mask >> 16;
+ * uint32_t available32 = src_buf[avail_offset + 4 * global_id.x];
+ * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ * dst_buf[dst_offset + elem_count * elem_size] = available32;
+ * }
+ * if ((bool)available32) {
+ * // repeat 11 times:
+ * if (stats_mask & (1 << 0)) {
+ * uint64_t start = src_buf[src_offset + 8 * indices[0]];
+ * uint64_t end = src_buf[src_offset + 8 * indices[0] +
+ * pipelinestat_block_size]; uint64_t result = end - start; if (flags & VK_QUERY_RESULT_64_BIT)
+ * dst_buf[dst_offset] = result;
+ * else
+ * dst_buf[dst_offset] = (uint32_t)result.
+ * dst_offset += elem_size;
+ * }
+ * } else if (flags & VK_QUERY_RESULT_PARTIAL_BIT) {
+ * // Set everything to 0 as we don't know what is valid.
+ * for (int i = 0; i < elem_count; ++i)
+ * dst_buf[dst_base + elem_size * i] = 0;
+ * }
+ * }
+ */
+ nir_builder b =
+ nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "pipeline_statistics_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
+
+ nir_variable *output_offset =
+ nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
+
+ nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
+ nir_ssa_def *stats_mask = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16);
+ nir_ssa_def *avail_offset = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
+
+ nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+ nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+ nir_ssa_def *input_stride = nir_imm_int(&b, pipelinestat_block_size * 2);
+ nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+ nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
+ nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+ avail_offset = nir_iadd(&b, avail_offset, nir_imul(&b, global_id, nir_imm_int(&b, 4)));
+
+ nir_ssa_def *available32 = nir_load_ssbo(&b, 1, 32, src_buf, avail_offset, .align_mul = 4);
+
+ nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
+ nir_ssa_def *elem_size = nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+ nir_ssa_def *elem_count = nir_ushr(&b, stats_mask, nir_imm_int(&b, 16));
+
+ radv_store_availability(&b, flags, dst_buf,
+ nir_iadd(&b, output_base, nir_imul(&b, elem_count, elem_size)),
+ available32);
+
+ nir_push_if(&b, nir_i2b(&b, available32));
+
+ nir_store_var(&b, output_offset, output_base, 0x1);
+ for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
+ nir_push_if(&b, nir_test_flag(&b, stats_mask, 1u << i));
+
+ nir_ssa_def *start_offset =
+ nir_iadd(&b, input_base, nir_imm_int(&b, pipeline_statistics_indices[i] * 8));
+ nir_ssa_def *start = nir_load_ssbo(&b, 1, 64, src_buf, start_offset, .align_mul = 8);
+
+ nir_ssa_def *end_offset =
+ nir_iadd(&b, input_base,
+ nir_imm_int(&b, pipeline_statistics_indices[i] * 8 + pipelinestat_block_size));
+ nir_ssa_def *end = nir_load_ssbo(&b, 1, 64, src_buf, end_offset, .align_mul = 8);
+
+ nir_ssa_def *result = nir_isub(&b, end, start);
+
+ /* Store result */
+ nir_push_if(&b, result_is_64bit);
+
+ nir_store_ssbo(&b, result, dst_buf, nir_load_var(&b, output_offset), .write_mask = 0x1,
+ .align_mul = 8);
+
+ nir_push_else(&b, NULL);
+
+ nir_store_ssbo(&b, nir_u2u32(&b, result), dst_buf, nir_load_var(&b, output_offset),
+ .write_mask = 0x1, .align_mul = 4);
+
+ nir_pop_if(&b, NULL);
+
+ nir_store_var(&b, output_offset, nir_iadd(&b, nir_load_var(&b, output_offset), elem_size),
+ 0x1);
- nir_pop_if(&b, NULL);
- }
+ nir_pop_if(&b, NULL);
+ }
- nir_push_else(&b, NULL); /* nir_i2b(&b, available32) */
+ nir_push_else(&b, NULL); /* nir_i2b(&b, available32) */
- nir_push_if(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT));
+ nir_push_if(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT));
- /* Stores zeros in all outputs. */
+ /* Stores zeros in all outputs. */
- nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
- nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
+ nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
+ nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
- nir_loop *loop = nir_push_loop(&b);
+ nir_loop *loop = nir_push_loop(&b);
- nir_ssa_def *current_counter = nir_load_var(&b, counter);
- radv_break_on_count(&b, counter, elem_count);
+ nir_ssa_def *current_counter = nir_load_var(&b, counter);
+ radv_break_on_count(&b, counter, elem_count);
- nir_ssa_def *output_elem = nir_iadd(&b, output_base,
- nir_imul(&b, elem_size, current_counter));
- nir_push_if(&b, result_is_64bit);
+ nir_ssa_def *output_elem = nir_iadd(&b, output_base, nir_imul(&b, elem_size, current_counter));
+ nir_push_if(&b, result_is_64bit);
- nir_store_ssbo(&b, nir_imm_int64(&b, 0), dst_buf, output_elem,
- .write_mask=0x1, .align_mul=8);
+ nir_store_ssbo(&b, nir_imm_int64(&b, 0), dst_buf, output_elem, .write_mask = 0x1,
+ .align_mul = 8);
- nir_push_else(&b, NULL);
+ nir_push_else(&b, NULL);
- nir_store_ssbo(&b, nir_imm_int(&b, 0), dst_buf, output_elem,
- .write_mask=0x1, .align_mul=4);
+ nir_store_ssbo(&b, nir_imm_int(&b, 0), dst_buf, output_elem, .write_mask = 0x1, .align_mul = 4);
- nir_pop_if(&b, NULL);
+ nir_pop_if(&b, NULL);
- nir_pop_loop(&b, loop);
- nir_pop_if(&b, NULL); /* VK_QUERY_RESULT_PARTIAL_BIT */
- nir_pop_if(&b, NULL); /* nir_i2b(&b, available32) */
- return b.shader;
+ nir_pop_loop(&b, loop);
+ nir_pop_if(&b, NULL); /* VK_QUERY_RESULT_PARTIAL_BIT */
+ nir_pop_if(&b, NULL); /* nir_i2b(&b, available32) */
+ return b.shader;
}
static nir_shader *
build_tfb_query_shader(struct radv_device *device)
{
- /* the shader this builds is roughly
- *
- * uint32_t src_stride = 32;
- *
- * location(binding = 0) buffer dst_buf;
- * location(binding = 1) buffer src_buf;
- *
- * void main() {
- * uint64_t result[2] = {};
- * bool available = false;
- * uint64_t src_offset = src_stride * global_id.x;
- * uint64_t dst_offset = dst_stride * global_id.x;
- * uint64_t *src_data = src_buf[src_offset];
- * uint32_t avail = (src_data[0] >> 32) &
- * (src_data[1] >> 32) &
- * (src_data[2] >> 32) &
- * (src_data[3] >> 32);
- * if (avail & 0x80000000) {
- * result[0] = src_data[3] - src_data[1];
- * result[1] = src_data[2] - src_data[0];
- * available = true;
- * }
- * uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 16 : 8;
- * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
- * if (flags & VK_QUERY_RESULT_64_BIT) {
- * dst_buf[dst_offset] = result;
- * } else {
- * dst_buf[dst_offset] = (uint32_t)result;
- * }
- * }
- * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- * dst_buf[dst_offset + result_size] = available;
- * }
- * }
- */
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "tfb_query");
- b.shader->info.cs.local_size[0] = 64;
- b.shader->info.cs.local_size[1] = 1;
- b.shader->info.cs.local_size[2] = 1;
-
- /* Create and initialize local variables. */
- nir_variable *result =
- nir_local_variable_create(b.impl,
- glsl_vector_type(GLSL_TYPE_UINT64, 2),
- "result");
- nir_variable *available =
- nir_local_variable_create(b.impl, glsl_bool_type(), "available");
-
- nir_store_var(&b, result,
- nir_vec2(&b, nir_imm_int64(&b, 0),
- nir_imm_int64(&b, 0)), 0x3);
- nir_store_var(&b, available, nir_imm_false(&b), 0x1);
-
- nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=16);
-
- /* Load resources. */
- nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
- nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
-
- /* Compute global ID. */
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
- /* Compute src/dst strides. */
- nir_ssa_def *input_stride = nir_imm_int(&b, 32);
- nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
- nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range=16);
- nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
- /* Load data from the query pool. */
- nir_ssa_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul=32);
- nir_ssa_def *load2 = nir_load_ssbo(&b, 4, 32, src_buf,
- nir_iadd(&b, input_base, nir_imm_int(&b, 16)),
- .align_mul=16);
-
- /* Check if result is available. */
- nir_ssa_def *avails[2];
- avails[0] = nir_iand(&b, nir_channel(&b, load1, 1),
- nir_channel(&b, load1, 3));
- avails[1] = nir_iand(&b, nir_channel(&b, load2, 1),
- nir_channel(&b, load2, 3));
- nir_ssa_def *result_is_available =
- nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]),
- nir_imm_int(&b, 0x80000000)));
-
- /* Only compute result if available. */
- nir_push_if(&b, result_is_available);
-
- /* Pack values. */
- nir_ssa_def *packed64[4];
- packed64[0] = nir_pack_64_2x32(&b, nir_vec2(&b,
- nir_channel(&b, load1, 0),
- nir_channel(&b, load1, 1)));
- packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b,
- nir_channel(&b, load1, 2),
- nir_channel(&b, load1, 3)));
- packed64[2] = nir_pack_64_2x32(&b, nir_vec2(&b,
- nir_channel(&b, load2, 0),
- nir_channel(&b, load2, 1)));
- packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b,
- nir_channel(&b, load2, 2),
- nir_channel(&b, load2, 3)));
-
- /* Compute result. */
- nir_ssa_def *num_primitive_written =
- nir_isub(&b, packed64[3], packed64[1]);
- nir_ssa_def *primitive_storage_needed =
- nir_isub(&b, packed64[2], packed64[0]);
-
- nir_store_var(&b, result,
- nir_vec2(&b, num_primitive_written,
- primitive_storage_needed), 0x3);
- nir_store_var(&b, available, nir_imm_true(&b), 0x1);
-
- nir_pop_if(&b, NULL);
-
- /* Determine if result is 64 or 32 bit. */
- nir_ssa_def *result_is_64bit =
- nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
- nir_ssa_def *result_size =
- nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16),
- nir_imm_int(&b, 8));
-
- /* Store the result if complete or partial results have been requested. */
- nir_push_if(&b,
- nir_ior(&b,
- nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
- nir_load_var(&b, available)));
-
- /* Store result. */
- nir_push_if(&b, result_is_64bit);
-
- nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base,
- .write_mask=0x3, .align_mul=8);
-
- nir_push_else(&b, NULL);
-
- nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf,
- output_base, .write_mask=0x3, .align_mul=4);
-
- nir_pop_if(&b, NULL);
- nir_pop_if(&b, NULL);
-
- radv_store_availability(&b, flags, dst_buf,
- nir_iadd(&b, result_size, output_base),
- nir_b2i32(&b, nir_load_var(&b, available)));
-
- return b.shader;
+ /* the shader this builds is roughly
+ *
+ * uint32_t src_stride = 32;
+ *
+ * location(binding = 0) buffer dst_buf;
+ * location(binding = 1) buffer src_buf;
+ *
+ * void main() {
+ * uint64_t result[2] = {};
+ * bool available = false;
+ * uint64_t src_offset = src_stride * global_id.x;
+ * uint64_t dst_offset = dst_stride * global_id.x;
+ * uint64_t *src_data = src_buf[src_offset];
+ * uint32_t avail = (src_data[0] >> 32) &
+ * (src_data[1] >> 32) &
+ * (src_data[2] >> 32) &
+ * (src_data[3] >> 32);
+ * if (avail & 0x80000000) {
+ * result[0] = src_data[3] - src_data[1];
+ * result[1] = src_data[2] - src_data[0];
+ * available = true;
+ * }
+ * uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 16 : 8;
+ * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
+ * if (flags & VK_QUERY_RESULT_64_BIT) {
+ * dst_buf[dst_offset] = result;
+ * } else {
+ * dst_buf[dst_offset] = (uint32_t)result;
+ * }
+ * }
+ * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ * dst_buf[dst_offset + result_size] = available;
+ * }
+ * }
+ */
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "tfb_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
+
+ /* Create and initialize local variables. */
+ nir_variable *result =
+ nir_local_variable_create(b.impl, glsl_vector_type(GLSL_TYPE_UINT64, 2), "result");
+ nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
+
+ nir_store_var(&b, result, nir_vec2(&b, nir_imm_int64(&b, 0), nir_imm_int64(&b, 0)), 0x3);
+ nir_store_var(&b, available, nir_imm_false(&b), 0x1);
+
+ nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
+
+ /* Load resources. */
+ nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+ nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+
+ /* Compute global ID. */
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+ /* Compute src/dst strides. */
+ nir_ssa_def *input_stride = nir_imm_int(&b, 32);
+ nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+ nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
+ nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+ /* Load data from the query pool. */
+ nir_ssa_def *load1 = nir_load_ssbo(&b, 4, 32, src_buf, input_base, .align_mul = 32);
+ nir_ssa_def *load2 = nir_load_ssbo(
+ &b, 4, 32, src_buf, nir_iadd(&b, input_base, nir_imm_int(&b, 16)), .align_mul = 16);
+
+ /* Check if result is available. */
+ nir_ssa_def *avails[2];
+ avails[0] = nir_iand(&b, nir_channel(&b, load1, 1), nir_channel(&b, load1, 3));
+ avails[1] = nir_iand(&b, nir_channel(&b, load2, 1), nir_channel(&b, load2, 3));
+ nir_ssa_def *result_is_available =
+ nir_i2b(&b, nir_iand(&b, nir_iand(&b, avails[0], avails[1]), nir_imm_int(&b, 0x80000000)));
+
+ /* Only compute result if available. */
+ nir_push_if(&b, result_is_available);
+
+ /* Pack values. */
+ nir_ssa_def *packed64[4];
+ packed64[0] =
+ nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 0), nir_channel(&b, load1, 1)));
+ packed64[1] =
+ nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load1, 2), nir_channel(&b, load1, 3)));
+ packed64[2] =
+ nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 0), nir_channel(&b, load2, 1)));
+ packed64[3] =
+ nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load2, 2), nir_channel(&b, load2, 3)));
+
+ /* Compute result. */
+ nir_ssa_def *num_primitive_written = nir_isub(&b, packed64[3], packed64[1]);
+ nir_ssa_def *primitive_storage_needed = nir_isub(&b, packed64[2], packed64[0]);
+
+ nir_store_var(&b, result, nir_vec2(&b, num_primitive_written, primitive_storage_needed), 0x3);
+ nir_store_var(&b, available, nir_imm_true(&b), 0x1);
+
+ nir_pop_if(&b, NULL);
+
+ /* Determine if result is 64 or 32 bit. */
+ nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
+ nir_ssa_def *result_size =
+ nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16), nir_imm_int(&b, 8));
+
+ /* Store the result if complete or partial results have been requested. */
+ nir_push_if(&b, nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
+ nir_load_var(&b, available)));
+
+ /* Store result. */
+ nir_push_if(&b, result_is_64bit);
+
+ nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base, .write_mask = 0x3,
+ .align_mul = 8);
+
+ nir_push_else(&b, NULL);
+
+ nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base,
+ .write_mask = 0x3, .align_mul = 4);
+
+ nir_pop_if(&b, NULL);
+ nir_pop_if(&b, NULL);
+
+ radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, result_size, output_base),
+ nir_b2i32(&b, nir_load_var(&b, available)));
+
+ return b.shader;
}
static nir_shader *
build_timestamp_query_shader(struct radv_device *device)
{
- /* the shader this builds is roughly
- *
- * uint32_t src_stride = 8;
- *
- * location(binding = 0) buffer dst_buf;
- * location(binding = 1) buffer src_buf;
- *
- * void main() {
- * uint64_t result = 0;
- * bool available = false;
- * uint64_t src_offset = src_stride * global_id.x;
- * uint64_t dst_offset = dst_stride * global_id.x;
- * uint64_t timestamp = src_buf[src_offset];
- * if (timestamp != TIMESTAMP_NOT_READY) {
- * result = timestamp;
- * available = true;
- * }
- * uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
- * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
- * if (flags & VK_QUERY_RESULT_64_BIT) {
- * dst_buf[dst_offset] = result;
- * } else {
- * dst_buf[dst_offset] = (uint32_t)result;
- * }
- * }
- * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- * dst_buf[dst_offset + result_size] = available;
- * }
- * }
- */
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "timestamp_query");
- b.shader->info.cs.local_size[0] = 64;
- b.shader->info.cs.local_size[1] = 1;
- b.shader->info.cs.local_size[2] = 1;
-
- /* Create and initialize local variables. */
- nir_variable *result =
- nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
- nir_variable *available =
- nir_local_variable_create(b.impl, glsl_bool_type(), "available");
-
- nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
- nir_store_var(&b, available, nir_imm_false(&b), 0x1);
-
- nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range=16);
-
- /* Load resources. */
- nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
- nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
-
- /* Compute global ID. */
- nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
- nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
- nir_ssa_def *block_size = nir_imm_ivec4(&b,
- b.shader->info.cs.local_size[0],
- b.shader->info.cs.local_size[1],
- b.shader->info.cs.local_size[2], 0);
- nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
- global_id = nir_channel(&b, global_id, 0); // We only care about x here.
-
- /* Compute src/dst strides. */
- nir_ssa_def *input_stride = nir_imm_int(&b, 8);
- nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
- nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range=16);
- nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
-
- /* Load data from the query pool. */
- nir_ssa_def *load = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul=8);
-
- /* Pack the timestamp. */
- nir_ssa_def *timestamp;
- timestamp = nir_pack_64_2x32(&b, nir_vec2(&b,
- nir_channel(&b, load, 0),
- nir_channel(&b, load, 1)));
-
- /* Check if result is available. */
- nir_ssa_def *result_is_available =
- nir_i2b(&b, nir_ine(&b, timestamp,
- nir_imm_int64(&b, TIMESTAMP_NOT_READY)));
-
- /* Only store result if available. */
- nir_push_if(&b, result_is_available);
-
- nir_store_var(&b, result, timestamp, 0x1);
- nir_store_var(&b, available, nir_imm_true(&b), 0x1);
-
- nir_pop_if(&b, NULL);
-
- /* Determine if result is 64 or 32 bit. */
- nir_ssa_def *result_is_64bit =
- nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
- nir_ssa_def *result_size =
- nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8),
- nir_imm_int(&b, 4));
-
- /* Store the result if complete or partial results have been requested. */
- nir_push_if(&b, nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
- nir_load_var(&b, available)));
-
- /* Store result. */
- nir_push_if(&b, result_is_64bit);
-
- nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base, .write_mask=0x1, .align_mul=8);
-
- nir_push_else(&b, NULL);
-
- nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf,
- output_base, .write_mask=0x1, .align_mul=4);
-
- nir_pop_if(&b, NULL);
-
- nir_pop_if(&b, NULL);
-
- radv_store_availability(&b, flags, dst_buf,
- nir_iadd(&b, result_size, output_base),
- nir_b2i32(&b, nir_load_var(&b, available)));
-
- return b.shader;
+ /* the shader this builds is roughly
+ *
+ * uint32_t src_stride = 8;
+ *
+ * location(binding = 0) buffer dst_buf;
+ * location(binding = 1) buffer src_buf;
+ *
+ * void main() {
+ * uint64_t result = 0;
+ * bool available = false;
+ * uint64_t src_offset = src_stride * global_id.x;
+ * uint64_t dst_offset = dst_stride * global_id.x;
+ * uint64_t timestamp = src_buf[src_offset];
+ * if (timestamp != TIMESTAMP_NOT_READY) {
+ * result = timestamp;
+ * available = true;
+ * }
+ * uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 8 : 4;
+ * if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
+ * if (flags & VK_QUERY_RESULT_64_BIT) {
+ * dst_buf[dst_offset] = result;
+ * } else {
+ * dst_buf[dst_offset] = (uint32_t)result;
+ * }
+ * }
+ * if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ * dst_buf[dst_offset + result_size] = available;
+ * }
+ * }
+ */
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "timestamp_query");
+ b.shader->info.cs.local_size[0] = 64;
+ b.shader->info.cs.local_size[1] = 1;
+ b.shader->info.cs.local_size[2] = 1;
+
+ /* Create and initialize local variables. */
+ nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
+ nir_variable *available = nir_local_variable_create(b.impl, glsl_bool_type(), "available");
+
+ nir_store_var(&b, result, nir_imm_int64(&b, 0), 0x1);
+ nir_store_var(&b, available, nir_imm_false(&b), 0x1);
+
+ nir_ssa_def *flags = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .range = 16);
+
+ /* Load resources. */
+ nir_ssa_def *dst_buf = radv_meta_load_descriptor(&b, 0, 0);
+ nir_ssa_def *src_buf = radv_meta_load_descriptor(&b, 0, 1);
+
+ /* Compute global ID. */
+ nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
+ nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
+ nir_ssa_def *block_size =
+ nir_imm_ivec4(&b, b.shader->info.cs.local_size[0], b.shader->info.cs.local_size[1],
+ b.shader->info.cs.local_size[2], 0);
+ nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+ global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+ /* Compute src/dst strides. */
+ nir_ssa_def *input_stride = nir_imm_int(&b, 8);
+ nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+ nir_ssa_def *output_stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 4), .range = 16);
+ nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+ /* Load data from the query pool. */
+ nir_ssa_def *load = nir_load_ssbo(&b, 2, 32, src_buf, input_base, .align_mul = 8);
+
+ /* Pack the timestamp. */
+ nir_ssa_def *timestamp;
+ timestamp =
+ nir_pack_64_2x32(&b, nir_vec2(&b, nir_channel(&b, load, 0), nir_channel(&b, load, 1)));
+
+ /* Check if result is available. */
+ nir_ssa_def *result_is_available =
+ nir_i2b(&b, nir_ine(&b, timestamp, nir_imm_int64(&b, TIMESTAMP_NOT_READY)));
+
+ /* Only store result if available. */
+ nir_push_if(&b, result_is_available);
+
+ nir_store_var(&b, result, timestamp, 0x1);
+ nir_store_var(&b, available, nir_imm_true(&b), 0x1);
+
+ nir_pop_if(&b, NULL);
+
+ /* Determine if result is 64 or 32 bit. */
+ nir_ssa_def *result_is_64bit = nir_test_flag(&b, flags, VK_QUERY_RESULT_64_BIT);
+ nir_ssa_def *result_size =
+ nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 8), nir_imm_int(&b, 4));
+
+ /* Store the result if complete or partial results have been requested. */
+ nir_push_if(&b, nir_ior(&b, nir_test_flag(&b, flags, VK_QUERY_RESULT_PARTIAL_BIT),
+ nir_load_var(&b, available)));
+
+ /* Store result. */
+ nir_push_if(&b, result_is_64bit);
+
+ nir_store_ssbo(&b, nir_load_var(&b, result), dst_buf, output_base, .write_mask = 0x1,
+ .align_mul = 8);
+
+ nir_push_else(&b, NULL);
+
+ nir_store_ssbo(&b, nir_u2u32(&b, nir_load_var(&b, result)), dst_buf, output_base,
+ .write_mask = 0x1, .align_mul = 4);
+
+ nir_pop_if(&b, NULL);
+
+ nir_pop_if(&b, NULL);
+
+ radv_store_availability(&b, flags, dst_buf, nir_iadd(&b, result_size, output_base),
+ nir_b2i32(&b, nir_load_var(&b, available)));
+
+ return b.shader;
}
-static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device)
+static VkResult
+radv_device_init_meta_query_state_internal(struct radv_device *device)
{
- VkResult result;
- nir_shader *occlusion_cs = NULL;
- nir_shader *pipeline_statistics_cs = NULL;
- nir_shader *tfb_cs = NULL;
- nir_shader *timestamp_cs = NULL;
-
- mtx_lock(&device->meta_state.mtx);
- if (device->meta_state.query.pipeline_statistics_query_pipeline) {
- mtx_unlock(&device->meta_state.mtx);
- return VK_SUCCESS;
- }
- occlusion_cs = build_occlusion_query_shader(device);
- pipeline_statistics_cs = build_pipeline_statistics_query_shader(device);
- tfb_cs = build_tfb_query_shader(device);
- timestamp_cs = build_timestamp_query_shader(device);
-
- VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
- .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
- .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
- .bindingCount = 2,
- .pBindings = (VkDescriptorSetLayoutBinding[]) {
- {
- .binding = 0,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- {
- .binding = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .descriptorCount = 1,
- .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
- .pImmutableSamplers = NULL
- },
- }
- };
-
- result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
- &occlusion_ds_create_info,
- &device->meta_state.alloc,
- &device->meta_state.query.ds_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineLayoutCreateInfo occlusion_pl_create_info = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
- .setLayoutCount = 1,
- .pSetLayouts = &device->meta_state.query.ds_layout,
- .pushConstantRangeCount = 1,
- .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
- };
-
- result = radv_CreatePipelineLayout(radv_device_to_handle(device),
- &occlusion_pl_create_info,
- &device->meta_state.alloc,
- &device->meta_state.query.p_layout);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo occlusion_pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(occlusion_cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo occlusion_vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = occlusion_pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.query.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &occlusion_vk_pipeline_info, NULL,
- &device->meta_state.query.occlusion_query_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo pipeline_statistics_pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(pipeline_statistics_cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo pipeline_statistics_vk_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = pipeline_statistics_pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.query.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &pipeline_statistics_vk_pipeline_info, NULL,
- &device->meta_state.query.pipeline_statistics_query_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo tfb_pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(tfb_cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo tfb_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = tfb_pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.query.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &tfb_pipeline_info, NULL,
- &device->meta_state.query.tfb_query_pipeline);
- if (result != VK_SUCCESS)
- goto fail;
-
- VkPipelineShaderStageCreateInfo timestamp_pipeline_shader_stage = {
- .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
- .stage = VK_SHADER_STAGE_COMPUTE_BIT,
- .module = vk_shader_module_handle_from_nir(timestamp_cs),
- .pName = "main",
- .pSpecializationInfo = NULL,
- };
-
- VkComputePipelineCreateInfo timestamp_pipeline_info = {
- .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
- .stage = timestamp_pipeline_shader_stage,
- .flags = 0,
- .layout = device->meta_state.query.p_layout,
- };
-
- result = radv_CreateComputePipelines(radv_device_to_handle(device),
- radv_pipeline_cache_to_handle(&device->meta_state.cache),
- 1, &timestamp_pipeline_info, NULL,
- &device->meta_state.query.timestamp_query_pipeline);
+ VkResult result;
+ nir_shader *occlusion_cs = NULL;
+ nir_shader *pipeline_statistics_cs = NULL;
+ nir_shader *tfb_cs = NULL;
+ nir_shader *timestamp_cs = NULL;
+
+ mtx_lock(&device->meta_state.mtx);
+ if (device->meta_state.query.pipeline_statistics_query_pipeline) {
+ mtx_unlock(&device->meta_state.mtx);
+ return VK_SUCCESS;
+ }
+ occlusion_cs = build_occlusion_query_shader(device);
+ pipeline_statistics_cs = build_pipeline_statistics_query_shader(device);
+ tfb_cs = build_tfb_query_shader(device);
+ timestamp_cs = build_timestamp_query_shader(device);
+
+ VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
+ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+ .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+ .bindingCount = 2,
+ .pBindings = (VkDescriptorSetLayoutBinding[]){
+ {.binding = 0,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ {.binding = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .descriptorCount = 1,
+ .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+ .pImmutableSamplers = NULL},
+ }};
+
+ result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &occlusion_ds_create_info,
+ &device->meta_state.alloc,
+ &device->meta_state.query.ds_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineLayoutCreateInfo occlusion_pl_create_info = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+ .setLayoutCount = 1,
+ .pSetLayouts = &device->meta_state.query.ds_layout,
+ .pushConstantRangeCount = 1,
+ .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
+ };
+
+ result =
+ radv_CreatePipelineLayout(radv_device_to_handle(device), &occlusion_pl_create_info,
+ &device->meta_state.alloc, &device->meta_state.query.p_layout);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo occlusion_pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(occlusion_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo occlusion_vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = occlusion_pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.query.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &occlusion_vk_pipeline_info, NULL, &device->meta_state.query.occlusion_query_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo pipeline_statistics_pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(pipeline_statistics_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo pipeline_statistics_vk_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = pipeline_statistics_pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.query.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &pipeline_statistics_vk_pipeline_info, NULL,
+ &device->meta_state.query.pipeline_statistics_query_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo tfb_pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(tfb_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo tfb_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = tfb_pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.query.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &tfb_pipeline_info, NULL, &device->meta_state.query.tfb_query_pipeline);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ VkPipelineShaderStageCreateInfo timestamp_pipeline_shader_stage = {
+ .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+ .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+ .module = vk_shader_module_handle_from_nir(timestamp_cs),
+ .pName = "main",
+ .pSpecializationInfo = NULL,
+ };
+
+ VkComputePipelineCreateInfo timestamp_pipeline_info = {
+ .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+ .stage = timestamp_pipeline_shader_stage,
+ .flags = 0,
+ .layout = device->meta_state.query.p_layout,
+ };
+
+ result = radv_CreateComputePipelines(
+ radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
+ &timestamp_pipeline_info, NULL, &device->meta_state.query.timestamp_query_pipeline);
fail:
- if (result != VK_SUCCESS)
- radv_device_finish_meta_query_state(device);
- ralloc_free(occlusion_cs);
- ralloc_free(pipeline_statistics_cs);
- ralloc_free(tfb_cs);
- ralloc_free(timestamp_cs);
- mtx_unlock(&device->meta_state.mtx);
- return result;
+ if (result != VK_SUCCESS)
+ radv_device_finish_meta_query_state(device);
+ ralloc_free(occlusion_cs);
+ ralloc_free(pipeline_statistics_cs);
+ ralloc_free(tfb_cs);
+ ralloc_free(timestamp_cs);
+ mtx_unlock(&device->meta_state.mtx);
+ return result;
}
-VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_demand)
+VkResult
+radv_device_init_meta_query_state(struct radv_device *device, bool on_demand)
{
- if (on_demand)
- return VK_SUCCESS;
+ if (on_demand)
+ return VK_SUCCESS;
- return radv_device_init_meta_query_state_internal(device);
+ return radv_device_init_meta_query_state_internal(device);
}
-void radv_device_finish_meta_query_state(struct radv_device *device)
+void
+radv_device_finish_meta_query_state(struct radv_device *device)
{
- if (device->meta_state.query.tfb_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.tfb_query_pipeline,
- &device->meta_state.alloc);
-
- if (device->meta_state.query.pipeline_statistics_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.pipeline_statistics_query_pipeline,
- &device->meta_state.alloc);
-
- if (device->meta_state.query.occlusion_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.occlusion_query_pipeline,
- &device->meta_state.alloc);
-
- if (device->meta_state.query.timestamp_query_pipeline)
- radv_DestroyPipeline(radv_device_to_handle(device),
- device->meta_state.query.timestamp_query_pipeline,
- &device->meta_state.alloc);
-
- if (device->meta_state.query.p_layout)
- radv_DestroyPipelineLayout(radv_device_to_handle(device),
- device->meta_state.query.p_layout,
- &device->meta_state.alloc);
-
- if (device->meta_state.query.ds_layout)
- radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
- device->meta_state.query.ds_layout,
- &device->meta_state.alloc);
+ if (device->meta_state.query.tfb_query_pipeline)
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ device->meta_state.query.tfb_query_pipeline, &device->meta_state.alloc);
+
+ if (device->meta_state.query.pipeline_statistics_query_pipeline)
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ device->meta_state.query.pipeline_statistics_query_pipeline,
+ &device->meta_state.alloc);
+
+ if (device->meta_state.query.occlusion_query_pipeline)
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ device->meta_state.query.occlusion_query_pipeline,
+ &device->meta_state.alloc);
+
+ if (device->meta_state.query.timestamp_query_pipeline)
+ radv_DestroyPipeline(radv_device_to_handle(device),
+ device->meta_state.query.timestamp_query_pipeline,
+ &device->meta_state.alloc);
+
+ if (device->meta_state.query.p_layout)
+ radv_DestroyPipelineLayout(radv_device_to_handle(device), device->meta_state.query.p_layout,
+ &device->meta_state.alloc);
+
+ if (device->meta_state.query.ds_layout)
+ radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+ device->meta_state.query.ds_layout,
+ &device->meta_state.alloc);
}
-static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
- VkPipeline *pipeline,
- struct radeon_winsys_bo *src_bo,
- struct radeon_winsys_bo *dst_bo,
- uint64_t src_offset, uint64_t dst_offset,
- uint32_t src_stride, uint32_t dst_stride,
- uint32_t count, uint32_t flags,
- uint32_t pipeline_stats_mask, uint32_t avail_offset)
+static void
+radv_query_shader(struct radv_cmd_buffer *cmd_buffer, VkPipeline *pipeline,
+ struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo,
+ uint64_t src_offset, uint64_t dst_offset, uint32_t src_stride,
+ uint32_t dst_stride, uint32_t count, uint32_t flags, uint32_t pipeline_stats_mask,
+ uint32_t avail_offset)
{
- struct radv_device *device = cmd_buffer->device;
- struct radv_meta_saved_state saved_state;
- bool old_predicating;
-
- if (!*pipeline) {
- VkResult ret = radv_device_init_meta_query_state_internal(device);
- if (ret != VK_SUCCESS) {
- cmd_buffer->record_result = ret;
- return;
- }
- }
-
- radv_meta_save(&saved_state, cmd_buffer,
- RADV_META_SAVE_COMPUTE_PIPELINE |
- RADV_META_SAVE_CONSTANTS |
- RADV_META_SAVE_DESCRIPTORS);
-
- /* VK_EXT_conditional_rendering says that copy commands should not be
- * affected by conditional rendering.
- */
- old_predicating = cmd_buffer->state.predicating;
- cmd_buffer->state.predicating = false;
-
- struct radv_buffer dst_buffer = {
- .bo = dst_bo,
- .offset = dst_offset,
- .size = dst_stride * count
- };
-
- struct radv_buffer src_buffer = {
- .bo = src_bo,
- .offset = src_offset,
- .size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset)
- };
-
- radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
- VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
-
- radv_meta_push_descriptor_set(cmd_buffer,
- VK_PIPELINE_BIND_POINT_COMPUTE,
- device->meta_state.query.p_layout,
- 0, /* set */
- 2, /* descriptorWriteCount */
- (VkWriteDescriptorSet[]) {
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 0,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&dst_buffer),
- .offset = 0,
- .range = VK_WHOLE_SIZE
- }
- },
- {
- .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
- .dstBinding = 1,
- .dstArrayElement = 0,
- .descriptorCount = 1,
- .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
- .pBufferInfo = &(VkDescriptorBufferInfo) {
- .buffer = radv_buffer_to_handle(&src_buffer),
- .offset = 0,
- .range = VK_WHOLE_SIZE
- }
- }
- });
-
- /* Encode the number of elements for easy access by the shader. */
- pipeline_stats_mask &= 0x7ff;
- pipeline_stats_mask |= util_bitcount(pipeline_stats_mask) << 16;
-
- avail_offset -= src_offset;
-
- struct {
- uint32_t flags;
- uint32_t dst_stride;
- uint32_t pipeline_stats_mask;
- uint32_t avail_offset;
- } push_constants = {
- flags,
- dst_stride,
- pipeline_stats_mask,
- avail_offset
- };
-
- radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
- device->meta_state.query.p_layout,
- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
- &push_constants);
-
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_INV_VCACHE;
-
- if (flags & VK_QUERY_RESULT_WAIT_BIT)
- cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
-
- radv_unaligned_dispatch(cmd_buffer, count, 1, 1);
-
- /* Restore conditional rendering. */
- cmd_buffer->state.predicating = old_predicating;
-
- radv_meta_restore(&saved_state, cmd_buffer);
+ struct radv_device *device = cmd_buffer->device;
+ struct radv_meta_saved_state saved_state;
+ bool old_predicating;
+
+ if (!*pipeline) {
+ VkResult ret = radv_device_init_meta_query_state_internal(device);
+ if (ret != VK_SUCCESS) {
+ cmd_buffer->record_result = ret;
+ return;
+ }
+ }
+
+ radv_meta_save(
+ &saved_state, cmd_buffer,
+ RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
+
+ /* VK_EXT_conditional_rendering says that copy commands should not be
+ * affected by conditional rendering.
+ */
+ old_predicating = cmd_buffer->state.predicating;
+ cmd_buffer->state.predicating = false;
+
+ struct radv_buffer dst_buffer = {.bo = dst_bo, .offset = dst_offset, .size = dst_stride * count};
+
+ struct radv_buffer src_buffer = {
+ .bo = src_bo,
+ .offset = src_offset,
+ .size = MAX2(src_stride * count, avail_offset + 4 * count - src_offset)};
+
+ radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
+ *pipeline);
+
+ radv_meta_push_descriptor_set(
+ cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.query.p_layout, 0, /* set */
+ 2, /* descriptorWriteCount */
+ (VkWriteDescriptorSet[]){
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 0,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
+ .offset = 0,
+ .range = VK_WHOLE_SIZE}},
+ {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+ .dstBinding = 1,
+ .dstArrayElement = 0,
+ .descriptorCount = 1,
+ .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+ .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&src_buffer),
+ .offset = 0,
+ .range = VK_WHOLE_SIZE}}});
+
+ /* Encode the number of elements for easy access by the shader. */
+ pipeline_stats_mask &= 0x7ff;
+ pipeline_stats_mask |= util_bitcount(pipeline_stats_mask) << 16;
+
+ avail_offset -= src_offset;
+
+ struct {
+ uint32_t flags;
+ uint32_t dst_stride;
+ uint32_t pipeline_stats_mask;
+ uint32_t avail_offset;
+ } push_constants = {flags, dst_stride, pipeline_stats_mask, avail_offset};
+
+ radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.query.p_layout,
+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), &push_constants);
+
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
+
+ if (flags & VK_QUERY_RESULT_WAIT_BIT)
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER;
+
+ radv_unaligned_dispatch(cmd_buffer, count, 1, 1);
+
+ /* Restore conditional rendering. */
+ cmd_buffer->state.predicating = old_predicating;
+
+ radv_meta_restore(&saved_state, cmd_buffer);
}
static bool
-radv_query_pool_needs_gds(struct radv_device *device,
- struct radv_query_pool *pool)
+radv_query_pool_needs_gds(struct radv_device *device, struct radv_query_pool *pool)
{
- /* The number of primitives generated by geometry shader invocations is
- * only counted by the hardware if GS uses the legacy path. When NGG GS
- * is used, the hardware can't know the number of generated primitives
- * and we have to it manually inside the shader. To achieve that, the
- * driver does a plain GDS atomic to accumulate that value.
- * TODO: fix use of NGG GS and non-NGG GS inside the same begin/end
- * query.
- */
- return device->physical_device->use_ngg &&
- (pool->pipeline_stats_mask & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
+ /* The number of primitives generated by geometry shader invocations is
+ * only counted by the hardware if GS uses the legacy path. When NGG GS
+ * is used, the hardware can't know the number of generated primitives
+ * and we have to it manually inside the shader. To achieve that, the
+ * driver does a plain GDS atomic to accumulate that value.
+ * TODO: fix use of NGG GS and non-NGG GS inside the same begin/end
+ * query.
+ */
+ return device->physical_device->use_ngg &&
+ (pool->pipeline_stats_mask & VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
}
static void
-radv_destroy_query_pool(struct radv_device *device,
- const VkAllocationCallbacks *pAllocator,
- struct radv_query_pool *pool)
+radv_destroy_query_pool(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
+ struct radv_query_pool *pool)
{
- if (pool->bo)
- device->ws->buffer_destroy(device->ws, pool->bo);
- vk_object_base_finish(&pool->base);
- vk_free2(&device->vk.alloc, pAllocator, pool);
+ if (pool->bo)
+ device->ws->buffer_destroy(device->ws, pool->bo);
+ vk_object_base_finish(&pool->base);
+ vk_free2(&device->vk.alloc, pAllocator, pool);
}
-VkResult radv_CreateQueryPool(
- VkDevice _device,
- const VkQueryPoolCreateInfo* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkQueryPool* pQueryPool)
+VkResult
+radv_CreateQueryPool(VkDevice _device, const VkQueryPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkQueryPool *pQueryPool)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_query_pool *pool = vk_alloc2(&device->vk.alloc, pAllocator,
- sizeof(*pool), 8,
- VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
- if (!pool)
- return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
-
- vk_object_base_init(&device->vk, &pool->base,
- VK_OBJECT_TYPE_QUERY_POOL);
-
- switch(pCreateInfo->queryType) {
- case VK_QUERY_TYPE_OCCLUSION:
- pool->stride = 16 * device->physical_device->rad_info.max_render_backends;
- break;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- pool->stride = pipelinestat_block_size * 2;
- break;
- case VK_QUERY_TYPE_TIMESTAMP:
- pool->stride = 8;
- break;
- case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
- pool->stride = 32;
- break;
- default:
- unreachable("creating unhandled query type");
- }
-
- pool->type = pCreateInfo->queryType;
- pool->pipeline_stats_mask = pCreateInfo->pipelineStatistics;
- pool->availability_offset = pool->stride * pCreateInfo->queryCount;
- pool->size = pool->availability_offset;
- if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS)
- pool->size += 4 * pCreateInfo->queryCount;
-
- pool->bo = device->ws->buffer_create(device->ws, pool->size,
- 64, RADEON_DOMAIN_GTT, RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_QUERY_POOL);
- if (!pool->bo) {
- radv_destroy_query_pool(device, pAllocator, pool);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
-
- pool->ptr = device->ws->buffer_map(pool->bo);
- if (!pool->ptr) {
- radv_destroy_query_pool(device, pAllocator, pool);
- return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
- }
-
- *pQueryPool = radv_query_pool_to_handle(pool);
- return VK_SUCCESS;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_query_pool *pool =
+ vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+ if (!pool)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk, &pool->base, VK_OBJECT_TYPE_QUERY_POOL);
+
+ switch (pCreateInfo->queryType) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ pool->stride = 16 * device->physical_device->rad_info.max_render_backends;
+ break;
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ pool->stride = pipelinestat_block_size * 2;
+ break;
+ case VK_QUERY_TYPE_TIMESTAMP:
+ pool->stride = 8;
+ break;
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ pool->stride = 32;
+ break;
+ default:
+ unreachable("creating unhandled query type");
+ }
+
+ pool->type = pCreateInfo->queryType;
+ pool->pipeline_stats_mask = pCreateInfo->pipelineStatistics;
+ pool->availability_offset = pool->stride * pCreateInfo->queryCount;
+ pool->size = pool->availability_offset;
+ if (pCreateInfo->queryType == VK_QUERY_TYPE_PIPELINE_STATISTICS)
+ pool->size += 4 * pCreateInfo->queryCount;
+
+ pool->bo =
+ device->ws->buffer_create(device->ws, pool->size, 64, RADEON_DOMAIN_GTT,
+ RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_QUERY_POOL);
+ if (!pool->bo) {
+ radv_destroy_query_pool(device, pAllocator, pool);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ pool->ptr = device->ws->buffer_map(pool->bo);
+ if (!pool->ptr) {
+ radv_destroy_query_pool(device, pAllocator, pool);
+ return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ *pQueryPool = radv_query_pool_to_handle(pool);
+ return VK_SUCCESS;
}
-void radv_DestroyQueryPool(
- VkDevice _device,
- VkQueryPool _pool,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroyQueryPool(VkDevice _device, VkQueryPool _pool, const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_query_pool, pool, _pool);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_query_pool, pool, _pool);
- if (!pool)
- return;
+ if (!pool)
+ return;
- radv_destroy_query_pool(device, pAllocator, pool);
+ radv_destroy_query_pool(device, pAllocator, pool);
}
-VkResult radv_GetQueryPoolResults(
- VkDevice _device,
- VkQueryPool queryPool,
- uint32_t firstQuery,
- uint32_t queryCount,
- size_t dataSize,
- void* pData,
- VkDeviceSize stride,
- VkQueryResultFlags flags)
+VkResult
+radv_GetQueryPoolResults(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery,
+ uint32_t queryCount, size_t dataSize, void *pData, VkDeviceSize stride,
+ VkQueryResultFlags flags)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- char *data = pData;
- VkResult result = VK_SUCCESS;
-
- if (radv_device_is_lost(device))
- return VK_ERROR_DEVICE_LOST;
-
- for(unsigned query_idx = 0; query_idx < queryCount; ++query_idx, data += stride) {
- char *dest = data;
- unsigned query = firstQuery + query_idx;
- char *src = pool->ptr + query * pool->stride;
- uint32_t available;
-
- switch (pool->type) {
- case VK_QUERY_TYPE_TIMESTAMP: {
- uint64_t const *src64 = (uint64_t const *)src;
- uint64_t value;
-
- do {
- value = p_atomic_read(src64);
- } while (value == TIMESTAMP_NOT_READY &&
- (flags & VK_QUERY_RESULT_WAIT_BIT));
-
- available = value != TIMESTAMP_NOT_READY;
-
- if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
- result = VK_NOT_READY;
-
- if (flags & VK_QUERY_RESULT_64_BIT) {
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint64_t*)dest = value;
- dest += 8;
- } else {
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint32_t*)dest = (uint32_t)value;
- dest += 4;
- }
- break;
- }
- case VK_QUERY_TYPE_OCCLUSION: {
- uint64_t const *src64 = (uint64_t const *)src;
- uint32_t db_count = device->physical_device->rad_info.max_render_backends;
- uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
- uint64_t sample_count = 0;
- available = 1;
-
- for (int i = 0; i < db_count; ++i) {
- uint64_t start, end;
-
- if (!(enabled_rb_mask & (1 << i)))
- continue;
-
- do {
- start = p_atomic_read(src64 + 2 * i);
- end = p_atomic_read(src64 + 2 * i + 1);
- } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) && (flags & VK_QUERY_RESULT_WAIT_BIT));
-
- if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
- available = 0;
- else {
- sample_count += end - start;
- }
- }
-
- if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
- result = VK_NOT_READY;
-
- if (flags & VK_QUERY_RESULT_64_BIT) {
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint64_t*)dest = sample_count;
- dest += 8;
- } else {
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint32_t*)dest = sample_count;
- dest += 4;
- }
- break;
- }
- case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
- const uint32_t *avail_ptr = (const uint32_t*)(pool->ptr + pool->availability_offset + 4 * query);
-
- do {
- available = p_atomic_read(avail_ptr);
- } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
-
- if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
- result = VK_NOT_READY;
-
- const uint64_t *start = (uint64_t*)src;
- const uint64_t *stop = (uint64_t*)(src + pipelinestat_block_size);
- if (flags & VK_QUERY_RESULT_64_BIT) {
- uint64_t *dst = (uint64_t*)dest;
- dest += util_bitcount(pool->pipeline_stats_mask) * 8;
- for(int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
- if(pool->pipeline_stats_mask & (1u << i)) {
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *dst = stop[pipeline_statistics_indices[i]] -
- start[pipeline_statistics_indices[i]];
- dst++;
- }
- }
-
- } else {
- uint32_t *dst = (uint32_t*)dest;
- dest += util_bitcount(pool->pipeline_stats_mask) * 4;
- for(int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
- if(pool->pipeline_stats_mask & (1u << i)) {
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *dst = stop[pipeline_statistics_indices[i]] -
- start[pipeline_statistics_indices[i]];
- dst++;
- }
- }
- }
- break;
- }
- case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
- uint64_t const *src64 = (uint64_t const *)src;
- uint64_t num_primitives_written;
- uint64_t primitive_storage_needed;
-
- /* SAMPLE_STREAMOUTSTATS stores this structure:
- * {
- * u64 NumPrimitivesWritten;
- * u64 PrimitiveStorageNeeded;
- * }
- */
- available = 1;
- for (int j = 0; j < 4; j++) {
- if (!(p_atomic_read(src64 + j) & 0x8000000000000000UL))
- available = 0;
- }
-
- if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
- result = VK_NOT_READY;
-
- num_primitives_written = src64[3] - src64[1];
- primitive_storage_needed = src64[2] - src64[0];
-
- if (flags & VK_QUERY_RESULT_64_BIT) {
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint64_t *)dest = num_primitives_written;
- dest += 8;
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint64_t *)dest = primitive_storage_needed;
- dest += 8;
- } else {
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint32_t *)dest = num_primitives_written;
- dest += 4;
- if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
- *(uint32_t *)dest = primitive_storage_needed;
- dest += 4;
- }
- break;
- }
- default:
- unreachable("trying to get results of unhandled query type");
- }
-
- if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
- if (flags & VK_QUERY_RESULT_64_BIT) {
- *(uint64_t*)dest = available;
- } else {
- *(uint32_t*)dest = available;
- }
- }
- }
-
- return result;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ char *data = pData;
+ VkResult result = VK_SUCCESS;
+
+ if (radv_device_is_lost(device))
+ return VK_ERROR_DEVICE_LOST;
+
+ for (unsigned query_idx = 0; query_idx < queryCount; ++query_idx, data += stride) {
+ char *dest = data;
+ unsigned query = firstQuery + query_idx;
+ char *src = pool->ptr + query * pool->stride;
+ uint32_t available;
+
+ switch (pool->type) {
+ case VK_QUERY_TYPE_TIMESTAMP: {
+ uint64_t const *src64 = (uint64_t const *)src;
+ uint64_t value;
+
+ do {
+ value = p_atomic_read(src64);
+ } while (value == TIMESTAMP_NOT_READY && (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+ available = value != TIMESTAMP_NOT_READY;
+
+ if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ result = VK_NOT_READY;
+
+ if (flags & VK_QUERY_RESULT_64_BIT) {
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *(uint64_t *)dest = value;
+ dest += 8;
+ } else {
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *(uint32_t *)dest = (uint32_t)value;
+ dest += 4;
+ }
+ break;
+ }
+ case VK_QUERY_TYPE_OCCLUSION: {
+ uint64_t const *src64 = (uint64_t const *)src;
+ uint32_t db_count = device->physical_device->rad_info.max_render_backends;
+ uint32_t enabled_rb_mask = device->physical_device->rad_info.enabled_rb_mask;
+ uint64_t sample_count = 0;
+ available = 1;
+
+ for (int i = 0; i < db_count; ++i) {
+ uint64_t start, end;
+
+ if (!(enabled_rb_mask & (1 << i)))
+ continue;
+
+ do {
+ start = p_atomic_read(src64 + 2 * i);
+ end = p_atomic_read(src64 + 2 * i + 1);
+ } while ((!(start & (1ull << 63)) || !(end & (1ull << 63))) &&
+ (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+ if (!(start & (1ull << 63)) || !(end & (1ull << 63)))
+ available = 0;
+ else {
+ sample_count += end - start;
+ }
+ }
+
+ if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ result = VK_NOT_READY;
+
+ if (flags & VK_QUERY_RESULT_64_BIT) {
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *(uint64_t *)dest = sample_count;
+ dest += 8;
+ } else {
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *(uint32_t *)dest = sample_count;
+ dest += 4;
+ }
+ break;
+ }
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
+ const uint32_t *avail_ptr =
+ (const uint32_t *)(pool->ptr + pool->availability_offset + 4 * query);
+
+ do {
+ available = p_atomic_read(avail_ptr);
+ } while (!available && (flags & VK_QUERY_RESULT_WAIT_BIT));
+
+ if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ result = VK_NOT_READY;
+
+ const uint64_t *start = (uint64_t *)src;
+ const uint64_t *stop = (uint64_t *)(src + pipelinestat_block_size);
+ if (flags & VK_QUERY_RESULT_64_BIT) {
+ uint64_t *dst = (uint64_t *)dest;
+ dest += util_bitcount(pool->pipeline_stats_mask) * 8;
+ for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
+ if (pool->pipeline_stats_mask & (1u << i)) {
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *dst = stop[pipeline_statistics_indices[i]] -
+ start[pipeline_statistics_indices[i]];
+ dst++;
+ }
+ }
+
+ } else {
+ uint32_t *dst = (uint32_t *)dest;
+ dest += util_bitcount(pool->pipeline_stats_mask) * 4;
+ for (int i = 0; i < ARRAY_SIZE(pipeline_statistics_indices); ++i) {
+ if (pool->pipeline_stats_mask & (1u << i)) {
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *dst = stop[pipeline_statistics_indices[i]] -
+ start[pipeline_statistics_indices[i]];
+ dst++;
+ }
+ }
+ }
+ break;
+ }
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
+ uint64_t const *src64 = (uint64_t const *)src;
+ uint64_t num_primitives_written;
+ uint64_t primitive_storage_needed;
+
+ /* SAMPLE_STREAMOUTSTATS stores this structure:
+ * {
+ * u64 NumPrimitivesWritten;
+ * u64 PrimitiveStorageNeeded;
+ * }
+ */
+ available = 1;
+ for (int j = 0; j < 4; j++) {
+ if (!(p_atomic_read(src64 + j) & 0x8000000000000000UL))
+ available = 0;
+ }
+
+ if (!available && !(flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ result = VK_NOT_READY;
+
+ num_primitives_written = src64[3] - src64[1];
+ primitive_storage_needed = src64[2] - src64[0];
+
+ if (flags & VK_QUERY_RESULT_64_BIT) {
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *(uint64_t *)dest = num_primitives_written;
+ dest += 8;
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *(uint64_t *)dest = primitive_storage_needed;
+ dest += 8;
+ } else {
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *(uint32_t *)dest = num_primitives_written;
+ dest += 4;
+ if (available || (flags & VK_QUERY_RESULT_PARTIAL_BIT))
+ *(uint32_t *)dest = primitive_storage_needed;
+ dest += 4;
+ }
+ break;
+ }
+ default:
+ unreachable("trying to get results of unhandled query type");
+ }
+
+ if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+ if (flags & VK_QUERY_RESULT_64_BIT) {
+ *(uint64_t *)dest = available;
+ } else {
+ *(uint32_t *)dest = available;
+ }
+ }
+ }
+
+ return result;
}
-static void emit_query_flush(struct radv_cmd_buffer *cmd_buffer,
- struct radv_query_pool *pool)
+static void
+emit_query_flush(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool)
{
- if (cmd_buffer->pending_reset_query) {
- if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
- /* Only need to flush caches if the query pool size is
- * large enough to be resetted using the compute shader
- * path. Small pools don't need any cache flushes
- * because we use a CP dma clear.
- */
- si_emit_cache_flush(cmd_buffer);
- }
- }
+ if (cmd_buffer->pending_reset_query) {
+ if (pool->size >= RADV_BUFFER_OPS_CS_THRESHOLD) {
+ /* Only need to flush caches if the query pool size is
+ * large enough to be resetted using the compute shader
+ * path. Small pools don't need any cache flushes
+ * because we use a CP dma clear.
+ */
+ si_emit_cache_flush(cmd_buffer);
+ }
+ }
}
-void radv_CmdCopyQueryPoolResults(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t firstQuery,
- uint32_t queryCount,
- VkBuffer dstBuffer,
- VkDeviceSize dstOffset,
- VkDeviceSize stride,
- VkQueryResultFlags flags)
+void
+radv_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer, VkQueryPool queryPool,
+ uint32_t firstQuery, uint32_t queryCount, VkBuffer dstBuffer,
+ VkDeviceSize dstOffset, VkDeviceSize stride, VkQueryResultFlags flags)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint64_t va = radv_buffer_get_va(pool->bo);
- uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
- dest_va += dst_buffer->offset + dstOffset;
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->bo);
- radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
-
- /* From the Vulkan spec 1.1.108:
- *
- * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
- * previous uses of vkCmdResetQueryPool in the same queue, without any
- * additional synchronization."
- *
- * So, we have to flush the caches if the compute shader path was used.
- */
- emit_query_flush(cmd_buffer, pool);
-
- switch (pool->type) {
- case VK_QUERY_TYPE_OCCLUSION:
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- unsigned enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask;
- uint32_t rb_avail_offset = 16 * util_last_bit(enabled_rb_mask) - 4;
- for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
- unsigned query = firstQuery + i;
- uint64_t src_va = va + query * pool->stride + rb_avail_offset;
-
- radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
- /* Waits on the upper word of the last DB entry */
- radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL,
- src_va, 0x80000000, 0xffffffff);
- }
- }
- radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset,
- pool->stride, stride,
- queryCount, flags, 0, 0);
- break;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
- unsigned query = firstQuery + i;
-
- radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
- uint64_t avail_va = va + pool->availability_offset + 4 * query;
-
- /* This waits on the ME. All copies below are done on the ME */
- radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL,
- avail_va, 1, 0xffffffff);
- }
- }
- radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
- pool->bo, dst_buffer->bo, firstQuery * pool->stride,
- dst_buffer->offset + dstOffset,
- pool->stride, stride, queryCount, flags,
- pool->pipeline_stats_mask,
- pool->availability_offset + 4 * firstQuery);
- break;
- case VK_QUERY_TYPE_TIMESTAMP:
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- for(unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
- unsigned query = firstQuery + i;
- uint64_t local_src_va = va + query * pool->stride;
-
- radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
- /* Wait on the high 32 bits of the timestamp in
- * case the low part is 0xffffffff.
- */
- radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL,
- local_src_va + 4,
- TIMESTAMP_NOT_READY >> 32,
- 0xffffffff);
- }
- }
-
- radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline,
- pool->bo, dst_buffer->bo,
- firstQuery * pool->stride,
- dst_buffer->offset + dstOffset,
- pool->stride, stride,
- queryCount, flags, 0, 0);
- break;
- case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
- if (flags & VK_QUERY_RESULT_WAIT_BIT) {
- for(unsigned i = 0; i < queryCount; i++) {
- unsigned query = firstQuery + i;
- uint64_t src_va = va + query * pool->stride;
-
- radeon_check_space(cmd_buffer->device->ws, cs, 7 * 4);
-
- /* Wait on the upper word of all results. */
- for (unsigned j = 0; j < 4; j++, src_va += 8) {
- radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL,
- src_va + 4, 0x80000000,
- 0xffffffff);
- }
- }
- }
-
- radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
- pool->bo, dst_buffer->bo,
- firstQuery * pool->stride,
- dst_buffer->offset + dstOffset,
- pool->stride, stride,
- queryCount, flags, 0, 0);
- break;
- default:
- unreachable("trying to get results of unhandled query type");
- }
-
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(pool->bo);
+ uint64_t dest_va = radv_buffer_get_va(dst_buffer->bo);
+ dest_va += dst_buffer->offset + dstOffset;
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pool->bo);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
+
+ /* From the Vulkan spec 1.1.108:
+ *
+ * "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
+ * previous uses of vkCmdResetQueryPool in the same queue, without any
+ * additional synchronization."
+ *
+ * So, we have to flush the caches if the compute shader path was used.
+ */
+ emit_query_flush(cmd_buffer, pool);
+
+ switch (pool->type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ unsigned enabled_rb_mask = cmd_buffer->device->physical_device->rad_info.enabled_rb_mask;
+ uint32_t rb_avail_offset = 16 * util_last_bit(enabled_rb_mask) - 4;
+ for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
+ unsigned query = firstQuery + i;
+ uint64_t src_va = va + query * pool->stride + rb_avail_offset;
+
+ radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+ /* Waits on the upper word of the last DB entry */
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va, 0x80000000, 0xffffffff);
+ }
+ }
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.occlusion_query_pipeline,
+ pool->bo, dst_buffer->bo, firstQuery * pool->stride,
+ dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
+ 0);
+ break;
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
+ unsigned query = firstQuery + i;
+
+ radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+ uint64_t avail_va = va + pool->availability_offset + 4 * query;
+
+ /* This waits on the ME. All copies below are done on the ME */
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, avail_va, 1, 0xffffffff);
+ }
+ }
+ radv_query_shader(cmd_buffer,
+ &cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
+ pool->bo, dst_buffer->bo, firstQuery * pool->stride,
+ dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags,
+ pool->pipeline_stats_mask, pool->availability_offset + 4 * firstQuery);
+ break;
+ case VK_QUERY_TYPE_TIMESTAMP:
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ for (unsigned i = 0; i < queryCount; ++i, dest_va += stride) {
+ unsigned query = firstQuery + i;
+ uint64_t local_src_va = va + query * pool->stride;
+
+ radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+ /* Wait on the high 32 bits of the timestamp in
+ * case the low part is 0xffffffff.
+ */
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_NOT_EQUAL, local_src_va + 4,
+ TIMESTAMP_NOT_READY >> 32, 0xffffffff);
+ }
+ }
+
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.timestamp_query_pipeline,
+ pool->bo, dst_buffer->bo, firstQuery * pool->stride,
+ dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
+ 0);
+ break;
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+ for (unsigned i = 0; i < queryCount; i++) {
+ unsigned query = firstQuery + i;
+ uint64_t src_va = va + query * pool->stride;
+
+ radeon_check_space(cmd_buffer->device->ws, cs, 7 * 4);
+
+ /* Wait on the upper word of all results. */
+ for (unsigned j = 0; j < 4; j++, src_va += 8) {
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_GREATER_OR_EQUAL, src_va + 4, 0x80000000,
+ 0xffffffff);
+ }
+ }
+ }
+
+ radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
+ pool->bo, dst_buffer->bo, firstQuery * pool->stride,
+ dst_buffer->offset + dstOffset, pool->stride, stride, queryCount, flags, 0,
+ 0);
+ break;
+ default:
+ unreachable("trying to get results of unhandled query type");
+ }
}
-void radv_CmdResetQueryPool(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t firstQuery,
- uint32_t queryCount)
+void
+radv_CmdResetQueryPool(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t firstQuery,
+ uint32_t queryCount)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- uint32_t value = pool->type == VK_QUERY_TYPE_TIMESTAMP
- ? (uint32_t)TIMESTAMP_NOT_READY : 0;
- uint32_t flush_bits = 0;
-
- /* Make sure to sync all previous work if the given command buffer has
- * pending active queries. Otherwise the GPU might write queries data
- * after the reset operation.
- */
- cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
-
- flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo,
- firstQuery * pool->stride,
- queryCount * pool->stride, value);
-
- if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
- flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo,
- pool->availability_offset + firstQuery * 4,
- queryCount * 4, 0);
- }
-
- if (flush_bits) {
- /* Only need to flush caches for the compute shader path. */
- cmd_buffer->pending_reset_query = true;
- cmd_buffer->state.flush_bits |= flush_bits;
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ uint32_t value = pool->type == VK_QUERY_TYPE_TIMESTAMP ? (uint32_t)TIMESTAMP_NOT_READY : 0;
+ uint32_t flush_bits = 0;
+
+ /* Make sure to sync all previous work if the given command buffer has
+ * pending active queries. Otherwise the GPU might write queries data
+ * after the reset operation.
+ */
+ cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
+
+ flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo, firstQuery * pool->stride,
+ queryCount * pool->stride, value);
+
+ if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
+ flush_bits |= radv_fill_buffer(cmd_buffer, NULL, pool->bo,
+ pool->availability_offset + firstQuery * 4, queryCount * 4, 0);
+ }
+
+ if (flush_bits) {
+ /* Only need to flush caches for the compute shader path. */
+ cmd_buffer->pending_reset_query = true;
+ cmd_buffer->state.flush_bits |= flush_bits;
+ }
}
-void radv_ResetQueryPool(
- VkDevice _device,
- VkQueryPool queryPool,
- uint32_t firstQuery,
- uint32_t queryCount)
+void
+radv_ResetQueryPool(VkDevice _device, VkQueryPool queryPool, uint32_t firstQuery,
+ uint32_t queryCount)
{
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- uint32_t value = pool->type == VK_QUERY_TYPE_TIMESTAMP
- ? (uint32_t)TIMESTAMP_NOT_READY : 0;
- uint32_t *data = (uint32_t*)(pool->ptr + firstQuery * pool->stride);
- uint32_t *data_end = (uint32_t*)(pool->ptr + (firstQuery + queryCount) * pool->stride);
+ uint32_t value = pool->type == VK_QUERY_TYPE_TIMESTAMP ? (uint32_t)TIMESTAMP_NOT_READY : 0;
+ uint32_t *data = (uint32_t *)(pool->ptr + firstQuery * pool->stride);
+ uint32_t *data_end = (uint32_t *)(pool->ptr + (firstQuery + queryCount) * pool->stride);
- for(uint32_t *p = data; p != data_end; ++p)
- *p = value;
+ for (uint32_t *p = data; p != data_end; ++p)
+ *p = value;
- if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
- memset(pool->ptr + pool->availability_offset + firstQuery * 4,
- 0, queryCount * 4);
- }
+ if (pool->type == VK_QUERY_TYPE_PIPELINE_STATISTICS) {
+ memset(pool->ptr + pool->availability_offset + firstQuery * 4, 0, queryCount * 4);
+ }
}
-static unsigned event_type_for_stream(unsigned stream)
+static unsigned
+event_type_for_stream(unsigned stream)
{
- switch (stream) {
- default:
- case 0: return V_028A90_SAMPLE_STREAMOUTSTATS;
- case 1: return V_028A90_SAMPLE_STREAMOUTSTATS1;
- case 2: return V_028A90_SAMPLE_STREAMOUTSTATS2;
- case 3: return V_028A90_SAMPLE_STREAMOUTSTATS3;
- }
+ switch (stream) {
+ default:
+ case 0:
+ return V_028A90_SAMPLE_STREAMOUTSTATS;
+ case 1:
+ return V_028A90_SAMPLE_STREAMOUTSTATS1;
+ case 2:
+ return V_028A90_SAMPLE_STREAMOUTSTATS2;
+ case 3:
+ return V_028A90_SAMPLE_STREAMOUTSTATS3;
+ }
}
-static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
- struct radv_query_pool *pool,
- uint64_t va,
- VkQueryType query_type,
- VkQueryControlFlags flags,
- uint32_t index)
+static void
+emit_begin_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va,
+ VkQueryType query_type, VkQueryControlFlags flags, uint32_t index)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- switch (query_type) {
- case VK_QUERY_TYPE_OCCLUSION:
- radeon_check_space(cmd_buffer->device->ws, cs, 7);
-
- ++cmd_buffer->state.active_occlusion_queries;
- if (cmd_buffer->state.active_occlusion_queries == 1) {
- if (flags & VK_QUERY_CONTROL_PRECISE_BIT) {
- /* This is the first occlusion query, enable
- * the hint if the precision bit is set.
- */
- cmd_buffer->state.perfect_occlusion_queries_enabled = true;
- }
-
- radv_set_db_count_control(cmd_buffer);
- } else {
- if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) &&
- !cmd_buffer->state.perfect_occlusion_queries_enabled) {
- /* This is not the first query, but this one
- * needs to enable precision, DB_COUNT_CONTROL
- * has to be updated accordingly.
- */
- cmd_buffer->state.perfect_occlusion_queries_enabled = true;
-
- radv_set_db_count_control(cmd_buffer);
- }
- }
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- break;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- radeon_check_space(cmd_buffer->device->ws, cs, 4);
-
- ++cmd_buffer->state.active_pipeline_queries;
- if (cmd_buffer->state.active_pipeline_queries == 1) {
- cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_STOP_PIPELINE_STATS;
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS;
- }
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- if (radv_query_pool_needs_gds(cmd_buffer->device, pool)) {
- int idx = radv_get_pipeline_statistics_index(VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
-
- /* Make sure GDS is idle before copying the value. */
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_L2;
- si_emit_cache_flush(cmd_buffer);
-
- va += 8 * idx;
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) |
- COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- /* Record that the command buffer needs GDS. */
- cmd_buffer->gds_needed = true;
-
- cmd_buffer->state.active_pipeline_gds_queries++;
- }
- break;
- case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
- radeon_check_space(cmd_buffer->device->ws, cs, 4);
-
- assert(index < MAX_SO_STREAMS);
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- break;
- default:
- unreachable("beginning unhandled query type");
- }
-
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ switch (query_type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ radeon_check_space(cmd_buffer->device->ws, cs, 7);
+
+ ++cmd_buffer->state.active_occlusion_queries;
+ if (cmd_buffer->state.active_occlusion_queries == 1) {
+ if (flags & VK_QUERY_CONTROL_PRECISE_BIT) {
+ /* This is the first occlusion query, enable
+ * the hint if the precision bit is set.
+ */
+ cmd_buffer->state.perfect_occlusion_queries_enabled = true;
+ }
+
+ radv_set_db_count_control(cmd_buffer);
+ } else {
+ if ((flags & VK_QUERY_CONTROL_PRECISE_BIT) &&
+ !cmd_buffer->state.perfect_occlusion_queries_enabled) {
+ /* This is not the first query, but this one
+ * needs to enable precision, DB_COUNT_CONTROL
+ * has to be updated accordingly.
+ */
+ cmd_buffer->state.perfect_occlusion_queries_enabled = true;
+
+ radv_set_db_count_control(cmd_buffer);
+ }
+ }
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ break;
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ radeon_check_space(cmd_buffer->device->ws, cs, 4);
+
+ ++cmd_buffer->state.active_pipeline_queries;
+ if (cmd_buffer->state.active_pipeline_queries == 1) {
+ cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_STOP_PIPELINE_STATS;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_START_PIPELINE_STATS;
+ }
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ if (radv_query_pool_needs_gds(cmd_buffer->device, pool)) {
+ int idx = radv_get_pipeline_statistics_index(
+ VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
+
+ /* Make sure GDS is idle before copying the value. */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2;
+ si_emit_cache_flush(cmd_buffer);
+
+ va += 8 * idx;
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ /* Record that the command buffer needs GDS. */
+ cmd_buffer->gds_needed = true;
+
+ cmd_buffer->state.active_pipeline_gds_queries++;
+ }
+ break;
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ radeon_check_space(cmd_buffer->device->ws, cs, 4);
+
+ assert(index < MAX_SO_STREAMS);
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ break;
+ default:
+ unreachable("beginning unhandled query type");
+ }
}
-static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
- struct radv_query_pool *pool,
- uint64_t va, uint64_t avail_va,
- VkQueryType query_type, uint32_t index)
+static void
+emit_end_query(struct radv_cmd_buffer *cmd_buffer, struct radv_query_pool *pool, uint64_t va,
+ uint64_t avail_va, VkQueryType query_type, uint32_t index)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- switch (query_type) {
- case VK_QUERY_TYPE_OCCLUSION:
- radeon_check_space(cmd_buffer->device->ws, cs, 14);
-
- cmd_buffer->state.active_occlusion_queries--;
- if (cmd_buffer->state.active_occlusion_queries == 0) {
- radv_set_db_count_control(cmd_buffer);
-
- /* Reset the perfect occlusion queries hint now that no
- * queries are active.
- */
- cmd_buffer->state.perfect_occlusion_queries_enabled = false;
- }
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
- radeon_emit(cs, va + 8);
- radeon_emit(cs, (va + 8) >> 32);
-
- break;
- case VK_QUERY_TYPE_PIPELINE_STATISTICS:
- radeon_check_space(cmd_buffer->device->ws, cs, 16);
-
- cmd_buffer->state.active_pipeline_queries--;
- if (cmd_buffer->state.active_pipeline_queries == 0) {
- cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_START_PIPELINE_STATS;
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_STOP_PIPELINE_STATS;
- }
- va += pipelinestat_block_size;
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- si_cs_emit_write_event_eop(cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- radv_cmd_buffer_uses_mec(cmd_buffer),
- V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_DATA_SEL_VALUE_32BIT,
- avail_va, 1,
- cmd_buffer->gfx9_eop_bug_va);
-
- if (radv_query_pool_needs_gds(cmd_buffer->device, pool)) {
- int idx = radv_get_pipeline_statistics_index(VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
-
- /* Make sure GDS is idle before copying the value. */
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_L2;
- si_emit_cache_flush(cmd_buffer);
-
- va += 8 * idx;
-
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) |
- COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
-
- cmd_buffer->state.active_pipeline_gds_queries--;
- }
- break;
- case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
- radeon_check_space(cmd_buffer->device->ws, cs, 4);
-
- assert(index < MAX_SO_STREAMS);
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
- radeon_emit(cs, (va + 16));
- radeon_emit(cs, (va + 16) >> 32);
- break;
- default:
- unreachable("ending unhandled query type");
- }
-
- cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_INV_VCACHE;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB;
- }
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ switch (query_type) {
+ case VK_QUERY_TYPE_OCCLUSION:
+ radeon_check_space(cmd_buffer->device->ws, cs, 14);
+
+ cmd_buffer->state.active_occlusion_queries--;
+ if (cmd_buffer->state.active_occlusion_queries == 0) {
+ radv_set_db_count_control(cmd_buffer);
+
+ /* Reset the perfect occlusion queries hint now that no
+ * queries are active.
+ */
+ cmd_buffer->state.perfect_occlusion_queries_enabled = false;
+ }
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, va + 8);
+ radeon_emit(cs, (va + 8) >> 32);
+
+ break;
+ case VK_QUERY_TYPE_PIPELINE_STATISTICS:
+ radeon_check_space(cmd_buffer->device->ws, cs, 16);
+
+ cmd_buffer->state.active_pipeline_queries--;
+ if (cmd_buffer->state.active_pipeline_queries == 0) {
+ cmd_buffer->state.flush_bits &= ~RADV_CMD_FLAG_START_PIPELINE_STATS;
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_STOP_PIPELINE_STATS;
+ }
+ va += pipelinestat_block_size;
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+ radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
+ 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, avail_va, 1,
+ cmd_buffer->gfx9_eop_bug_va);
+
+ if (radv_query_pool_needs_gds(cmd_buffer->device, pool)) {
+ int idx = radv_get_pipeline_statistics_index(
+ VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT);
+
+ /* Make sure GDS is idle before copying the value. */
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2;
+ si_emit_cache_flush(cmd_buffer);
+
+ va += 8 * idx;
+
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+
+ cmd_buffer->state.active_pipeline_gds_queries--;
+ }
+ break;
+ case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+ radeon_check_space(cmd_buffer->device->ws, cs, 4);
+
+ assert(index < MAX_SO_STREAMS);
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(event_type_for_stream(index)) | EVENT_INDEX(3));
+ radeon_emit(cs, (va + 16));
+ radeon_emit(cs, (va + 16) >> 32);
+ break;
+ default:
+ unreachable("ending unhandled query type");
+ }
+
+ cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 |
+ RADV_CMD_FLAG_INV_VCACHE;
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ cmd_buffer->active_query_flush_bits |=
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ }
}
-void radv_CmdBeginQueryIndexedEXT(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t query,
- VkQueryControlFlags flags,
- uint32_t index)
+void
+radv_CmdBeginQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
+ VkQueryControlFlags flags, uint32_t index)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint64_t va = radv_buffer_get_va(pool->bo);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(pool->bo);
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
- emit_query_flush(cmd_buffer, pool);
+ emit_query_flush(cmd_buffer, pool);
- va += pool->stride * query;
+ va += pool->stride * query;
- emit_begin_query(cmd_buffer, pool, va, pool->type, flags, index);
+ emit_begin_query(cmd_buffer, pool, va, pool->type, flags, index);
}
-void radv_CmdBeginQuery(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t query,
- VkQueryControlFlags flags)
+void
+radv_CmdBeginQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
+ VkQueryControlFlags flags)
{
- radv_CmdBeginQueryIndexedEXT(commandBuffer, queryPool, query, flags, 0);
+ radv_CmdBeginQueryIndexedEXT(commandBuffer, queryPool, query, flags, 0);
}
-void radv_CmdEndQueryIndexedEXT(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t query,
- uint32_t index)
+void
+radv_CmdEndQueryIndexedEXT(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query,
+ uint32_t index)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- uint64_t va = radv_buffer_get_va(pool->bo);
- uint64_t avail_va = va + pool->availability_offset + 4 * query;
- va += pool->stride * query;
-
- /* Do not need to add the pool BO to the list because the query must
- * currently be active, which means the BO is already in the list.
- */
- emit_end_query(cmd_buffer, pool, va, avail_va, pool->type, index);
-
- /*
- * For multiview we have to emit a query for each bit in the mask,
- * however the first query we emit will get the totals for all the
- * operations, so we don't want to get a real value in the other
- * queries. This emits a fake begin/end sequence so the waiting
- * code gets a completed query value and doesn't hang, but the
- * query returns 0.
- */
- if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
- for (unsigned i = 1; i < util_bitcount(cmd_buffer->state.subpass->view_mask); i++) {
- va += pool->stride;
- avail_va += 4;
- emit_begin_query(cmd_buffer, pool, va, pool->type, 0, 0);
- emit_end_query(cmd_buffer, pool, va, avail_va, pool->type, 0);
- }
- }
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ uint64_t va = radv_buffer_get_va(pool->bo);
+ uint64_t avail_va = va + pool->availability_offset + 4 * query;
+ va += pool->stride * query;
+
+ /* Do not need to add the pool BO to the list because the query must
+ * currently be active, which means the BO is already in the list.
+ */
+ emit_end_query(cmd_buffer, pool, va, avail_va, pool->type, index);
+
+ /*
+ * For multiview we have to emit a query for each bit in the mask,
+ * however the first query we emit will get the totals for all the
+ * operations, so we don't want to get a real value in the other
+ * queries. This emits a fake begin/end sequence so the waiting
+ * code gets a completed query value and doesn't hang, but the
+ * query returns 0.
+ */
+ if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask) {
+ for (unsigned i = 1; i < util_bitcount(cmd_buffer->state.subpass->view_mask); i++) {
+ va += pool->stride;
+ avail_va += 4;
+ emit_begin_query(cmd_buffer, pool, va, pool->type, 0, 0);
+ emit_end_query(cmd_buffer, pool, va, avail_va, pool->type, 0);
+ }
+ }
}
-void radv_CmdEndQuery(
- VkCommandBuffer commandBuffer,
- VkQueryPool queryPool,
- uint32_t query)
+void
+radv_CmdEndQuery(VkCommandBuffer commandBuffer, VkQueryPool queryPool, uint32_t query)
{
- radv_CmdEndQueryIndexedEXT(commandBuffer, queryPool, query, 0);
+ radv_CmdEndQueryIndexedEXT(commandBuffer, queryPool, query, 0);
}
-void radv_CmdWriteTimestamp(
- VkCommandBuffer commandBuffer,
- VkPipelineStageFlagBits pipelineStage,
- VkQueryPool queryPool,
- uint32_t query)
+void
+radv_CmdWriteTimestamp(VkCommandBuffer commandBuffer, VkPipelineStageFlagBits pipelineStage,
+ VkQueryPool queryPool, uint32_t query)
{
- RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
- RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
- bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint64_t va = radv_buffer_get_va(pool->bo);
- uint64_t query_va = va + pool->stride * query;
-
- radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
-
- emit_query_flush(cmd_buffer, pool);
-
- int num_queries = 1;
- if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask)
- num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask);
-
- ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28 * num_queries);
-
- for (unsigned i = 0; i < num_queries; i++) {
- switch(pipelineStage) {
- case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
- COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
- COPY_DATA_DST_SEL(V_370_MEM));
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, query_va);
- radeon_emit(cs, query_va >> 32);
- break;
- default:
- si_cs_emit_write_event_eop(cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- mec,
- V_028A90_BOTTOM_OF_PIPE_TS, 0,
- EOP_DST_SEL_MEM,
- EOP_DATA_SEL_TIMESTAMP,
- query_va, 0,
- cmd_buffer->gfx9_eop_bug_va);
- break;
- }
- query_va += pool->stride;
- }
-
- cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_INV_VCACHE;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB;
- }
-
- assert(cmd_buffer->cs->cdw <= cdw_max);
+ RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+ RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+ bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint64_t va = radv_buffer_get_va(pool->bo);
+ uint64_t query_va = va + pool->stride * query;
+
+ radv_cs_add_buffer(cmd_buffer->device->ws, cs, pool->bo);
+
+ emit_query_flush(cmd_buffer, pool);
+
+ int num_queries = 1;
+ if (cmd_buffer->state.subpass && cmd_buffer->state.subpass->view_mask)
+ num_queries = util_bitcount(cmd_buffer->state.subpass->view_mask);
+
+ ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28 * num_queries);
+
+ for (unsigned i = 0; i < num_queries; i++) {
+ switch (pipelineStage) {
+ case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
+ COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) | COPY_DATA_DST_SEL(V_370_MEM));
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, query_va);
+ radeon_emit(cs, query_va >> 32);
+ break;
+ default:
+ si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+ mec, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
+ EOP_DATA_SEL_TIMESTAMP, query_va, 0,
+ cmd_buffer->gfx9_eop_bug_va);
+ break;
+ }
+ query_va += pool->stride;
+ }
+
+ cmd_buffer->active_query_flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 |
+ RADV_CMD_FLAG_INV_VCACHE;
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ cmd_buffer->active_query_flush_bits |=
+ RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
+ }
+
+ assert(cmd_buffer->cs->cdw <= cdw_max);
}
diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
index 4189e3b3dba..5a05c3ee440 100644
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -29,337 +29,313 @@
#ifndef RADV_RADEON_WINSYS_H
#define RADV_RADEON_WINSYS_H
-#include <stdio.h>
-#include <stdint.h>
#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include "util/u_math.h"
+#include "util/u_memory.h"
#include <vulkan/vulkan.h>
#include "amd_family.h"
-#include "util/u_memory.h"
-#include "util/u_math.h"
struct radeon_info;
struct ac_surf_info;
struct radeon_surf;
enum radeon_bo_domain { /* bitfield */
- RADEON_DOMAIN_GTT = 2,
- RADEON_DOMAIN_VRAM = 4,
- RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
- RADEON_DOMAIN_GDS = 8,
- RADEON_DOMAIN_OA = 16,
+ RADEON_DOMAIN_GTT = 2,
+ RADEON_DOMAIN_VRAM = 4,
+ RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT,
+ RADEON_DOMAIN_GDS = 8,
+ RADEON_DOMAIN_OA = 16,
};
enum radeon_bo_flag { /* bitfield */
- RADEON_FLAG_GTT_WC = (1 << 0),
- RADEON_FLAG_CPU_ACCESS = (1 << 1),
- RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
- RADEON_FLAG_VIRTUAL = (1 << 3),
- RADEON_FLAG_VA_UNCACHED = (1 << 4),
- RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
- RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
- RADEON_FLAG_READ_ONLY = (1 << 7),
- RADEON_FLAG_32BIT = (1 << 8),
- RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
- RADEON_FLAG_ZERO_VRAM = (1 << 10),
+ RADEON_FLAG_GTT_WC = (1 << 0),
+ RADEON_FLAG_CPU_ACCESS = (1 << 1),
+ RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
+ RADEON_FLAG_VIRTUAL = (1 << 3),
+ RADEON_FLAG_VA_UNCACHED = (1 << 4),
+ RADEON_FLAG_IMPLICIT_SYNC = (1 << 5),
+ RADEON_FLAG_NO_INTERPROCESS_SHARING = (1 << 6),
+ RADEON_FLAG_READ_ONLY = (1 << 7),
+ RADEON_FLAG_32BIT = (1 << 8),
+ RADEON_FLAG_PREFER_LOCAL_BO = (1 << 9),
+ RADEON_FLAG_ZERO_VRAM = (1 << 10),
};
enum radeon_ctx_priority {
- RADEON_CTX_PRIORITY_INVALID = -1,
- RADEON_CTX_PRIORITY_LOW = 0,
- RADEON_CTX_PRIORITY_MEDIUM,
- RADEON_CTX_PRIORITY_HIGH,
- RADEON_CTX_PRIORITY_REALTIME,
+ RADEON_CTX_PRIORITY_INVALID = -1,
+ RADEON_CTX_PRIORITY_LOW = 0,
+ RADEON_CTX_PRIORITY_MEDIUM,
+ RADEON_CTX_PRIORITY_HIGH,
+ RADEON_CTX_PRIORITY_REALTIME,
};
enum radeon_value_id {
- RADEON_ALLOCATED_VRAM,
- RADEON_ALLOCATED_VRAM_VIS,
- RADEON_ALLOCATED_GTT,
- RADEON_TIMESTAMP,
- RADEON_NUM_BYTES_MOVED,
- RADEON_NUM_EVICTIONS,
- RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
- RADEON_VRAM_USAGE,
- RADEON_VRAM_VIS_USAGE,
- RADEON_GTT_USAGE,
- RADEON_GPU_TEMPERATURE,
- RADEON_CURRENT_SCLK,
- RADEON_CURRENT_MCLK,
+ RADEON_ALLOCATED_VRAM,
+ RADEON_ALLOCATED_VRAM_VIS,
+ RADEON_ALLOCATED_GTT,
+ RADEON_TIMESTAMP,
+ RADEON_NUM_BYTES_MOVED,
+ RADEON_NUM_EVICTIONS,
+ RADEON_NUM_VRAM_CPU_PAGE_FAULTS,
+ RADEON_VRAM_USAGE,
+ RADEON_VRAM_VIS_USAGE,
+ RADEON_GTT_USAGE,
+ RADEON_GPU_TEMPERATURE,
+ RADEON_CURRENT_SCLK,
+ RADEON_CURRENT_MCLK,
};
struct radeon_cmdbuf {
- unsigned cdw; /* Number of used dwords. */
- unsigned max_dw; /* Maximum number of dwords. */
- uint32_t *buf; /* The base pointer of the chunk. */
+ unsigned cdw; /* Number of used dwords. */
+ unsigned max_dw; /* Maximum number of dwords. */
+ uint32_t *buf; /* The base pointer of the chunk. */
};
-#define RADEON_SURF_TYPE_MASK 0xFF
-#define RADEON_SURF_TYPE_SHIFT 0
-#define RADEON_SURF_TYPE_1D 0
-#define RADEON_SURF_TYPE_2D 1
-#define RADEON_SURF_TYPE_3D 2
-#define RADEON_SURF_TYPE_CUBEMAP 3
-#define RADEON_SURF_TYPE_1D_ARRAY 4
-#define RADEON_SURF_TYPE_2D_ARRAY 5
-#define RADEON_SURF_MODE_MASK 0xFF
-#define RADEON_SURF_MODE_SHIFT 8
-
-#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
-#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
-#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
+#define RADEON_SURF_TYPE_MASK 0xFF
+#define RADEON_SURF_TYPE_SHIFT 0
+#define RADEON_SURF_TYPE_1D 0
+#define RADEON_SURF_TYPE_2D 1
+#define RADEON_SURF_TYPE_3D 2
+#define RADEON_SURF_TYPE_CUBEMAP 3
+#define RADEON_SURF_TYPE_1D_ARRAY 4
+#define RADEON_SURF_TYPE_2D_ARRAY 5
+#define RADEON_SURF_MODE_MASK 0xFF
+#define RADEON_SURF_MODE_SHIFT 8
+
+#define RADEON_SURF_GET(v, field) \
+ (((v) >> RADEON_SURF_##field##_SHIFT) & RADEON_SURF_##field##_MASK)
+#define RADEON_SURF_SET(v, field) (((v)&RADEON_SURF_##field##_MASK) << RADEON_SURF_##field##_SHIFT)
+#define RADEON_SURF_CLR(v, field) \
+ ((v) & ~(RADEON_SURF_##field##_MASK << RADEON_SURF_##field##_SHIFT))
enum radeon_bo_layout {
- RADEON_LAYOUT_LINEAR = 0,
- RADEON_LAYOUT_TILED,
- RADEON_LAYOUT_SQUARETILED,
+ RADEON_LAYOUT_LINEAR = 0,
+ RADEON_LAYOUT_TILED,
+ RADEON_LAYOUT_SQUARETILED,
- RADEON_LAYOUT_UNKNOWN
+ RADEON_LAYOUT_UNKNOWN
};
/* Tiling info for display code, DRI sharing, and other data. */
struct radeon_bo_metadata {
- /* Tiling flags describing the texture layout for display code
- * and DRI sharing.
- */
- union {
- struct {
- enum radeon_bo_layout microtile;
- enum radeon_bo_layout macrotile;
- unsigned pipe_config;
- unsigned bankw;
- unsigned bankh;
- unsigned tile_split;
- unsigned mtilea;
- unsigned num_banks;
- unsigned stride;
- bool scanout;
- } legacy;
-
- struct {
- /* surface flags */
- unsigned swizzle_mode:5;
- bool scanout;
- uint32_t dcc_offset_256b;
- uint32_t dcc_pitch_max;
- bool dcc_independent_64b_blocks;
- bool dcc_independent_128b_blocks;
- unsigned dcc_max_compressed_block_size;
- } gfx9;
- } u;
-
- /* Additional metadata associated with the buffer, in bytes.
- * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
- * Supported by amdgpu only.
- */
- uint32_t size_metadata;
- uint32_t metadata[64];
+ /* Tiling flags describing the texture layout for display code
+ * and DRI sharing.
+ */
+ union {
+ struct {
+ enum radeon_bo_layout microtile;
+ enum radeon_bo_layout macrotile;
+ unsigned pipe_config;
+ unsigned bankw;
+ unsigned bankh;
+ unsigned tile_split;
+ unsigned mtilea;
+ unsigned num_banks;
+ unsigned stride;
+ bool scanout;
+ } legacy;
+
+ struct {
+ /* surface flags */
+ unsigned swizzle_mode : 5;
+ bool scanout;
+ uint32_t dcc_offset_256b;
+ uint32_t dcc_pitch_max;
+ bool dcc_independent_64b_blocks;
+ bool dcc_independent_128b_blocks;
+ unsigned dcc_max_compressed_block_size;
+ } gfx9;
+ } u;
+
+ /* Additional metadata associated with the buffer, in bytes.
+ * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
+ * Supported by amdgpu only.
+ */
+ uint32_t size_metadata;
+ uint32_t metadata[64];
};
struct radeon_winsys_ctx;
struct radeon_winsys_bo {
- uint64_t va;
- bool is_local;
- bool vram_no_cpu_access;
- bool use_global_list;
- enum radeon_bo_domain initial_domain;
+ uint64_t va;
+ bool is_local;
+ bool vram_no_cpu_access;
+ bool use_global_list;
+ enum radeon_bo_domain initial_domain;
};
struct radv_winsys_sem_counts {
- uint32_t syncobj_count;
- uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */
- uint32_t timeline_syncobj_count;
- uint32_t *syncobj;
- uint64_t *points;
+ uint32_t syncobj_count;
+ uint32_t syncobj_reset_count; /* for wait only, whether to reset the syncobj */
+ uint32_t timeline_syncobj_count;
+ uint32_t *syncobj;
+ uint64_t *points;
};
struct radv_winsys_sem_info {
- bool cs_emit_signal;
- bool cs_emit_wait;
- struct radv_winsys_sem_counts wait;
- struct radv_winsys_sem_counts signal;
+ bool cs_emit_signal;
+ bool cs_emit_wait;
+ struct radv_winsys_sem_counts wait;
+ struct radv_winsys_sem_counts signal;
};
struct radv_winsys_bo_list {
- struct radeon_winsys_bo **bos;
- unsigned count;
+ struct radeon_winsys_bo **bos;
+ unsigned count;
};
/* Kernel effectively allows 0-31. This sets some priorities for fixed
* functionality buffers */
enum {
- RADV_BO_PRIORITY_APPLICATION_MAX = 28,
-
- /* virtual buffers have 0 priority since the priority is not used. */
- RADV_BO_PRIORITY_VIRTUAL = 0,
-
- RADV_BO_PRIORITY_METADATA = 10,
- /* This should be considerably lower than most of the stuff below,
- * but how much lower is hard to say since we don't know application
- * assignments. Put it pretty high since it is GTT anyway. */
- RADV_BO_PRIORITY_QUERY_POOL = 29,
-
- RADV_BO_PRIORITY_DESCRIPTOR = 30,
- RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
- RADV_BO_PRIORITY_FENCE = 30,
- RADV_BO_PRIORITY_SHADER = 31,
- RADV_BO_PRIORITY_SCRATCH = 31,
- RADV_BO_PRIORITY_CS = 31,
+ RADV_BO_PRIORITY_APPLICATION_MAX = 28,
+
+ /* virtual buffers have 0 priority since the priority is not used. */
+ RADV_BO_PRIORITY_VIRTUAL = 0,
+
+ RADV_BO_PRIORITY_METADATA = 10,
+ /* This should be considerably lower than most of the stuff below,
+ * but how much lower is hard to say since we don't know application
+ * assignments. Put it pretty high since it is GTT anyway. */
+ RADV_BO_PRIORITY_QUERY_POOL = 29,
+
+ RADV_BO_PRIORITY_DESCRIPTOR = 30,
+ RADV_BO_PRIORITY_UPLOAD_BUFFER = 30,
+ RADV_BO_PRIORITY_FENCE = 30,
+ RADV_BO_PRIORITY_SHADER = 31,
+ RADV_BO_PRIORITY_SCRATCH = 31,
+ RADV_BO_PRIORITY_CS = 31,
};
struct radeon_winsys {
- void (*destroy)(struct radeon_winsys *ws);
-
- void (*query_info)(struct radeon_winsys *ws,
- struct radeon_info *info);
+ void (*destroy)(struct radeon_winsys *ws);
- uint64_t (*query_value)(struct radeon_winsys *ws,
- enum radeon_value_id value);
+ void (*query_info)(struct radeon_winsys *ws, struct radeon_info *info);
- bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset,
- unsigned num_registers, uint32_t *out);
+ uint64_t (*query_value)(struct radeon_winsys *ws, enum radeon_value_id value);
- const char *(*get_chip_name)(struct radeon_winsys *ws);
+ bool (*read_registers)(struct radeon_winsys *ws, unsigned reg_offset, unsigned num_registers,
+ uint32_t *out);
- struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws,
- uint64_t size,
- unsigned alignment,
- enum radeon_bo_domain domain,
- enum radeon_bo_flag flags,
- unsigned priority);
+ const char *(*get_chip_name)(struct radeon_winsys *ws);
- void (*buffer_destroy)(struct radeon_winsys *ws,
- struct radeon_winsys_bo *bo);
- void *(*buffer_map)(struct radeon_winsys_bo *bo);
+ struct radeon_winsys_bo *(*buffer_create)(struct radeon_winsys *ws, uint64_t size,
+ unsigned alignment, enum radeon_bo_domain domain,
+ enum radeon_bo_flag flags, unsigned priority);
- struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws,
- void *pointer,
- uint64_t size,
- unsigned priority);
+ void (*buffer_destroy)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo);
+ void *(*buffer_map)(struct radeon_winsys_bo *bo);
- struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws,
- int fd,
- unsigned priority,
- uint64_t *alloc_size);
+ struct radeon_winsys_bo *(*buffer_from_ptr)(struct radeon_winsys *ws, void *pointer,
+ uint64_t size, unsigned priority);
- bool (*buffer_get_fd)(struct radeon_winsys *ws,
- struct radeon_winsys_bo *bo,
- int *fd);
+ struct radeon_winsys_bo *(*buffer_from_fd)(struct radeon_winsys *ws, int fd, unsigned priority,
+ uint64_t *alloc_size);
- bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd,
- enum radeon_bo_domain *domains,
- enum radeon_bo_flag *flags);
+ bool (*buffer_get_fd)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo, int *fd);
- void (*buffer_unmap)(struct radeon_winsys_bo *bo);
+ bool (*buffer_get_flags_from_fd)(struct radeon_winsys *ws, int fd,
+ enum radeon_bo_domain *domains, enum radeon_bo_flag *flags);
- void (*buffer_set_metadata)(struct radeon_winsys *ws,
- struct radeon_winsys_bo *bo,
- struct radeon_bo_metadata *md);
- void (*buffer_get_metadata)(struct radeon_winsys *ws,
- struct radeon_winsys_bo *bo,
- struct radeon_bo_metadata *md);
+ void (*buffer_unmap)(struct radeon_winsys_bo *bo);
- VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws,
- struct radeon_winsys_bo *parent,
- uint64_t offset, uint64_t size,
- struct radeon_winsys_bo *bo, uint64_t bo_offset);
+ void (*buffer_set_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
+ struct radeon_bo_metadata *md);
+ void (*buffer_get_metadata)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
+ struct radeon_bo_metadata *md);
- VkResult (*buffer_make_resident)(struct radeon_winsys *ws,
- struct radeon_winsys_bo *bo,
- bool resident);
+ VkResult (*buffer_virtual_bind)(struct radeon_winsys *ws, struct radeon_winsys_bo *parent,
+ uint64_t offset, uint64_t size, struct radeon_winsys_bo *bo,
+ uint64_t bo_offset);
- VkResult (*ctx_create)(struct radeon_winsys *ws,
- enum radeon_ctx_priority priority,
- struct radeon_winsys_ctx **ctx);
- void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
+ VkResult (*buffer_make_resident)(struct radeon_winsys *ws, struct radeon_winsys_bo *bo,
+ bool resident);
- bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
- enum ring_type ring_type, int ring_index);
+ VkResult (*ctx_create)(struct radeon_winsys *ws, enum radeon_ctx_priority priority,
+ struct radeon_winsys_ctx **ctx);
+ void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);
- struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws,
- enum ring_type ring_type);
+ bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx, enum ring_type ring_type, int ring_index);
- void (*cs_destroy)(struct radeon_cmdbuf *cs);
+ struct radeon_cmdbuf *(*cs_create)(struct radeon_winsys *ws, enum ring_type ring_type);
- void (*cs_reset)(struct radeon_cmdbuf *cs);
+ void (*cs_destroy)(struct radeon_cmdbuf *cs);
- VkResult (*cs_finalize)(struct radeon_cmdbuf *cs);
+ void (*cs_reset)(struct radeon_cmdbuf *cs);
- void (*cs_grow)(struct radeon_cmdbuf * cs, size_t min_size);
+ VkResult (*cs_finalize)(struct radeon_cmdbuf *cs);
- VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx,
- int queue_index,
- struct radeon_cmdbuf **cs_array,
- unsigned cs_count,
- struct radeon_cmdbuf *initial_preamble_cs,
- struct radeon_cmdbuf *continue_preamble_cs,
- struct radv_winsys_sem_info *sem_info,
- bool can_patch);
+ void (*cs_grow)(struct radeon_cmdbuf *cs, size_t min_size);
- void (*cs_add_buffer)(struct radeon_cmdbuf *cs,
- struct radeon_winsys_bo *bo);
+ VkResult (*cs_submit)(struct radeon_winsys_ctx *ctx, int queue_index,
+ struct radeon_cmdbuf **cs_array, unsigned cs_count,
+ struct radeon_cmdbuf *initial_preamble_cs,
+ struct radeon_cmdbuf *continue_preamble_cs,
+ struct radv_winsys_sem_info *sem_info, bool can_patch);
- void (*cs_execute_secondary)(struct radeon_cmdbuf *parent,
- struct radeon_cmdbuf *child);
+ void (*cs_add_buffer)(struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo);
- void (*cs_dump)(struct radeon_cmdbuf *cs, FILE* file, const int *trace_ids, int trace_id_count);
+ void (*cs_execute_secondary)(struct radeon_cmdbuf *parent, struct radeon_cmdbuf *child);
- void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file);
+ void (*cs_dump)(struct radeon_cmdbuf *cs, FILE *file, const int *trace_ids, int trace_id_count);
- void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
+ void (*dump_bo_ranges)(struct radeon_winsys *ws, FILE *file);
- int (*surface_init)(struct radeon_winsys *ws,
- const struct ac_surf_info *surf_info,
- struct radeon_surf *surf);
+ void (*dump_bo_log)(struct radeon_winsys *ws, FILE *file);
- int (*create_syncobj)(struct radeon_winsys *ws, bool create_signaled,
- uint32_t *handle);
- void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+ int (*surface_init)(struct radeon_winsys *ws, const struct ac_surf_info *surf_info,
+ struct radeon_surf *surf);
- void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
- void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t point);
- VkResult (*query_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t *point);
- bool (*wait_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, uint32_t handle_count,
- bool wait_all, uint64_t timeout);
- bool (*wait_timeline_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, const uint64_t *points,
- uint32_t handle_count, bool wait_all, bool available, uint64_t timeout);
+ int (*create_syncobj)(struct radeon_winsys *ws, bool create_signaled, uint32_t *handle);
+ void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);
- int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
- int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);
+ void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
+ void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t point);
+ VkResult (*query_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t *point);
+ bool (*wait_syncobj)(struct radeon_winsys *ws, const uint32_t *handles, uint32_t handle_count,
+ bool wait_all, uint64_t timeout);
+ bool (*wait_timeline_syncobj)(struct radeon_winsys *ws, const uint32_t *handles,
+ const uint64_t *points, uint32_t handle_count, bool wait_all,
+ bool available, uint64_t timeout);
- int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
+ int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
+ int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);
- /* Note that this, unlike the normal import, uses an existing syncobj. */
- int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd);
+ int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
+ /* Note that this, unlike the normal import, uses an existing syncobj. */
+ int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd);
};
-static inline void radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
+static inline void
+radeon_emit(struct radeon_cmdbuf *cs, uint32_t value)
{
- cs->buf[cs->cdw++] = value;
+ cs->buf[cs->cdw++] = value;
}
-static inline void radeon_emit_array(struct radeon_cmdbuf *cs,
- const uint32_t *values, unsigned count)
+static inline void
+radeon_emit_array(struct radeon_cmdbuf *cs, const uint32_t *values, unsigned count)
{
- memcpy(cs->buf + cs->cdw, values, count * 4);
- cs->cdw += count;
+ memcpy(cs->buf + cs->cdw, values, count * 4);
+ cs->cdw += count;
}
-static inline uint64_t radv_buffer_get_va(struct radeon_winsys_bo *bo)
+static inline uint64_t
+radv_buffer_get_va(struct radeon_winsys_bo *bo)
{
- return bo->va;
+ return bo->va;
}
-static inline void radv_cs_add_buffer(struct radeon_winsys *ws,
- struct radeon_cmdbuf *cs,
- struct radeon_winsys_bo *bo)
+static inline void
+radv_cs_add_buffer(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_winsys_bo *bo)
{
- if (bo->use_global_list)
- return;
+ if (bo->use_global_list)
+ return;
- ws->cs_add_buffer(cs, bo);
+ ws->cs_add_buffer(cs, bo);
}
enum radeon_bo_domain radv_cmdbuffer_domain(const struct radeon_info *info, uint32_t perftest);
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index e23bf5f6ad9..eb29a794099 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -25,689 +25,652 @@
* IN THE SOFTWARE.
*/
+#include "radv_shader.h"
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "spirv/nir_spirv.h"
#include "util/memstream.h"
#include "util/mesa-sha1.h"
#include "util/u_atomic.h"
#include "radv_debug.h"
#include "radv_private.h"
-#include "radv_shader.h"
-#include "radv_shader_helper.h"
#include "radv_shader_args.h"
-#include "nir/nir.h"
-#include "nir/nir_builder.h"
-#include "spirv/nir_spirv.h"
+#include "radv_shader_helper.h"
-#include "sid.h"
+#include "util/debug.h"
#include "ac_binary.h"
+#include "ac_exp_param.h"
#include "ac_llvm_util.h"
#include "ac_nir.h"
#include "ac_nir_to_llvm.h"
#include "ac_rtld.h"
+#include "aco_interface.h"
+#include "sid.h"
#include "vk_format.h"
-#include "util/debug.h"
-#include "ac_exp_param.h"
static const struct nir_shader_compiler_options nir_options = {
- .vertex_id_zero_based = true,
- .lower_scmp = true,
- .lower_flrp16 = true,
- .lower_flrp32 = true,
- .lower_flrp64 = true,
- .lower_device_index_to_zero = true,
- .lower_fdiv = true,
- .lower_fmod = true,
- .lower_ineg = true,
- .lower_bitfield_insert_to_bitfield_select = true,
- .lower_bitfield_extract = true,
- .lower_pack_snorm_2x16 = true,
- .lower_pack_snorm_4x8 = true,
- .lower_pack_unorm_2x16 = true,
- .lower_pack_unorm_4x8 = true,
- .lower_pack_half_2x16 = true,
- .lower_pack_64_2x32 = true,
- .lower_pack_64_4x16 = true,
- .lower_pack_32_2x16 = true,
- .lower_unpack_snorm_2x16 = true,
- .lower_unpack_snorm_4x8 = true,
- .lower_unpack_unorm_2x16 = true,
- .lower_unpack_unorm_4x8 = true,
- .lower_unpack_half_2x16 = true,
- .lower_extract_byte = true,
- .lower_extract_word = true,
- .lower_ffma16 = true,
- .lower_ffma32 = true,
- .lower_ffma64 = true,
- .lower_fpow = true,
- .lower_mul_2x32_64 = true,
- .lower_rotate = true,
- .has_fsub = true,
- .has_isub = true,
- .use_scoped_barrier = true,
- .max_unroll_iterations = 32,
- .max_unroll_iterations_aggressive = 128,
- .use_interpolated_input_intrinsics = true,
- .vectorize_vec2_16bit = true,
- /* nir_lower_int64() isn't actually called for the LLVM backend, but
- * this helps the loop unrolling heuristics. */
- .lower_int64_options = nir_lower_imul64 |
- nir_lower_imul_high64 |
- nir_lower_imul_2x32_64 |
- nir_lower_divmod64 |
- nir_lower_minmax64 |
- nir_lower_iabs64,
- .lower_doubles_options = nir_lower_drcp |
- nir_lower_dsqrt |
- nir_lower_drsq |
- nir_lower_ddiv,
+ .vertex_id_zero_based = true,
+ .lower_scmp = true,
+ .lower_flrp16 = true,
+ .lower_flrp32 = true,
+ .lower_flrp64 = true,
+ .lower_device_index_to_zero = true,
+ .lower_fdiv = true,
+ .lower_fmod = true,
+ .lower_ineg = true,
+ .lower_bitfield_insert_to_bitfield_select = true,
+ .lower_bitfield_extract = true,
+ .lower_pack_snorm_2x16 = true,
+ .lower_pack_snorm_4x8 = true,
+ .lower_pack_unorm_2x16 = true,
+ .lower_pack_unorm_4x8 = true,
+ .lower_pack_half_2x16 = true,
+ .lower_pack_64_2x32 = true,
+ .lower_pack_64_4x16 = true,
+ .lower_pack_32_2x16 = true,
+ .lower_unpack_snorm_2x16 = true,
+ .lower_unpack_snorm_4x8 = true,
+ .lower_unpack_unorm_2x16 = true,
+ .lower_unpack_unorm_4x8 = true,
+ .lower_unpack_half_2x16 = true,
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
+ .lower_ffma16 = true,
+ .lower_ffma32 = true,
+ .lower_ffma64 = true,
+ .lower_fpow = true,
+ .lower_mul_2x32_64 = true,
+ .lower_rotate = true,
+ .has_fsub = true,
+ .has_isub = true,
+ .use_scoped_barrier = true,
+ .max_unroll_iterations = 32,
+ .max_unroll_iterations_aggressive = 128,
+ .use_interpolated_input_intrinsics = true,
+ .vectorize_vec2_16bit = true,
+ /* nir_lower_int64() isn't actually called for the LLVM backend, but
+ * this helps the loop unrolling heuristics. */
+ .lower_int64_options = nir_lower_imul64 | nir_lower_imul_high64 | nir_lower_imul_2x32_64 |
+ nir_lower_divmod64 | nir_lower_minmax64 | nir_lower_iabs64,
+ .lower_doubles_options = nir_lower_drcp | nir_lower_dsqrt | nir_lower_drsq | nir_lower_ddiv,
.divergence_analysis_options = nir_divergence_view_index_uniform,
};
bool
-radv_can_dump_shader(struct radv_device *device,
- struct vk_shader_module *module,
- bool is_gs_copy_shader)
+radv_can_dump_shader(struct radv_device *device, struct vk_shader_module *module,
+ bool is_gs_copy_shader)
{
- if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
- return false;
- if (module)
- return !module->nir ||
- (device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS);
+ if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
+ return false;
+ if (module)
+ return !module->nir || (device->instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS);
- return is_gs_copy_shader;
+ return is_gs_copy_shader;
}
bool
-radv_can_dump_shader_stats(struct radv_device *device,
- struct vk_shader_module *module)
+radv_can_dump_shader_stats(struct radv_device *device, struct vk_shader_module *module)
{
- /* Only dump non-meta shader stats. */
- return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS &&
- module && !module->nir;
+ /* Only dump non-meta shader stats. */
+ return device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS && module && !module->nir;
}
void
radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
- bool optimize_conservatively, bool allow_copies)
+ bool optimize_conservatively, bool allow_copies)
{
- bool progress;
- unsigned lower_flrp =
- (shader->options->lower_flrp16 ? 16 : 0) |
- (shader->options->lower_flrp32 ? 32 : 0) |
- (shader->options->lower_flrp64 ? 64 : 0);
-
- do {
- progress = false;
-
- NIR_PASS(progress, shader, nir_split_array_vars, nir_var_function_temp);
- NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_function_temp);
-
- NIR_PASS_V(shader, nir_lower_vars_to_ssa);
-
- if (allow_copies) {
- /* Only run this pass in the first call to
- * radv_optimize_nir. Later calls assume that we've
- * lowered away any copy_deref instructions and we
- * don't want to introduce any more.
- */
- NIR_PASS(progress, shader, nir_opt_find_array_copies);
- }
-
- NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
- NIR_PASS(progress, shader, nir_opt_dead_write_vars);
- NIR_PASS(progress, shader, nir_remove_dead_variables,
- nir_var_function_temp | nir_var_shader_in | nir_var_shader_out,
- NULL);
-
- NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL, NULL);
- NIR_PASS_V(shader, nir_lower_phis_to_scalar);
-
- NIR_PASS(progress, shader, nir_copy_prop);
- NIR_PASS(progress, shader, nir_opt_remove_phis);
- NIR_PASS(progress, shader, nir_opt_dce);
- if (nir_opt_trivial_continues(shader)) {
- progress = true;
- NIR_PASS(progress, shader, nir_copy_prop);
- NIR_PASS(progress, shader, nir_opt_remove_phis);
- NIR_PASS(progress, shader, nir_opt_dce);
- }
- NIR_PASS(progress, shader, nir_opt_if, true);
- NIR_PASS(progress, shader, nir_opt_dead_cf);
- NIR_PASS(progress, shader, nir_opt_cse);
- NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
- NIR_PASS(progress, shader, nir_opt_constant_folding);
- NIR_PASS(progress, shader, nir_opt_algebraic);
-
- if (lower_flrp != 0) {
- bool lower_flrp_progress = false;
- NIR_PASS(lower_flrp_progress,
- shader,
- nir_lower_flrp,
- lower_flrp,
- false /* always_precise */);
- if (lower_flrp_progress) {
- NIR_PASS(progress, shader,
- nir_opt_constant_folding);
- progress = true;
- }
-
- /* Nothing should rematerialize any flrps, so we only
- * need to do this lowering once.
- */
- lower_flrp = 0;
- }
-
- NIR_PASS(progress, shader, nir_opt_undef);
- NIR_PASS(progress, shader, nir_opt_shrink_vectors,
- !device->instance->disable_shrink_image_store);
- if (shader->options->max_unroll_iterations) {
- NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
- }
- } while (progress && !optimize_conservatively);
-
- NIR_PASS(progress, shader, nir_opt_conditional_discard);
- NIR_PASS(progress, shader, nir_opt_move, nir_move_load_ubo);
+ bool progress;
+ unsigned lower_flrp = (shader->options->lower_flrp16 ? 16 : 0) |
+ (shader->options->lower_flrp32 ? 32 : 0) |
+ (shader->options->lower_flrp64 ? 64 : 0);
+
+ do {
+ progress = false;
+
+ NIR_PASS(progress, shader, nir_split_array_vars, nir_var_function_temp);
+ NIR_PASS(progress, shader, nir_shrink_vec_array_vars, nir_var_function_temp);
+
+ NIR_PASS_V(shader, nir_lower_vars_to_ssa);
+
+ if (allow_copies) {
+ /* Only run this pass in the first call to
+ * radv_optimize_nir. Later calls assume that we've
+ * lowered away any copy_deref instructions and we
+ * don't want to introduce any more.
+ */
+ NIR_PASS(progress, shader, nir_opt_find_array_copies);
+ }
+
+ NIR_PASS(progress, shader, nir_opt_copy_prop_vars);
+ NIR_PASS(progress, shader, nir_opt_dead_write_vars);
+ NIR_PASS(progress, shader, nir_remove_dead_variables,
+ nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL);
+
+ NIR_PASS_V(shader, nir_lower_alu_to_scalar, NULL, NULL);
+ NIR_PASS_V(shader, nir_lower_phis_to_scalar);
+
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_remove_phis);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ if (nir_opt_trivial_continues(shader)) {
+ progress = true;
+ NIR_PASS(progress, shader, nir_copy_prop);
+ NIR_PASS(progress, shader, nir_opt_remove_phis);
+ NIR_PASS(progress, shader, nir_opt_dce);
+ }
+ NIR_PASS(progress, shader, nir_opt_if, true);
+ NIR_PASS(progress, shader, nir_opt_dead_cf);
+ NIR_PASS(progress, shader, nir_opt_cse);
+ NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
+ NIR_PASS(progress, shader, nir_opt_constant_folding);
+ NIR_PASS(progress, shader, nir_opt_algebraic);
+
+ if (lower_flrp != 0) {
+ bool lower_flrp_progress = false;
+ NIR_PASS(lower_flrp_progress, shader, nir_lower_flrp, lower_flrp,
+ false /* always_precise */);
+ if (lower_flrp_progress) {
+ NIR_PASS(progress, shader, nir_opt_constant_folding);
+ progress = true;
+ }
+
+ /* Nothing should rematerialize any flrps, so we only
+ * need to do this lowering once.
+ */
+ lower_flrp = 0;
+ }
+
+ NIR_PASS(progress, shader, nir_opt_undef);
+ NIR_PASS(progress, shader, nir_opt_shrink_vectors,
+ !device->instance->disable_shrink_image_store);
+ if (shader->options->max_unroll_iterations) {
+ NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+ }
+ } while (progress && !optimize_conservatively);
+
+ NIR_PASS(progress, shader, nir_opt_conditional_discard);
+ NIR_PASS(progress, shader, nir_opt_move, nir_move_load_ubo);
}
static void
shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
{
- assert(glsl_type_is_vector_or_scalar(type));
+ assert(glsl_type_is_vector_or_scalar(type));
- uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
- unsigned length = glsl_get_vector_elements(type);
- *size = comp_size * length,
- *align = comp_size;
+ uint32_t comp_size = glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+ unsigned length = glsl_get_vector_elements(type);
+ *size = comp_size * length, *align = comp_size;
}
struct radv_shader_debug_data {
- struct radv_device *device;
- const struct vk_shader_module *module;
+ struct radv_device *device;
+ const struct vk_shader_module *module;
};
-static void radv_spirv_nir_debug(void *private_data,
- enum nir_spirv_debug_level level,
- size_t spirv_offset,
- const char *message)
+static void
+radv_spirv_nir_debug(void *private_data, enum nir_spirv_debug_level level, size_t spirv_offset,
+ const char *message)
{
- struct radv_shader_debug_data *debug_data = private_data;
- struct radv_instance *instance = debug_data->device->instance;
+ struct radv_shader_debug_data *debug_data = private_data;
+ struct radv_instance *instance = debug_data->device->instance;
- static const VkDebugReportFlagsEXT vk_flags[] = {
- [NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
- [NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
- [NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
- };
- char buffer[256];
+ static const VkDebugReportFlagsEXT vk_flags[] = {
+ [NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,
+ [NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,
+ [NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
+ };
+ char buffer[256];
- snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s",
- (unsigned long)spirv_offset, message);
+ snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long)spirv_offset, message);
- vk_debug_report(&instance->vk, vk_flags[level],
- &debug_data->module->base, 0, 0, "radv", buffer);
+ vk_debug_report(&instance->vk, vk_flags[level], &debug_data->module->base, 0, 0, "radv", buffer);
}
-static void radv_compiler_debug(void *private_data,
- enum radv_compiler_debug_level level,
- const char *message)
+static void
+radv_compiler_debug(void *private_data, enum radv_compiler_debug_level level, const char *message)
{
- struct radv_shader_debug_data *debug_data = private_data;
- struct radv_instance *instance = debug_data->device->instance;
-
- static const VkDebugReportFlagsEXT vk_flags[] = {
- [RADV_COMPILER_DEBUG_LEVEL_PERFWARN] = VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
- [RADV_COMPILER_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
- };
-
- /* VK_DEBUG_REPORT_DEBUG_BIT_EXT specifies diagnostic information
- * from the implementation and layers.
- */
- vk_debug_report(&instance->vk,
- vk_flags[level] | VK_DEBUG_REPORT_DEBUG_BIT_EXT,
- &debug_data->module->base, 0, 0, "radv", message);
+ struct radv_shader_debug_data *debug_data = private_data;
+ struct radv_instance *instance = debug_data->device->instance;
+
+ static const VkDebugReportFlagsEXT vk_flags[] = {
+ [RADV_COMPILER_DEBUG_LEVEL_PERFWARN] = VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,
+ [RADV_COMPILER_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,
+ };
+
+ /* VK_DEBUG_REPORT_DEBUG_BIT_EXT specifies diagnostic information
+ * from the implementation and layers.
+ */
+ vk_debug_report(&instance->vk, vk_flags[level] | VK_DEBUG_REPORT_DEBUG_BIT_EXT,
+ &debug_data->module->base, 0, 0, "radv", message);
}
static void
mark_geom_invariant(nir_shader *nir)
{
- nir_foreach_shader_out_variable(var, nir) {
- switch (var->data.location) {
- case VARYING_SLOT_POS:
- case VARYING_SLOT_PSIZ:
- case VARYING_SLOT_CLIP_DIST0:
- case VARYING_SLOT_CLIP_DIST1:
- case VARYING_SLOT_CULL_DIST0:
- case VARYING_SLOT_CULL_DIST1:
- case VARYING_SLOT_TESS_LEVEL_OUTER:
- case VARYING_SLOT_TESS_LEVEL_INNER:
- var->data.invariant = true;
- break;
- default:
- break;
- }
- }
+ nir_foreach_shader_out_variable(var, nir)
+ {
+ switch (var->data.location) {
+ case VARYING_SLOT_POS:
+ case VARYING_SLOT_PSIZ:
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ case VARYING_SLOT_CULL_DIST0:
+ case VARYING_SLOT_CULL_DIST1:
+ case VARYING_SLOT_TESS_LEVEL_OUTER:
+ case VARYING_SLOT_TESS_LEVEL_INNER:
+ var->data.invariant = true;
+ break;
+ default:
+ break;
+ }
+ }
}
static bool
lower_intrinsics(nir_shader *nir, const struct radv_pipeline_key *key)
{
- nir_function_impl *entry = nir_shader_get_entrypoint(nir);
- bool progress = false;
- nir_builder b;
-
- nir_builder_init(&b, entry);
-
- nir_foreach_block(block, entry) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
- b.cursor = nir_before_instr(&intrin->instr);
-
- nir_ssa_def *def = NULL;
- if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
- def = nir_vec2(&b, nir_channel(&b, intrin->src[0].ssa, 0),
- nir_imm_int(&b, 0));
- } else if (intrin->intrinsic == nir_intrinsic_is_sparse_texels_resident) {
- def = nir_ieq_imm(&b, intrin->src[0].ssa, 0);
- } else if (intrin->intrinsic == nir_intrinsic_sparse_residency_code_and) {
- def = nir_ior(&b, intrin->src[0].ssa, intrin->src[1].ssa);
- } else if (intrin->intrinsic == nir_intrinsic_load_view_index &&
- !key->has_multiview_view_index) {
- def = nir_imm_zero(&b, 1, 32);
- } else {
- continue;
- }
-
- nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
- def);
-
- nir_instr_remove(instr);
- progress = true;
- }
- }
-
- return progress;
+ nir_function_impl *entry = nir_shader_get_entrypoint(nir);
+ bool progress = false;
+ nir_builder b;
+
+ nir_builder_init(&b, entry);
+
+ nir_foreach_block (block, entry) {
+ nir_foreach_instr_safe (instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ b.cursor = nir_before_instr(&intrin->instr);
+
+ nir_ssa_def *def = NULL;
+ if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) {
+ def = nir_vec2(&b, nir_channel(&b, intrin->src[0].ssa, 0), nir_imm_int(&b, 0));
+ } else if (intrin->intrinsic == nir_intrinsic_is_sparse_texels_resident) {
+ def = nir_ieq_imm(&b, intrin->src[0].ssa, 0);
+ } else if (intrin->intrinsic == nir_intrinsic_sparse_residency_code_and) {
+ def = nir_ior(&b, intrin->src[0].ssa, intrin->src[1].ssa);
+ } else if (intrin->intrinsic == nir_intrinsic_load_view_index &&
+ !key->has_multiview_view_index) {
+ def = nir_imm_zero(&b, 1, 32);
+ } else {
+ continue;
+ }
+
+ nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
+
+ nir_instr_remove(instr);
+ progress = true;
+ }
+ }
+
+ return progress;
}
nir_shader *
-radv_shader_compile_to_nir(struct radv_device *device,
- struct vk_shader_module *module,
- const char *entrypoint_name,
- gl_shader_stage stage,
- const VkSpecializationInfo *spec_info,
- const VkPipelineCreateFlags flags,
- const struct radv_pipeline_layout *layout,
- const struct radv_pipeline_key *key)
+radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
+ const char *entrypoint_name, gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info, const VkPipelineCreateFlags flags,
+ const struct radv_pipeline_layout *layout,
+ const struct radv_pipeline_key *key)
{
- unsigned subgroup_size = 64, ballot_bit_size = 64;
- if (key->compute_subgroup_size) {
- /* Only compute shaders currently support requiring a
- * specific subgroup size.
- */
- assert(stage == MESA_SHADER_COMPUTE);
- subgroup_size = key->compute_subgroup_size;
- ballot_bit_size = key->compute_subgroup_size;
- }
-
- nir_shader *nir;
-
- if (module->nir) {
- /* Some things such as our meta clear/blit code will give us a NIR
- * shader directly. In that case, we just ignore the SPIR-V entirely
- * and just use the NIR shader */
- nir = module->nir;
- nir->options = &nir_options;
- nir_validate_shader(nir, "in internal shader");
-
- assert(exec_list_length(&nir->functions) == 1);
- } else {
- uint32_t *spirv = (uint32_t *) module->data;
- assert(module->size % 4 == 0);
-
- if (device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV)
- radv_print_spirv(module->data, module->size, stderr);
-
- uint32_t num_spec_entries = 0;
- struct nir_spirv_specialization *spec_entries = NULL;
- if (spec_info && spec_info->mapEntryCount > 0) {
- num_spec_entries = spec_info->mapEntryCount;
- spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
- for (uint32_t i = 0; i < num_spec_entries; i++) {
- VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
- const void *data = (uint8_t *)spec_info->pData + entry.offset;
- assert((uint8_t *)data + entry.size <= (uint8_t *)spec_info->pData + spec_info->dataSize);
-
- spec_entries[i].id = spec_info->pMapEntries[i].constantID;
- switch (entry.size) {
- case 8:
- memcpy(&spec_entries[i].value.u64, data, sizeof(uint64_t));
- break;
- case 4:
- memcpy(&spec_entries[i].value.u32, data, sizeof(uint32_t));
- break;
- case 2:
- memcpy(&spec_entries[i].value.u16, data, sizeof(uint16_t));
- break;
- case 1:
- memcpy(&spec_entries[i].value.u8, data, sizeof(uint8_t));
- break;
- default:
- assert(!"Invalid spec constant size");
- break;
- }
- }
- }
-
- struct radv_shader_debug_data spirv_debug_data = {
- .device = device,
- .module = module,
- };
- const struct spirv_to_nir_options spirv_options = {
- .caps = {
- .amd_fragment_mask = true,
- .amd_gcn_shader = true,
- .amd_image_gather_bias_lod = true,
- .amd_image_read_write_lod = true,
- .amd_shader_ballot = true,
- .amd_shader_explicit_vertex_parameter = true,
- .amd_trinary_minmax = true,
- .demote_to_helper_invocation = true,
- .derivative_group = true,
- .descriptor_array_dynamic_indexing = true,
- .descriptor_array_non_uniform_indexing = true,
- .descriptor_indexing = true,
- .device_group = true,
- .draw_parameters = true,
- .float_controls = true,
- .float16 = device->physical_device->rad_info.has_packed_math_16bit,
- .float32_atomic_add = true,
- .float64 = true,
- .geometry_streams = true,
- .image_atomic_int64 = true,
- .image_ms_array = true,
- .image_read_without_format = true,
- .image_write_without_format = true,
- .int8 = true,
- .int16 = true,
- .int64 = true,
- .int64_atomics = true,
- .min_lod = true,
- .multiview = true,
- .physical_storage_buffer_address = true,
- .post_depth_coverage = true,
- .runtime_descriptor_array = true,
- .shader_clock = true,
- .shader_viewport_index_layer = true,
- .sparse_residency = true,
- .stencil_export = true,
- .storage_8bit = true,
- .storage_16bit = true,
- .storage_image_ms = true,
- .subgroup_arithmetic = true,
- .subgroup_ballot = true,
- .subgroup_basic = true,
- .subgroup_quad = true,
- .subgroup_shuffle = true,
- .subgroup_vote = true,
- .tessellation = true,
- .transform_feedback = true,
- .variable_pointers = true,
- .vk_memory_model = true,
- .vk_memory_model_device_scope = true,
- .fragment_shading_rate = device->physical_device->rad_info.chip_class >= GFX10_3,
- .workgroup_memory_explicit_layout = true,
- },
- .ubo_addr_format = nir_address_format_32bit_index_offset,
- .ssbo_addr_format = nir_address_format_32bit_index_offset,
- .phys_ssbo_addr_format = nir_address_format_64bit_global,
- .push_const_addr_format = nir_address_format_logical,
- .shared_addr_format = nir_address_format_32bit_offset,
- .frag_coord_is_sysval = true,
- .use_deref_buffer_array_length = true,
- .debug = {
- .func = radv_spirv_nir_debug,
- .private_data = &spirv_debug_data,
- },
- };
- nir = spirv_to_nir(spirv, module->size / 4,
- spec_entries, num_spec_entries,
- stage, entrypoint_name,
- &spirv_options, &nir_options);
- assert(nir->info.stage == stage);
- nir_validate_shader(nir, "after spirv_to_nir");
-
- free(spec_entries);
-
- /* We have to lower away local constant initializers right before we
- * inline functions. That way they get properly initialized at the top
- * of the function and not at the top of its caller.
- */
- NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
- NIR_PASS_V(nir, nir_lower_returns);
- NIR_PASS_V(nir, nir_inline_functions);
- NIR_PASS_V(nir, nir_copy_prop);
- NIR_PASS_V(nir, nir_opt_deref);
-
- /* Pick off the single entrypoint that we want */
- foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
- if (func->is_entrypoint)
- func->name = ralloc_strdup(func, "main");
- else
- exec_node_remove(&func->node);
- }
- assert(exec_list_length(&nir->functions) == 1);
-
- /* Make sure we lower constant initializers on output variables so that
- * nir_remove_dead_variables below sees the corresponding stores
- */
- NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
-
- /* Now that we've deleted all but the main function, we can go ahead and
- * lower the rest of the constant initializers.
- */
- NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
-
- /* Split member structs. We do this before lower_io_to_temporaries so that
- * it doesn't lower system values to temporaries by accident.
- */
- NIR_PASS_V(nir, nir_split_var_copies);
- NIR_PASS_V(nir, nir_split_per_member_structs);
-
- if (nir->info.stage == MESA_SHADER_FRAGMENT)
- NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
- if (nir->info.stage == MESA_SHADER_FRAGMENT)
- NIR_PASS_V(nir, nir_lower_input_attachments,
- &(nir_input_attachment_options) {
- .use_fragcoord_sysval = true,
- .use_layer_id_sysval = false,
- });
-
- NIR_PASS_V(nir, nir_remove_dead_variables,
- nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
- NULL);
-
- /* Variables can make nir_propagate_invariant more conservative
- * than it needs to be.
- */
- NIR_PASS_V(nir, nir_lower_global_vars_to_local);
- NIR_PASS_V(nir, nir_lower_vars_to_ssa);
-
- if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM &&
- stage != MESA_SHADER_FRAGMENT) {
- mark_geom_invariant(nir);
- }
-
- NIR_PASS_V(nir, nir_propagate_invariant);
-
- NIR_PASS_V(nir, nir_lower_system_values);
- NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
-
- NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
-
- NIR_PASS_V(nir, nir_lower_discard_or_demote,
- device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE);
-
- nir_lower_doubles_options lower_doubles =
- nir->options->lower_doubles_options;
-
- if (device->physical_device->rad_info.chip_class == GFX6) {
- /* GFX6 doesn't support v_floor_f64 and the precision
- * of v_fract_f64 which is used to implement 64-bit
- * floor is less than what Vulkan requires.
- */
- lower_doubles |= nir_lower_dfloor;
- }
-
- NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
- }
-
- /* Vulkan uses the separate-shader linking model */
- nir->info.separate_shader = true;
-
- nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
-
- if (nir->info.stage == MESA_SHADER_GEOMETRY) {
- unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
-
- if (device->physical_device->use_ngg && !radv_use_llvm_for_stage(device, stage)) {
- /* ACO needs NIR to do some of the hard lifting */
- nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives |
- nir_lower_gs_intrinsics_count_vertices_per_primitive |
- nir_lower_gs_intrinsics_overwrite_incomplete;
- }
-
- nir_lower_gs_intrinsics(nir, nir_gs_flags);
- }
-
- static const nir_lower_tex_options tex_options = {
- .lower_txp = ~0,
- .lower_tg4_offsets = true,
- };
-
- nir_lower_tex(nir, &tex_options);
-
- nir_lower_vars_to_ssa(nir);
-
- if (nir->info.stage == MESA_SHADER_VERTEX ||
- nir->info.stage == MESA_SHADER_GEOMETRY ||
- nir->info.stage == MESA_SHADER_FRAGMENT) {
- NIR_PASS_V(nir, nir_lower_io_to_temporaries,
- nir_shader_get_entrypoint(nir), true, true);
- } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
- NIR_PASS_V(nir, nir_lower_io_to_temporaries,
- nir_shader_get_entrypoint(nir), true, false);
- }
-
- nir_split_var_copies(nir);
-
- nir_lower_global_vars_to_local(nir);
- nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
- bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
- nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options) {
- .subgroup_size = subgroup_size,
- .ballot_bit_size = ballot_bit_size,
- .lower_to_scalar = 1,
- .lower_subgroup_masks = 1,
- .lower_shuffle = 1,
- .lower_shuffle_to_32bit = 1,
- .lower_vote_eq_to_ballot = 1,
- .lower_quad_broadcast_dynamic = 1,
- .lower_quad_broadcast_dynamic_to_const = gfx7minus,
- .lower_shuffle_to_swizzle_amd = 1,
- .lower_elect = radv_use_llvm_for_stage(device, stage),
- });
-
- nir_lower_load_const_to_scalar(nir);
-
- if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
- radv_optimize_nir(device, nir, false, true);
-
- /* call radv_nir_lower_ycbcr_textures() late as there might still be
- * tex with undef texture/sampler before first optimization */
- NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
-
- /* We call nir_lower_var_copies() after the first radv_optimize_nir()
- * to remove any copies introduced by nir_opt_find_array_copies().
- */
- nir_lower_var_copies(nir);
-
- const nir_opt_access_options opt_access_options = {
- .is_vulkan = true,
- .infer_non_readable = true,
- };
- NIR_PASS_V(nir, nir_opt_access, &opt_access_options);
-
- NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
- nir_address_format_32bit_offset);
-
- NIR_PASS_V(nir, nir_lower_explicit_io,
- nir_var_mem_ubo | nir_var_mem_ssbo,
- nir_address_format_32bit_index_offset);
-
- NIR_PASS_V(nir, lower_intrinsics, key);
-
- /* Lower deref operations for compute shared memory. */
- if (nir->info.stage == MESA_SHADER_COMPUTE) {
- if (!nir->info.cs.shared_memory_explicit_layout) {
- NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
- nir_var_mem_shared, shared_var_info);
- }
- NIR_PASS_V(nir, nir_lower_explicit_io,
- nir_var_mem_shared, nir_address_format_32bit_offset);
-
- if (nir->info.cs.zero_initialize_shared_memory &&
- nir->info.shared_size > 0) {
- const unsigned chunk_size = 16; /* max single store size */
- const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
- NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
- shared_size, chunk_size);
- }
- }
-
- nir_lower_explicit_io(nir, nir_var_mem_global,
- nir_address_format_64bit_global);
-
- /* Lower large variables that are always constant with load_constant
- * intrinsics, which get turned into PC-relative loads from a data
- * section next to the shader.
- */
- NIR_PASS_V(nir, nir_opt_large_constants,
- glsl_get_natural_size_align_bytes, 16);
-
- /* Indirect lowering must be called after the radv_optimize_nir() loop
- * has been called at least once. Otherwise indirect lowering can
- * bloat the instruction count of the loop and cause it to be
- * considered too large for unrolling.
- */
- if (ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
- !(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) &&
- nir->info.stage != MESA_SHADER_COMPUTE) {
- /* Optimize the lowered code before the linking optimizations. */
- radv_optimize_nir(device, nir, false, false);
- }
-
- return nir;
+ unsigned subgroup_size = 64, ballot_bit_size = 64;
+ if (key->compute_subgroup_size) {
+ /* Only compute shaders currently support requiring a
+ * specific subgroup size.
+ */
+ assert(stage == MESA_SHADER_COMPUTE);
+ subgroup_size = key->compute_subgroup_size;
+ ballot_bit_size = key->compute_subgroup_size;
+ }
+
+ nir_shader *nir;
+
+ if (module->nir) {
+ /* Some things such as our meta clear/blit code will give us a NIR
+ * shader directly. In that case, we just ignore the SPIR-V entirely
+ * and just use the NIR shader */
+ nir = module->nir;
+ nir->options = &nir_options;
+ nir_validate_shader(nir, "in internal shader");
+
+ assert(exec_list_length(&nir->functions) == 1);
+ } else {
+ uint32_t *spirv = (uint32_t *)module->data;
+ assert(module->size % 4 == 0);
+
+ if (device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV)
+ radv_print_spirv(module->data, module->size, stderr);
+
+ uint32_t num_spec_entries = 0;
+ struct nir_spirv_specialization *spec_entries = NULL;
+ if (spec_info && spec_info->mapEntryCount > 0) {
+ num_spec_entries = spec_info->mapEntryCount;
+ spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));
+ for (uint32_t i = 0; i < num_spec_entries; i++) {
+ VkSpecializationMapEntry entry = spec_info->pMapEntries[i];
+ const void *data = (uint8_t *)spec_info->pData + entry.offset;
+ assert((uint8_t *)data + entry.size <=
+ (uint8_t *)spec_info->pData + spec_info->dataSize);
+
+ spec_entries[i].id = spec_info->pMapEntries[i].constantID;
+ switch (entry.size) {
+ case 8:
+ memcpy(&spec_entries[i].value.u64, data, sizeof(uint64_t));
+ break;
+ case 4:
+ memcpy(&spec_entries[i].value.u32, data, sizeof(uint32_t));
+ break;
+ case 2:
+ memcpy(&spec_entries[i].value.u16, data, sizeof(uint16_t));
+ break;
+ case 1:
+ memcpy(&spec_entries[i].value.u8, data, sizeof(uint8_t));
+ break;
+ default:
+ assert(!"Invalid spec constant size");
+ break;
+ }
+ }
+ }
+
+ struct radv_shader_debug_data spirv_debug_data = {
+ .device = device,
+ .module = module,
+ };
+ const struct spirv_to_nir_options spirv_options = {
+ .caps =
+ {
+ .amd_fragment_mask = true,
+ .amd_gcn_shader = true,
+ .amd_image_gather_bias_lod = true,
+ .amd_image_read_write_lod = true,
+ .amd_shader_ballot = true,
+ .amd_shader_explicit_vertex_parameter = true,
+ .amd_trinary_minmax = true,
+ .demote_to_helper_invocation = true,
+ .derivative_group = true,
+ .descriptor_array_dynamic_indexing = true,
+ .descriptor_array_non_uniform_indexing = true,
+ .descriptor_indexing = true,
+ .device_group = true,
+ .draw_parameters = true,
+ .float_controls = true,
+ .float16 = device->physical_device->rad_info.has_packed_math_16bit,
+ .float32_atomic_add = true,
+ .float64 = true,
+ .geometry_streams = true,
+ .image_atomic_int64 = true,
+ .image_ms_array = true,
+ .image_read_without_format = true,
+ .image_write_without_format = true,
+ .int8 = true,
+ .int16 = true,
+ .int64 = true,
+ .int64_atomics = true,
+ .min_lod = true,
+ .multiview = true,
+ .physical_storage_buffer_address = true,
+ .post_depth_coverage = true,
+ .runtime_descriptor_array = true,
+ .shader_clock = true,
+ .shader_viewport_index_layer = true,
+ .sparse_residency = true,
+ .stencil_export = true,
+ .storage_8bit = true,
+ .storage_16bit = true,
+ .storage_image_ms = true,
+ .subgroup_arithmetic = true,
+ .subgroup_ballot = true,
+ .subgroup_basic = true,
+ .subgroup_quad = true,
+ .subgroup_shuffle = true,
+ .subgroup_vote = true,
+ .tessellation = true,
+ .transform_feedback = true,
+ .variable_pointers = true,
+ .vk_memory_model = true,
+ .vk_memory_model_device_scope = true,
+ .fragment_shading_rate = device->physical_device->rad_info.chip_class >= GFX10_3,
+ .workgroup_memory_explicit_layout = true,
+ },
+ .ubo_addr_format = nir_address_format_32bit_index_offset,
+ .ssbo_addr_format = nir_address_format_32bit_index_offset,
+ .phys_ssbo_addr_format = nir_address_format_64bit_global,
+ .push_const_addr_format = nir_address_format_logical,
+ .shared_addr_format = nir_address_format_32bit_offset,
+ .frag_coord_is_sysval = true,
+ .use_deref_buffer_array_length = true,
+ .debug =
+ {
+ .func = radv_spirv_nir_debug,
+ .private_data = &spirv_debug_data,
+ },
+ };
+ nir = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, stage,
+ entrypoint_name, &spirv_options, &nir_options);
+ assert(nir->info.stage == stage);
+ nir_validate_shader(nir, "after spirv_to_nir");
+
+ free(spec_entries);
+
+ /* We have to lower away local constant initializers right before we
+ * inline functions. That way they get properly initialized at the top
+ * of the function and not at the top of its caller.
+ */
+ NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
+ NIR_PASS_V(nir, nir_lower_returns);
+ NIR_PASS_V(nir, nir_inline_functions);
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_deref);
+
+ /* Pick off the single entrypoint that we want */
+ foreach_list_typed_safe(nir_function, func, node, &nir->functions)
+ {
+ if (func->is_entrypoint)
+ func->name = ralloc_strdup(func, "main");
+ else
+ exec_node_remove(&func->node);
+ }
+ assert(exec_list_length(&nir->functions) == 1);
+
+ /* Make sure we lower constant initializers on output variables so that
+ * nir_remove_dead_variables below sees the corresponding stores
+ */
+ NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);
+
+ /* Now that we've deleted all but the main function, we can go ahead and
+ * lower the rest of the constant initializers.
+ */
+ NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
+
+ /* Split member structs. We do this before lower_io_to_temporaries so that
+ * it doesn't lower system values to temporaries by accident.
+ */
+ NIR_PASS_V(nir, nir_split_var_copies);
+ NIR_PASS_V(nir, nir_split_per_member_structs);
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ NIR_PASS_V(nir, nir_lower_input_attachments,
+ &(nir_input_attachment_options){
+ .use_fragcoord_sysval = true,
+ .use_layer_id_sysval = false,
+ });
+
+ NIR_PASS_V(nir, nir_remove_dead_variables,
+ nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,
+ NULL);
+
+ /* Variables can make nir_propagate_invariant more conservative
+ * than it needs to be.
+ */
+ NIR_PASS_V(nir, nir_lower_global_vars_to_local);
+ NIR_PASS_V(nir, nir_lower_vars_to_ssa);
+
+ if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM &&
+ stage != MESA_SHADER_FRAGMENT) {
+ mark_geom_invariant(nir);
+ }
+
+ NIR_PASS_V(nir, nir_propagate_invariant);
+
+ NIR_PASS_V(nir, nir_lower_system_values);
+ NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
+
+ NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
+
+ NIR_PASS_V(nir, nir_lower_discard_or_demote,
+ device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE);
+
+ nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options;
+
+ if (device->physical_device->rad_info.chip_class == GFX6) {
+ /* GFX6 doesn't support v_floor_f64 and the precision
+ * of v_fract_f64 which is used to implement 64-bit
+ * floor is less than what Vulkan requires.
+ */
+ lower_doubles |= nir_lower_dfloor;
+ }
+
+ NIR_PASS_V(nir, nir_lower_doubles, NULL, lower_doubles);
+ }
+
+ /* Vulkan uses the separate-shader linking model */
+ nir->info.separate_shader = true;
+
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+ if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+ unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
+
+ if (device->physical_device->use_ngg && !radv_use_llvm_for_stage(device, stage)) {
+ /* ACO needs NIR to do some of the hard lifting */
+ nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives |
+ nir_lower_gs_intrinsics_count_vertices_per_primitive |
+ nir_lower_gs_intrinsics_overwrite_incomplete;
+ }
+
+ nir_lower_gs_intrinsics(nir, nir_gs_flags);
+ }
+
+ static const nir_lower_tex_options tex_options = {
+ .lower_txp = ~0,
+ .lower_tg4_offsets = true,
+ };
+
+ nir_lower_tex(nir, &tex_options);
+
+ nir_lower_vars_to_ssa(nir);
+
+ if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_GEOMETRY ||
+ nir->info.stage == MESA_SHADER_FRAGMENT) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
+ } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, false);
+ }
+
+ nir_split_var_copies(nir);
+
+ nir_lower_global_vars_to_local(nir);
+ nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
+ bool gfx7minus = device->physical_device->rad_info.chip_class <= GFX7;
+ nir_lower_subgroups(nir, &(struct nir_lower_subgroups_options){
+ .subgroup_size = subgroup_size,
+ .ballot_bit_size = ballot_bit_size,
+ .lower_to_scalar = 1,
+ .lower_subgroup_masks = 1,
+ .lower_shuffle = 1,
+ .lower_shuffle_to_32bit = 1,
+ .lower_vote_eq_to_ballot = 1,
+ .lower_quad_broadcast_dynamic = 1,
+ .lower_quad_broadcast_dynamic_to_const = gfx7minus,
+ .lower_shuffle_to_swizzle_amd = 1,
+ .lower_elect = radv_use_llvm_for_stage(device, stage),
+ });
+
+ nir_lower_load_const_to_scalar(nir);
+
+ if (!(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT))
+ radv_optimize_nir(device, nir, false, true);
+
+ /* call radv_nir_lower_ycbcr_textures() late as there might still be
+ * tex with undef texture/sampler before first optimization */
+ NIR_PASS_V(nir, radv_nir_lower_ycbcr_textures, layout);
+
+ /* We call nir_lower_var_copies() after the first radv_optimize_nir()
+ * to remove any copies introduced by nir_opt_find_array_copies().
+ */
+ nir_lower_var_copies(nir);
+
+ const nir_opt_access_options opt_access_options = {
+ .is_vulkan = true,
+ .infer_non_readable = true,
+ };
+ NIR_PASS_V(nir, nir_opt_access, &opt_access_options);
+
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const, nir_address_format_32bit_offset);
+
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo | nir_var_mem_ssbo,
+ nir_address_format_32bit_index_offset);
+
+ NIR_PASS_V(nir, lower_intrinsics, key);
+
+ /* Lower deref operations for compute shared memory. */
+ if (nir->info.stage == MESA_SHADER_COMPUTE) {
+ if (!nir->info.cs.shared_memory_explicit_layout) {
+ NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
+ }
+ NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
+
+ if (nir->info.cs.zero_initialize_shared_memory && nir->info.shared_size > 0) {
+ const unsigned chunk_size = 16; /* max single store size */
+ const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
+ NIR_PASS_V(nir, nir_zero_initialize_shared_memory, shared_size, chunk_size);
+ }
+ }
+
+ nir_lower_explicit_io(nir, nir_var_mem_global, nir_address_format_64bit_global);
+
+ /* Lower large variables that are always constant with load_constant
+ * intrinsics, which get turned into PC-relative loads from a data
+ * section next to the shader.
+ */
+ NIR_PASS_V(nir, nir_opt_large_constants, glsl_get_natural_size_align_bytes, 16);
+
+ /* Indirect lowering must be called after the radv_optimize_nir() loop
+ * has been called at least once. Otherwise indirect lowering can
+ * bloat the instruction count of the loop and cause it to be
+ * considered too large for unrolling.
+ */
+ if (ac_lower_indirect_derefs(nir, device->physical_device->rad_info.chip_class) &&
+ !(flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) &&
+ nir->info.stage != MESA_SHADER_COMPUTE) {
+ /* Optimize the lowered code before the linking optimizations. */
+ radv_optimize_nir(device, nir, false, false);
+ }
+
+ return nir;
}
static int
type_size_vec4(const struct glsl_type *type, bool bindless)
{
- return glsl_count_attribute_slots(type, false);
+ return glsl_count_attribute_slots(type, false);
}
static nir_variable *
find_layer_in_var(nir_shader *nir)
{
- nir_variable *var =
- nir_find_variable_with_location(nir, nir_var_shader_in, VARYING_SLOT_LAYER);
- if (var != NULL)
- return var;
-
- var = nir_variable_create(nir, nir_var_shader_in, glsl_int_type(), "layer id");
- var->data.location = VARYING_SLOT_LAYER;
- var->data.interpolation = INTERP_MODE_FLAT;
- return var;
+ nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_in, VARYING_SLOT_LAYER);
+ if (var != NULL)
+ return var;
+
+ var = nir_variable_create(nir, nir_var_shader_in, glsl_int_type(), "layer id");
+ var->data.location = VARYING_SLOT_LAYER;
+ var->data.interpolation = INTERP_MODE_FLAT;
+ return var;
}
/* We use layered rendering to implement multiview, which means we need to map
@@ -722,1174 +685,1108 @@ find_layer_in_var(nir_shader *nir)
static bool
lower_view_index(nir_shader *nir)
{
- bool progress = false;
- nir_function_impl *entry = nir_shader_get_entrypoint(nir);
- nir_builder b;
- nir_builder_init(&b, entry);
-
- nir_variable *layer = NULL;
- nir_foreach_block(block, entry) {
- nir_foreach_instr_safe(instr, block) {
- if (instr->type != nir_instr_type_intrinsic)
- continue;
-
- nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
- if (load->intrinsic != nir_intrinsic_load_view_index)
- continue;
-
- if (!layer)
- layer = find_layer_in_var(nir);
-
- b.cursor = nir_before_instr(instr);
- nir_ssa_def *def = nir_load_var(&b, layer);
- nir_ssa_def_rewrite_uses(&load->dest.ssa,
- def);
-
- nir_instr_remove(instr);
- progress = true;
- }
- }
-
- return progress;
+ bool progress = false;
+ nir_function_impl *entry = nir_shader_get_entrypoint(nir);
+ nir_builder b;
+ nir_builder_init(&b, entry);
+
+ nir_variable *layer = NULL;
+ nir_foreach_block (block, entry) {
+ nir_foreach_instr_safe (instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
+ if (load->intrinsic != nir_intrinsic_load_view_index)
+ continue;
+
+ if (!layer)
+ layer = find_layer_in_var(nir);
+
+ b.cursor = nir_before_instr(instr);
+ nir_ssa_def *def = nir_load_var(&b, layer);
+ nir_ssa_def_rewrite_uses(&load->dest.ssa, def);
+
+ nir_instr_remove(instr);
+ progress = true;
+ }
+ }
+
+ return progress;
}
void
radv_lower_io(struct radv_device *device, nir_shader *nir)
{
- if (nir->info.stage == MESA_SHADER_COMPUTE)
- return;
+ if (nir->info.stage == MESA_SHADER_COMPUTE)
+ return;
- if (nir->info.stage == MESA_SHADER_FRAGMENT) {
- NIR_PASS_V(nir, lower_view_index);
- nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,
- MESA_SHADER_FRAGMENT);
- }
+ if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ NIR_PASS_V(nir, lower_view_index);
+ nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, MESA_SHADER_FRAGMENT);
+ }
- /* The RADV/LLVM backend expects 64-bit IO to be lowered. */
- nir_lower_io_options options =
- radv_use_llvm_for_stage(device, nir->info.stage) ? nir_lower_io_lower_64bit_to_32 : 0;
+ /* The RADV/LLVM backend expects 64-bit IO to be lowered. */
+ nir_lower_io_options options =
+ radv_use_llvm_for_stage(device, nir->info.stage) ? nir_lower_io_lower_64bit_to_32 : 0;
- NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
- type_size_vec4, options);
+ NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, type_size_vec4, options);
- /* This pass needs actual constants */
- nir_opt_constant_folding(nir);
+ /* This pass needs actual constants */
+ nir_opt_constant_folding(nir);
- NIR_PASS_V(nir, nir_io_add_const_offset_to_base,
- nir_var_shader_in | nir_var_shader_out);
+ NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in | nir_var_shader_out);
}
bool
radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
struct radv_shader_info *info, const struct radv_pipeline_key *pl_key)
{
- if (nir->info.stage == MESA_SHADER_VERTEX) {
- if (info->vs.as_ls) {
- ac_nir_lower_ls_outputs_to_mem(
- nir,
- info->vs.tcs_in_out_eq,
- info->vs.tcs_temp_only_input_mask,
- info->vs.num_linked_outputs);
- return true;
- } else if (info->vs.as_es) {
- ac_nir_lower_es_outputs_to_mem(
- nir,
- device->physical_device->rad_info.chip_class,
- info->vs.num_linked_outputs);
- return true;
- }
- } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
- ac_nir_lower_hs_inputs_to_mem(
- nir,
- info->vs.tcs_in_out_eq,
- info->tcs.num_linked_inputs);
- ac_nir_lower_hs_outputs_to_mem(
- nir, device->physical_device->rad_info.chip_class,
- info->tcs.tes_reads_tess_factors,
- info->tcs.tes_inputs_read,
- info->tcs.tes_patch_inputs_read,
- info->tcs.num_linked_inputs,
- info->tcs.num_linked_outputs,
- info->tcs.num_linked_patch_outputs,
- true);
- ac_nir_lower_tess_to_const(
- nir,
- pl_key->tess_input_vertices,
- info->num_tess_patches,
- ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches);
-
- return true;
- } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
- ac_nir_lower_tes_inputs_to_mem(
- nir,
- info->tes.num_linked_inputs,
- info->tes.num_linked_patch_inputs);
- ac_nir_lower_tess_to_const(
- nir,
- nir->info.tess.tcs_vertices_out,
- info->num_tess_patches,
- ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches);
-
- if (info->tes.as_es) {
- ac_nir_lower_es_outputs_to_mem(
- nir,
- device->physical_device->rad_info.chip_class,
- info->tes.num_linked_outputs);
- }
-
- return true;
- } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
- ac_nir_lower_gs_inputs_to_mem(
- nir,
- device->physical_device->rad_info.chip_class,
- info->gs.num_linked_inputs);
- return true;
- }
-
- return false;
+ if (nir->info.stage == MESA_SHADER_VERTEX) {
+ if (info->vs.as_ls) {
+ ac_nir_lower_ls_outputs_to_mem(nir, info->vs.tcs_in_out_eq,
+ info->vs.tcs_temp_only_input_mask,
+ info->vs.num_linked_outputs);
+ return true;
+ } else if (info->vs.as_es) {
+ ac_nir_lower_es_outputs_to_mem(nir, device->physical_device->rad_info.chip_class,
+ info->vs.num_linked_outputs);
+ return true;
+ }
+ } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
+ ac_nir_lower_hs_inputs_to_mem(nir, info->vs.tcs_in_out_eq, info->tcs.num_linked_inputs);
+ ac_nir_lower_hs_outputs_to_mem(
+ nir, device->physical_device->rad_info.chip_class, info->tcs.tes_reads_tess_factors,
+ info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, info->tcs.num_linked_inputs,
+ info->tcs.num_linked_outputs, info->tcs.num_linked_patch_outputs, true);
+ ac_nir_lower_tess_to_const(nir, pl_key->tess_input_vertices, info->num_tess_patches,
+ ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches);
+
+ return true;
+ } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
+ ac_nir_lower_tes_inputs_to_mem(nir, info->tes.num_linked_inputs,
+ info->tes.num_linked_patch_inputs);
+ ac_nir_lower_tess_to_const(nir, nir->info.tess.tcs_vertices_out, info->num_tess_patches,
+ ac_nir_lower_patch_vtx_in | ac_nir_lower_num_patches);
+
+ if (info->tes.as_es) {
+ ac_nir_lower_es_outputs_to_mem(nir, device->physical_device->rad_info.chip_class,
+ info->tes.num_linked_outputs);
+ }
+
+ return true;
+ } else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+ ac_nir_lower_gs_inputs_to_mem(nir, device->physical_device->rad_info.chip_class,
+ info->gs.num_linked_inputs);
+ return true;
+ }
+
+ return false;
}
static void *
-radv_alloc_shader_memory(struct radv_device *device,
- struct radv_shader_variant *shader)
+radv_alloc_shader_memory(struct radv_device *device, struct radv_shader_variant *shader)
{
- mtx_lock(&device->shader_slab_mutex);
- list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
- uint64_t offset = 0;
+ mtx_lock(&device->shader_slab_mutex);
+ list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
+ {
+ uint64_t offset = 0;
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#endif
- list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list) {
+ list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list)
+ {
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
- if (s->bo_offset - offset >= shader->code_size) {
- shader->bo = slab->bo;
- shader->bo_offset = offset;
- list_addtail(&shader->slab_list, &s->slab_list);
- mtx_unlock(&device->shader_slab_mutex);
- return slab->ptr + offset;
- }
- offset = align_u64(s->bo_offset + s->code_size, 256);
- }
- if (offset <= slab->size && slab->size - offset >= shader->code_size) {
- shader->bo = slab->bo;
- shader->bo_offset = offset;
- list_addtail(&shader->slab_list, &slab->shaders);
- mtx_unlock(&device->shader_slab_mutex);
- return slab->ptr + offset;
- }
- }
-
- mtx_unlock(&device->shader_slab_mutex);
- struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
-
- slab->size = MAX2(256 * 1024, shader->code_size);
- slab->bo = device->ws->buffer_create(device->ws, slab->size, 256,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- (device->physical_device->rad_info.cpdma_prefetch_writes_memory ?
- 0 : RADEON_FLAG_READ_ONLY),
- RADV_BO_PRIORITY_SHADER);
- if (!slab->bo) {
- free(slab);
- return NULL;
- }
-
- slab->ptr = (char*)device->ws->buffer_map(slab->bo);
- if (!slab->ptr) {
- device->ws->buffer_destroy(device->ws, slab->bo);
- free(slab);
- return NULL;
- }
-
- list_inithead(&slab->shaders);
-
- mtx_lock(&device->shader_slab_mutex);
- list_add(&slab->slabs, &device->shader_slabs);
-
- shader->bo = slab->bo;
- shader->bo_offset = 0;
- list_add(&shader->slab_list, &slab->shaders);
- mtx_unlock(&device->shader_slab_mutex);
- return slab->ptr;
+ if (s->bo_offset - offset >= shader->code_size) {
+ shader->bo = slab->bo;
+ shader->bo_offset = offset;
+ list_addtail(&shader->slab_list, &s->slab_list);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr + offset;
+ }
+ offset = align_u64(s->bo_offset + s->code_size, 256);
+ }
+ if (offset <= slab->size && slab->size - offset >= shader->code_size) {
+ shader->bo = slab->bo;
+ shader->bo_offset = offset;
+ list_addtail(&shader->slab_list, &slab->shaders);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr + offset;
+ }
+ }
+
+ mtx_unlock(&device->shader_slab_mutex);
+ struct radv_shader_slab *slab = calloc(1, sizeof(struct radv_shader_slab));
+
+ slab->size = MAX2(256 * 1024, shader->code_size);
+ slab->bo = device->ws->buffer_create(
+ device->ws, slab->size, 256, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ (device->physical_device->rad_info.cpdma_prefetch_writes_memory ? 0
+ : RADEON_FLAG_READ_ONLY),
+ RADV_BO_PRIORITY_SHADER);
+ if (!slab->bo) {
+ free(slab);
+ return NULL;
+ }
+
+ slab->ptr = (char *)device->ws->buffer_map(slab->bo);
+ if (!slab->ptr) {
+ device->ws->buffer_destroy(device->ws, slab->bo);
+ free(slab);
+ return NULL;
+ }
+
+ list_inithead(&slab->shaders);
+
+ mtx_lock(&device->shader_slab_mutex);
+ list_add(&slab->slabs, &device->shader_slabs);
+
+ shader->bo = slab->bo;
+ shader->bo_offset = 0;
+ list_add(&shader->slab_list, &slab->shaders);
+ mtx_unlock(&device->shader_slab_mutex);
+ return slab->ptr;
}
void
radv_destroy_shader_slabs(struct radv_device *device)
{
- list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs) {
- device->ws->buffer_destroy(device->ws, slab->bo);
- free(slab);
- }
- mtx_destroy(&device->shader_slab_mutex);
+ list_for_each_entry_safe(struct radv_shader_slab, slab, &device->shader_slabs, slabs)
+ {
+ device->ws->buffer_destroy(device->ws, slab->bo);
+ free(slab);
+ }
+ mtx_destroy(&device->shader_slab_mutex);
}
/* For the UMR disassembler. */
-#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
-#define DEBUGGER_NUM_MARKERS 5
+#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
+#define DEBUGGER_NUM_MARKERS 5
static unsigned
radv_get_shader_binary_size(size_t code_size)
{
- return code_size + DEBUGGER_NUM_MARKERS * 4;
+ return code_size + DEBUGGER_NUM_MARKERS * 4;
}
-static bool radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage,
- const struct radv_shader_info *info)
+static bool
+radv_should_use_wgp_mode(const struct radv_device *device, gl_shader_stage stage,
+ const struct radv_shader_info *info)
{
- enum chip_class chip = device->physical_device->rad_info.chip_class;
- switch (stage) {
- case MESA_SHADER_COMPUTE:
- case MESA_SHADER_TESS_CTRL:
- return chip >= GFX10;
- case MESA_SHADER_GEOMETRY:
- return chip == GFX10 || (chip >= GFX10_3 && !info->is_ngg);
- case MESA_SHADER_VERTEX:
- case MESA_SHADER_TESS_EVAL:
- return chip == GFX10 && info->is_ngg;
- default:
- return false;
- }
+ enum chip_class chip = device->physical_device->rad_info.chip_class;
+ switch (stage) {
+ case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_TESS_CTRL:
+ return chip >= GFX10;
+ case MESA_SHADER_GEOMETRY:
+ return chip == GFX10 || (chip >= GFX10_3 && !info->is_ngg);
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_TESS_EVAL:
+ return chip == GFX10 && info->is_ngg;
+ default:
+ return false;
+ }
}
-static void radv_postprocess_config(const struct radv_device *device,
- const struct ac_shader_config *config_in,
- const struct radv_shader_info *info,
- gl_shader_stage stage,
- struct ac_shader_config *config_out)
+static void
+radv_postprocess_config(const struct radv_device *device, const struct ac_shader_config *config_in,
+ const struct radv_shader_info *info, gl_shader_stage stage,
+ struct ac_shader_config *config_out)
{
- const struct radv_physical_device *pdevice = device->physical_device;
- bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
- bool trap_enabled = !!device->trap_handler_shader;
- unsigned vgpr_comp_cnt = 0;
- unsigned num_input_vgprs = info->num_input_vgprs;
-
- if (stage == MESA_SHADER_FRAGMENT) {
- num_input_vgprs = ac_get_fs_input_vgpr_cnt(config_in, NULL, NULL);
- }
-
- unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs);
- /* +3 for scratch wave offset and VCC */
- unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3);
- unsigned num_shared_vgprs = config_in->num_shared_vgprs;
- /* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */
- assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0)
- || (pdevice->rad_info.chip_class < GFX10 && num_shared_vgprs == 0));
- unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8;
- unsigned excp_en = 0;
-
- *config_out = *config_in;
- config_out->num_vgprs = num_vgprs;
- config_out->num_sgprs = num_sgprs;
- config_out->num_shared_vgprs = num_shared_vgprs;
-
- config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
- S_00B12C_SCRATCH_EN(scratch_enabled) |
- S_00B12C_TRAP_PRESENT(trap_enabled);
-
- if (trap_enabled) {
- /* Configure the shader exceptions like memory violation, etc.
- * TODO: Enable (and validate) more exceptions.
- */
- excp_en = 1 << 8; /* mem_viol */
- }
-
- if (!pdevice->use_ngg_streamout) {
- config_out->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
- S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
- S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
- S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
- S_00B12C_SO_EN(!!info->so.num_outputs);
- }
-
- config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) /
- (info->wave_size == 32 ? 8 : 4)) |
- S_00B848_DX10_CLAMP(1) |
- S_00B848_FLOAT_MODE(config_out->float_mode);
-
- if (pdevice->rad_info.chip_class >= GFX10) {
- config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(info->num_user_sgprs >> 5);
- } else {
- config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
- config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5);
- }
-
- bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
-
- switch (stage) {
- case MESA_SHADER_TESS_EVAL:
- if (info->is_ngg) {
- config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
- config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1) |
- S_00B22C_EXCP_EN(excp_en);
- } else if (info->tes.as_es) {
- assert(pdevice->rad_info.chip_class <= GFX8);
- vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
-
- config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) |
- S_00B12C_EXCP_EN(excp_en);
- } else {
- bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id;
- vgpr_comp_cnt = enable_prim_id ? 3 : 2;
-
- config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
- config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) |
- S_00B12C_EXCP_EN(excp_en);
- }
- config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
- break;
- case MESA_SHADER_TESS_CTRL:
- if (pdevice->rad_info.chip_class >= GFX9) {
- /* We need at least 2 components for LS.
- * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
- * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
- */
- if (pdevice->rad_info.chip_class >= GFX10) {
- vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 1;
- config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX10(info->tcs.num_lds_blocks) |
- S_00B42C_EXCP_EN_GFX6(excp_en);
- } else {
- vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
- config_out->rsrc2 |= S_00B42C_LDS_SIZE_GFX9(info->tcs.num_lds_blocks) |
- S_00B42C_EXCP_EN_GFX9(excp_en);
- }
- } else {
- config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) |
- S_00B12C_EXCP_EN(excp_en);
- }
- config_out->rsrc1 |= S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
- S_00B428_WGP_MODE(wgp_mode);
- config_out->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
- break;
- case MESA_SHADER_VERTEX:
- if (info->is_ngg) {
- config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
- } else if (info->vs.as_ls) {
- assert(pdevice->rad_info.chip_class <= GFX8);
- /* We need at least 2 components for LS.
- * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
- * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
- */
- vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
- } else if (info->vs.as_es) {
- assert(pdevice->rad_info.chip_class <= GFX8);
- /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
- vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0;
- } else {
- /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
- * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
- * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
- */
- if (info->vs.needs_instance_id && pdevice->rad_info.chip_class >= GFX10) {
- vgpr_comp_cnt = 3;
- } else if (info->vs.export_prim_id) {
- vgpr_comp_cnt = 2;
- } else if (info->vs.needs_instance_id) {
- vgpr_comp_cnt = 1;
- } else {
- vgpr_comp_cnt = 0;
- }
-
- config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
- }
- config_out->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
- S_00B12C_EXCP_EN(excp_en);
- break;
- case MESA_SHADER_FRAGMENT:
- config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
- config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
- S_00B02C_TRAP_PRESENT(1) |
- S_00B02C_EXCP_EN(excp_en);
- break;
- case MESA_SHADER_GEOMETRY:
- config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
- config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
- S_00B22C_EXCP_EN(excp_en);
- break;
- case MESA_SHADER_COMPUTE:
- config_out->rsrc1 |= S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) |
- S_00B848_WGP_MODE(wgp_mode);
- config_out->rsrc2 |=
- S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
- S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
- S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
- S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2 :
- info->cs.uses_thread_id[1] ? 1 : 0) |
- S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
- S_00B84C_LDS_SIZE(config_in->lds_size) |
- S_00B84C_EXCP_EN(excp_en);
- config_out->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
-
- break;
- default:
- unreachable("unsupported shader type");
- break;
- }
-
- if (pdevice->rad_info.chip_class >= GFX10 && info->is_ngg &&
- (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_GEOMETRY)) {
- unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
- gl_shader_stage es_stage = stage;
- if (stage == MESA_SHADER_GEOMETRY)
- es_stage = info->gs.es_type;
-
- /* VGPR5-8: (VertexID, UserVGPR0, UserVGPR1, UserVGPR2 / InstanceID) */
- if (es_stage == MESA_SHADER_VERTEX) {
- es_vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 0;
- } else if (es_stage == MESA_SHADER_TESS_EVAL) {
- bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id;
- es_vgpr_comp_cnt = enable_prim_id ? 3 : 2;
- } else
- unreachable("Unexpected ES shader stage");
-
- bool tes_triangles = stage == MESA_SHADER_TESS_EVAL &&
- info->tes.primitive_mode >= 4; /* GL_TRIANGLES */
- if (info->uses_invocation_id || stage == MESA_SHADER_VERTEX) {
- gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
- } else if (info->uses_prim_id) {
- gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
- } else if (info->gs.vertices_in >= 3 || tes_triangles) {
- gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
- } else {
- gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
- }
-
- /* Disable the WGP mode on gfx10.3 because it can hang. (it
- * happened on VanGogh) Let's disable it on all chips that
- * disable exactly 1 CU per SA for GS.
- */
- config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
- S_00B228_WGP_MODE(wgp_mode);
- config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
- S_00B22C_LDS_SIZE(config_in->lds_size) |
- S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
- } else if (pdevice->rad_info.chip_class >= GFX9 &&
- stage == MESA_SHADER_GEOMETRY) {
- unsigned es_type = info->gs.es_type;
- unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
-
- if (es_type == MESA_SHADER_VERTEX) {
- /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
- if (info->vs.needs_instance_id) {
- es_vgpr_comp_cnt = pdevice->rad_info.chip_class >= GFX10 ? 3 : 1;
- } else {
- es_vgpr_comp_cnt = 0;
- }
- } else if (es_type == MESA_SHADER_TESS_EVAL) {
- es_vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
- } else {
- unreachable("invalid shader ES type");
- }
-
- /* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and
- * VGPR[0:4] are always loaded.
- */
- if (info->uses_invocation_id) {
- gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
- } else if (info->uses_prim_id) {
- gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
- } else if (info->gs.vertices_in >= 3) {
- gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
- } else {
- gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
- }
-
- config_out->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) |
- S_00B228_WGP_MODE(wgp_mode);
- config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
- S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
- } else if (pdevice->rad_info.chip_class >= GFX9 &&
- stage == MESA_SHADER_TESS_CTRL) {
- config_out->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
- } else {
- config_out->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
- }
+ const struct radv_physical_device *pdevice = device->physical_device;
+ bool scratch_enabled = config_in->scratch_bytes_per_wave > 0;
+ bool trap_enabled = !!device->trap_handler_shader;
+ unsigned vgpr_comp_cnt = 0;
+ unsigned num_input_vgprs = info->num_input_vgprs;
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ num_input_vgprs = ac_get_fs_input_vgpr_cnt(config_in, NULL, NULL);
+ }
+
+ unsigned num_vgprs = MAX2(config_in->num_vgprs, num_input_vgprs);
+ /* +3 for scratch wave offset and VCC */
+ unsigned num_sgprs = MAX2(config_in->num_sgprs, info->num_input_sgprs + 3);
+ unsigned num_shared_vgprs = config_in->num_shared_vgprs;
+ /* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */
+ assert((pdevice->rad_info.chip_class >= GFX10 && num_shared_vgprs % 8 == 0) ||
+ (pdevice->rad_info.chip_class < GFX10 && num_shared_vgprs == 0));
+ unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8;
+ unsigned excp_en = 0;
+
+ *config_out = *config_in;
+ config_out->num_vgprs = num_vgprs;
+ config_out->num_sgprs = num_sgprs;
+ config_out->num_shared_vgprs = num_shared_vgprs;
+
+ config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
+ S_00B12C_SCRATCH_EN(scratch_enabled) | S_00B12C_TRAP_PRESENT(trap_enabled);
+
+ if (trap_enabled) {
+ /* Configure the shader exceptions like memory violation, etc.
+ * TODO: Enable (and validate) more exceptions.
+ */
+ excp_en = 1 << 8; /* mem_viol */
+ }
+
+ if (!pdevice->use_ngg_streamout) {
+ config_out->rsrc2 |=
+ S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
+ S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
+ S_00B12C_SO_EN(!!info->so.num_outputs);
+ }
+
+ config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) |
+ S_00B848_DX10_CLAMP(1) | S_00B848_FLOAT_MODE(config_out->float_mode);
+
+ if (pdevice->rad_info.chip_class >= GFX10) {
+ config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(info->num_user_sgprs >> 5);
+ } else {
+ config_out->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8);
+ config_out->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX9(info->num_user_sgprs >> 5);
+ }
+
+ bool wgp_mode = radv_should_use_wgp_mode(device, stage, info);
+
+ switch (stage) {
+ case MESA_SHADER_TESS_EVAL:
+ if (info->is_ngg) {
+ config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+ config_out->rsrc2 |= S_00B22C_OC_LDS_EN(1) | S_00B22C_EXCP_EN(excp_en);
+ } else if (info->tes.as_es) {
+ assert(pdevice->rad_info.chip_class <= GFX8);
+ vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
+
+ config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
+ } else {
+ bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id;
+ vgpr_comp_cnt = enable_prim_id ? 3 : 2;
+
+ config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+ config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
+ }
+ config_out->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (pdevice->rad_info.chip_class >= GFX9) {
+ /* We need at least 2 components for LS.
+ * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ if (pdevice->rad_info.chip_class >= GFX10) {
+ vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 1;
+ config_out->rsrc2 |=
+ S_00B42C_LDS_SIZE_GFX10(info->tcs.num_lds_blocks) | S_00B42C_EXCP_EN_GFX6(excp_en);
+ } else {
+ vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
+ config_out->rsrc2 |=
+ S_00B42C_LDS_SIZE_GFX9(info->tcs.num_lds_blocks) | S_00B42C_EXCP_EN_GFX9(excp_en);
+ }
+ } else {
+ config_out->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en);
+ }
+ config_out->rsrc1 |=
+ S_00B428_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B428_WGP_MODE(wgp_mode);
+ config_out->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+ break;
+ case MESA_SHADER_VERTEX:
+ if (info->is_ngg) {
+ config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+ } else if (info->vs.as_ls) {
+ assert(pdevice->rad_info.chip_class <= GFX8);
+ /* We need at least 2 components for LS.
+ * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID).
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1;
+ } else if (info->vs.as_es) {
+ assert(pdevice->rad_info.chip_class <= GFX8);
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+ vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0;
+ } else {
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, PrimID, InstanceID)
+ * If PrimID is disabled. InstanceID / StepRate1 is loaded instead.
+ * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded.
+ */
+ if (info->vs.needs_instance_id && pdevice->rad_info.chip_class >= GFX10) {
+ vgpr_comp_cnt = 3;
+ } else if (info->vs.export_prim_id) {
+ vgpr_comp_cnt = 2;
+ } else if (info->vs.needs_instance_id) {
+ vgpr_comp_cnt = 1;
+ } else {
+ vgpr_comp_cnt = 0;
+ }
+
+ config_out->rsrc1 |= S_00B128_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+ }
+ config_out->rsrc2 |=
+ S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B12C_EXCP_EN(excp_en);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ config_out->rsrc1 |= S_00B028_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+ config_out->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) |
+ S_00B02C_TRAP_PRESENT(1) | S_00B02C_EXCP_EN(excp_en);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ config_out->rsrc1 |= S_00B228_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10);
+ config_out->rsrc2 |=
+ S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B22C_EXCP_EN(excp_en);
+ break;
+ case MESA_SHADER_COMPUTE:
+ config_out->rsrc1 |=
+ S_00B848_MEM_ORDERED(pdevice->rad_info.chip_class >= GFX10) | S_00B848_WGP_MODE(wgp_mode);
+ config_out->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) |
+ S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) |
+ S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) |
+ S_00B84C_TIDIG_COMP_CNT(info->cs.uses_thread_id[2] ? 2
+ : info->cs.uses_thread_id[1] ? 1
+ : 0) |
+ S_00B84C_TG_SIZE_EN(info->cs.uses_local_invocation_idx) |
+ S_00B84C_LDS_SIZE(config_in->lds_size) | S_00B84C_EXCP_EN(excp_en);
+ config_out->rsrc3 |= S_00B8A0_SHARED_VGPR_CNT(num_shared_vgpr_blocks);
+
+ break;
+ default:
+ unreachable("unsupported shader type");
+ break;
+ }
+
+ if (pdevice->rad_info.chip_class >= GFX10 && info->is_ngg &&
+ (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
+ stage == MESA_SHADER_GEOMETRY)) {
+ unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
+ gl_shader_stage es_stage = stage;
+ if (stage == MESA_SHADER_GEOMETRY)
+ es_stage = info->gs.es_type;
+
+ /* VGPR5-8: (VertexID, UserVGPR0, UserVGPR1, UserVGPR2 / InstanceID) */
+ if (es_stage == MESA_SHADER_VERTEX) {
+ es_vgpr_comp_cnt = info->vs.needs_instance_id ? 3 : 0;
+ } else if (es_stage == MESA_SHADER_TESS_EVAL) {
+ bool enable_prim_id = info->tes.export_prim_id || info->uses_prim_id;
+ es_vgpr_comp_cnt = enable_prim_id ? 3 : 2;
+ } else
+ unreachable("Unexpected ES shader stage");
+
+ bool tes_triangles =
+ stage == MESA_SHADER_TESS_EVAL && info->tes.primitive_mode >= 4; /* GL_TRIANGLES */
+ if (info->uses_invocation_id || stage == MESA_SHADER_VERTEX) {
+ gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
+ } else if (info->uses_prim_id) {
+ gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
+ } else if (info->gs.vertices_in >= 3 || tes_triangles) {
+ gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
+ } else {
+ gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
+ }
+
+ /* Disable the WGP mode on gfx10.3 because it can hang. (it
+ * happened on VanGogh) Let's disable it on all chips that
+ * disable exactly 1 CU per SA for GS.
+ */
+ config_out->rsrc1 |=
+ S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
+ config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
+ S_00B22C_LDS_SIZE(config_in->lds_size) |
+ S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL);
+ } else if (pdevice->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_GEOMETRY) {
+ unsigned es_type = info->gs.es_type;
+ unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt;
+
+ if (es_type == MESA_SHADER_VERTEX) {
+ /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */
+ if (info->vs.needs_instance_id) {
+ es_vgpr_comp_cnt = pdevice->rad_info.chip_class >= GFX10 ? 3 : 1;
+ } else {
+ es_vgpr_comp_cnt = 0;
+ }
+ } else if (es_type == MESA_SHADER_TESS_EVAL) {
+ es_vgpr_comp_cnt = info->uses_prim_id ? 3 : 2;
+ } else {
+ unreachable("invalid shader ES type");
+ }
+
+ /* If offsets 4, 5 are used, GS_VGPR_COMP_CNT is ignored and
+ * VGPR[0:4] are always loaded.
+ */
+ if (info->uses_invocation_id) {
+ gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID. */
+ } else if (info->uses_prim_id) {
+ gs_vgpr_comp_cnt = 2; /* VGPR2 contains PrimitiveID. */
+ } else if (info->gs.vertices_in >= 3) {
+ gs_vgpr_comp_cnt = 1; /* VGPR1 contains offsets 2, 3 */
+ } else {
+ gs_vgpr_comp_cnt = 0; /* VGPR0 contains offsets 0, 1 */
+ }
+
+ config_out->rsrc1 |=
+ S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(wgp_mode);
+ config_out->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) |
+ S_00B22C_OC_LDS_EN(es_type == MESA_SHADER_TESS_EVAL);
+ } else if (pdevice->rad_info.chip_class >= GFX9 && stage == MESA_SHADER_TESS_CTRL) {
+ config_out->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt);
+ } else {
+ config_out->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt);
+ }
}
struct radv_shader_variant *
-radv_shader_variant_create(struct radv_device *device,
- const struct radv_shader_binary *binary,
- bool keep_shader_info)
+radv_shader_variant_create(struct radv_device *device, const struct radv_shader_binary *binary,
+ bool keep_shader_info)
{
- struct ac_shader_config config = {0};
- struct ac_rtld_binary rtld_binary = {0};
- struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
- if (!variant)
- return NULL;
-
- variant->ref_count = 1;
-
- if (binary->type == RADV_BINARY_TYPE_RTLD) {
- struct ac_rtld_symbol lds_symbols[2];
- unsigned num_lds_symbols = 0;
- const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
- size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
-
- if (device->physical_device->rad_info.chip_class >= GFX9 &&
- (binary->stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg) &&
- !binary->is_gs_copy_shader) {
- /* We add this symbol even on LLVM <= 8 to ensure that
- * shader->config.lds_size is set correctly below.
- */
- struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
- sym->name = "esgs_ring";
- sym->size = binary->info.ngg_info.esgs_ring_size;
- sym->align = 64 * 1024;
- }
-
- if (binary->info.is_ngg &&
- binary->stage == MESA_SHADER_GEOMETRY) {
- struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
- sym->name = "ngg_emit";
- sym->size = binary->info.ngg_info.ngg_emit_size * 4;
- sym->align = 4;
- }
-
- struct ac_rtld_open_info open_info = {
- .info = &device->physical_device->rad_info,
- .shader_type = binary->stage,
- .wave_size = binary->info.wave_size,
- .num_parts = 1,
- .elf_ptrs = &elf_data,
- .elf_sizes = &elf_size,
- .num_shared_lds_symbols = num_lds_symbols,
- .shared_lds_symbols = lds_symbols,
- };
-
- if (!ac_rtld_open(&rtld_binary, open_info)) {
- free(variant);
- return NULL;
- }
-
- if (!ac_rtld_read_config(&device->physical_device->rad_info,
- &rtld_binary, &config)) {
- ac_rtld_close(&rtld_binary);
- free(variant);
- return NULL;
- }
-
- if (rtld_binary.lds_size > 0) {
- unsigned encode_granularity = device->physical_device->rad_info.lds_encode_granularity;
- config.lds_size = align(rtld_binary.lds_size, encode_granularity) / encode_granularity;
- }
- if (!config.lds_size && binary->stage == MESA_SHADER_TESS_CTRL) {
- /* This is used for reporting LDS statistics */
- config.lds_size = binary->info.tcs.num_lds_blocks;
- }
-
- variant->code_size = rtld_binary.rx_size;
- variant->exec_size = rtld_binary.exec_size;
- } else {
- assert(binary->type == RADV_BINARY_TYPE_LEGACY);
- config = ((struct radv_shader_binary_legacy *)binary)->config;
- variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
- variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size;
- }
-
- variant->info = binary->info;
- radv_postprocess_config(device, &config, &binary->info,
- binary->stage, &variant->config);
-
- void *dest_ptr = radv_alloc_shader_memory(device, variant);
- if (!dest_ptr) {
- if (binary->type == RADV_BINARY_TYPE_RTLD)
- ac_rtld_close(&rtld_binary);
- free(variant);
- return NULL;
- }
-
- if (binary->type == RADV_BINARY_TYPE_RTLD) {
- struct radv_shader_binary_rtld* bin = (struct radv_shader_binary_rtld *)binary;
- struct ac_rtld_upload_info info = {
- .binary = &rtld_binary,
- .rx_va = radv_buffer_get_va(variant->bo) + variant->bo_offset,
- .rx_ptr = dest_ptr,
- };
-
- if (!ac_rtld_upload(&info)) {
- radv_shader_variant_destroy(device, variant);
- ac_rtld_close(&rtld_binary);
- return NULL;
- }
-
- if (keep_shader_info ||
- (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) {
- const char *disasm_data;
- size_t disasm_size;
- if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data, &disasm_size)) {
- radv_shader_variant_destroy(device, variant);
- ac_rtld_close(&rtld_binary);
- return NULL;
- }
-
- variant->ir_string = bin->llvm_ir_size ? strdup((const char*)(bin->data + bin->elf_size)) : NULL;
- variant->disasm_string = malloc(disasm_size + 1);
- memcpy(variant->disasm_string, disasm_data, disasm_size);
- variant->disasm_string[disasm_size] = 0;
- }
-
- variant->code_ptr = dest_ptr;
- ac_rtld_close(&rtld_binary);
- } else {
- struct radv_shader_binary_legacy* bin = (struct radv_shader_binary_legacy *)binary;
- memcpy(dest_ptr, bin->data + bin->stats_size, bin->code_size);
-
- /* Add end-of-code markers for the UMR disassembler. */
- uint32_t *ptr32 = (uint32_t *)dest_ptr + bin->code_size / 4;
- for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
- ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
-
- variant->code_ptr = dest_ptr;
- variant->ir_string = bin->ir_size ? strdup((const char*)(bin->data + bin->stats_size + bin->code_size)) : NULL;
- variant->disasm_string = bin->disasm_size ? strdup((const char*)(bin->data + bin->stats_size + bin->code_size + bin->ir_size)) : NULL;
-
- if (bin->stats_size) {
- variant->statistics = calloc(bin->stats_size, 1);
- memcpy(variant->statistics, bin->data, bin->stats_size);
- }
- }
- return variant;
+ struct ac_shader_config config = {0};
+ struct ac_rtld_binary rtld_binary = {0};
+ struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
+ if (!variant)
+ return NULL;
+
+ variant->ref_count = 1;
+
+ if (binary->type == RADV_BINARY_TYPE_RTLD) {
+ struct ac_rtld_symbol lds_symbols[2];
+ unsigned num_lds_symbols = 0;
+ const char *elf_data = (const char *)((struct radv_shader_binary_rtld *)binary)->data;
+ size_t elf_size = ((struct radv_shader_binary_rtld *)binary)->elf_size;
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 &&
+ (binary->stage == MESA_SHADER_GEOMETRY || binary->info.is_ngg) &&
+ !binary->is_gs_copy_shader) {
+ /* We add this symbol even on LLVM <= 8 to ensure that
+ * shader->config.lds_size is set correctly below.
+ */
+ struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
+ sym->name = "esgs_ring";
+ sym->size = binary->info.ngg_info.esgs_ring_size;
+ sym->align = 64 * 1024;
+ }
+
+ if (binary->info.is_ngg && binary->stage == MESA_SHADER_GEOMETRY) {
+ struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
+ sym->name = "ngg_emit";
+ sym->size = binary->info.ngg_info.ngg_emit_size * 4;
+ sym->align = 4;
+ }
+
+ struct ac_rtld_open_info open_info = {
+ .info = &device->physical_device->rad_info,
+ .shader_type = binary->stage,
+ .wave_size = binary->info.wave_size,
+ .num_parts = 1,
+ .elf_ptrs = &elf_data,
+ .elf_sizes = &elf_size,
+ .num_shared_lds_symbols = num_lds_symbols,
+ .shared_lds_symbols = lds_symbols,
+ };
+
+ if (!ac_rtld_open(&rtld_binary, open_info)) {
+ free(variant);
+ return NULL;
+ }
+
+ if (!ac_rtld_read_config(&device->physical_device->rad_info, &rtld_binary, &config)) {
+ ac_rtld_close(&rtld_binary);
+ free(variant);
+ return NULL;
+ }
+
+ if (rtld_binary.lds_size > 0) {
+ unsigned encode_granularity = device->physical_device->rad_info.lds_encode_granularity;
+ config.lds_size = align(rtld_binary.lds_size, encode_granularity) / encode_granularity;
+ }
+ if (!config.lds_size && binary->stage == MESA_SHADER_TESS_CTRL) {
+ /* This is used for reporting LDS statistics */
+ config.lds_size = binary->info.tcs.num_lds_blocks;
+ }
+
+ variant->code_size = rtld_binary.rx_size;
+ variant->exec_size = rtld_binary.exec_size;
+ } else {
+ assert(binary->type == RADV_BINARY_TYPE_LEGACY);
+ config = ((struct radv_shader_binary_legacy *)binary)->config;
+ variant->code_size =
+ radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
+ variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size;
+ }
+
+ variant->info = binary->info;
+ radv_postprocess_config(device, &config, &binary->info, binary->stage, &variant->config);
+
+ void *dest_ptr = radv_alloc_shader_memory(device, variant);
+ if (!dest_ptr) {
+ if (binary->type == RADV_BINARY_TYPE_RTLD)
+ ac_rtld_close(&rtld_binary);
+ free(variant);
+ return NULL;
+ }
+
+ if (binary->type == RADV_BINARY_TYPE_RTLD) {
+ struct radv_shader_binary_rtld *bin = (struct radv_shader_binary_rtld *)binary;
+ struct ac_rtld_upload_info info = {
+ .binary = &rtld_binary,
+ .rx_va = radv_buffer_get_va(variant->bo) + variant->bo_offset,
+ .rx_ptr = dest_ptr,
+ };
+
+ if (!ac_rtld_upload(&info)) {
+ radv_shader_variant_destroy(device, variant);
+ ac_rtld_close(&rtld_binary);
+ return NULL;
+ }
+
+ if (keep_shader_info || (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)) {
+ const char *disasm_data;
+ size_t disasm_size;
+ if (!ac_rtld_get_section_by_name(&rtld_binary, ".AMDGPU.disasm", &disasm_data,
+ &disasm_size)) {
+ radv_shader_variant_destroy(device, variant);
+ ac_rtld_close(&rtld_binary);
+ return NULL;
+ }
+
+ variant->ir_string =
+ bin->llvm_ir_size ? strdup((const char *)(bin->data + bin->elf_size)) : NULL;
+ variant->disasm_string = malloc(disasm_size + 1);
+ memcpy(variant->disasm_string, disasm_data, disasm_size);
+ variant->disasm_string[disasm_size] = 0;
+ }
+
+ variant->code_ptr = dest_ptr;
+ ac_rtld_close(&rtld_binary);
+ } else {
+ struct radv_shader_binary_legacy *bin = (struct radv_shader_binary_legacy *)binary;
+ memcpy(dest_ptr, bin->data + bin->stats_size, bin->code_size);
+
+ /* Add end-of-code markers for the UMR disassembler. */
+ uint32_t *ptr32 = (uint32_t *)dest_ptr + bin->code_size / 4;
+ for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; i++)
+ ptr32[i] = DEBUGGER_END_OF_CODE_MARKER;
+
+ variant->code_ptr = dest_ptr;
+ variant->ir_string =
+ bin->ir_size ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size)) : NULL;
+ variant->disasm_string =
+ bin->disasm_size
+ ? strdup((const char *)(bin->data + bin->stats_size + bin->code_size + bin->ir_size))
+ : NULL;
+
+ if (bin->stats_size) {
+ variant->statistics = calloc(bin->stats_size, 1);
+ memcpy(variant->statistics, bin->data, bin->stats_size);
+ }
+ }
+ return variant;
}
static char *
-radv_dump_nir_shaders(struct nir_shader * const *shaders,
- int shader_count)
+radv_dump_nir_shaders(struct nir_shader *const *shaders, int shader_count)
{
- char *data = NULL;
- char *ret = NULL;
- size_t size = 0;
- struct u_memstream mem;
- if (u_memstream_open(&mem, &data, &size)) {
- FILE *const memf = u_memstream_get(&mem);
- for (int i = 0; i < shader_count; ++i)
- nir_print_shader(shaders[i], memf);
- u_memstream_close(&mem);
- }
-
- ret = malloc(size + 1);
- if (ret) {
- memcpy(ret, data, size);
- ret[size] = 0;
- }
- free(data);
- return ret;
+ char *data = NULL;
+ char *ret = NULL;
+ size_t size = 0;
+ struct u_memstream mem;
+ if (u_memstream_open(&mem, &data, &size)) {
+ FILE *const memf = u_memstream_get(&mem);
+ for (int i = 0; i < shader_count; ++i)
+ nir_print_shader(shaders[i], memf);
+ u_memstream_close(&mem);
+ }
+
+ ret = malloc(size + 1);
+ if (ret) {
+ memcpy(ret, data, size);
+ ret[size] = 0;
+ }
+ free(data);
+ return ret;
}
static struct radv_shader_variant *
-shader_variant_compile(struct radv_device *device,
- struct vk_shader_module *module,
- struct nir_shader * const *shaders,
- int shader_count,
- gl_shader_stage stage,
- struct radv_shader_info *info,
- struct radv_nir_compiler_options *options,
- bool gs_copy_shader,
- bool trap_handler_shader,
- bool keep_shader_info,
- bool keep_statistic_info,
- struct radv_shader_binary **binary_out)
+shader_variant_compile(struct radv_device *device, struct vk_shader_module *module,
+ struct nir_shader *const *shaders, int shader_count, gl_shader_stage stage,
+ struct radv_shader_info *info, struct radv_nir_compiler_options *options,
+ bool gs_copy_shader, bool trap_handler_shader, bool keep_shader_info,
+ bool keep_statistic_info, struct radv_shader_binary **binary_out)
{
- enum radeon_family chip_family = device->physical_device->rad_info.family;
- struct radv_shader_binary *binary = NULL;
-
- struct radv_shader_debug_data debug_data = {
- .device = device,
- .module = module,
- };
-
- options->family = chip_family;
- options->chip_class = device->physical_device->rad_info.chip_class;
- options->info = &device->physical_device->rad_info;
- options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader);
- options->dump_preoptir = options->dump_shader &&
- device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
- options->record_ir = keep_shader_info;
- options->record_stats = keep_statistic_info;
- options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
- options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
- options->address32_hi = device->physical_device->rad_info.address32_hi;
- options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
- options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
- options->enable_mrt_output_nan_fixup = module && !module->nir &&
- device->instance->enable_mrt_output_nan_fixup;
- options->adjust_frag_coord_z = device->adjust_frag_coord_z;
- options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
- options->debug.func = radv_compiler_debug;
- options->debug.private_data = &debug_data;
-
- switch (device->force_vrs) {
- case RADV_FORCE_VRS_2x2:
- options->force_vrs_rates = (1u << 2) | (1u << 4);
- break;
- case RADV_FORCE_VRS_2x1:
- options->force_vrs_rates = (0u << 2) | (1u << 4);
- break;
- case RADV_FORCE_VRS_1x2:
- options->force_vrs_rates = (1u << 2) | (0u << 4);
- break;
- default:
- break;
- }
-
- struct radv_shader_args args = {0};
- args.options = options;
- args.shader_info = info;
- args.is_gs_copy_shader = gs_copy_shader;
- args.is_trap_handler_shader = trap_handler_shader;
-
- radv_declare_shader_args(&args,
- gs_copy_shader ? MESA_SHADER_VERTEX
- : shaders[shader_count - 1]->info.stage,
- shader_count >= 2,
- shader_count >= 2 ? shaders[shader_count - 2]->info.stage
- : MESA_SHADER_VERTEX);
-
- if (radv_use_llvm_for_stage(device, stage) ||
- options->dump_shader || options->record_ir)
- ac_init_llvm_once();
-
- if (radv_use_llvm_for_stage(device, stage)) {
- llvm_compile_shader(device, shader_count, shaders, &binary, &args);
- } else {
- aco_compile_shader(shader_count, shaders, &binary, &args);
- }
-
- binary->info = *info;
-
- struct radv_shader_variant *variant = radv_shader_variant_create(device, binary,
- keep_shader_info);
- if (!variant) {
- free(binary);
- return NULL;
- }
-
- if (options->dump_shader) {
- fprintf(stderr, "%s", radv_get_shader_name(info, shaders[0]->info.stage));
- for (int i = 1; i < shader_count; ++i)
- fprintf(stderr, " + %s", radv_get_shader_name(info, shaders[i]->info.stage));
-
- fprintf(stderr, "\ndisasm:\n%s\n", variant->disasm_string);
- }
-
-
- if (keep_shader_info) {
- variant->nir_string = radv_dump_nir_shaders(shaders, shader_count);
- if (!gs_copy_shader && !trap_handler_shader && !module->nir) {
- variant->spirv = malloc(module->size);
- if (!variant->spirv) {
- free(variant);
- free(binary);
- return NULL;
- }
-
- memcpy(variant->spirv, module->data, module->size);
- variant->spirv_size = module->size;
- }
- }
-
- if (binary_out)
- *binary_out = binary;
- else
- free(binary);
-
- return variant;
+ enum radeon_family chip_family = device->physical_device->rad_info.family;
+ struct radv_shader_binary *binary = NULL;
+
+ struct radv_shader_debug_data debug_data = {
+ .device = device,
+ .module = module,
+ };
+
+ options->family = chip_family;
+ options->chip_class = device->physical_device->rad_info.chip_class;
+ options->info = &device->physical_device->rad_info;
+ options->dump_shader = radv_can_dump_shader(device, module, gs_copy_shader);
+ options->dump_preoptir =
+ options->dump_shader && device->instance->debug_flags & RADV_DEBUG_PREOPTIR;
+ options->record_ir = keep_shader_info;
+ options->record_stats = keep_statistic_info;
+ options->check_ir = device->instance->debug_flags & RADV_DEBUG_CHECKIR;
+ options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
+ options->address32_hi = device->physical_device->rad_info.address32_hi;
+ options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
+ options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
+ options->enable_mrt_output_nan_fixup =
+ module && !module->nir && device->instance->enable_mrt_output_nan_fixup;
+ options->adjust_frag_coord_z = device->adjust_frag_coord_z;
+ options->has_image_load_dcc_bug = device->physical_device->rad_info.has_image_load_dcc_bug;
+ options->debug.func = radv_compiler_debug;
+ options->debug.private_data = &debug_data;
+
+ switch (device->force_vrs) {
+ case RADV_FORCE_VRS_2x2:
+ options->force_vrs_rates = (1u << 2) | (1u << 4);
+ break;
+ case RADV_FORCE_VRS_2x1:
+ options->force_vrs_rates = (0u << 2) | (1u << 4);
+ break;
+ case RADV_FORCE_VRS_1x2:
+ options->force_vrs_rates = (1u << 2) | (0u << 4);
+ break;
+ default:
+ break;
+ }
+
+ struct radv_shader_args args = {0};
+ args.options = options;
+ args.shader_info = info;
+ args.is_gs_copy_shader = gs_copy_shader;
+ args.is_trap_handler_shader = trap_handler_shader;
+
+ radv_declare_shader_args(
+ &args, gs_copy_shader ? MESA_SHADER_VERTEX : shaders[shader_count - 1]->info.stage,
+ shader_count >= 2,
+ shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
+
+ if (radv_use_llvm_for_stage(device, stage) || options->dump_shader || options->record_ir)
+ ac_init_llvm_once();
+
+ if (radv_use_llvm_for_stage(device, stage)) {
+ llvm_compile_shader(device, shader_count, shaders, &binary, &args);
+ } else {
+ aco_compile_shader(shader_count, shaders, &binary, &args);
+ }
+
+ binary->info = *info;
+
+ struct radv_shader_variant *variant =
+ radv_shader_variant_create(device, binary, keep_shader_info);
+ if (!variant) {
+ free(binary);
+ return NULL;
+ }
+
+ if (options->dump_shader) {
+ fprintf(stderr, "%s", radv_get_shader_name(info, shaders[0]->info.stage));
+ for (int i = 1; i < shader_count; ++i)
+ fprintf(stderr, " + %s", radv_get_shader_name(info, shaders[i]->info.stage));
+
+ fprintf(stderr, "\ndisasm:\n%s\n", variant->disasm_string);
+ }
+
+ if (keep_shader_info) {
+ variant->nir_string = radv_dump_nir_shaders(shaders, shader_count);
+ if (!gs_copy_shader && !trap_handler_shader && !module->nir) {
+ variant->spirv = malloc(module->size);
+ if (!variant->spirv) {
+ free(variant);
+ free(binary);
+ return NULL;
+ }
+
+ memcpy(variant->spirv, module->data, module->size);
+ variant->spirv_size = module->size;
+ }
+ }
+
+ if (binary_out)
+ *binary_out = binary;
+ else
+ free(binary);
+
+ return variant;
}
struct radv_shader_variant *
-radv_shader_variant_compile(struct radv_device *device,
- struct vk_shader_module *module,
- struct nir_shader *const *shaders,
- int shader_count,
- struct radv_pipeline_layout *layout,
- const struct radv_shader_variant_key *key,
- struct radv_shader_info *info,
- bool keep_shader_info, bool keep_statistic_info,
- bool disable_optimizations,
- struct radv_shader_binary **binary_out)
+radv_shader_variant_compile(struct radv_device *device, struct vk_shader_module *module,
+ struct nir_shader *const *shaders, int shader_count,
+ struct radv_pipeline_layout *layout,
+ const struct radv_shader_variant_key *key,
+ struct radv_shader_info *info, bool keep_shader_info,
+ bool keep_statistic_info, bool disable_optimizations,
+ struct radv_shader_binary **binary_out)
{
- gl_shader_stage stage = shaders[shader_count - 1]->info.stage;
- struct radv_nir_compiler_options options = {0};
-
- options.layout = layout;
- if (key)
- options.key = *key;
-
- options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
- options.robust_buffer_access = device->robust_buffer_access;
- options.robust_buffer_access2 = device->robust_buffer_access2;
- options.disable_optimizations = disable_optimizations;
- options.wgp_mode = radv_should_use_wgp_mode(device, stage, info);
-
- return shader_variant_compile(device, module, shaders, shader_count, stage, info,
- &options, false, false,
- keep_shader_info, keep_statistic_info, binary_out);
+ gl_shader_stage stage = shaders[shader_count - 1]->info.stage;
+ struct radv_nir_compiler_options options = {0};
+
+ options.layout = layout;
+ if (key)
+ options.key = *key;
+
+ options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
+ options.robust_buffer_access = device->robust_buffer_access;
+ options.robust_buffer_access2 = device->robust_buffer_access2;
+ options.disable_optimizations = disable_optimizations;
+ options.wgp_mode = radv_should_use_wgp_mode(device, stage, info);
+
+ return shader_variant_compile(device, module, shaders, shader_count, stage, info, &options,
+ false, false, keep_shader_info, keep_statistic_info, binary_out);
}
struct radv_shader_variant *
-radv_create_gs_copy_shader(struct radv_device *device,
- struct nir_shader *shader,
- struct radv_shader_info *info,
- struct radv_shader_binary **binary_out,
- bool keep_shader_info, bool keep_statistic_info,
- bool multiview, bool disable_optimizations)
+radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *shader,
+ struct radv_shader_info *info, struct radv_shader_binary **binary_out,
+ bool keep_shader_info, bool keep_statistic_info, bool multiview,
+ bool disable_optimizations)
{
- struct radv_nir_compiler_options options = {0};
- gl_shader_stage stage = MESA_SHADER_VERTEX;
+ struct radv_nir_compiler_options options = {0};
+ gl_shader_stage stage = MESA_SHADER_VERTEX;
- options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
- options.key.has_multiview_view_index = multiview;
- options.disable_optimizations = disable_optimizations;
+ options.explicit_scratch_args = !radv_use_llvm_for_stage(device, stage);
+ options.key.has_multiview_view_index = multiview;
+ options.disable_optimizations = disable_optimizations;
- return shader_variant_compile(device, NULL, &shader, 1, stage,
- info, &options, true, false,
- keep_shader_info, keep_statistic_info, binary_out);
+ return shader_variant_compile(device, NULL, &shader, 1, stage, info, &options, true, false,
+ keep_shader_info, keep_statistic_info, binary_out);
}
struct radv_shader_variant *
radv_create_trap_handler_shader(struct radv_device *device)
{
- struct radv_nir_compiler_options options = {0};
- struct radv_shader_variant *shader = NULL;
- struct radv_shader_binary *binary = NULL;
- struct radv_shader_info info = {0};
+ struct radv_nir_compiler_options options = {0};
+ struct radv_shader_variant *shader = NULL;
+ struct radv_shader_binary *binary = NULL;
+ struct radv_shader_info info = {0};
- nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_trap_handler");
+ nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_trap_handler");
- options.explicit_scratch_args = true;
- options.wgp_mode = radv_should_use_wgp_mode(device, MESA_SHADER_COMPUTE, &info);
- info.wave_size = 64;
+ options.explicit_scratch_args = true;
+ options.wgp_mode = radv_should_use_wgp_mode(device, MESA_SHADER_COMPUTE, &info);
+ info.wave_size = 64;
- shader = shader_variant_compile(device, NULL, &b.shader, 1,
- MESA_SHADER_COMPUTE, &info, &options,
- false, true, true, false, &binary);
+ shader = shader_variant_compile(device, NULL, &b.shader, 1, MESA_SHADER_COMPUTE, &info, &options,
+ false, true, true, false, &binary);
- ralloc_free(b.shader);
- free(binary);
+ ralloc_free(b.shader);
+ free(binary);
- return shader;
+ return shader;
}
void
-radv_shader_variant_destroy(struct radv_device *device,
- struct radv_shader_variant *variant)
+radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant)
{
- if (!p_atomic_dec_zero(&variant->ref_count))
- return;
-
- mtx_lock(&device->shader_slab_mutex);
- list_del(&variant->slab_list);
- mtx_unlock(&device->shader_slab_mutex);
-
- free(variant->spirv);
- free(variant->nir_string);
- free(variant->disasm_string);
- free(variant->ir_string);
- free(variant->statistics);
- free(variant);
+ if (!p_atomic_dec_zero(&variant->ref_count))
+ return;
+
+ mtx_lock(&device->shader_slab_mutex);
+ list_del(&variant->slab_list);
+ mtx_unlock(&device->shader_slab_mutex);
+
+ free(variant->spirv);
+ free(variant->nir_string);
+ free(variant->disasm_string);
+ free(variant->ir_string);
+ free(variant->statistics);
+ free(variant);
}
const char *
-radv_get_shader_name(struct radv_shader_info *info,
- gl_shader_stage stage)
+radv_get_shader_name(struct radv_shader_info *info, gl_shader_stage stage)
{
- switch (stage) {
- case MESA_SHADER_VERTEX:
- if (info->vs.as_ls)
- return "Vertex Shader as LS";
- else if (info->vs.as_es)
- return "Vertex Shader as ES";
- else if (info->is_ngg)
- return "Vertex Shader as ESGS";
- else
- return "Vertex Shader as VS";
- case MESA_SHADER_TESS_CTRL:
- return "Tessellation Control Shader";
- case MESA_SHADER_TESS_EVAL:
- if (info->tes.as_es)
- return "Tessellation Evaluation Shader as ES";
- else if (info->is_ngg)
- return "Tessellation Evaluation Shader as ESGS";
- else
- return "Tessellation Evaluation Shader as VS";
- case MESA_SHADER_GEOMETRY:
- return "Geometry Shader";
- case MESA_SHADER_FRAGMENT:
- return "Pixel Shader";
- case MESA_SHADER_COMPUTE:
- return "Compute Shader";
- default:
- return "Unknown shader";
- };
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ if (info->vs.as_ls)
+ return "Vertex Shader as LS";
+ else if (info->vs.as_es)
+ return "Vertex Shader as ES";
+ else if (info->is_ngg)
+ return "Vertex Shader as ESGS";
+ else
+ return "Vertex Shader as VS";
+ case MESA_SHADER_TESS_CTRL:
+ return "Tessellation Control Shader";
+ case MESA_SHADER_TESS_EVAL:
+ if (info->tes.as_es)
+ return "Tessellation Evaluation Shader as ES";
+ else if (info->is_ngg)
+ return "Tessellation Evaluation Shader as ESGS";
+ else
+ return "Tessellation Evaluation Shader as VS";
+ case MESA_SHADER_GEOMETRY:
+ return "Geometry Shader";
+ case MESA_SHADER_FRAGMENT:
+ return "Pixel Shader";
+ case MESA_SHADER_COMPUTE:
+ return "Compute Shader";
+ default:
+ return "Unknown shader";
+ };
}
unsigned
-radv_get_max_workgroup_size(enum chip_class chip_class,
- gl_shader_stage stage,
+radv_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
const unsigned *sizes)
{
- switch (stage) {
- case MESA_SHADER_TESS_CTRL:
- return chip_class >= GFX7 ? 128 : 64;
- case MESA_SHADER_GEOMETRY:
- return chip_class >= GFX9 ? 128 : 64;
- case MESA_SHADER_COMPUTE:
- break;
- default:
- return 0;
- }
-
- unsigned max_workgroup_size = sizes[0] * sizes[1] * sizes[2];
- return max_workgroup_size;
+ switch (stage) {
+ case MESA_SHADER_TESS_CTRL:
+ return chip_class >= GFX7 ? 128 : 64;
+ case MESA_SHADER_GEOMETRY:
+ return chip_class >= GFX9 ? 128 : 64;
+ case MESA_SHADER_COMPUTE:
+ break;
+ default:
+ return 0;
+ }
+
+ unsigned max_workgroup_size = sizes[0] * sizes[1] * sizes[2];
+ return max_workgroup_size;
}
unsigned
-radv_get_max_waves(struct radv_device *device,
- struct radv_shader_variant *variant,
+radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *variant,
gl_shader_stage stage)
{
- struct radeon_info *info = &device->physical_device->rad_info;
- enum chip_class chip_class = info->chip_class;
- uint8_t wave_size = variant->info.wave_size;
- struct ac_shader_config *conf = &variant->config;
- unsigned max_simd_waves;
- unsigned lds_per_wave = 0;
-
- max_simd_waves = info->max_wave64_per_simd * (64 / wave_size);
-
- if (stage == MESA_SHADER_FRAGMENT) {
- lds_per_wave = conf->lds_size * info->lds_encode_granularity +
- variant->info.ps.num_interp * 48;
- lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity);
- } else if (stage == MESA_SHADER_COMPUTE) {
- unsigned max_workgroup_size =
- radv_get_max_workgroup_size(chip_class, stage, variant->info.cs.block_size);
- lds_per_wave = align(conf->lds_size * info->lds_encode_granularity,
- info->lds_alloc_granularity);
- lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
- }
-
- if (conf->num_sgprs && chip_class < GFX10) {
- unsigned sgprs = align(conf->num_sgprs, chip_class >= GFX8 ? 16 : 8);
- max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs);
- }
-
- if (conf->num_vgprs) {
- unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
- unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4);
- if (chip_class >= GFX10_3)
- vgprs = align(vgprs, wave_size == 32 ? 16 : 8);
- max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
- }
-
- unsigned simd_per_workgroup = info->num_simd_per_compute_unit;
- if (chip_class >= GFX10)
- simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */
-
- unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup;
- if (lds_per_wave)
- max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave));
-
- return chip_class >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves;
+ struct radeon_info *info = &device->physical_device->rad_info;
+ enum chip_class chip_class = info->chip_class;
+ uint8_t wave_size = variant->info.wave_size;
+ struct ac_shader_config *conf = &variant->config;
+ unsigned max_simd_waves;
+ unsigned lds_per_wave = 0;
+
+ max_simd_waves = info->max_wave64_per_simd * (64 / wave_size);
+
+ if (stage == MESA_SHADER_FRAGMENT) {
+ lds_per_wave =
+ conf->lds_size * info->lds_encode_granularity + variant->info.ps.num_interp * 48;
+ lds_per_wave = align(lds_per_wave, info->lds_alloc_granularity);
+ } else if (stage == MESA_SHADER_COMPUTE) {
+ unsigned max_workgroup_size =
+ radv_get_max_workgroup_size(chip_class, stage, variant->info.cs.block_size);
+ lds_per_wave =
+ align(conf->lds_size * info->lds_encode_granularity, info->lds_alloc_granularity);
+ lds_per_wave /= DIV_ROUND_UP(max_workgroup_size, wave_size);
+ }
+
+ if (conf->num_sgprs && chip_class < GFX10) {
+ unsigned sgprs = align(conf->num_sgprs, chip_class >= GFX8 ? 16 : 8);
+ max_simd_waves = MIN2(max_simd_waves, info->num_physical_sgprs_per_simd / sgprs);
+ }
+
+ if (conf->num_vgprs) {
+ unsigned physical_vgprs = info->num_physical_wave64_vgprs_per_simd * (64 / wave_size);
+ unsigned vgprs = align(conf->num_vgprs, wave_size == 32 ? 8 : 4);
+ if (chip_class >= GFX10_3)
+ vgprs = align(vgprs, wave_size == 32 ? 16 : 8);
+ max_simd_waves = MIN2(max_simd_waves, physical_vgprs / vgprs);
+ }
+
+ unsigned simd_per_workgroup = info->num_simd_per_compute_unit;
+ if (chip_class >= GFX10)
+ simd_per_workgroup *= 2; /* like lds_size_per_workgroup, assume WGP on GFX10+ */
+
+ unsigned max_lds_per_simd = info->lds_size_per_workgroup / simd_per_workgroup;
+ if (lds_per_wave)
+ max_simd_waves = MIN2(max_simd_waves, DIV_ROUND_UP(max_lds_per_simd, lds_per_wave));
+
+ return chip_class >= GFX10 ? max_simd_waves * (wave_size / 32) : max_simd_waves;
}
VkResult
-radv_GetShaderInfoAMD(VkDevice _device,
- VkPipeline _pipeline,
- VkShaderStageFlagBits shaderStage,
- VkShaderInfoTypeAMD infoType,
- size_t* pInfoSize,
- void* pInfo)
+radv_GetShaderInfoAMD(VkDevice _device, VkPipeline _pipeline, VkShaderStageFlagBits shaderStage,
+ VkShaderInfoTypeAMD infoType, size_t *pInfoSize, void *pInfo)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
- gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
- struct radv_shader_variant *variant = pipeline->shaders[stage];
- VkResult result = VK_SUCCESS;
-
- /* Spec doesn't indicate what to do if the stage is invalid, so just
- * return no info for this. */
- if (!variant)
- return vk_error(device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
-
- switch (infoType) {
- case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
- if (!pInfo) {
- *pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
- } else {
- unsigned lds_multiplier = device->physical_device->rad_info.lds_encode_granularity;
- struct ac_shader_config *conf = &variant->config;
-
- VkShaderStatisticsInfoAMD statistics = {0};
- statistics.shaderStageMask = shaderStage;
- statistics.numPhysicalVgprs = device->physical_device->rad_info.num_physical_wave64_vgprs_per_simd;
- statistics.numPhysicalSgprs = device->physical_device->rad_info.num_physical_sgprs_per_simd;
- statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
-
- if (stage == MESA_SHADER_COMPUTE) {
- unsigned *local_size = variant->info.cs.block_size;
- unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
-
- statistics.numAvailableVgprs = statistics.numPhysicalVgprs /
- ceil((double)workgroup_size / statistics.numPhysicalVgprs);
-
- statistics.computeWorkGroupSize[0] = local_size[0];
- statistics.computeWorkGroupSize[1] = local_size[1];
- statistics.computeWorkGroupSize[2] = local_size[2];
- } else {
- statistics.numAvailableVgprs = statistics.numPhysicalVgprs;
- }
-
- statistics.resourceUsage.numUsedVgprs = conf->num_vgprs;
- statistics.resourceUsage.numUsedSgprs = conf->num_sgprs;
- statistics.resourceUsage.ldsSizePerLocalWorkGroup = 32768;
- statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size * lds_multiplier;
- statistics.resourceUsage.scratchMemUsageInBytes = conf->scratch_bytes_per_wave;
-
- size_t size = *pInfoSize;
- *pInfoSize = sizeof(statistics);
-
- memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));
-
- if (size < *pInfoSize)
- result = VK_INCOMPLETE;
- }
-
- break;
- case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: {
- char *out;
- size_t outsize;
- struct u_memstream mem;
- u_memstream_open(&mem, &out, &outsize);
- FILE *const memf = u_memstream_get(&mem);
-
- fprintf(memf, "%s:\n", radv_get_shader_name(&variant->info, stage));
- fprintf(memf, "%s\n\n", variant->ir_string);
- fprintf(memf, "%s\n\n", variant->disasm_string);
- radv_dump_shader_stats(device, pipeline, stage, memf);
- u_memstream_close(&mem);
-
- /* Need to include the null terminator. */
- size_t length = outsize + 1;
-
- if (!pInfo) {
- *pInfoSize = length;
- } else {
- size_t size = *pInfoSize;
- *pInfoSize = length;
-
- memcpy(pInfo, out, MIN2(size, length));
-
- if (size < length)
- result = VK_INCOMPLETE;
- }
-
- free(out);
- break;
- }
- default:
- /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */
- result = VK_ERROR_FEATURE_NOT_PRESENT;
- break;
- }
-
- return result;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
+ gl_shader_stage stage = vk_to_mesa_shader_stage(shaderStage);
+ struct radv_shader_variant *variant = pipeline->shaders[stage];
+ VkResult result = VK_SUCCESS;
+
+ /* Spec doesn't indicate what to do if the stage is invalid, so just
+ * return no info for this. */
+ if (!variant)
+ return vk_error(device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
+
+ switch (infoType) {
+ case VK_SHADER_INFO_TYPE_STATISTICS_AMD:
+ if (!pInfo) {
+ *pInfoSize = sizeof(VkShaderStatisticsInfoAMD);
+ } else {
+ unsigned lds_multiplier = device->physical_device->rad_info.lds_encode_granularity;
+ struct ac_shader_config *conf = &variant->config;
+
+ VkShaderStatisticsInfoAMD statistics = {0};
+ statistics.shaderStageMask = shaderStage;
+ statistics.numPhysicalVgprs =
+ device->physical_device->rad_info.num_physical_wave64_vgprs_per_simd;
+ statistics.numPhysicalSgprs =
+ device->physical_device->rad_info.num_physical_sgprs_per_simd;
+ statistics.numAvailableSgprs = statistics.numPhysicalSgprs;
+
+ if (stage == MESA_SHADER_COMPUTE) {
+ unsigned *local_size = variant->info.cs.block_size;
+ unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
+
+ statistics.numAvailableVgprs =
+ statistics.numPhysicalVgprs /
+ ceil((double)workgroup_size / statistics.numPhysicalVgprs);
+
+ statistics.computeWorkGroupSize[0] = local_size[0];
+ statistics.computeWorkGroupSize[1] = local_size[1];
+ statistics.computeWorkGroupSize[2] = local_size[2];
+ } else {
+ statistics.numAvailableVgprs = statistics.numPhysicalVgprs;
+ }
+
+ statistics.resourceUsage.numUsedVgprs = conf->num_vgprs;
+ statistics.resourceUsage.numUsedSgprs = conf->num_sgprs;
+ statistics.resourceUsage.ldsSizePerLocalWorkGroup = 32768;
+ statistics.resourceUsage.ldsUsageSizeInBytes = conf->lds_size * lds_multiplier;
+ statistics.resourceUsage.scratchMemUsageInBytes = conf->scratch_bytes_per_wave;
+
+ size_t size = *pInfoSize;
+ *pInfoSize = sizeof(statistics);
+
+ memcpy(pInfo, &statistics, MIN2(size, *pInfoSize));
+
+ if (size < *pInfoSize)
+ result = VK_INCOMPLETE;
+ }
+
+ break;
+ case VK_SHADER_INFO_TYPE_DISASSEMBLY_AMD: {
+ char *out;
+ size_t outsize;
+ struct u_memstream mem;
+ u_memstream_open(&mem, &out, &outsize);
+ FILE *const memf = u_memstream_get(&mem);
+
+ fprintf(memf, "%s:\n", radv_get_shader_name(&variant->info, stage));
+ fprintf(memf, "%s\n\n", variant->ir_string);
+ fprintf(memf, "%s\n\n", variant->disasm_string);
+ radv_dump_shader_stats(device, pipeline, stage, memf);
+ u_memstream_close(&mem);
+
+ /* Need to include the null terminator. */
+ size_t length = outsize + 1;
+
+ if (!pInfo) {
+ *pInfoSize = length;
+ } else {
+ size_t size = *pInfoSize;
+ *pInfoSize = length;
+
+ memcpy(pInfo, out, MIN2(size, length));
+
+ if (size < length)
+ result = VK_INCOMPLETE;
+ }
+
+ free(out);
+ break;
+ }
+ default:
+ /* VK_SHADER_INFO_TYPE_BINARY_AMD unimplemented for now. */
+ result = VK_ERROR_FEATURE_NOT_PRESENT;
+ break;
+ }
+
+ return result;
}
VkResult
-radv_dump_shader_stats(struct radv_device *device,
- struct radv_pipeline *pipeline,
- gl_shader_stage stage, FILE *output)
+radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline,
+ gl_shader_stage stage, FILE *output)
{
- struct radv_shader_variant *shader = pipeline->shaders[stage];
- VkPipelineExecutablePropertiesKHR *props = NULL;
- uint32_t prop_count = 0;
- VkResult result;
-
- VkPipelineInfoKHR pipeline_info = {0};
- pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
- pipeline_info.pipeline = radv_pipeline_to_handle(pipeline);
-
- result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device),
- &pipeline_info,
- &prop_count, NULL);
- if (result != VK_SUCCESS)
- return result;
-
- props = calloc(prop_count, sizeof(*props));
- if (!props)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device),
- &pipeline_info,
- &prop_count, props);
- if (result != VK_SUCCESS)
- goto fail;
-
- for (unsigned exec_idx = 0; exec_idx < prop_count; exec_idx++) {
- if (!(props[exec_idx].stages & mesa_to_vk_shader_stage(stage)))
- continue;
-
- VkPipelineExecutableStatisticKHR *stats = NULL;
- uint32_t stat_count = 0;
-
- VkPipelineExecutableInfoKHR exec_info = {0};
- exec_info.pipeline = radv_pipeline_to_handle(pipeline);
- exec_info.executableIndex = exec_idx;
-
- result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device),
- &exec_info,
- &stat_count, NULL);
- if (result != VK_SUCCESS)
- goto fail;
-
- stats = calloc(stat_count, sizeof(*stats));
- if (!stats) {
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail;
- }
-
- result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device),
- &exec_info,
- &stat_count, stats);
- if (result != VK_SUCCESS) {
- free(stats);
- goto fail;
- }
-
- fprintf(output, "\n%s:\n",
- radv_get_shader_name(&shader->info, stage));
- fprintf(output, "*** SHADER STATS ***\n");
-
- for (unsigned i = 0; i < stat_count; i++) {
- fprintf(output, "%s: ", stats[i].name);
- switch (stats[i].format) {
- case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
- fprintf(output, "%s", stats[i].value.b32 == VK_TRUE ? "true" : "false");
- break;
- case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
- fprintf(output, "%"PRIi64, stats[i].value.i64);
- break;
- case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
- fprintf(output, "%"PRIu64, stats[i].value.u64);
- break;
- case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
- fprintf(output, "%f", stats[i].value.f64);
- break;
- default:
- unreachable("Invalid pipeline statistic format");
- }
- fprintf(output, "\n");
- }
-
- fprintf(output, "********************\n\n\n");
-
- free(stats);
- }
+ struct radv_shader_variant *shader = pipeline->shaders[stage];
+ VkPipelineExecutablePropertiesKHR *props = NULL;
+ uint32_t prop_count = 0;
+ VkResult result;
+
+ VkPipelineInfoKHR pipeline_info = {0};
+ pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
+ pipeline_info.pipeline = radv_pipeline_to_handle(pipeline);
+
+ result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info,
+ &prop_count, NULL);
+ if (result != VK_SUCCESS)
+ return result;
+
+ props = calloc(prop_count, sizeof(*props));
+ if (!props)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ result = radv_GetPipelineExecutablePropertiesKHR(radv_device_to_handle(device), &pipeline_info,
+ &prop_count, props);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ for (unsigned exec_idx = 0; exec_idx < prop_count; exec_idx++) {
+ if (!(props[exec_idx].stages & mesa_to_vk_shader_stage(stage)))
+ continue;
+
+ VkPipelineExecutableStatisticKHR *stats = NULL;
+ uint32_t stat_count = 0;
+
+ VkPipelineExecutableInfoKHR exec_info = {0};
+ exec_info.pipeline = radv_pipeline_to_handle(pipeline);
+ exec_info.executableIndex = exec_idx;
+
+ result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info,
+ &stat_count, NULL);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ stats = calloc(stat_count, sizeof(*stats));
+ if (!stats) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail;
+ }
+
+ result = radv_GetPipelineExecutableStatisticsKHR(radv_device_to_handle(device), &exec_info,
+ &stat_count, stats);
+ if (result != VK_SUCCESS) {
+ free(stats);
+ goto fail;
+ }
+
+ fprintf(output, "\n%s:\n", radv_get_shader_name(&shader->info, stage));
+ fprintf(output, "*** SHADER STATS ***\n");
+
+ for (unsigned i = 0; i < stat_count; i++) {
+ fprintf(output, "%s: ", stats[i].name);
+ switch (stats[i].format) {
+ case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_BOOL32_KHR:
+ fprintf(output, "%s", stats[i].value.b32 == VK_TRUE ? "true" : "false");
+ break;
+ case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_INT64_KHR:
+ fprintf(output, "%" PRIi64, stats[i].value.i64);
+ break;
+ case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR:
+ fprintf(output, "%" PRIu64, stats[i].value.u64);
+ break;
+ case VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_FLOAT64_KHR:
+ fprintf(output, "%f", stats[i].value.f64);
+ break;
+ default:
+ unreachable("Invalid pipeline statistic format");
+ }
+ fprintf(output, "\n");
+ }
+
+ fprintf(output, "********************\n\n\n");
+
+ free(stats);
+ }
fail:
- free(props);
- return result;
+ free(props);
+ return result;
}
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index d4878cc14a1..7bfcf854dbe 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -35,570 +35,522 @@
#include "radv_constants.h"
#include "nir/nir.h"
-#include "vulkan/vulkan.h"
#include "vulkan/util/vk_object.h"
#include "vulkan/util/vk_shader_module.h"
-
-#include "aco_interface.h"
+#include "vulkan/vulkan.h"
#define RADV_VERT_ATTRIB_MAX MAX2(VERT_ATTRIB_MAX, VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS)
struct radv_device;
+struct radv_pipeline;
+struct radv_pipeline_cache;
+struct radv_pipeline_key;
struct radv_vs_out_key {
- uint32_t as_es:1;
- uint32_t as_ls:1;
- uint32_t as_ngg:1;
- uint32_t as_ngg_passthrough:1;
- uint32_t export_prim_id:1;
- uint32_t export_layer_id:1;
- uint32_t export_clip_dists:1;
- uint32_t export_viewport_index:1;
+ uint32_t as_es : 1;
+ uint32_t as_ls : 1;
+ uint32_t as_ngg : 1;
+ uint32_t as_ngg_passthrough : 1;
+ uint32_t export_prim_id : 1;
+ uint32_t export_layer_id : 1;
+ uint32_t export_clip_dists : 1;
+ uint32_t export_viewport_index : 1;
};
struct radv_vs_variant_key {
- struct radv_vs_out_key out;
+ struct radv_vs_out_key out;
- uint32_t instance_rate_inputs;
- uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
- uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
- uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
- uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
- uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
+ uint32_t instance_rate_inputs;
+ uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
+ uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
+ uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
+ uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
+ uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
- /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
- * so we may need to fix it up. */
- enum ac_fetch_format alpha_adjust[MAX_VERTEX_ATTRIBS];
+ /* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
+ * so we may need to fix it up. */
+ enum ac_fetch_format alpha_adjust[MAX_VERTEX_ATTRIBS];
- /* For some formats the channels have to be shuffled. */
- uint32_t post_shuffle;
+ /* For some formats the channels have to be shuffled. */
+ uint32_t post_shuffle;
- /* Output primitive type. */
- uint8_t outprim;
+ /* Output primitive type. */
+ uint8_t outprim;
};
struct radv_tes_variant_key {
- struct radv_vs_out_key out;
+ struct radv_vs_out_key out;
};
struct radv_tcs_variant_key {
- struct radv_vs_variant_key vs_key;
- unsigned primitive_mode;
- unsigned input_vertices;
+ struct radv_vs_variant_key vs_key;
+ unsigned primitive_mode;
+ unsigned input_vertices;
};
struct radv_fs_variant_key {
- uint32_t col_format;
- uint8_t log2_ps_iter_samples;
- uint8_t num_samples;
- uint32_t is_int8;
- uint32_t is_int10;
+ uint32_t col_format;
+ uint8_t log2_ps_iter_samples;
+ uint8_t num_samples;
+ uint32_t is_int8;
+ uint32_t is_int10;
};
struct radv_cs_variant_key {
- uint8_t subgroup_size;
+ uint8_t subgroup_size;
};
struct radv_shader_variant_key {
- union {
- struct radv_vs_variant_key vs;
- struct radv_fs_variant_key fs;
- struct radv_tes_variant_key tes;
- struct radv_tcs_variant_key tcs;
- struct radv_cs_variant_key cs;
-
- /* A common prefix of the vs and tes keys. */
- struct radv_vs_out_key vs_common_out;
- };
- bool has_multiview_view_index;
+ union {
+ struct radv_vs_variant_key vs;
+ struct radv_fs_variant_key fs;
+ struct radv_tes_variant_key tes;
+ struct radv_tcs_variant_key tcs;
+ struct radv_cs_variant_key cs;
+
+ /* A common prefix of the vs and tes keys. */
+ struct radv_vs_out_key vs_common_out;
+ };
+ bool has_multiview_view_index;
};
enum radv_compiler_debug_level {
- RADV_COMPILER_DEBUG_LEVEL_PERFWARN,
- RADV_COMPILER_DEBUG_LEVEL_ERROR,
+ RADV_COMPILER_DEBUG_LEVEL_PERFWARN,
+ RADV_COMPILER_DEBUG_LEVEL_ERROR,
};
struct radv_nir_compiler_options {
- struct radv_pipeline_layout *layout;
- struct radv_shader_variant_key key;
- bool explicit_scratch_args;
- bool clamp_shadow_reference;
- bool robust_buffer_access;
- bool robust_buffer_access2;
- bool adjust_frag_coord_z;
- bool dump_shader;
- bool dump_preoptir;
- bool record_ir;
- bool record_stats;
- bool check_ir;
- bool has_ls_vgpr_init_bug;
- bool has_image_load_dcc_bug;
- bool use_ngg_streamout;
- bool enable_mrt_output_nan_fixup;
- bool disable_optimizations; /* only used by ACO */
- bool wgp_mode;
- enum radeon_family family;
- enum chip_class chip_class;
- const struct radeon_info *info;
- uint32_t tess_offchip_block_dw_size;
- uint32_t address32_hi;
- uint8_t force_vrs_rates;
-
- struct {
- void (*func)(void *private_data,
- enum radv_compiler_debug_level level,
- const char *message);
- void *private_data;
- } debug;
+ struct radv_pipeline_layout *layout;
+ struct radv_shader_variant_key key;
+ bool explicit_scratch_args;
+ bool clamp_shadow_reference;
+ bool robust_buffer_access;
+ bool robust_buffer_access2;
+ bool adjust_frag_coord_z;
+ bool dump_shader;
+ bool dump_preoptir;
+ bool record_ir;
+ bool record_stats;
+ bool check_ir;
+ bool has_ls_vgpr_init_bug;
+ bool has_image_load_dcc_bug;
+ bool use_ngg_streamout;
+ bool enable_mrt_output_nan_fixup;
+ bool disable_optimizations; /* only used by ACO */
+ bool wgp_mode;
+ enum radeon_family family;
+ enum chip_class chip_class;
+ const struct radeon_info *info;
+ uint32_t tess_offchip_block_dw_size;
+ uint32_t address32_hi;
+ uint8_t force_vrs_rates;
+
+ struct {
+ void (*func)(void *private_data, enum radv_compiler_debug_level level, const char *message);
+ void *private_data;
+ } debug;
};
enum radv_ud_index {
- AC_UD_SCRATCH_RING_OFFSETS = 0,
- AC_UD_PUSH_CONSTANTS = 1,
- AC_UD_INLINE_PUSH_CONSTANTS = 2,
- AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
- AC_UD_VIEW_INDEX = 4,
- AC_UD_STREAMOUT_BUFFERS = 5,
- AC_UD_NGG_GS_STATE = 6,
- AC_UD_SHADER_START = 7,
- AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
- AC_UD_VS_BASE_VERTEX_START_INSTANCE,
- AC_UD_VS_MAX_UD,
- AC_UD_PS_MAX_UD,
- AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
- AC_UD_CS_MAX_UD,
- AC_UD_GS_MAX_UD,
- AC_UD_TCS_MAX_UD,
- AC_UD_TES_MAX_UD,
- AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
+ AC_UD_SCRATCH_RING_OFFSETS = 0,
+ AC_UD_PUSH_CONSTANTS = 1,
+ AC_UD_INLINE_PUSH_CONSTANTS = 2,
+ AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
+ AC_UD_VIEW_INDEX = 4,
+ AC_UD_STREAMOUT_BUFFERS = 5,
+ AC_UD_NGG_GS_STATE = 6,
+ AC_UD_SHADER_START = 7,
+ AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
+ AC_UD_VS_BASE_VERTEX_START_INSTANCE,
+ AC_UD_VS_MAX_UD,
+ AC_UD_PS_MAX_UD,
+ AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
+ AC_UD_CS_MAX_UD,
+ AC_UD_GS_MAX_UD,
+ AC_UD_TCS_MAX_UD,
+ AC_UD_TES_MAX_UD,
+ AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
};
struct radv_stream_output {
- uint8_t location;
- uint8_t buffer;
- uint16_t offset;
- uint8_t component_mask;
- uint8_t stream;
+ uint8_t location;
+ uint8_t buffer;
+ uint16_t offset;
+ uint8_t component_mask;
+ uint8_t stream;
};
struct radv_streamout_info {
- uint16_t num_outputs;
- struct radv_stream_output outputs[MAX_SO_OUTPUTS];
- uint16_t strides[MAX_SO_BUFFERS];
- uint32_t enabled_stream_buffers_mask;
+ uint16_t num_outputs;
+ struct radv_stream_output outputs[MAX_SO_OUTPUTS];
+ uint16_t strides[MAX_SO_BUFFERS];
+ uint32_t enabled_stream_buffers_mask;
};
struct radv_userdata_info {
- int8_t sgpr_idx;
- uint8_t num_sgprs;
+ int8_t sgpr_idx;
+ uint8_t num_sgprs;
};
struct radv_userdata_locations {
- struct radv_userdata_info descriptor_sets[MAX_SETS];
- struct radv_userdata_info shader_data[AC_UD_MAX_UD];
- uint32_t descriptor_sets_enabled;
+ struct radv_userdata_info descriptor_sets[MAX_SETS];
+ struct radv_userdata_info shader_data[AC_UD_MAX_UD];
+ uint32_t descriptor_sets_enabled;
};
struct radv_vs_output_info {
- uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
- uint8_t clip_dist_mask;
- uint8_t cull_dist_mask;
- uint8_t param_exports;
- bool writes_pointsize;
- bool writes_layer;
- bool writes_viewport_index;
- bool writes_primitive_shading_rate;
- bool export_prim_id;
- unsigned pos_exports;
+ uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
+ uint8_t clip_dist_mask;
+ uint8_t cull_dist_mask;
+ uint8_t param_exports;
+ bool writes_pointsize;
+ bool writes_layer;
+ bool writes_viewport_index;
+ bool writes_primitive_shading_rate;
+ bool export_prim_id;
+ unsigned pos_exports;
};
struct radv_es_output_info {
- uint32_t esgs_itemsize;
+ uint32_t esgs_itemsize;
};
struct gfx9_gs_info {
- uint32_t vgt_gs_onchip_cntl;
- uint32_t vgt_gs_max_prims_per_subgroup;
- uint32_t vgt_esgs_ring_itemsize;
- uint32_t lds_size;
+ uint32_t vgt_gs_onchip_cntl;
+ uint32_t vgt_gs_max_prims_per_subgroup;
+ uint32_t vgt_esgs_ring_itemsize;
+ uint32_t lds_size;
};
struct gfx10_ngg_info {
- uint16_t ngg_emit_size; /* in dwords */
- uint32_t hw_max_esverts;
- uint32_t max_gsprims;
- uint32_t max_out_verts;
- uint32_t prim_amp_factor;
- uint32_t vgt_esgs_ring_itemsize;
- uint32_t esgs_ring_size;
- bool max_vert_out_per_gs_instance;
+ uint16_t ngg_emit_size; /* in dwords */
+ uint32_t hw_max_esverts;
+ uint32_t max_gsprims;
+ uint32_t max_out_verts;
+ uint32_t prim_amp_factor;
+ uint32_t vgt_esgs_ring_itemsize;
+ uint32_t esgs_ring_size;
+ bool max_vert_out_per_gs_instance;
};
struct radv_shader_info {
- bool loads_push_constants;
- bool loads_dynamic_offsets;
- uint8_t min_push_constant_used;
- uint8_t max_push_constant_used;
- bool has_only_32bit_push_constants;
- bool has_indirect_push_constants;
- uint8_t num_inline_push_consts;
- uint8_t base_inline_push_consts;
- uint32_t desc_set_used_mask;
- bool needs_multiview_view_index;
- bool uses_invocation_id;
- bool uses_prim_id;
- uint8_t wave_size;
- uint8_t ballot_bit_size;
- struct radv_userdata_locations user_sgprs_locs;
- unsigned num_user_sgprs;
- unsigned num_input_sgprs;
- unsigned num_input_vgprs;
- unsigned private_mem_vgprs;
- bool need_indirect_descriptor_sets;
- bool is_ngg;
- bool is_ngg_passthrough;
- uint32_t num_tess_patches;
- struct {
- uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
- uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
- bool has_vertex_buffers; /* needs vertex buffers and base/start */
- bool needs_draw_id;
- bool needs_instance_id;
- struct radv_vs_output_info outinfo;
- struct radv_es_output_info es_info;
- bool as_es;
- bool as_ls;
- bool export_prim_id;
- bool tcs_in_out_eq;
- uint64_t tcs_temp_only_input_mask;
- uint8_t num_linked_outputs;
- bool needs_base_instance;
- } vs;
- struct {
- uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
- uint8_t num_stream_output_components[4];
- uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
- uint8_t max_stream;
- unsigned gsvs_vertex_size;
- unsigned max_gsvs_emit_size;
- unsigned vertices_in;
- unsigned vertices_out;
- unsigned output_prim;
- unsigned invocations;
- unsigned es_type; /* GFX9: VS or TES */
- uint8_t num_linked_inputs;
- } gs;
- struct {
- uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
- struct radv_vs_output_info outinfo;
- struct radv_es_output_info es_info;
- bool as_es;
- unsigned primitive_mode;
- enum gl_tess_spacing spacing;
- bool ccw;
- bool point_mode;
- bool export_prim_id;
- uint8_t num_linked_inputs;
- uint8_t num_linked_patch_inputs;
- uint8_t num_linked_outputs;
- } tes;
- struct {
- bool uses_sample_shading;
- bool needs_sample_positions;
- bool writes_memory;
- bool writes_z;
- bool writes_stencil;
- bool writes_sample_mask;
- bool has_pcoord;
- bool prim_id_input;
- bool layer_input;
- bool viewport_index_input;
- uint8_t num_input_clips_culls;
- uint32_t input_mask;
- uint32_t flat_shaded_mask;
- uint32_t explicit_shaded_mask;
- uint32_t float16_shaded_mask;
- uint32_t num_interp;
- bool can_discard;
- bool early_fragment_test;
- bool post_depth_coverage;
- bool reads_sample_mask_in;
- uint8_t depth_layout;
- bool uses_persp_or_linear_interp;
- bool allow_flat_shading;
- } ps;
- struct {
- bool uses_grid_size;
- bool uses_block_id[3];
- bool uses_thread_id[3];
- bool uses_local_invocation_idx;
- unsigned block_size[3];
- } cs;
- struct {
- uint64_t tes_inputs_read;
- uint64_t tes_patch_inputs_read;
- unsigned tcs_vertices_out;
- uint32_t num_lds_blocks;
- uint8_t num_linked_inputs;
- uint8_t num_linked_outputs;
- uint8_t num_linked_patch_outputs;
- bool tes_reads_tess_factors:1;
- } tcs;
-
- struct radv_streamout_info so;
-
- struct gfx9_gs_info gs_ring_info;
- struct gfx10_ngg_info ngg_info;
-
- unsigned float_controls_mode;
+ bool loads_push_constants;
+ bool loads_dynamic_offsets;
+ uint8_t min_push_constant_used;
+ uint8_t max_push_constant_used;
+ bool has_only_32bit_push_constants;
+ bool has_indirect_push_constants;
+ uint8_t num_inline_push_consts;
+ uint8_t base_inline_push_consts;
+ uint32_t desc_set_used_mask;
+ bool needs_multiview_view_index;
+ bool uses_invocation_id;
+ bool uses_prim_id;
+ uint8_t wave_size;
+ uint8_t ballot_bit_size;
+ struct radv_userdata_locations user_sgprs_locs;
+ unsigned num_user_sgprs;
+ unsigned num_input_sgprs;
+ unsigned num_input_vgprs;
+ unsigned private_mem_vgprs;
+ bool need_indirect_descriptor_sets;
+ bool is_ngg;
+ bool is_ngg_passthrough;
+ uint32_t num_tess_patches;
+ struct {
+ uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
+ uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+ bool has_vertex_buffers; /* needs vertex buffers and base/start */
+ bool needs_draw_id;
+ bool needs_instance_id;
+ struct radv_vs_output_info outinfo;
+ struct radv_es_output_info es_info;
+ bool as_es;
+ bool as_ls;
+ bool export_prim_id;
+ bool tcs_in_out_eq;
+ uint64_t tcs_temp_only_input_mask;
+ uint8_t num_linked_outputs;
+ bool needs_base_instance;
+ } vs;
+ struct {
+ uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+ uint8_t num_stream_output_components[4];
+ uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
+ uint8_t max_stream;
+ unsigned gsvs_vertex_size;
+ unsigned max_gsvs_emit_size;
+ unsigned vertices_in;
+ unsigned vertices_out;
+ unsigned output_prim;
+ unsigned invocations;
+ unsigned es_type; /* GFX9: VS or TES */
+ uint8_t num_linked_inputs;
+ } gs;
+ struct {
+ uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
+ struct radv_vs_output_info outinfo;
+ struct radv_es_output_info es_info;
+ bool as_es;
+ unsigned primitive_mode;
+ enum gl_tess_spacing spacing;
+ bool ccw;
+ bool point_mode;
+ bool export_prim_id;
+ uint8_t num_linked_inputs;
+ uint8_t num_linked_patch_inputs;
+ uint8_t num_linked_outputs;
+ } tes;
+ struct {
+ bool uses_sample_shading;
+ bool needs_sample_positions;
+ bool writes_memory;
+ bool writes_z;
+ bool writes_stencil;
+ bool writes_sample_mask;
+ bool has_pcoord;
+ bool prim_id_input;
+ bool layer_input;
+ bool viewport_index_input;
+ uint8_t num_input_clips_culls;
+ uint32_t input_mask;
+ uint32_t flat_shaded_mask;
+ uint32_t explicit_shaded_mask;
+ uint32_t float16_shaded_mask;
+ uint32_t num_interp;
+ bool can_discard;
+ bool early_fragment_test;
+ bool post_depth_coverage;
+ bool reads_sample_mask_in;
+ uint8_t depth_layout;
+ bool uses_persp_or_linear_interp;
+ bool allow_flat_shading;
+ } ps;
+ struct {
+ bool uses_grid_size;
+ bool uses_block_id[3];
+ bool uses_thread_id[3];
+ bool uses_local_invocation_idx;
+ unsigned block_size[3];
+ } cs;
+ struct {
+ uint64_t tes_inputs_read;
+ uint64_t tes_patch_inputs_read;
+ unsigned tcs_vertices_out;
+ uint32_t num_lds_blocks;
+ uint8_t num_linked_inputs;
+ uint8_t num_linked_outputs;
+ uint8_t num_linked_patch_outputs;
+ bool tes_reads_tess_factors : 1;
+ } tcs;
+
+ struct radv_streamout_info so;
+
+ struct gfx9_gs_info gs_ring_info;
+ struct gfx10_ngg_info ngg_info;
+
+ unsigned float_controls_mode;
};
-enum radv_shader_binary_type {
- RADV_BINARY_TYPE_LEGACY,
- RADV_BINARY_TYPE_RTLD
-};
+enum radv_shader_binary_type { RADV_BINARY_TYPE_LEGACY, RADV_BINARY_TYPE_RTLD };
struct radv_shader_binary {
- enum radv_shader_binary_type type;
- gl_shader_stage stage;
- bool is_gs_copy_shader;
+ enum radv_shader_binary_type type;
+ gl_shader_stage stage;
+ bool is_gs_copy_shader;
- struct radv_shader_info info;
+ struct radv_shader_info info;
- /* Self-referential size so we avoid consistency issues. */
- uint32_t total_size;
+ /* Self-referential size so we avoid consistency issues. */
+ uint32_t total_size;
};
struct radv_shader_binary_legacy {
- struct radv_shader_binary base;
- struct ac_shader_config config;
- unsigned code_size;
- unsigned exec_size;
- unsigned ir_size;
- unsigned disasm_size;
- unsigned stats_size;
-
- /* data has size of stats_size + code_size + ir_size + disasm_size + 2,
- * where the +2 is for 0 of the ir strings. */
- uint8_t data[0];
+ struct radv_shader_binary base;
+ struct ac_shader_config config;
+ unsigned code_size;
+ unsigned exec_size;
+ unsigned ir_size;
+ unsigned disasm_size;
+ unsigned stats_size;
+
+ /* data has size of stats_size + code_size + ir_size + disasm_size + 2,
+ * where the +2 is for 0 of the ir strings. */
+ uint8_t data[0];
};
struct radv_shader_binary_rtld {
- struct radv_shader_binary base;
- unsigned elf_size;
- unsigned llvm_ir_size;
- uint8_t data[0];
+ struct radv_shader_binary base;
+ unsigned elf_size;
+ unsigned llvm_ir_size;
+ uint8_t data[0];
};
struct radv_shader_variant {
- uint32_t ref_count;
-
- struct radeon_winsys_bo *bo;
- uint64_t bo_offset;
- struct ac_shader_config config;
- uint8_t *code_ptr;
- uint32_t code_size;
- uint32_t exec_size;
- struct radv_shader_info info;
-
- /* debug only */
- char *spirv;
- uint32_t spirv_size;
- char *nir_string;
- char *disasm_string;
- char *ir_string;
- uint32_t *statistics;
-
- struct list_head slab_list;
+ uint32_t ref_count;
+
+ struct radeon_winsys_bo *bo;
+ uint64_t bo_offset;
+ struct ac_shader_config config;
+ uint8_t *code_ptr;
+ uint32_t code_size;
+ uint32_t exec_size;
+ struct radv_shader_info info;
+
+ /* debug only */
+ char *spirv;
+ uint32_t spirv_size;
+ char *nir_string;
+ char *disasm_string;
+ char *ir_string;
+ uint32_t *statistics;
+
+ struct list_head slab_list;
};
struct radv_shader_slab {
- struct list_head slabs;
- struct list_head shaders;
- struct radeon_winsys_bo *bo;
- uint64_t size;
- char *ptr;
+ struct list_head slabs;
+ struct list_head shaders;
+ struct radeon_winsys_bo *bo;
+ uint64_t size;
+ char *ptr;
};
-void
-radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
- bool optimize_conservatively, bool allow_copies);
-bool
-radv_nir_lower_ycbcr_textures(nir_shader *shader,
- const struct radv_pipeline_layout *layout);
-
-nir_shader *
-radv_shader_compile_to_nir(struct radv_device *device,
- struct vk_shader_module *module,
- const char *entrypoint_name,
- gl_shader_stage stage,
- const VkSpecializationInfo *spec_info,
- const VkPipelineCreateFlags flags,
- const struct radv_pipeline_layout *layout,
- const struct radv_pipeline_key *key);
-
-void
-radv_destroy_shader_slabs(struct radv_device *device);
-
-VkResult
-radv_create_shaders(struct radv_pipeline *pipeline,
- struct radv_device *device,
- struct radv_pipeline_cache *cache,
- const struct radv_pipeline_key *key,
- const VkPipelineShaderStageCreateInfo **pStages,
- const VkPipelineCreateFlags flags,
- VkPipelineCreationFeedbackEXT *pipeline_feedback,
- VkPipelineCreationFeedbackEXT **stage_feedbacks);
-
-struct radv_shader_variant *
-radv_shader_variant_create(struct radv_device *device,
- const struct radv_shader_binary *binary,
- bool keep_shader_info);
-struct radv_shader_variant *
-radv_shader_variant_compile(struct radv_device *device,
- struct vk_shader_module *module,
- struct nir_shader *const *shaders,
- int shader_count,
- struct radv_pipeline_layout *layout,
- const struct radv_shader_variant_key *key,
- struct radv_shader_info *info,
- bool keep_shader_info, bool keep_statistic_info,
- bool disable_optimizations,
- struct radv_shader_binary **binary_out);
+void radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
+ bool optimize_conservatively, bool allow_copies);
+bool radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout);
+
+nir_shader *radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
+ const char *entrypoint_name, gl_shader_stage stage,
+ const VkSpecializationInfo *spec_info,
+ const VkPipelineCreateFlags flags,
+ const struct radv_pipeline_layout *layout,
+ const struct radv_pipeline_key *key);
+
+void radv_destroy_shader_slabs(struct radv_device *device);
+
+VkResult radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
+ struct radv_pipeline_cache *cache, const struct radv_pipeline_key *key,
+ const VkPipelineShaderStageCreateInfo **pStages,
+ const VkPipelineCreateFlags flags,
+ VkPipelineCreationFeedbackEXT *pipeline_feedback,
+ VkPipelineCreationFeedbackEXT **stage_feedbacks);
+
+struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
+ const struct radv_shader_binary *binary,
+ bool keep_shader_info);
+struct radv_shader_variant *radv_shader_variant_compile(
+ struct radv_device *device, struct vk_shader_module *module, struct nir_shader *const *shaders,
+ int shader_count, struct radv_pipeline_layout *layout, const struct radv_shader_variant_key *key,
+ struct radv_shader_info *info, bool keep_shader_info, bool keep_statistic_info,
+ bool disable_optimizations, struct radv_shader_binary **binary_out);
struct radv_shader_variant *
radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir,
- struct radv_shader_info *info,
- struct radv_shader_binary **binary_out,
- bool multiview, bool keep_shader_info,
- bool keep_statistic_info,
- bool disable_optimizations);
-
-struct radv_shader_variant *
-radv_create_trap_handler_shader(struct radv_device *device);
+ struct radv_shader_info *info, struct radv_shader_binary **binary_out,
+ bool multiview, bool keep_shader_info, bool keep_statistic_info,
+ bool disable_optimizations);
-void
-radv_shader_variant_destroy(struct radv_device *device,
- struct radv_shader_variant *variant);
+struct radv_shader_variant *radv_create_trap_handler_shader(struct radv_device *device);
+void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant);
-unsigned
-radv_get_max_waves(struct radv_device *device,
- struct radv_shader_variant *variant,
- gl_shader_stage stage);
+unsigned radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *variant,
+ gl_shader_stage stage);
-unsigned
-radv_get_max_workgroup_size(enum chip_class chip_class,
- gl_shader_stage stage,
- const unsigned *sizes);
+unsigned radv_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
+ const unsigned *sizes);
-const char *
-radv_get_shader_name(struct radv_shader_info *info,
- gl_shader_stage stage);
+const char *radv_get_shader_name(struct radv_shader_info *info, gl_shader_stage stage);
-bool
-radv_can_dump_shader(struct radv_device *device,
- struct vk_shader_module *module,
- bool is_gs_copy_shader);
+bool radv_can_dump_shader(struct radv_device *device, struct vk_shader_module *module,
+ bool is_gs_copy_shader);
-bool
-radv_can_dump_shader_stats(struct radv_device *device,
- struct vk_shader_module *module);
+bool radv_can_dump_shader_stats(struct radv_device *device, struct vk_shader_module *module);
-VkResult
-radv_dump_shader_stats(struct radv_device *device,
- struct radv_pipeline *pipeline,
- gl_shader_stage stage, FILE *output);
+VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline,
+ gl_shader_stage stage, FILE *output);
static inline unsigned
-calculate_tess_lds_size(enum chip_class chip_class,
- unsigned tcs_num_input_vertices,
- unsigned tcs_num_output_vertices,
- unsigned tcs_num_inputs,
- unsigned tcs_num_patches,
- unsigned tcs_num_outputs,
- unsigned tcs_num_patch_outputs)
+calculate_tess_lds_size(enum chip_class chip_class, unsigned tcs_num_input_vertices,
+ unsigned tcs_num_output_vertices, unsigned tcs_num_inputs,
+ unsigned tcs_num_patches, unsigned tcs_num_outputs,
+ unsigned tcs_num_patch_outputs)
{
- unsigned input_vertex_size = tcs_num_inputs * 16;
- unsigned output_vertex_size = tcs_num_outputs * 16;
+ unsigned input_vertex_size = tcs_num_inputs * 16;
+ unsigned output_vertex_size = tcs_num_outputs * 16;
- unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
+ unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
- unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
- unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
+ unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
+ unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
- unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
+ unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
- unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
+ unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
- if (chip_class >= GFX7) {
- assert(lds_size <= 65536);
- lds_size = align(lds_size, 512) / 512;
- } else {
- assert(lds_size <= 32768);
- lds_size = align(lds_size, 256) / 256;
- }
+ if (chip_class >= GFX7) {
+ assert(lds_size <= 65536);
+ lds_size = align(lds_size, 512) / 512;
+ } else {
+ assert(lds_size <= 32768);
+ lds_size = align(lds_size, 256) / 256;
+ }
- return lds_size;
+ return lds_size;
}
static inline unsigned
-get_tcs_num_patches(unsigned tcs_num_input_vertices,
- unsigned tcs_num_output_vertices,
- unsigned tcs_num_inputs,
- unsigned tcs_num_outputs,
- unsigned tcs_num_patch_outputs,
- unsigned tess_offchip_block_dw_size,
- enum chip_class chip_class,
- enum radeon_family family)
+get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices,
+ unsigned tcs_num_inputs, unsigned tcs_num_outputs,
+ unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size,
+ enum chip_class chip_class, enum radeon_family family)
{
- uint32_t input_vertex_size = tcs_num_inputs * 16;
- uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
- uint32_t output_vertex_size = tcs_num_outputs * 16;
- uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
- uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
-
- /* Ensure that we only need one wave per SIMD so we don't need to check
- * resource usage. Also ensures that the number of tcs in and out
- * vertices per threadgroup are at most 256.
- */
- unsigned num_patches = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices) * 4;
- /* Make sure that the data fits in LDS. This assumes the shaders only
- * use LDS for the inputs and outputs.
- */
- unsigned hardware_lds_size = 32768;
-
- /* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
- * threadgroup, even though there is more than 32 KiB LDS.
- *
- * Test: dEQP-VK.tessellation.shader_input_output.barrier
- */
- if (chip_class >= GFX7 && family != CHIP_STONEY)
- hardware_lds_size = 65536;
-
- if (input_patch_size + output_patch_size)
- num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
- /* Make sure the output data fits in the offchip buffer */
- if (output_patch_size)
- num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
- /* Not necessary for correctness, but improves performance. The
- * specific value is taken from the proprietary driver.
- */
- num_patches = MIN2(num_patches, 40);
-
- /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
- if (chip_class == GFX6) {
- unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices);
- num_patches = MIN2(num_patches, one_wave);
- }
- return num_patches;
+ uint32_t input_vertex_size = tcs_num_inputs * 16;
+ uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
+ uint32_t output_vertex_size = tcs_num_outputs * 16;
+ uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
+ uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
+
+ /* Ensure that we only need one wave per SIMD so we don't need to check
+ * resource usage. Also ensures that the number of tcs in and out
+ * vertices per threadgroup are at most 256.
+ */
+ unsigned num_patches = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices) * 4;
+ /* Make sure that the data fits in LDS. This assumes the shaders only
+ * use LDS for the inputs and outputs.
+ */
+ unsigned hardware_lds_size = 32768;
+
+ /* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
+ * threadgroup, even though there is more than 32 KiB LDS.
+ *
+ * Test: dEQP-VK.tessellation.shader_input_output.barrier
+ */
+ if (chip_class >= GFX7 && family != CHIP_STONEY)
+ hardware_lds_size = 65536;
+
+ if (input_patch_size + output_patch_size)
+ num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
+ /* Make sure the output data fits in the offchip buffer */
+ if (output_patch_size)
+ num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
+ /* Not necessary for correctness, but improves performance. The
+ * specific value is taken from the proprietary driver.
+ */
+ num_patches = MIN2(num_patches, 40);
+
+ /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
+ if (chip_class == GFX6) {
+ unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices);
+ num_patches = MIN2(num_patches, one_wave);
+ }
+ return num_patches;
}
-void
-radv_lower_io(struct radv_device *device, nir_shader *nir);
+void radv_lower_io(struct radv_device *device, nir_shader *nir);
-bool
-radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
- struct radv_shader_info *info, const struct radv_pipeline_key *pl_key);
+bool radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
+ struct radv_shader_info *info, const struct radv_pipeline_key *pl_key);
#endif
diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c
index c3a8167571b..75f70d6c475 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -26,744 +26,663 @@
* IN THE SOFTWARE.
*/
+#include "radv_shader_args.h"
#include "radv_private.h"
#include "radv_shader.h"
-#include "radv_shader_args.h"
static void
-set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx,
- uint8_t num_sgprs)
+set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
{
- ud_info->sgpr_idx = *sgpr_idx;
- ud_info->num_sgprs = num_sgprs;
- *sgpr_idx += num_sgprs;
+ ud_info->sgpr_idx = *sgpr_idx;
+ ud_info->num_sgprs = num_sgprs;
+ *sgpr_idx += num_sgprs;
}
static void
-set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx,
- uint8_t num_sgprs)
+set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
{
- struct radv_userdata_info *ud_info =
- &args->shader_info->user_sgprs_locs.shader_data[idx];
- assert(ud_info);
+ struct radv_userdata_info *ud_info = &args->shader_info->user_sgprs_locs.shader_data[idx];
+ assert(ud_info);
- set_loc(ud_info, sgpr_idx, num_sgprs);
+ set_loc(ud_info, sgpr_idx, num_sgprs);
}
static void
set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
{
- bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
+ bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
- set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
+ set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
}
static void
set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
{
- struct radv_userdata_locations *locs =
- &args->shader_info->user_sgprs_locs;
- struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
- assert(ud_info);
+ struct radv_userdata_locations *locs = &args->shader_info->user_sgprs_locs;
+ struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
+ assert(ud_info);
- set_loc(ud_info, sgpr_idx, 1);
+ set_loc(ud_info, sgpr_idx, 1);
- locs->descriptor_sets_enabled |= 1u << idx;
+ locs->descriptor_sets_enabled |= 1u << idx;
}
struct user_sgpr_info {
- bool indirect_all_descriptor_sets;
- uint8_t remaining_sgprs;
+ bool indirect_all_descriptor_sets;
+ uint8_t remaining_sgprs;
};
-static bool needs_view_index_sgpr(struct radv_shader_args *args,
- gl_shader_stage stage)
+static bool
+needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage)
{
- switch (stage) {
- case MESA_SHADER_VERTEX:
- if (args->shader_info->needs_multiview_view_index ||
- (!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls && args->options->key.has_multiview_view_index))
- return true;
- break;
- case MESA_SHADER_TESS_EVAL:
- if (args->shader_info->needs_multiview_view_index || (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index))
- return true;
- break;
- case MESA_SHADER_TESS_CTRL:
- if (args->shader_info->needs_multiview_view_index)
- return true;
- break;
- case MESA_SHADER_GEOMETRY:
- if (args->shader_info->needs_multiview_view_index ||
- (args->options->key.vs_common_out.as_ngg &&
- args->options->key.has_multiview_view_index))
- return true;
- break;
- default:
- break;
- }
- return false;
+ switch (stage) {
+ case MESA_SHADER_VERTEX:
+ if (args->shader_info->needs_multiview_view_index ||
+ (!args->options->key.vs_common_out.as_es && !args->options->key.vs_common_out.as_ls &&
+ args->options->key.has_multiview_view_index))
+ return true;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ if (args->shader_info->needs_multiview_view_index ||
+ (!args->options->key.vs_common_out.as_es && args->options->key.has_multiview_view_index))
+ return true;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (args->shader_info->needs_multiview_view_index)
+ return true;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (args->shader_info->needs_multiview_view_index ||
+ (args->options->key.vs_common_out.as_ngg && args->options->key.has_multiview_view_index))
+ return true;
+ break;
+ default:
+ break;
+ }
+ return false;
}
static uint8_t
count_vs_user_sgprs(struct radv_shader_args *args)
{
- uint8_t count = 1; /* vertex offset */
+ uint8_t count = 1; /* vertex offset */
- if (args->shader_info->vs.has_vertex_buffers)
- count++;
- if (args->shader_info->vs.needs_draw_id)
- count++;
- if (args->shader_info->vs.needs_base_instance)
- count++;
+ if (args->shader_info->vs.has_vertex_buffers)
+ count++;
+ if (args->shader_info->vs.needs_draw_id)
+ count++;
+ if (args->shader_info->vs.needs_base_instance)
+ count++;
- return count;
+ return count;
}
-static void allocate_inline_push_consts(struct radv_shader_args *args,
- struct user_sgpr_info *user_sgpr_info)
+static void
+allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
{
- uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
-
- /* Only supported if shaders use push constants. */
- if (args->shader_info->min_push_constant_used == UINT8_MAX)
- return;
-
- /* Only supported if shaders don't have indirect push constants. */
- if (args->shader_info->has_indirect_push_constants)
- return;
-
- /* Only supported for 32-bit push constants. */
- if (!args->shader_info->has_only_32bit_push_constants)
- return;
-
- uint8_t num_push_consts =
- (args->shader_info->max_push_constant_used -
- args->shader_info->min_push_constant_used) / 4;
-
- /* Check if the number of user SGPRs is large enough. */
- if (num_push_consts < remaining_sgprs) {
- args->shader_info->num_inline_push_consts = num_push_consts;
- } else {
- args->shader_info->num_inline_push_consts = remaining_sgprs;
- }
-
- /* Clamp to the maximum number of allowed inlined push constants. */
- if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
- args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
-
- if (args->shader_info->num_inline_push_consts == num_push_consts &&
- !args->shader_info->loads_dynamic_offsets) {
- /* Disable the default push constants path if all constants are
- * inlined and if shaders don't use dynamic descriptors.
- */
- args->shader_info->loads_push_constants = false;
- }
-
- args->shader_info->base_inline_push_consts =
- args->shader_info->min_push_constant_used / 4;
+ uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
+
+ /* Only supported if shaders use push constants. */
+ if (args->shader_info->min_push_constant_used == UINT8_MAX)
+ return;
+
+ /* Only supported if shaders don't have indirect push constants. */
+ if (args->shader_info->has_indirect_push_constants)
+ return;
+
+ /* Only supported for 32-bit push constants. */
+ if (!args->shader_info->has_only_32bit_push_constants)
+ return;
+
+ uint8_t num_push_consts =
+ (args->shader_info->max_push_constant_used - args->shader_info->min_push_constant_used) / 4;
+
+ /* Check if the number of user SGPRs is large enough. */
+ if (num_push_consts < remaining_sgprs) {
+ args->shader_info->num_inline_push_consts = num_push_consts;
+ } else {
+ args->shader_info->num_inline_push_consts = remaining_sgprs;
+ }
+
+ /* Clamp to the maximum number of allowed inlined push constants. */
+ if (args->shader_info->num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
+ args->shader_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
+
+ if (args->shader_info->num_inline_push_consts == num_push_consts &&
+ !args->shader_info->loads_dynamic_offsets) {
+ /* Disable the default push constants path if all constants are
+ * inlined and if shaders don't use dynamic descriptors.
+ */
+ args->shader_info->loads_push_constants = false;
+ }
+
+ args->shader_info->base_inline_push_consts = args->shader_info->min_push_constant_used / 4;
}
-static void allocate_user_sgprs(struct radv_shader_args *args,
- gl_shader_stage stage,
- bool has_previous_stage,
- gl_shader_stage previous_stage,
- bool needs_view_index,
- struct user_sgpr_info *user_sgpr_info)
+static void
+allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
+ gl_shader_stage previous_stage, bool needs_view_index,
+ struct user_sgpr_info *user_sgpr_info)
{
- uint8_t user_sgpr_count = 0;
-
- memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
-
- /* 2 user sgprs will always be allocated for scratch/rings */
- user_sgpr_count += 2;
-
- switch (stage) {
- case MESA_SHADER_COMPUTE:
- if (args->shader_info->cs.uses_grid_size)
- user_sgpr_count += 3;
- break;
- case MESA_SHADER_FRAGMENT:
- user_sgpr_count += args->shader_info->ps.needs_sample_positions;
- break;
- case MESA_SHADER_VERTEX:
- if (!args->is_gs_copy_shader)
- user_sgpr_count += count_vs_user_sgprs(args);
- break;
- case MESA_SHADER_TESS_CTRL:
- if (has_previous_stage) {
- if (previous_stage == MESA_SHADER_VERTEX)
- user_sgpr_count += count_vs_user_sgprs(args);
- }
- break;
- case MESA_SHADER_TESS_EVAL:
- break;
- case MESA_SHADER_GEOMETRY:
- if (has_previous_stage) {
- if (previous_stage == MESA_SHADER_VERTEX) {
- user_sgpr_count += count_vs_user_sgprs(args);
- }
- }
- break;
- default:
- break;
- }
-
- if (needs_view_index)
- user_sgpr_count++;
-
- if (args->shader_info->loads_push_constants)
- user_sgpr_count++;
-
- if (args->shader_info->so.num_outputs)
- user_sgpr_count++;
-
- uint32_t available_sgprs = args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
- uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
- uint32_t num_desc_set =
- util_bitcount(args->shader_info->desc_set_used_mask);
-
- if (remaining_sgprs < num_desc_set) {
- user_sgpr_info->indirect_all_descriptor_sets = true;
- user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
- } else {
- user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
- }
-
- allocate_inline_push_consts(args, user_sgpr_info);
+ uint8_t user_sgpr_count = 0;
+
+ memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
+
+ /* 2 user sgprs will always be allocated for scratch/rings */
+ user_sgpr_count += 2;
+
+ switch (stage) {
+ case MESA_SHADER_COMPUTE:
+ if (args->shader_info->cs.uses_grid_size)
+ user_sgpr_count += 3;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ user_sgpr_count += args->shader_info->ps.needs_sample_positions;
+ break;
+ case MESA_SHADER_VERTEX:
+ if (!args->is_gs_copy_shader)
+ user_sgpr_count += count_vs_user_sgprs(args);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (has_previous_stage) {
+ if (previous_stage == MESA_SHADER_VERTEX)
+ user_sgpr_count += count_vs_user_sgprs(args);
+ }
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (has_previous_stage) {
+ if (previous_stage == MESA_SHADER_VERTEX) {
+ user_sgpr_count += count_vs_user_sgprs(args);
+ }
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (needs_view_index)
+ user_sgpr_count++;
+
+ if (args->shader_info->loads_push_constants)
+ user_sgpr_count++;
+
+ if (args->shader_info->so.num_outputs)
+ user_sgpr_count++;
+
+ uint32_t available_sgprs =
+ args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
+ uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
+ uint32_t num_desc_set = util_bitcount(args->shader_info->desc_set_used_mask);
+
+ if (remaining_sgprs < num_desc_set) {
+ user_sgpr_info->indirect_all_descriptor_sets = true;
+ user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
+ } else {
+ user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
+ }
+
+ allocate_inline_push_consts(args, user_sgpr_info);
}
static void
declare_global_input_sgprs(struct radv_shader_args *args,
- const struct user_sgpr_info *user_sgpr_info)
+ const struct user_sgpr_info *user_sgpr_info)
{
- /* 1 for each descriptor set */
- if (!user_sgpr_info->indirect_all_descriptor_sets) {
- uint32_t mask = args->shader_info->desc_set_used_mask;
-
- while (mask) {
- int i = u_bit_scan(&mask);
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
- &args->descriptor_sets[i]);
- }
- } else {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR,
- &args->descriptor_sets[0]);
- }
-
- if (args->shader_info->loads_push_constants) {
- /* 1 for push constants and dynamic descriptors */
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR,
- &args->ac.push_constants);
- }
-
- for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.inline_push_consts[i]);
- }
- args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts;
- args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts;
-
- if (args->shader_info->so.num_outputs) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
- &args->streamout_buffers);
- }
+ /* 1 for each descriptor set */
+ if (!user_sgpr_info->indirect_all_descriptor_sets) {
+ uint32_t mask = args->shader_info->desc_set_used_mask;
+
+ while (mask) {
+ int i = u_bit_scan(&mask);
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->descriptor_sets[i]);
+ }
+ } else {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]);
+ }
+
+ if (args->shader_info->loads_push_constants) {
+ /* 1 for push constants and dynamic descriptors */
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
+ }
+
+ for (unsigned i = 0; i < args->shader_info->num_inline_push_consts; i++) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
+ }
+ args->ac.num_inline_push_consts = args->shader_info->num_inline_push_consts;
+ args->ac.base_inline_push_consts = args->shader_info->base_inline_push_consts;
+
+ if (args->shader_info->so.num_outputs) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
+ }
}
static void
-declare_vs_specific_input_sgprs(struct radv_shader_args *args,
- gl_shader_stage stage,
- bool has_previous_stage,
- gl_shader_stage previous_stage)
+declare_vs_specific_input_sgprs(struct radv_shader_args *args, gl_shader_stage stage,
+ bool has_previous_stage, gl_shader_stage previous_stage)
{
- if (!args->is_gs_copy_shader &&
- (stage == MESA_SHADER_VERTEX ||
- (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
- if (args->shader_info->vs.has_vertex_buffers) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR,
- &args->ac.vertex_buffers);
- }
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
- if (args->shader_info->vs.needs_draw_id) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
- }
- if (args->shader_info->vs.needs_base_instance) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
- }
- }
+ if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
+ (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+ if (args->shader_info->vs.has_vertex_buffers) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
+ }
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
+ if (args->shader_info->vs.needs_draw_id) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
+ }
+ if (args->shader_info->vs.needs_base_instance) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
+ }
+ }
}
static void
declare_vs_input_vgprs(struct radv_shader_args *args)
{
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
- if (!args->is_gs_copy_shader) {
- if (args->options->key.vs_common_out.as_ls) {
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
- if (args->options->chip_class >= GFX10) {
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
- } else {
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
- }
- } else {
- if (args->options->chip_class >= GFX10) {
- if (args->options->key.vs_common_out.as_ngg) {
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
- } else {
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
- }
- } else {
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
- }
- }
- }
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
+ if (!args->is_gs_copy_shader) {
+ if (args->options->key.vs_common_out.as_ls) {
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
+ if (args->options->chip_class >= GFX10) {
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+ } else {
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
+ }
+ } else {
+ if (args->options->chip_class >= GFX10) {
+ if (args->options->key.vs_common_out.as_ngg) {
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+ } else {
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+ }
+ } else {
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_prim_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
+ }
+ }
+ }
}
static void
declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
{
- int i;
-
- if (args->options->use_ngg_streamout) {
- if (stage == MESA_SHADER_TESS_EVAL)
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- return;
- }
-
- /* Streamout SGPRs. */
- if (args->shader_info->so.num_outputs) {
- assert(stage == MESA_SHADER_VERTEX ||
- stage == MESA_SHADER_TESS_EVAL);
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
- } else if (stage == MESA_SHADER_TESS_EVAL) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- }
-
- /* A streamout buffer offset is loaded if the stride is non-zero. */
- for (i = 0; i < 4; i++) {
- if (!args->shader_info->so.strides[i])
- continue;
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
- }
+ int i;
+
+ if (args->options->use_ngg_streamout) {
+ if (stage == MESA_SHADER_TESS_EVAL)
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ return;
+ }
+
+ /* Streamout SGPRs. */
+ if (args->shader_info->so.num_outputs) {
+ assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_write_index);
+ } else if (stage == MESA_SHADER_TESS_EVAL) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ }
+
+ /* A streamout buffer offset is loaded if the stride is non-zero. */
+ for (i = 0; i < 4; i++) {
+ if (!args->shader_info->so.strides[i])
+ continue;
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
+ }
}
static void
declare_tes_input_vgprs(struct radv_shader_args *args)
{
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_u);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.tes_v);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_rel_patch_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tes_patch_id);
}
static void
-set_global_input_locs(struct radv_shader_args *args,
- const struct user_sgpr_info *user_sgpr_info,
- uint8_t *user_sgpr_idx)
+set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
+ uint8_t *user_sgpr_idx)
{
- uint32_t mask = args->shader_info->desc_set_used_mask;
-
- if (!user_sgpr_info->indirect_all_descriptor_sets) {
- while (mask) {
- int i = u_bit_scan(&mask);
-
- set_loc_desc(args, i, user_sgpr_idx);
- }
- } else {
- set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS,
- user_sgpr_idx);
-
- args->shader_info->need_indirect_descriptor_sets = true;
- }
-
- if (args->shader_info->loads_push_constants) {
- set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
- }
-
- if (args->shader_info->num_inline_push_consts) {
- set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
- args->shader_info->num_inline_push_consts);
- }
-
- if (args->streamout_buffers.used) {
- set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS,
- user_sgpr_idx);
- }
+ uint32_t mask = args->shader_info->desc_set_used_mask;
+
+ if (!user_sgpr_info->indirect_all_descriptor_sets) {
+ while (mask) {
+ int i = u_bit_scan(&mask);
+
+ set_loc_desc(args, i, user_sgpr_idx);
+ }
+ } else {
+ set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
+
+ args->shader_info->need_indirect_descriptor_sets = true;
+ }
+
+ if (args->shader_info->loads_push_constants) {
+ set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
+ }
+
+ if (args->shader_info->num_inline_push_consts) {
+ set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
+ args->shader_info->num_inline_push_consts);
+ }
+
+ if (args->streamout_buffers.used) {
+ set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
+ }
}
static void
-set_vs_specific_input_locs(struct radv_shader_args *args,
- gl_shader_stage stage, bool has_previous_stage,
- gl_shader_stage previous_stage,
- uint8_t *user_sgpr_idx)
+set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
+ bool has_previous_stage, gl_shader_stage previous_stage,
+ uint8_t *user_sgpr_idx)
{
- if (!args->is_gs_copy_shader &&
- (stage == MESA_SHADER_VERTEX ||
- (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
- if (args->shader_info->vs.has_vertex_buffers) {
- set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS,
- user_sgpr_idx);
- }
-
- unsigned vs_num = count_vs_user_sgprs(args) - args->shader_info->vs.has_vertex_buffers;
- set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
- user_sgpr_idx, vs_num);
- }
+ if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
+ (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
+ if (args->shader_info->vs.has_vertex_buffers) {
+ set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
+ }
+
+ unsigned vs_num = count_vs_user_sgprs(args) - args->shader_info->vs.has_vertex_buffers;
+ set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
+ }
}
/* Returns whether the stage is a stage that can be directly before the GS */
-static bool is_pre_gs_stage(gl_shader_stage stage)
+static bool
+is_pre_gs_stage(gl_shader_stage stage)
{
- return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
+ return stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL;
}
void
-radv_declare_shader_args(struct radv_shader_args *args,
- gl_shader_stage stage,
- bool has_previous_stage,
- gl_shader_stage previous_stage)
+radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
+ bool has_previous_stage, gl_shader_stage previous_stage)
{
- struct user_sgpr_info user_sgpr_info;
- bool needs_view_index = needs_view_index_sgpr(args, stage);
-
- if (args->options->chip_class >= GFX10) {
- if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) {
- /* On GFX10, VS is merged into GS for NGG. */
- previous_stage = stage;
- stage = MESA_SHADER_GEOMETRY;
- has_previous_stage = true;
- }
- }
-
- for (int i = 0; i < MAX_SETS; i++)
- args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
- for (int i = 0; i < AC_UD_MAX_UD; i++)
- args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
-
-
- allocate_user_sgprs(args, stage, has_previous_stage,
- previous_stage, needs_view_index, &user_sgpr_info);
-
- if (args->options->explicit_scratch_args) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
- &args->ring_offsets);
- }
-
- switch (stage) {
- case MESA_SHADER_COMPUTE:
- declare_global_input_sgprs(args, &user_sgpr_info);
-
- if (args->shader_info->cs.uses_grid_size) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT,
- &args->ac.num_work_groups);
- }
-
- for (int i = 0; i < 3; i++) {
- if (args->shader_info->cs.uses_block_id[i]) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.workgroup_ids[i]);
- }
- }
-
- if (args->shader_info->cs.uses_local_invocation_idx) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.tg_size);
- }
-
- if (args->options->explicit_scratch_args) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.scratch_offset);
- }
-
- ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
- &args->ac.local_invocation_ids);
- break;
- case MESA_SHADER_VERTEX:
- declare_global_input_sgprs(args, &user_sgpr_info);
-
- declare_vs_specific_input_sgprs(args, stage, has_previous_stage,
- previous_stage);
-
- if (needs_view_index) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.view_index);
- }
-
- if (args->options->key.vs_common_out.as_es) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.es2gs_offset);
- } else if (args->options->key.vs_common_out.as_ls) {
- /* no extra parameters */
- } else {
- declare_streamout_sgprs(args, stage);
- }
-
- if (args->options->explicit_scratch_args) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.scratch_offset);
- }
-
- declare_vs_input_vgprs(args);
- break;
- case MESA_SHADER_TESS_CTRL:
- if (has_previous_stage) {
- // First 6 system regs
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.merged_wave_info);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.tcs_factor_offset);
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
-
- declare_global_input_sgprs(args, &user_sgpr_info);
-
- declare_vs_specific_input_sgprs(args, stage,
- has_previous_stage,
- previous_stage);
-
- if (needs_view_index) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.view_index);
- }
-
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.tcs_patch_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.tcs_rel_ids);
-
- declare_vs_input_vgprs(args);
- } else {
- declare_global_input_sgprs(args, &user_sgpr_info);
-
- if (needs_view_index) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.view_index);
- }
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.tcs_factor_offset);
- if (args->options->explicit_scratch_args) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.scratch_offset);
- }
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.tcs_patch_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.tcs_rel_ids);
- }
- break;
- case MESA_SHADER_TESS_EVAL:
- declare_global_input_sgprs(args, &user_sgpr_info);
-
- if (needs_view_index)
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.view_index);
-
- if (args->options->key.vs_common_out.as_es) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.es2gs_offset);
- } else {
- declare_streamout_sgprs(args, stage);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
- }
- if (args->options->explicit_scratch_args) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.scratch_offset);
- }
- declare_tes_input_vgprs(args);
- break;
- case MESA_SHADER_GEOMETRY:
- if (has_previous_stage) {
- // First 6 system regs
- if (args->options->key.vs_common_out.as_ngg) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.gs_tg_info);
- } else {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.gs2vs_offset);
- }
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.merged_wave_info);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
-
- declare_global_input_sgprs(args, &user_sgpr_info);
-
- if (previous_stage != MESA_SHADER_TESS_EVAL) {
- declare_vs_specific_input_sgprs(args, stage,
- has_previous_stage,
- previous_stage);
- }
-
- if (needs_view_index) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.view_index);
- }
-
- if (args->options->key.vs_common_out.as_ngg) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ngg_gs_state);
- }
-
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[0]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[2]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_prim_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_invocation_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[4]);
-
- if (previous_stage == MESA_SHADER_VERTEX) {
- declare_vs_input_vgprs(args);
- } else {
- declare_tes_input_vgprs(args);
- }
- } else {
- declare_global_input_sgprs(args, &user_sgpr_info);
-
- if (needs_view_index) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.view_index);
- }
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
- if (args->options->explicit_scratch_args) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.scratch_offset);
- }
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[0]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[1]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_prim_id);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[2]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[3]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[4]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_vtx_offset[5]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
- &args->ac.gs_invocation_id);
- }
- break;
- case MESA_SHADER_FRAGMENT:
- declare_global_input_sgprs(args, &user_sgpr_info);
-
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
- if (args->options->explicit_scratch_args) {
- ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
- &args->ac.scratch_offset);
- }
- ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
- ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
- break;
- default:
- unreachable("Shader stage not implemented");
- }
-
- args->shader_info->num_input_vgprs = 0;
- args->shader_info->num_input_sgprs = 2;
- args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
- args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
-
- uint8_t user_sgpr_idx = 0;
-
- set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
- &user_sgpr_idx);
-
- /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
- * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
- if (has_previous_stage)
- user_sgpr_idx = 0;
-
- set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
-
- switch (stage) {
- case MESA_SHADER_COMPUTE:
- if (args->shader_info->cs.uses_grid_size) {
- set_loc_shader(args, AC_UD_CS_GRID_SIZE,
- &user_sgpr_idx, 3);
- }
- break;
- case MESA_SHADER_VERTEX:
- set_vs_specific_input_locs(args, stage, has_previous_stage,
- previous_stage, &user_sgpr_idx);
- if (args->ac.view_index.used)
- set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
- break;
- case MESA_SHADER_TESS_CTRL:
- set_vs_specific_input_locs(args, stage, has_previous_stage,
- previous_stage, &user_sgpr_idx);
- if (args->ac.view_index.used)
- set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
- break;
- case MESA_SHADER_TESS_EVAL:
- if (args->ac.view_index.used)
- set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
- break;
- case MESA_SHADER_GEOMETRY:
- if (has_previous_stage) {
- if (previous_stage == MESA_SHADER_VERTEX)
- set_vs_specific_input_locs(args, stage,
- has_previous_stage,
- previous_stage,
- &user_sgpr_idx);
- }
- if (args->ac.view_index.used)
- set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
-
- if (args->ngg_gs_state.used)
- set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
- break;
- case MESA_SHADER_FRAGMENT:
- break;
- default:
- unreachable("Shader stage not implemented");
- }
-
- args->shader_info->num_user_sgprs = user_sgpr_idx;
+ struct user_sgpr_info user_sgpr_info;
+ bool needs_view_index = needs_view_index_sgpr(args, stage);
+
+ if (args->options->chip_class >= GFX10) {
+ if (is_pre_gs_stage(stage) && args->options->key.vs_common_out.as_ngg) {
+ /* On GFX10, VS is merged into GS for NGG. */
+ previous_stage = stage;
+ stage = MESA_SHADER_GEOMETRY;
+ has_previous_stage = true;
+ }
+ }
+
+ for (int i = 0; i < MAX_SETS; i++)
+ args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
+ for (int i = 0; i < AC_UD_MAX_UD; i++)
+ args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
+
+ allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index,
+ &user_sgpr_info);
+
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
+ }
+
+ switch (stage) {
+ case MESA_SHADER_COMPUTE:
+ declare_global_input_sgprs(args, &user_sgpr_info);
+
+ if (args->shader_info->cs.uses_grid_size) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
+ }
+
+ for (int i = 0; i < 3; i++) {
+ if (args->shader_info->cs.uses_block_id[i]) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
+ }
+ }
+
+ if (args->shader_info->cs.uses_local_invocation_idx) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
+ }
+
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+ }
+
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.local_invocation_ids);
+ break;
+ case MESA_SHADER_VERTEX:
+ declare_global_input_sgprs(args, &user_sgpr_info);
+
+ declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
+
+ if (needs_view_index) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+ }
+
+ if (args->options->key.vs_common_out.as_es) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
+ } else if (args->options->key.vs_common_out.as_ls) {
+ /* no extra parameters */
+ } else {
+ declare_streamout_sgprs(args, stage);
+ }
+
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+ }
+
+ declare_vs_input_vgprs(args);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ if (has_previous_stage) {
+ // First 6 system regs
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
+
+ declare_global_input_sgprs(args, &user_sgpr_info);
+
+ declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
+
+ if (needs_view_index) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+ }
+
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
+
+ declare_vs_input_vgprs(args);
+ } else {
+ declare_global_input_sgprs(args, &user_sgpr_info);
+
+ if (needs_view_index) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+ }
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+ }
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
+ }
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ declare_global_input_sgprs(args, &user_sgpr_info);
+
+ if (needs_view_index)
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+
+ if (args->options->key.vs_common_out.as_es) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
+ } else {
+ declare_streamout_sgprs(args, stage);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+ }
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+ }
+ declare_tes_input_vgprs(args);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (has_previous_stage) {
+ // First 6 system regs
+ if (args->options->key.vs_common_out.as_ngg) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
+ } else {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
+ }
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.merged_wave_info);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
+
+ declare_global_input_sgprs(args, &user_sgpr_info);
+
+ if (previous_stage != MESA_SHADER_TESS_EVAL) {
+ declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
+ }
+
+ if (needs_view_index) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+ }
+
+ if (args->options->key.vs_common_out.as_ngg) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
+ }
+
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
+
+ if (previous_stage == MESA_SHADER_VERTEX) {
+ declare_vs_input_vgprs(args);
+ } else {
+ declare_tes_input_vgprs(args);
+ }
+ } else {
+ declare_global_input_sgprs(args, &user_sgpr_info);
+
+ if (needs_view_index) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
+ }
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+ }
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[3]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[5]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id);
+ }
+ break;
+ case MESA_SHADER_FRAGMENT:
+ declare_global_input_sgprs(args, &user_sgpr_info);
+
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
+ if (args->options->explicit_scratch_args) {
+ ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
+ }
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT, &args->ac.pull_model);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_sample);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_center);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.linear_centroid);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL); /* line stipple tex */
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[0]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[1]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[2]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_FLOAT, &args->ac.frag_pos[3]);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.front_face);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.ancillary);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
+ ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
+ break;
+ default:
+ unreachable("Shader stage not implemented");
+ }
+
+ args->shader_info->num_input_vgprs = 0;
+ args->shader_info->num_input_sgprs = 2;
+ args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
+ args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
+
+ uint8_t user_sgpr_idx = 0;
+
+ set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
+
+ /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
+ * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
+ if (has_previous_stage)
+ user_sgpr_idx = 0;
+
+ set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
+
+ switch (stage) {
+ case MESA_SHADER_COMPUTE:
+ if (args->shader_info->cs.uses_grid_size) {
+ set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3);
+ }
+ break;
+ case MESA_SHADER_VERTEX:
+ set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
+ if (args->ac.view_index.used)
+ set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
+ if (args->ac.view_index.used)
+ set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ if (args->ac.view_index.used)
+ set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ if (has_previous_stage) {
+ if (previous_stage == MESA_SHADER_VERTEX)
+ set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage,
+ &user_sgpr_idx);
+ }
+ if (args->ac.view_index.used)
+ set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
+
+ if (args->ngg_gs_state.used)
+ set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ break;
+ default:
+ unreachable("Shader stage not implemented");
+ }
+
+ args->shader_info->num_user_sgprs = user_sgpr_idx;
}
-
diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h
index c4c57f38d44..a6828cdf309 100644
--- a/src/amd/vulkan/radv_shader_args.h
+++ b/src/amd/vulkan/radv_shader_args.h
@@ -21,38 +21,36 @@
* IN THE SOFTWARE.
*/
-#include "ac_shader_args.h"
-#include "radv_constants.h"
-#include "util/list.h"
#include "compiler/shader_enums.h"
+#include "util/list.h"
+#include "util/macros.h"
+#include "ac_shader_args.h"
#include "amd_family.h"
+#include "radv_constants.h"
struct radv_shader_args {
- struct ac_shader_args ac;
- struct radv_shader_info *shader_info;
- const struct radv_nir_compiler_options *options;
+ struct ac_shader_args ac;
+ struct radv_shader_info *shader_info;
+ const struct radv_nir_compiler_options *options;
- struct ac_arg descriptor_sets[MAX_SETS];
- struct ac_arg ring_offsets;
+ struct ac_arg descriptor_sets[MAX_SETS];
+ struct ac_arg ring_offsets;
- /* Streamout */
- struct ac_arg streamout_buffers;
+ /* Streamout */
+ struct ac_arg streamout_buffers;
- /* NGG GS */
- struct ac_arg ngg_gs_state;
+ /* NGG GS */
+ struct ac_arg ngg_gs_state;
- bool is_gs_copy_shader;
- bool is_trap_handler_shader;
+ bool is_gs_copy_shader;
+ bool is_trap_handler_shader;
};
static inline struct radv_shader_args *
radv_shader_args_from_ac(struct ac_shader_args *args)
{
- return container_of(args, struct radv_shader_args, ac);
+ return container_of(args, struct radv_shader_args, ac);
}
-void radv_declare_shader_args(struct radv_shader_args *args,
- gl_shader_stage stage,
- bool has_previous_stage,
- gl_shader_stage previous_stage);
-
+void radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
+ bool has_previous_stage, gl_shader_stage previous_stage);
diff --git a/src/amd/vulkan/radv_shader_helper.h b/src/amd/vulkan/radv_shader_helper.h
index c64d2df676b..adf1f27dff9 100644
--- a/src/amd/vulkan/radv_shader_helper.h
+++ b/src/amd/vulkan/radv_shader_helper.h
@@ -26,17 +26,13 @@
extern "C" {
#endif
-bool radv_init_llvm_compiler(struct ac_llvm_compiler *info,
- bool thread_compiler,
- enum radeon_family family,
- enum ac_target_machine_options tm_options,
- unsigned wave_size);
-void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info,
- bool thread_compiler);
+bool radv_init_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler,
+ enum radeon_family family, enum ac_target_machine_options tm_options,
+ unsigned wave_size);
+void radv_destroy_llvm_compiler(struct ac_llvm_compiler *info, bool thread_compiler);
-bool radv_compile_to_elf(struct ac_llvm_compiler *info,
- LLVMModuleRef module,
- char **pelf_buffer, size_t *pelf_size);
+bool radv_compile_to_elf(struct ac_llvm_compiler *info, LLVMModuleRef module, char **pelf_buffer,
+ size_t *pelf_size);
#ifdef __cplusplus
}
diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c
index 75e9fd12339..84978192502 100644
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -20,718 +20,691 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
-#include "radv_private.h"
-#include "radv_shader.h"
#include "nir/nir.h"
#include "nir/nir_xfb_info.h"
+#include "radv_private.h"
+#include "radv_shader.h"
-static void mark_sampler_desc(const nir_variable *var,
- struct radv_shader_info *info)
+static void
+mark_sampler_desc(const nir_variable *var, struct radv_shader_info *info)
{
- info->desc_set_used_mask |= (1u << var->data.descriptor_set);
+ info->desc_set_used_mask |= (1u << var->data.descriptor_set);
}
static void
-gather_intrinsic_load_input_info(const nir_shader *nir,
- const nir_intrinsic_instr *instr,
- struct radv_shader_info *info)
+gather_intrinsic_load_input_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
{
- switch (nir->info.stage) {
- case MESA_SHADER_VERTEX: {
- unsigned idx = nir_intrinsic_io_semantics(instr).location;
- unsigned component = nir_intrinsic_component(instr);
- unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
-
- info->vs.input_usage_mask[idx] |= mask << component;
- break;
- }
- default:
- break;
- }
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX: {
+ unsigned idx = nir_intrinsic_io_semantics(instr).location;
+ unsigned component = nir_intrinsic_component(instr);
+ unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+
+ info->vs.input_usage_mask[idx] |= mask << component;
+ break;
+ }
+ default:
+ break;
+ }
}
static uint32_t
widen_writemask(uint32_t wrmask)
{
- uint32_t new_wrmask = 0;
- for(unsigned i = 0; i < 4; i++)
- new_wrmask |= (wrmask & (1 << i) ? 0x3 : 0x0) << (i * 2);
- return new_wrmask;
+ uint32_t new_wrmask = 0;
+ for (unsigned i = 0; i < 4; i++)
+ new_wrmask |= (wrmask & (1 << i) ? 0x3 : 0x0) << (i * 2);
+ return new_wrmask;
}
static void
set_writes_memory(const nir_shader *nir, struct radv_shader_info *info)
{
- if (nir->info.stage == MESA_SHADER_FRAGMENT)
- info->ps.writes_memory = true;
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.writes_memory = true;
}
static void
-gather_intrinsic_store_output_info(const nir_shader *nir,
- const nir_intrinsic_instr *instr,
- struct radv_shader_info *info)
+gather_intrinsic_store_output_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
{
- unsigned idx = nir_intrinsic_base(instr);
- unsigned num_slots = nir_intrinsic_io_semantics(instr).num_slots;
- unsigned component = nir_intrinsic_component(instr);
- unsigned write_mask = nir_intrinsic_write_mask(instr);
- uint8_t *output_usage_mask = NULL;
-
- if (instr->src[0].ssa->bit_size == 64)
- write_mask = widen_writemask(write_mask);
-
- switch (nir->info.stage) {
- case MESA_SHADER_VERTEX:
- output_usage_mask = info->vs.output_usage_mask;
- break;
- case MESA_SHADER_TESS_EVAL:
- output_usage_mask = info->tes.output_usage_mask;
- break;
- case MESA_SHADER_GEOMETRY:
- output_usage_mask = info->gs.output_usage_mask;
- break;
- default:
- break;
- }
-
- if (output_usage_mask) {
- for (unsigned i = 0; i < num_slots; i++) {
- output_usage_mask[idx + i] |=
- ((write_mask >> (i * 4)) & 0xf) << component;
- }
- }
+ unsigned idx = nir_intrinsic_base(instr);
+ unsigned num_slots = nir_intrinsic_io_semantics(instr).num_slots;
+ unsigned component = nir_intrinsic_component(instr);
+ unsigned write_mask = nir_intrinsic_write_mask(instr);
+ uint8_t *output_usage_mask = NULL;
+
+ if (instr->src[0].ssa->bit_size == 64)
+ write_mask = widen_writemask(write_mask);
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ output_usage_mask = info->vs.output_usage_mask;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ output_usage_mask = info->tes.output_usage_mask;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ output_usage_mask = info->gs.output_usage_mask;
+ break;
+ default:
+ break;
+ }
+
+ if (output_usage_mask) {
+ for (unsigned i = 0; i < num_slots; i++) {
+ output_usage_mask[idx + i] |= ((write_mask >> (i * 4)) & 0xf) << component;
+ }
+ }
}
static void
-gather_push_constant_info(const nir_shader *nir,
- const nir_intrinsic_instr *instr,
- struct radv_shader_info *info)
+gather_push_constant_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
+ struct radv_shader_info *info)
{
- int base = nir_intrinsic_base(instr);
+ int base = nir_intrinsic_base(instr);
- if (!nir_src_is_const(instr->src[0])) {
- info->has_indirect_push_constants = true;
- } else {
- uint32_t min = base + nir_src_as_uint(instr->src[0]);
- uint32_t max = min + instr->num_components * 4;
+ if (!nir_src_is_const(instr->src[0])) {
+ info->has_indirect_push_constants = true;
+ } else {
+ uint32_t min = base + nir_src_as_uint(instr->src[0]);
+ uint32_t max = min + instr->num_components * 4;
- info->max_push_constant_used =
- MAX2(max, info->max_push_constant_used);
- info->min_push_constant_used =
- MIN2(min, info->min_push_constant_used);
- }
+ info->max_push_constant_used = MAX2(max, info->max_push_constant_used);
+ info->min_push_constant_used = MIN2(min, info->min_push_constant_used);
+ }
- if (instr->dest.ssa.bit_size != 32)
- info->has_only_32bit_push_constants = false;
+ if (instr->dest.ssa.bit_size != 32)
+ info->has_only_32bit_push_constants = false;
- info->loads_push_constants = true;
+ info->loads_push_constants = true;
}
static void
gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
- struct radv_shader_info *info)
+ struct radv_shader_info *info)
{
- switch (instr->intrinsic) {
- case nir_intrinsic_load_barycentric_sample:
- case nir_intrinsic_load_barycentric_pixel:
- case nir_intrinsic_load_barycentric_centroid: {
- enum glsl_interp_mode mode = nir_intrinsic_interp_mode(instr);
- switch (mode) {
- case INTERP_MODE_NONE:
- case INTERP_MODE_SMOOTH:
- case INTERP_MODE_NOPERSPECTIVE:
- info->ps.uses_persp_or_linear_interp = true;
- break;
- default:
- break;
- }
- break;
- }
- case nir_intrinsic_load_barycentric_at_offset:
- case nir_intrinsic_load_barycentric_at_sample:
- if (nir_intrinsic_interp_mode(instr) != INTERP_MODE_FLAT)
- info->ps.uses_persp_or_linear_interp = true;
-
- if (instr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
- info->ps.needs_sample_positions = true;
- break;
- case nir_intrinsic_load_draw_id:
- info->vs.needs_draw_id = true;
- break;
- case nir_intrinsic_load_base_instance:
- info->vs.needs_base_instance = true;
- break;
- case nir_intrinsic_load_instance_id:
- info->vs.needs_instance_id = true;
- break;
- case nir_intrinsic_load_num_work_groups:
- info->cs.uses_grid_size = true;
- break;
- case nir_intrinsic_load_local_invocation_id:
- case nir_intrinsic_load_work_group_id: {
- unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
- while (mask) {
- unsigned i = u_bit_scan(&mask);
-
- if (instr->intrinsic == nir_intrinsic_load_work_group_id)
- info->cs.uses_block_id[i] = true;
- else
- info->cs.uses_thread_id[i] = true;
- }
- break;
- }
- case nir_intrinsic_load_local_invocation_index:
- case nir_intrinsic_load_subgroup_id:
- case nir_intrinsic_load_num_subgroups:
- info->cs.uses_local_invocation_idx = true;
- break;
- case nir_intrinsic_load_sample_mask_in:
- info->ps.reads_sample_mask_in = true;
- break;
- case nir_intrinsic_load_view_index:
- info->needs_multiview_view_index = true;
- if (nir->info.stage == MESA_SHADER_FRAGMENT)
- info->ps.layer_input = true;
- break;
- case nir_intrinsic_load_layer_id:
- if (nir->info.stage == MESA_SHADER_FRAGMENT)
- info->ps.layer_input = true;
- break;
- case nir_intrinsic_load_invocation_id:
- info->uses_invocation_id = true;
- break;
- case nir_intrinsic_load_primitive_id:
- info->uses_prim_id = true;
- break;
- case nir_intrinsic_load_push_constant:
- gather_push_constant_info(nir, instr, info);
- break;
- case nir_intrinsic_vulkan_resource_index:
- info->desc_set_used_mask |= (1u << nir_intrinsic_desc_set(instr));
- break;
- case nir_intrinsic_image_deref_load:
- case nir_intrinsic_image_deref_sparse_load:
- case nir_intrinsic_image_deref_store:
- case nir_intrinsic_image_deref_atomic_add:
- case nir_intrinsic_image_deref_atomic_imin:
- case nir_intrinsic_image_deref_atomic_umin:
- case nir_intrinsic_image_deref_atomic_imax:
- case nir_intrinsic_image_deref_atomic_umax:
- case nir_intrinsic_image_deref_atomic_and:
- case nir_intrinsic_image_deref_atomic_or:
- case nir_intrinsic_image_deref_atomic_xor:
- case nir_intrinsic_image_deref_atomic_exchange:
- case nir_intrinsic_image_deref_atomic_comp_swap:
- case nir_intrinsic_image_deref_size: {
- nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
- mark_sampler_desc(var, info);
-
- if (instr->intrinsic == nir_intrinsic_image_deref_store ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
- instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
- set_writes_memory(nir, info);
- }
- break;
- }
- case nir_intrinsic_store_ssbo:
- case nir_intrinsic_ssbo_atomic_add:
- case nir_intrinsic_ssbo_atomic_imin:
- case nir_intrinsic_ssbo_atomic_umin:
- case nir_intrinsic_ssbo_atomic_imax:
- case nir_intrinsic_ssbo_atomic_umax:
- case nir_intrinsic_ssbo_atomic_and:
- case nir_intrinsic_ssbo_atomic_or:
- case nir_intrinsic_ssbo_atomic_xor:
- case nir_intrinsic_ssbo_atomic_exchange:
- case nir_intrinsic_ssbo_atomic_comp_swap:
- case nir_intrinsic_store_global:
- case nir_intrinsic_global_atomic_add:
- case nir_intrinsic_global_atomic_imin:
- case nir_intrinsic_global_atomic_umin:
- case nir_intrinsic_global_atomic_imax:
- case nir_intrinsic_global_atomic_umax:
- case nir_intrinsic_global_atomic_and:
- case nir_intrinsic_global_atomic_or:
- case nir_intrinsic_global_atomic_xor:
- case nir_intrinsic_global_atomic_exchange:
- case nir_intrinsic_global_atomic_comp_swap:
- set_writes_memory(nir, info);
- break;
- case nir_intrinsic_load_input:
- gather_intrinsic_load_input_info(nir, instr, info);
- break;
- case nir_intrinsic_store_output:
- gather_intrinsic_store_output_info(nir, instr, info);
- break;
- default:
- break;
- }
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_barycentric_sample:
+ case nir_intrinsic_load_barycentric_pixel:
+ case nir_intrinsic_load_barycentric_centroid: {
+ enum glsl_interp_mode mode = nir_intrinsic_interp_mode(instr);
+ switch (mode) {
+ case INTERP_MODE_NONE:
+ case INTERP_MODE_SMOOTH:
+ case INTERP_MODE_NOPERSPECTIVE:
+ info->ps.uses_persp_or_linear_interp = true;
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ case nir_intrinsic_load_barycentric_at_offset:
+ case nir_intrinsic_load_barycentric_at_sample:
+ if (nir_intrinsic_interp_mode(instr) != INTERP_MODE_FLAT)
+ info->ps.uses_persp_or_linear_interp = true;
+
+ if (instr->intrinsic == nir_intrinsic_load_barycentric_at_sample)
+ info->ps.needs_sample_positions = true;
+ break;
+ case nir_intrinsic_load_draw_id:
+ info->vs.needs_draw_id = true;
+ break;
+ case nir_intrinsic_load_base_instance:
+ info->vs.needs_base_instance = true;
+ break;
+ case nir_intrinsic_load_instance_id:
+ info->vs.needs_instance_id = true;
+ break;
+ case nir_intrinsic_load_num_work_groups:
+ info->cs.uses_grid_size = true;
+ break;
+ case nir_intrinsic_load_local_invocation_id:
+ case nir_intrinsic_load_work_group_id: {
+ unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
+ while (mask) {
+ unsigned i = u_bit_scan(&mask);
+
+ if (instr->intrinsic == nir_intrinsic_load_work_group_id)
+ info->cs.uses_block_id[i] = true;
+ else
+ info->cs.uses_thread_id[i] = true;
+ }
+ break;
+ }
+ case nir_intrinsic_load_local_invocation_index:
+ case nir_intrinsic_load_subgroup_id:
+ case nir_intrinsic_load_num_subgroups:
+ info->cs.uses_local_invocation_idx = true;
+ break;
+ case nir_intrinsic_load_sample_mask_in:
+ info->ps.reads_sample_mask_in = true;
+ break;
+ case nir_intrinsic_load_view_index:
+ info->needs_multiview_view_index = true;
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.layer_input = true;
+ break;
+ case nir_intrinsic_load_layer_id:
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.layer_input = true;
+ break;
+ case nir_intrinsic_load_invocation_id:
+ info->uses_invocation_id = true;
+ break;
+ case nir_intrinsic_load_primitive_id:
+ info->uses_prim_id = true;
+ break;
+ case nir_intrinsic_load_push_constant:
+ gather_push_constant_info(nir, instr, info);
+ break;
+ case nir_intrinsic_vulkan_resource_index:
+ info->desc_set_used_mask |= (1u << nir_intrinsic_desc_set(instr));
+ break;
+ case nir_intrinsic_image_deref_load:
+ case nir_intrinsic_image_deref_sparse_load:
+ case nir_intrinsic_image_deref_store:
+ case nir_intrinsic_image_deref_atomic_add:
+ case nir_intrinsic_image_deref_atomic_imin:
+ case nir_intrinsic_image_deref_atomic_umin:
+ case nir_intrinsic_image_deref_atomic_imax:
+ case nir_intrinsic_image_deref_atomic_umax:
+ case nir_intrinsic_image_deref_atomic_and:
+ case nir_intrinsic_image_deref_atomic_or:
+ case nir_intrinsic_image_deref_atomic_xor:
+ case nir_intrinsic_image_deref_atomic_exchange:
+ case nir_intrinsic_image_deref_atomic_comp_swap:
+ case nir_intrinsic_image_deref_size: {
+ nir_variable *var =
+ nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+ mark_sampler_desc(var, info);
+
+ if (instr->intrinsic == nir_intrinsic_image_deref_store ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_add ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_imin ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_umin ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_imax ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_umax ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_and ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_or ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_xor ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_exchange ||
+ instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
+ set_writes_memory(nir, info);
+ }
+ break;
+ }
+ case nir_intrinsic_store_ssbo:
+ case nir_intrinsic_ssbo_atomic_add:
+ case nir_intrinsic_ssbo_atomic_imin:
+ case nir_intrinsic_ssbo_atomic_umin:
+ case nir_intrinsic_ssbo_atomic_imax:
+ case nir_intrinsic_ssbo_atomic_umax:
+ case nir_intrinsic_ssbo_atomic_and:
+ case nir_intrinsic_ssbo_atomic_or:
+ case nir_intrinsic_ssbo_atomic_xor:
+ case nir_intrinsic_ssbo_atomic_exchange:
+ case nir_intrinsic_ssbo_atomic_comp_swap:
+ case nir_intrinsic_store_global:
+ case nir_intrinsic_global_atomic_add:
+ case nir_intrinsic_global_atomic_imin:
+ case nir_intrinsic_global_atomic_umin:
+ case nir_intrinsic_global_atomic_imax:
+ case nir_intrinsic_global_atomic_umax:
+ case nir_intrinsic_global_atomic_and:
+ case nir_intrinsic_global_atomic_or:
+ case nir_intrinsic_global_atomic_xor:
+ case nir_intrinsic_global_atomic_exchange:
+ case nir_intrinsic_global_atomic_comp_swap:
+ set_writes_memory(nir, info);
+ break;
+ case nir_intrinsic_load_input:
+ gather_intrinsic_load_input_info(nir, instr, info);
+ break;
+ case nir_intrinsic_store_output:
+ gather_intrinsic_store_output_info(nir, instr, info);
+ break;
+ default:
+ break;
+ }
}
static void
-gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr,
- struct radv_shader_info *info)
+gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr, struct radv_shader_info *info)
{
- for (unsigned i = 0; i < instr->num_srcs; i++) {
- switch (instr->src[i].src_type) {
- case nir_tex_src_texture_deref:
- mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
- break;
- case nir_tex_src_sampler_deref:
- mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
- break;
- default:
- break;
- }
- }
+ for (unsigned i = 0; i < instr->num_srcs; i++) {
+ switch (instr->src[i].src_type) {
+ case nir_tex_src_texture_deref:
+ mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
+ break;
+ case nir_tex_src_sampler_deref:
+ mark_sampler_desc(nir_deref_instr_get_variable(nir_src_as_deref(instr->src[i].src)), info);
+ break;
+ default:
+ break;
+ }
+ }
}
static void
-gather_info_block(const nir_shader *nir, const nir_block *block,
- struct radv_shader_info *info)
+gather_info_block(const nir_shader *nir, const nir_block *block, struct radv_shader_info *info)
{
- nir_foreach_instr(instr, block) {
- switch (instr->type) {
- case nir_instr_type_intrinsic:
- gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info);
- break;
- case nir_instr_type_tex:
- gather_tex_info(nir, nir_instr_as_tex(instr), info);
- break;
- default:
- break;
- }
- }
+ nir_foreach_instr (instr, block) {
+ switch (instr->type) {
+ case nir_instr_type_intrinsic:
+ gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info);
+ break;
+ case nir_instr_type_tex:
+ gather_tex_info(nir, nir_instr_as_tex(instr), info);
+ break;
+ default:
+ break;
+ }
+ }
}
static void
gather_info_input_decl_vs(const nir_shader *nir, const nir_variable *var,
- struct radv_shader_info *info,
- const struct radv_shader_variant_key *key)
+ struct radv_shader_info *info, const struct radv_shader_variant_key *key)
{
- unsigned attrib_count = glsl_count_attribute_slots(var->type, true);
- int idx = var->data.location;
+ unsigned attrib_count = glsl_count_attribute_slots(var->type, true);
+ int idx = var->data.location;
- if (idx >= VERT_ATTRIB_GENERIC0 && idx < VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS)
- info->vs.has_vertex_buffers = true;
+ if (idx >= VERT_ATTRIB_GENERIC0 && idx < VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS)
+ info->vs.has_vertex_buffers = true;
- for (unsigned i = 0; i < attrib_count; ++i) {
- unsigned attrib_index = var->data.location + i - VERT_ATTRIB_GENERIC0;
+ for (unsigned i = 0; i < attrib_count; ++i) {
+ unsigned attrib_index = var->data.location + i - VERT_ATTRIB_GENERIC0;
- if (key->vs.instance_rate_inputs & (1u << attrib_index)) {
- info->vs.needs_instance_id = true;
- info->vs.needs_base_instance = true;
- }
- }
+ if (key->vs.instance_rate_inputs & (1u << attrib_index)) {
+ info->vs.needs_instance_id = true;
+ info->vs.needs_base_instance = true;
+ }
+ }
}
static void
-mark_16bit_ps_input(struct radv_shader_info *info, const struct glsl_type *type,
- int location)
+mark_16bit_ps_input(struct radv_shader_info *info, const struct glsl_type *type, int location)
{
- if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
- unsigned attrib_count = glsl_count_attribute_slots(type, false);
- if (glsl_type_is_16bit(type)) {
- info->ps.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
- }
- } else if (glsl_type_is_array(type)) {
- unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
- for (unsigned i = 0; i < glsl_get_length(type); ++i) {
- mark_16bit_ps_input(info, glsl_get_array_element(type), location + i * stride);
- }
- } else {
- assert(glsl_type_is_struct_or_ifc(type));
- for (unsigned i = 0; i < glsl_get_length(type); i++) {
- mark_16bit_ps_input(info, glsl_get_struct_field(type, i), location);
- location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
- }
- }
+ if (glsl_type_is_scalar(type) || glsl_type_is_vector(type) || glsl_type_is_matrix(type)) {
+ unsigned attrib_count = glsl_count_attribute_slots(type, false);
+ if (glsl_type_is_16bit(type)) {
+ info->ps.float16_shaded_mask |= ((1ull << attrib_count) - 1) << location;
+ }
+ } else if (glsl_type_is_array(type)) {
+ unsigned stride = glsl_count_attribute_slots(glsl_get_array_element(type), false);
+ for (unsigned i = 0; i < glsl_get_length(type); ++i) {
+ mark_16bit_ps_input(info, glsl_get_array_element(type), location + i * stride);
+ }
+ } else {
+ assert(glsl_type_is_struct_or_ifc(type));
+ for (unsigned i = 0; i < glsl_get_length(type); i++) {
+ mark_16bit_ps_input(info, glsl_get_struct_field(type, i), location);
+ location += glsl_count_attribute_slots(glsl_get_struct_field(type, i), false);
+ }
+ }
}
static void
gather_info_input_decl_ps(const nir_shader *nir, const nir_variable *var,
- struct radv_shader_info *info)
+ struct radv_shader_info *info)
{
- unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
- int idx = var->data.location;
-
- switch (idx) {
- case VARYING_SLOT_PNTC:
- info->ps.has_pcoord = true;
- break;
- case VARYING_SLOT_PRIMITIVE_ID:
- info->ps.prim_id_input = true;
- break;
- case VARYING_SLOT_LAYER:
- info->ps.layer_input = true;
- break;
- case VARYING_SLOT_CLIP_DIST0:
- case VARYING_SLOT_CLIP_DIST1:
- info->ps.num_input_clips_culls += attrib_count;
- break;
- case VARYING_SLOT_VIEWPORT:
- info->ps.viewport_index_input = true;
- break;
- default:
- break;
- }
-
- if (var->data.compact) {
- unsigned component_count = var->data.location_frac +
- glsl_get_length(var->type);
- attrib_count = (component_count + 3) / 4;
- } else {
- mark_16bit_ps_input(info, var->type, var->data.driver_location);
- }
-
- uint64_t mask = ((1ull << attrib_count) - 1);
-
- if (var->data.interpolation == INTERP_MODE_FLAT)
- info->ps.flat_shaded_mask |= mask << var->data.driver_location;
- if (var->data.interpolation == INTERP_MODE_EXPLICIT)
- info->ps.explicit_shaded_mask |= mask << var->data.driver_location;
-
- if (var->data.location >= VARYING_SLOT_VAR0)
- info->ps.input_mask |= mask << (var->data.location - VARYING_SLOT_VAR0);
+ unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+ int idx = var->data.location;
+
+ switch (idx) {
+ case VARYING_SLOT_PNTC:
+ info->ps.has_pcoord = true;
+ break;
+ case VARYING_SLOT_PRIMITIVE_ID:
+ info->ps.prim_id_input = true;
+ break;
+ case VARYING_SLOT_LAYER:
+ info->ps.layer_input = true;
+ break;
+ case VARYING_SLOT_CLIP_DIST0:
+ case VARYING_SLOT_CLIP_DIST1:
+ info->ps.num_input_clips_culls += attrib_count;
+ break;
+ case VARYING_SLOT_VIEWPORT:
+ info->ps.viewport_index_input = true;
+ break;
+ default:
+ break;
+ }
+
+ if (var->data.compact) {
+ unsigned component_count = var->data.location_frac + glsl_get_length(var->type);
+ attrib_count = (component_count + 3) / 4;
+ } else {
+ mark_16bit_ps_input(info, var->type, var->data.driver_location);
+ }
+
+ uint64_t mask = ((1ull << attrib_count) - 1);
+
+ if (var->data.interpolation == INTERP_MODE_FLAT)
+ info->ps.flat_shaded_mask |= mask << var->data.driver_location;
+ if (var->data.interpolation == INTERP_MODE_EXPLICIT)
+ info->ps.explicit_shaded_mask |= mask << var->data.driver_location;
+
+ if (var->data.location >= VARYING_SLOT_VAR0)
+ info->ps.input_mask |= mask << (var->data.location - VARYING_SLOT_VAR0);
}
static void
gather_info_input_decl(const nir_shader *nir, const nir_variable *var,
- struct radv_shader_info *info,
- const struct radv_shader_variant_key *key)
+ struct radv_shader_info *info, const struct radv_shader_variant_key *key)
{
- switch (nir->info.stage) {
- case MESA_SHADER_VERTEX:
- gather_info_input_decl_vs(nir, var, info, key);
- break;
- case MESA_SHADER_FRAGMENT:
- gather_info_input_decl_ps(nir, var, info);
- break;
- default:
- break;
- }
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ gather_info_input_decl_vs(nir, var, info, key);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ gather_info_input_decl_ps(nir, var, info);
+ break;
+ default:
+ break;
+ }
}
static void
gather_info_output_decl_ps(const nir_shader *nir, const nir_variable *var,
- struct radv_shader_info *info)
+ struct radv_shader_info *info)
{
- int idx = var->data.location;
-
- switch (idx) {
- case FRAG_RESULT_DEPTH:
- info->ps.writes_z = true;
- break;
- case FRAG_RESULT_STENCIL:
- info->ps.writes_stencil = true;
- break;
- case FRAG_RESULT_SAMPLE_MASK:
- info->ps.writes_sample_mask = true;
- break;
- default:
- break;
- }
+ int idx = var->data.location;
+
+ switch (idx) {
+ case FRAG_RESULT_DEPTH:
+ info->ps.writes_z = true;
+ break;
+ case FRAG_RESULT_STENCIL:
+ info->ps.writes_stencil = true;
+ break;
+ case FRAG_RESULT_SAMPLE_MASK:
+ info->ps.writes_sample_mask = true;
+ break;
+ default:
+ break;
+ }
}
static void
gather_info_output_decl_gs(const nir_shader *nir, const nir_variable *var,
- struct radv_shader_info *info)
+ struct radv_shader_info *info)
{
- unsigned num_components = glsl_get_component_slots(var->type);
- unsigned stream = var->data.stream;
- unsigned idx = var->data.location;
+ unsigned num_components = glsl_get_component_slots(var->type);
+ unsigned stream = var->data.stream;
+ unsigned idx = var->data.location;
- assert(stream < 4);
+ assert(stream < 4);
- info->gs.max_stream = MAX2(info->gs.max_stream, stream);
- info->gs.num_stream_output_components[stream] += num_components;
- info->gs.output_streams[idx] = stream;
+ info->gs.max_stream = MAX2(info->gs.max_stream, stream);
+ info->gs.num_stream_output_components[stream] += num_components;
+ info->gs.output_streams[idx] = stream;
}
static void
gather_info_output_decl(const nir_shader *nir, const nir_variable *var,
- struct radv_shader_info *info,
- const struct radv_shader_variant_key *key)
+ struct radv_shader_info *info, const struct radv_shader_variant_key *key)
{
- struct radv_vs_output_info *vs_info = NULL;
-
- switch (nir->info.stage) {
- case MESA_SHADER_FRAGMENT:
- gather_info_output_decl_ps(nir, var, info);
- break;
- case MESA_SHADER_VERTEX:
- if (!key->vs_common_out.as_ls &&
- !key->vs_common_out.as_es)
- vs_info = &info->vs.outinfo;
-
- /* TODO: Adjust as_ls/as_nng. */
- if (!key->vs_common_out.as_ls && key->vs_common_out.as_ngg)
- gather_info_output_decl_gs(nir, var, info);
- break;
- case MESA_SHADER_GEOMETRY:
- vs_info = &info->vs.outinfo;
- gather_info_output_decl_gs(nir, var, info);
- break;
- case MESA_SHADER_TESS_EVAL:
- if (!key->vs_common_out.as_es)
- vs_info = &info->tes.outinfo;
- break;
- default:
- break;
- }
-
- if (vs_info) {
- switch (var->data.location) {
- case VARYING_SLOT_CLIP_DIST0:
- vs_info->clip_dist_mask =
- (1 << nir->info.clip_distance_array_size) - 1;
- vs_info->cull_dist_mask =
- (1 << nir->info.cull_distance_array_size) - 1;
- vs_info->cull_dist_mask <<= nir->info.clip_distance_array_size;
- break;
- case VARYING_SLOT_PSIZ:
- vs_info->writes_pointsize = true;
- break;
- case VARYING_SLOT_VIEWPORT:
- vs_info->writes_viewport_index = true;
- break;
- case VARYING_SLOT_LAYER:
- vs_info->writes_layer = true;
- break;
- case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
- vs_info->writes_primitive_shading_rate = true;
- break;
- default:
- break;
- }
- }
+ struct radv_vs_output_info *vs_info = NULL;
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_FRAGMENT:
+ gather_info_output_decl_ps(nir, var, info);
+ break;
+ case MESA_SHADER_VERTEX:
+ if (!key->vs_common_out.as_ls && !key->vs_common_out.as_es)
+ vs_info = &info->vs.outinfo;
+
+ /* TODO: Adjust as_ls/as_nng. */
+ if (!key->vs_common_out.as_ls && key->vs_common_out.as_ngg)
+ gather_info_output_decl_gs(nir, var, info);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ vs_info = &info->vs.outinfo;
+ gather_info_output_decl_gs(nir, var, info);
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ if (!key->vs_common_out.as_es)
+ vs_info = &info->tes.outinfo;
+ break;
+ default:
+ break;
+ }
+
+ if (vs_info) {
+ switch (var->data.location) {
+ case VARYING_SLOT_CLIP_DIST0:
+ vs_info->clip_dist_mask = (1 << nir->info.clip_distance_array_size) - 1;
+ vs_info->cull_dist_mask = (1 << nir->info.cull_distance_array_size) - 1;
+ vs_info->cull_dist_mask <<= nir->info.clip_distance_array_size;
+ break;
+ case VARYING_SLOT_PSIZ:
+ vs_info->writes_pointsize = true;
+ break;
+ case VARYING_SLOT_VIEWPORT:
+ vs_info->writes_viewport_index = true;
+ break;
+ case VARYING_SLOT_LAYER:
+ vs_info->writes_layer = true;
+ break;
+ case VARYING_SLOT_PRIMITIVE_SHADING_RATE:
+ vs_info->writes_primitive_shading_rate = true;
+ break;
+ default:
+ break;
+ }
+ }
}
static void
gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info)
{
- nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
- struct radv_streamout_info *so = &info->so;
-
- if (!xfb)
- return;
+ nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);
+ struct radv_streamout_info *so = &info->so;
- assert(xfb->output_count < MAX_SO_OUTPUTS);
- so->num_outputs = xfb->output_count;
+ if (!xfb)
+ return;
- for (unsigned i = 0; i < xfb->output_count; i++) {
- struct radv_stream_output *output = &so->outputs[i];
+ assert(xfb->output_count < MAX_SO_OUTPUTS);
+ so->num_outputs = xfb->output_count;
- output->buffer = xfb->outputs[i].buffer;
- output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
- output->offset = xfb->outputs[i].offset;
- output->location = xfb->outputs[i].location;
- output->component_mask = xfb->outputs[i].component_mask;
+ for (unsigned i = 0; i < xfb->output_count; i++) {
+ struct radv_stream_output *output = &so->outputs[i];
- so->enabled_stream_buffers_mask |=
- (1 << output->buffer) << (output->stream * 4);
+ output->buffer = xfb->outputs[i].buffer;
+ output->stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];
+ output->offset = xfb->outputs[i].offset;
+ output->location = xfb->outputs[i].location;
+ output->component_mask = xfb->outputs[i].component_mask;
- }
+ so->enabled_stream_buffers_mask |= (1 << output->buffer) << (output->stream * 4);
+ }
- for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) {
- so->strides[i] = xfb->buffers[i].stride / 4;
- }
+ for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++) {
+ so->strides[i] = xfb->buffers[i].stride / 4;
+ }
- ralloc_free(xfb);
+ ralloc_free(xfb);
}
void
radv_nir_shader_info_init(struct radv_shader_info *info)
{
- /* Assume that shaders only have 32-bit push constants by default. */
- info->min_push_constant_used = UINT8_MAX;
- info->has_only_32bit_push_constants = true;
+ /* Assume that shaders only have 32-bit push constants by default. */
+ info->min_push_constant_used = UINT8_MAX;
+ info->has_only_32bit_push_constants = true;
}
void
-radv_nir_shader_info_pass(const struct nir_shader *nir,
- const struct radv_pipeline_layout *layout,
- const struct radv_shader_variant_key *key,
- struct radv_shader_info *info)
+radv_nir_shader_info_pass(const struct nir_shader *nir, const struct radv_pipeline_layout *layout,
+ const struct radv_shader_variant_key *key, struct radv_shader_info *info)
{
- struct nir_function *func =
- (struct nir_function *)exec_list_get_head_const(&nir->functions);
-
- if (layout && layout->dynamic_offset_count &&
- (layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) {
- info->loads_push_constants = true;
- info->loads_dynamic_offsets = true;
- }
-
- nir_foreach_shader_in_variable(variable, nir)
- gather_info_input_decl(nir, variable, info, key);
-
- nir_foreach_block(block, func->impl) {
- gather_info_block(nir, block, info);
- }
-
- nir_foreach_shader_out_variable(variable, nir)
- gather_info_output_decl(nir, variable, info, key);
-
- if (nir->info.stage == MESA_SHADER_VERTEX ||
- nir->info.stage == MESA_SHADER_TESS_EVAL ||
- nir->info.stage == MESA_SHADER_GEOMETRY)
- gather_xfb_info(nir, info);
-
- /* Make sure to export the LayerID if the fragment shader needs it. */
- if (key->vs_common_out.export_layer_id) {
- switch (nir->info.stage) {
- case MESA_SHADER_VERTEX:
- info->vs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
- break;
- case MESA_SHADER_TESS_EVAL:
- info->tes.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
- break;
- case MESA_SHADER_GEOMETRY:
- info->gs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
- break;
- default:
- break;
- }
- }
-
- /* Make sure to export the LayerID if the subpass has multiviews. */
- if (key->has_multiview_view_index) {
- switch (nir->info.stage) {
- case MESA_SHADER_VERTEX:
- info->vs.outinfo.writes_layer = true;
- break;
- case MESA_SHADER_TESS_EVAL:
- info->tes.outinfo.writes_layer = true;
- break;
- case MESA_SHADER_GEOMETRY:
- info->vs.outinfo.writes_layer = true;
- break;
- default:
- break;
- }
- }
-
- /* Make sure to export the PrimitiveID if the fragment shader needs it. */
- if (key->vs_common_out.export_prim_id) {
- switch (nir->info.stage) {
- case MESA_SHADER_VERTEX:
- info->vs.outinfo.export_prim_id = true;
- break;
- case MESA_SHADER_TESS_EVAL:
- info->tes.outinfo.export_prim_id = true;
- break;
- case MESA_SHADER_GEOMETRY:
- info->vs.outinfo.export_prim_id = true;
- break;
- default:
- break;
- }
- }
-
- /* Make sure to export the ViewportIndex if the fragment shader needs it. */
- if (key->vs_common_out.export_viewport_index) {
- switch (nir->info.stage) {
- case MESA_SHADER_VERTEX:
- info->vs.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
- break;
- case MESA_SHADER_TESS_EVAL:
- info->tes.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
- break;
- case MESA_SHADER_GEOMETRY:
- info->gs.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
- break;
- default:
- break;
- }
- }
-
- if (nir->info.stage == MESA_SHADER_FRAGMENT)
- info->ps.num_interp = nir->num_inputs;
-
- switch (nir->info.stage) {
- case MESA_SHADER_COMPUTE:
- for (int i = 0; i < 3; ++i)
- info->cs.block_size[i] = nir->info.cs.local_size[i];
- break;
- case MESA_SHADER_FRAGMENT:
- info->ps.can_discard = nir->info.fs.uses_discard;
- info->ps.early_fragment_test = nir->info.fs.early_fragment_tests;
- info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage;
- info->ps.depth_layout = nir->info.fs.depth_layout;
- info->ps.uses_sample_shading = nir->info.fs.uses_sample_shading;
- break;
- case MESA_SHADER_GEOMETRY:
- info->gs.vertices_in = nir->info.gs.vertices_in;
- info->gs.vertices_out = nir->info.gs.vertices_out;
- info->gs.output_prim = nir->info.gs.output_primitive;
- info->gs.invocations = nir->info.gs.invocations;
- break;
- case MESA_SHADER_TESS_EVAL:
- info->tes.primitive_mode = nir->info.tess.primitive_mode;
- info->tes.spacing = nir->info.tess.spacing;
- info->tes.ccw = nir->info.tess.ccw;
- info->tes.point_mode = nir->info.tess.point_mode;
- info->tes.as_es = key->vs_common_out.as_es;
- info->tes.export_prim_id = key->vs_common_out.export_prim_id;
- info->is_ngg = key->vs_common_out.as_ngg;
- info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
- break;
- case MESA_SHADER_TESS_CTRL:
- info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
- break;
- case MESA_SHADER_VERTEX:
- info->vs.as_es = key->vs_common_out.as_es;
- info->vs.as_ls = key->vs_common_out.as_ls;
- info->vs.export_prim_id = key->vs_common_out.export_prim_id;
- info->is_ngg = key->vs_common_out.as_ngg;
- info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
- break;
- default:
- break;
- }
-
- if (nir->info.stage == MESA_SHADER_GEOMETRY) {
- unsigned add_clip = nir->info.clip_distance_array_size +
- nir->info.cull_distance_array_size > 4;
- info->gs.gsvs_vertex_size =
- (util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
- info->gs.max_gsvs_emit_size =
- info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
- }
-
- /* Compute the ESGS item size for VS or TES as ES. */
- if ((nir->info.stage == MESA_SHADER_VERTEX ||
- nir->info.stage == MESA_SHADER_TESS_EVAL) &&
- key->vs_common_out.as_es) {
- struct radv_es_output_info *es_info =
- nir->info.stage == MESA_SHADER_VERTEX ? &info->vs.es_info : &info->tes.es_info;
- uint32_t num_outputs_written = nir->info.stage == MESA_SHADER_VERTEX
- ? info->vs.num_linked_outputs
- : info->tes.num_linked_outputs;
- es_info->esgs_itemsize = num_outputs_written * 16;
- }
-
- info->float_controls_mode = nir->info.float_controls_execution_mode;
-
- if (nir->info.stage == MESA_SHADER_FRAGMENT) {
- info->ps.allow_flat_shading =
- !(info->ps.uses_persp_or_linear_interp ||
- info->ps.needs_sample_positions ||
- info->ps.writes_memory ||
- nir->info.fs.needs_quad_helper_invocations ||
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
- BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
- }
+ struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
+
+ if (layout && layout->dynamic_offset_count &&
+ (layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) {
+ info->loads_push_constants = true;
+ info->loads_dynamic_offsets = true;
+ }
+
+ nir_foreach_shader_in_variable (variable, nir)
+ gather_info_input_decl(nir, variable, info, key);
+
+ nir_foreach_block (block, func->impl) {
+ gather_info_block(nir, block, info);
+ }
+
+ nir_foreach_shader_out_variable(variable, nir) gather_info_output_decl(nir, variable, info, key);
+
+ if (nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL ||
+ nir->info.stage == MESA_SHADER_GEOMETRY)
+ gather_xfb_info(nir, info);
+
+ /* Make sure to export the LayerID if the fragment shader needs it. */
+ if (key->vs_common_out.export_layer_id) {
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ info->vs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->gs.output_usage_mask[VARYING_SLOT_LAYER] |= 0x1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Make sure to export the LayerID if the subpass has multiviews. */
+ if (key->has_multiview_view_index) {
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ info->vs.outinfo.writes_layer = true;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.outinfo.writes_layer = true;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->vs.outinfo.writes_layer = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Make sure to export the PrimitiveID if the fragment shader needs it. */
+ if (key->vs_common_out.export_prim_id) {
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ info->vs.outinfo.export_prim_id = true;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.outinfo.export_prim_id = true;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->vs.outinfo.export_prim_id = true;
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Make sure to export the ViewportIndex if the fragment shader needs it. */
+ if (key->vs_common_out.export_viewport_index) {
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX:
+ info->vs.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->gs.output_usage_mask[VARYING_SLOT_VIEWPORT] |= 0x1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ info->ps.num_interp = nir->num_inputs;
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_COMPUTE:
+ for (int i = 0; i < 3; ++i)
+ info->cs.block_size[i] = nir->info.cs.local_size[i];
+ break;
+ case MESA_SHADER_FRAGMENT:
+ info->ps.can_discard = nir->info.fs.uses_discard;
+ info->ps.early_fragment_test = nir->info.fs.early_fragment_tests;
+ info->ps.post_depth_coverage = nir->info.fs.post_depth_coverage;
+ info->ps.depth_layout = nir->info.fs.depth_layout;
+ info->ps.uses_sample_shading = nir->info.fs.uses_sample_shading;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ info->gs.vertices_in = nir->info.gs.vertices_in;
+ info->gs.vertices_out = nir->info.gs.vertices_out;
+ info->gs.output_prim = nir->info.gs.output_primitive;
+ info->gs.invocations = nir->info.gs.invocations;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ info->tes.primitive_mode = nir->info.tess.primitive_mode;
+ info->tes.spacing = nir->info.tess.spacing;
+ info->tes.ccw = nir->info.tess.ccw;
+ info->tes.point_mode = nir->info.tess.point_mode;
+ info->tes.as_es = key->vs_common_out.as_es;
+ info->tes.export_prim_id = key->vs_common_out.export_prim_id;
+ info->is_ngg = key->vs_common_out.as_ngg;
+ info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
+ break;
+ case MESA_SHADER_VERTEX:
+ info->vs.as_es = key->vs_common_out.as_es;
+ info->vs.as_ls = key->vs_common_out.as_ls;
+ info->vs.export_prim_id = key->vs_common_out.export_prim_id;
+ info->is_ngg = key->vs_common_out.as_ngg;
+ info->is_ngg_passthrough = key->vs_common_out.as_ngg_passthrough;
+ break;
+ default:
+ break;
+ }
+
+ if (nir->info.stage == MESA_SHADER_GEOMETRY) {
+ unsigned add_clip =
+ nir->info.clip_distance_array_size + nir->info.cull_distance_array_size > 4;
+ info->gs.gsvs_vertex_size = (util_bitcount64(nir->info.outputs_written) + add_clip) * 16;
+ info->gs.max_gsvs_emit_size = info->gs.gsvs_vertex_size * nir->info.gs.vertices_out;
+ }
+
+ /* Compute the ESGS item size for VS or TES as ES. */
+ if ((nir->info.stage == MESA_SHADER_VERTEX || nir->info.stage == MESA_SHADER_TESS_EVAL) &&
+ key->vs_common_out.as_es) {
+ struct radv_es_output_info *es_info =
+ nir->info.stage == MESA_SHADER_VERTEX ? &info->vs.es_info : &info->tes.es_info;
+ uint32_t num_outputs_written = nir->info.stage == MESA_SHADER_VERTEX
+ ? info->vs.num_linked_outputs
+ : info->tes.num_linked_outputs;
+ es_info->esgs_itemsize = num_outputs_written * 16;
+ }
+
+ info->float_controls_mode = nir->info.float_controls_execution_mode;
+
+ if (nir->info.stage == MESA_SHADER_FRAGMENT) {
+ info->ps.allow_flat_shading =
+ !(info->ps.uses_persp_or_linear_interp || info->ps.needs_sample_positions ||
+ info->ps.writes_memory || nir->info.fs.needs_quad_helper_invocations ||
+ BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) ||
+ BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_POINT_COORD) ||
+ BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
+ BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS) ||
+ BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN) ||
+ BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION));
+ }
}
diff --git a/src/amd/vulkan/radv_sqtt.c b/src/amd/vulkan/radv_sqtt.c
index bde7924c40e..ed3a6e8168f 100644
--- a/src/amd/vulkan/radv_sqtt.c
+++ b/src/amd/vulkan/radv_sqtt.c
@@ -23,8 +23,8 @@
#include <inttypes.h>
-#include "radv_private.h"
#include "radv_cs.h"
+#include "radv_private.h"
#include "sid.h"
#define SQTT_BUFFER_ALIGN_SHIFT 12
@@ -32,650 +32,604 @@
static bool
radv_se_is_disabled(struct radv_device *device, unsigned se)
{
- /* No active CU on the SE means it is disabled. */
- return device->physical_device->rad_info.cu_mask[se][0] == 0;
+ /* No active CU on the SE means it is disabled. */
+ return device->physical_device->rad_info.cu_mask[se][0] == 0;
}
static void
-radv_emit_thread_trace_start(struct radv_device *device,
- struct radeon_cmdbuf *cs,
- uint32_t queue_family_index)
+radv_emit_thread_trace_start(struct radv_device *device, struct radeon_cmdbuf *cs,
+ uint32_t queue_family_index)
{
- uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
- struct radeon_info *rad_info = &device->physical_device->rad_info;
- unsigned max_se = rad_info->max_se;
-
- assert(device->physical_device->rad_info.chip_class >= GFX8);
-
- for (unsigned se = 0; se < max_se; se++) {
- uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
- uint64_t data_va = ac_thread_trace_get_data_va(rad_info, &device->thread_trace, va, se);
- uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
- int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
-
- if (radv_se_is_disabled(device, se))
- continue;
-
- /* Target SEx and SH0. */
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) |
- S_030800_SH_INDEX(0) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- /* Order seems important for the following 2 registers. */
- radeon_set_privileged_config_reg(cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
- S_008D04_SIZE(shifted_size) |
- S_008D04_BASE_HI(shifted_va >> 32));
-
- radeon_set_privileged_config_reg(cs, R_008D00_SQ_THREAD_TRACE_BUF0_BASE,
- S_008D00_BASE_LO(shifted_va));
-
- radeon_set_privileged_config_reg(cs, R_008D14_SQ_THREAD_TRACE_MASK,
- S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
- S_008D14_SA_SEL(0) |
- S_008D14_WGP_SEL(first_active_cu / 2) |
- S_008D14_SIMD_SEL(0));
-
- uint32_t thread_trace_token_mask =
- S_008D18_REG_INCLUDE(V_008D18_REG_INCLUDE_SQDEC |
- V_008D18_REG_INCLUDE_SHDEC |
- V_008D18_REG_INCLUDE_GFXUDEC |
- V_008D18_REG_INCLUDE_COMP |
- V_008D18_REG_INCLUDE_CONTEXT |
- V_008D18_REG_INCLUDE_CONFIG);
-
- /* Performance counters with SQTT are considered
- * deprecated.
- */
- thread_trace_token_mask |= S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF);
-
- radeon_set_privileged_config_reg(cs, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
- thread_trace_token_mask);
-
- uint32_t thread_trace_ctrl = S_008D1C_MODE(1) |
- S_008D1C_HIWATER(5) |
- S_008D1C_UTIL_TIMER(1) |
- S_008D1C_RT_FREQ(2) | /* 4096 clk */
- S_008D1C_DRAW_EVENT_EN(1) |
- S_008D1C_REG_STALL_EN(1) |
- S_008D1C_SPI_STALL_EN(1) |
- S_008D1C_SQ_STALL_EN(1) |
- S_008D1C_REG_DROP_ON_STALL(0);
-
- if (device->physical_device->rad_info.chip_class == GFX10_3)
- thread_trace_ctrl |= S_008D1C_LOWATER_OFFSET(4);
-
- /* Should be emitted last (it enables thread traces). */
- radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL,
- thread_trace_ctrl);
- } else {
- /* Order seems important for the following 4 registers. */
- radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2,
- S_030CDC_ADDR_HI(shifted_va >> 32));
-
- radeon_set_uconfig_reg(cs, R_030CC0_SQ_THREAD_TRACE_BASE,
- S_030CC0_ADDR(shifted_va));
-
- radeon_set_uconfig_reg(cs, R_030CC4_SQ_THREAD_TRACE_SIZE,
- S_030CC4_SIZE(shifted_size));
-
- radeon_set_uconfig_reg(cs, R_030CD4_SQ_THREAD_TRACE_CTRL,
- S_030CD4_RESET_BUFFER(1));
-
- uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) |
- S_030CC8_SH_SEL(0) |
- S_030CC8_SIMD_EN(0xf) |
- S_030CC8_VM_ID_MASK(0) |
- S_030CC8_REG_STALL_EN(1) |
- S_030CC8_SPI_STALL_EN(1) |
- S_030CC8_SQ_STALL_EN(1);
-
- if (device->physical_device->rad_info.chip_class < GFX9) {
- thread_trace_mask |= S_030CC8_RANDOM_SEED(0xffff);
- }
-
- radeon_set_uconfig_reg(cs, R_030CC8_SQ_THREAD_TRACE_MASK,
- thread_trace_mask);
-
- /* Trace all tokens and registers. */
- radeon_set_uconfig_reg(cs, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
- S_030CCC_TOKEN_MASK(0xbfff) |
- S_030CCC_REG_MASK(0xff) |
- S_030CCC_REG_DROP_ON_STALL(0));
-
- /* Enable SQTT perf counters for all CUs. */
- radeon_set_uconfig_reg(cs, R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
- S_030CD0_SH0_MASK(0xffff) |
- S_030CD0_SH1_MASK(0xffff));
-
- radeon_set_uconfig_reg(cs, R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2,
- S_030CE0_INST_MASK(0xffffffff));
-
- radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER,
- S_030CEC_HIWATER(4));
-
- if (device->physical_device->rad_info.chip_class == GFX9) {
- /* Reset thread trace status errors. */
- radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS,
- S_030CE8_UTC_ERROR(0));
- }
-
- /* Enable the thread trace mode. */
- uint32_t thread_trace_mode = S_030CD8_MASK_PS(1) |
- S_030CD8_MASK_VS(1) |
- S_030CD8_MASK_GS(1) |
- S_030CD8_MASK_ES(1) |
- S_030CD8_MASK_HS(1) |
- S_030CD8_MASK_LS(1) |
- S_030CD8_MASK_CS(1) |
- S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
- S_030CD8_MODE(1);
-
- if (device->physical_device->rad_info.chip_class == GFX9) {
- /* Count SQTT traffic in TCC perf counters. */
- thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
- }
-
- radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE,
- thread_trace_mode);
- }
- }
-
- /* Restore global broadcasting. */
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) |
- S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
-
- /* Start the thread trace with a different event based on the queue. */
- if (queue_family_index == RADV_QUEUE_COMPUTE &&
- device->physical_device->rad_info.chip_class >= GFX7) {
- radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
- S_00B878_THREAD_TRACE_ENABLE(1));
- } else {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
- }
+ uint32_t shifted_size = device->thread_trace.buffer_size >> SQTT_BUFFER_ALIGN_SHIFT;
+ struct radeon_info *rad_info = &device->physical_device->rad_info;
+ unsigned max_se = rad_info->max_se;
+
+ assert(device->physical_device->rad_info.chip_class >= GFX8);
+
+ for (unsigned se = 0; se < max_se; se++) {
+ uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
+ uint64_t data_va = ac_thread_trace_get_data_va(rad_info, &device->thread_trace, va, se);
+ uint64_t shifted_va = data_va >> SQTT_BUFFER_ALIGN_SHIFT;
+ int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
+
+ if (radv_se_is_disabled(device, se))
+ continue;
+
+ /* Target SEx and SH0. */
+ radeon_set_uconfig_reg(
+ cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ /* Order seems important for the following 2 registers. */
+ radeon_set_privileged_config_reg(
+ cs, R_008D04_SQ_THREAD_TRACE_BUF0_SIZE,
+ S_008D04_SIZE(shifted_size) | S_008D04_BASE_HI(shifted_va >> 32));
+
+ radeon_set_privileged_config_reg(cs, R_008D00_SQ_THREAD_TRACE_BUF0_BASE,
+ S_008D00_BASE_LO(shifted_va));
+
+ radeon_set_privileged_config_reg(
+ cs, R_008D14_SQ_THREAD_TRACE_MASK,
+ S_008D14_WTYPE_INCLUDE(0x7f) | /* all shader stages */
+ S_008D14_SA_SEL(0) | S_008D14_WGP_SEL(first_active_cu / 2) | S_008D14_SIMD_SEL(0));
+
+ uint32_t thread_trace_token_mask = S_008D18_REG_INCLUDE(
+ V_008D18_REG_INCLUDE_SQDEC | V_008D18_REG_INCLUDE_SHDEC | V_008D18_REG_INCLUDE_GFXUDEC |
+ V_008D18_REG_INCLUDE_COMP | V_008D18_REG_INCLUDE_CONTEXT | V_008D18_REG_INCLUDE_CONFIG);
+
+ /* Performance counters with SQTT are considered
+ * deprecated.
+ */
+ thread_trace_token_mask |= S_008D18_TOKEN_EXCLUDE(V_008D18_TOKEN_EXCLUDE_PERF);
+
+ radeon_set_privileged_config_reg(cs, R_008D18_SQ_THREAD_TRACE_TOKEN_MASK,
+ thread_trace_token_mask);
+
+ uint32_t thread_trace_ctrl = S_008D1C_MODE(1) | S_008D1C_HIWATER(5) |
+ S_008D1C_UTIL_TIMER(1) | S_008D1C_RT_FREQ(2) | /* 4096 clk */
+ S_008D1C_DRAW_EVENT_EN(1) | S_008D1C_REG_STALL_EN(1) |
+ S_008D1C_SPI_STALL_EN(1) | S_008D1C_SQ_STALL_EN(1) |
+ S_008D1C_REG_DROP_ON_STALL(0);
+
+ if (device->physical_device->rad_info.chip_class == GFX10_3)
+ thread_trace_ctrl |= S_008D1C_LOWATER_OFFSET(4);
+
+ /* Should be emitted last (it enables thread traces). */
+ radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, thread_trace_ctrl);
+ } else {
+ /* Order seems important for the following 4 registers. */
+ radeon_set_uconfig_reg(cs, R_030CDC_SQ_THREAD_TRACE_BASE2,
+ S_030CDC_ADDR_HI(shifted_va >> 32));
+
+ radeon_set_uconfig_reg(cs, R_030CC0_SQ_THREAD_TRACE_BASE, S_030CC0_ADDR(shifted_va));
+
+ radeon_set_uconfig_reg(cs, R_030CC4_SQ_THREAD_TRACE_SIZE, S_030CC4_SIZE(shifted_size));
+
+ radeon_set_uconfig_reg(cs, R_030CD4_SQ_THREAD_TRACE_CTRL, S_030CD4_RESET_BUFFER(1));
+
+ uint32_t thread_trace_mask = S_030CC8_CU_SEL(first_active_cu) | S_030CC8_SH_SEL(0) |
+ S_030CC8_SIMD_EN(0xf) | S_030CC8_VM_ID_MASK(0) |
+ S_030CC8_REG_STALL_EN(1) | S_030CC8_SPI_STALL_EN(1) |
+ S_030CC8_SQ_STALL_EN(1);
+
+ if (device->physical_device->rad_info.chip_class < GFX9) {
+ thread_trace_mask |= S_030CC8_RANDOM_SEED(0xffff);
+ }
+
+ radeon_set_uconfig_reg(cs, R_030CC8_SQ_THREAD_TRACE_MASK, thread_trace_mask);
+
+ /* Trace all tokens and registers. */
+ radeon_set_uconfig_reg(
+ cs, R_030CCC_SQ_THREAD_TRACE_TOKEN_MASK,
+ S_030CCC_TOKEN_MASK(0xbfff) | S_030CCC_REG_MASK(0xff) | S_030CCC_REG_DROP_ON_STALL(0));
+
+ /* Enable SQTT perf counters for all CUs. */
+ radeon_set_uconfig_reg(cs, R_030CD0_SQ_THREAD_TRACE_PERF_MASK,
+ S_030CD0_SH0_MASK(0xffff) | S_030CD0_SH1_MASK(0xffff));
+
+ radeon_set_uconfig_reg(cs, R_030CE0_SQ_THREAD_TRACE_TOKEN_MASK2,
+ S_030CE0_INST_MASK(0xffffffff));
+
+ radeon_set_uconfig_reg(cs, R_030CEC_SQ_THREAD_TRACE_HIWATER, S_030CEC_HIWATER(4));
+
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ /* Reset thread trace status errors. */
+ radeon_set_uconfig_reg(cs, R_030CE8_SQ_THREAD_TRACE_STATUS, S_030CE8_UTC_ERROR(0));
+ }
+
+ /* Enable the thread trace mode. */
+ uint32_t thread_trace_mode =
+ S_030CD8_MASK_PS(1) | S_030CD8_MASK_VS(1) | S_030CD8_MASK_GS(1) | S_030CD8_MASK_ES(1) |
+ S_030CD8_MASK_HS(1) | S_030CD8_MASK_LS(1) | S_030CD8_MASK_CS(1) |
+ S_030CD8_AUTOFLUSH_EN(1) | /* periodically flush SQTT data to memory */
+ S_030CD8_MODE(1);
+
+ if (device->physical_device->rad_info.chip_class == GFX9) {
+ /* Count SQTT traffic in TCC perf counters. */
+ thread_trace_mode |= S_030CD8_TC_PERF_EN(1);
+ }
+
+ radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE, thread_trace_mode);
+ }
+ }
+
+ /* Restore global broadcasting. */
+ radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+ /* Start the thread trace with a different event based on the queue. */
+ if (queue_family_index == RADV_QUEUE_COMPUTE &&
+ device->physical_device->rad_info.chip_class >= GFX7) {
+ radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(1));
+ } else {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_START) | EVENT_INDEX(0));
+ }
}
-static const uint32_t gfx8_thread_trace_info_regs[] =
-{
- R_030CE4_SQ_THREAD_TRACE_WPTR,
- R_030CE8_SQ_THREAD_TRACE_STATUS,
- R_008E40_SQ_THREAD_TRACE_CNTR,
+static const uint32_t gfx8_thread_trace_info_regs[] = {
+ R_030CE4_SQ_THREAD_TRACE_WPTR,
+ R_030CE8_SQ_THREAD_TRACE_STATUS,
+ R_008E40_SQ_THREAD_TRACE_CNTR,
};
-static const uint32_t gfx9_thread_trace_info_regs[] =
-{
- R_030CE4_SQ_THREAD_TRACE_WPTR,
- R_030CE8_SQ_THREAD_TRACE_STATUS,
- R_030CF0_SQ_THREAD_TRACE_CNTR,
+static const uint32_t gfx9_thread_trace_info_regs[] = {
+ R_030CE4_SQ_THREAD_TRACE_WPTR,
+ R_030CE8_SQ_THREAD_TRACE_STATUS,
+ R_030CF0_SQ_THREAD_TRACE_CNTR,
};
-static const uint32_t gfx10_thread_trace_info_regs[] =
-{
- R_008D10_SQ_THREAD_TRACE_WPTR,
- R_008D20_SQ_THREAD_TRACE_STATUS,
- R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
+static const uint32_t gfx10_thread_trace_info_regs[] = {
+ R_008D10_SQ_THREAD_TRACE_WPTR,
+ R_008D20_SQ_THREAD_TRACE_STATUS,
+ R_008D24_SQ_THREAD_TRACE_DROPPED_CNTR,
};
static void
-radv_copy_thread_trace_info_regs(struct radv_device *device,
- struct radeon_cmdbuf *cs,
- unsigned se_index)
+radv_copy_thread_trace_info_regs(struct radv_device *device, struct radeon_cmdbuf *cs,
+ unsigned se_index)
{
- const uint32_t *thread_trace_info_regs = NULL;
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- thread_trace_info_regs = gfx10_thread_trace_info_regs;
- } else if (device->physical_device->rad_info.chip_class == GFX9) {
- thread_trace_info_regs = gfx9_thread_trace_info_regs;
- } else {
- assert(device->physical_device->rad_info.chip_class == GFX8);
- thread_trace_info_regs = gfx8_thread_trace_info_regs;
- }
-
- /* Get the VA where the info struct is stored for this SE. */
- uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
- uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
-
- /* Copy back the info struct one DWORD at a time. */
- for (unsigned i = 0; i < 3; i++) {
- radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
- radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) |
- COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
- COPY_DATA_WR_CONFIRM);
- radeon_emit(cs, thread_trace_info_regs[i] >> 2);
- radeon_emit(cs, 0); /* unused */
- radeon_emit(cs, (info_va + i * 4));
- radeon_emit(cs, (info_va + i * 4) >> 32);
- }
+ const uint32_t *thread_trace_info_regs = NULL;
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ thread_trace_info_regs = gfx10_thread_trace_info_regs;
+ } else if (device->physical_device->rad_info.chip_class == GFX9) {
+ thread_trace_info_regs = gfx9_thread_trace_info_regs;
+ } else {
+ assert(device->physical_device->rad_info.chip_class == GFX8);
+ thread_trace_info_regs = gfx8_thread_trace_info_regs;
+ }
+
+ /* Get the VA where the info struct is stored for this SE. */
+ uint64_t va = radv_buffer_get_va(device->thread_trace.bo);
+ uint64_t info_va = ac_thread_trace_get_info_va(va, se_index);
+
+ /* Copy back the info struct one DWORD at a time. */
+ for (unsigned i = 0; i < 3; i++) {
+ radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+ radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_PERF) | COPY_DATA_DST_SEL(COPY_DATA_TC_L2) |
+ COPY_DATA_WR_CONFIRM);
+ radeon_emit(cs, thread_trace_info_regs[i] >> 2);
+ radeon_emit(cs, 0); /* unused */
+ radeon_emit(cs, (info_va + i * 4));
+ radeon_emit(cs, (info_va + i * 4) >> 32);
+ }
}
static void
-radv_emit_thread_trace_stop(struct radv_device *device,
- struct radeon_cmdbuf *cs,
- uint32_t queue_family_index)
+radv_emit_thread_trace_stop(struct radv_device *device, struct radeon_cmdbuf *cs,
+ uint32_t queue_family_index)
{
- unsigned max_se = device->physical_device->rad_info.max_se;
-
- assert(device->physical_device->rad_info.chip_class >= GFX8);
-
- /* Stop the thread trace with a different event based on the queue. */
- if (queue_family_index == RADV_QUEUE_COMPUTE &&
- device->physical_device->rad_info.chip_class >= GFX7) {
- radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE,
- S_00B878_THREAD_TRACE_ENABLE(0));
- } else {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
- }
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
-
- for (unsigned se = 0; se < max_se; se++) {
- if (radv_se_is_disabled(device, se))
- continue;
-
- /* Target SEi and SH0. */
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) |
- S_030800_SH_INDEX(0) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- /* Make sure to wait for the trace buffer. */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
- radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
- radeon_emit(cs, 0);
- radeon_emit(cs, 0); /* reference value */
- radeon_emit(cs, S_008D20_FINISH_DONE(1)); /* mask */
- radeon_emit(cs, 4); /* poll interval */
-
- /* Disable the thread trace mode. */
- radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL,
- S_008D1C_MODE(0));
-
- /* Wait for thread trace completion. */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
- radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
- radeon_emit(cs, 0);
- radeon_emit(cs, 0); /* reference value */
- radeon_emit(cs, S_008D20_BUSY(1)); /* mask */
- radeon_emit(cs, 4); /* poll interval */
- } else {
- /* Disable the thread trace mode. */
- radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE,
- S_030CD8_MODE(0));
-
- /* Wait for thread trace completion. */
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
- radeon_emit(cs, R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */
- radeon_emit(cs, 0);
- radeon_emit(cs, 0); /* reference value */
- radeon_emit(cs, S_030CE8_BUSY(1)); /* mask */
- radeon_emit(cs, 4); /* poll interval */
- }
-
- radv_copy_thread_trace_info_regs(device, cs, se);
- }
-
- /* Restore global broadcasting. */
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) |
- S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
+ unsigned max_se = device->physical_device->rad_info.max_se;
+
+ assert(device->physical_device->rad_info.chip_class >= GFX8);
+
+ /* Stop the thread trace with a different event based on the queue. */
+ if (queue_family_index == RADV_QUEUE_COMPUTE &&
+ device->physical_device->rad_info.chip_class >= GFX7) {
+ radeon_set_sh_reg(cs, R_00B878_COMPUTE_THREAD_TRACE_ENABLE, S_00B878_THREAD_TRACE_ENABLE(0));
+ } else {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_STOP) | EVENT_INDEX(0));
+ }
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_THREAD_TRACE_FINISH) | EVENT_INDEX(0));
+
+ for (unsigned se = 0; se < max_se; se++) {
+ if (radv_se_is_disabled(device, se))
+ continue;
+
+ /* Target SEi and SH0. */
+ radeon_set_uconfig_reg(
+ cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_INDEX(0) | S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ /* Make sure to wait for the trace buffer. */
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(
+ cs,
+ WAIT_REG_MEM_NOT_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0); /* reference value */
+ radeon_emit(cs, S_008D20_FINISH_DONE(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+
+ /* Disable the thread trace mode. */
+ radeon_set_privileged_config_reg(cs, R_008D1C_SQ_THREAD_TRACE_CTRL, S_008D1C_MODE(0));
+
+ /* Wait for thread trace completion. */
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(
+ cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, R_008D20_SQ_THREAD_TRACE_STATUS >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0); /* reference value */
+ radeon_emit(cs, S_008D20_BUSY(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+ } else {
+ /* Disable the thread trace mode. */
+ radeon_set_uconfig_reg(cs, R_030CD8_SQ_THREAD_TRACE_MODE, S_030CD8_MODE(0));
+
+ /* Wait for thread trace completion. */
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+ radeon_emit(
+ cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+ radeon_emit(cs, R_030CE8_SQ_THREAD_TRACE_STATUS >> 2); /* register */
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0); /* reference value */
+ radeon_emit(cs, S_030CE8_BUSY(1)); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
+ }
+
+ radv_copy_thread_trace_info_regs(device, cs, se);
+ }
+
+ /* Restore global broadcasting. */
+ radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
}
void
-radv_emit_thread_trace_userdata(const struct radv_device *device,
- struct radeon_cmdbuf *cs,
- const void *data, uint32_t num_dwords)
+radv_emit_thread_trace_userdata(const struct radv_device *device, struct radeon_cmdbuf *cs,
+ const void *data, uint32_t num_dwords)
{
- const uint32_t *dwords = (uint32_t *)data;
+ const uint32_t *dwords = (uint32_t *)data;
- while (num_dwords > 0) {
- uint32_t count = MIN2(num_dwords, 2);
+ while (num_dwords > 0) {
+ uint32_t count = MIN2(num_dwords, 2);
- radeon_check_space(device->ws, cs, 2 + count);
+ radeon_check_space(device->ws, cs, 2 + count);
- /* Without the perfctr bit the CP might not always pass the
- * write on correctly. */
- if (device->physical_device->rad_info.chip_class >= GFX10)
- radeon_set_uconfig_reg_seq_perfctr(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
- else
- radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
- radeon_emit_array(cs, dwords, count);
+ /* Without the perfctr bit the CP might not always pass the
+ * write on correctly. */
+ if (device->physical_device->rad_info.chip_class >= GFX10)
+ radeon_set_uconfig_reg_seq_perfctr(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
+ else
+ radeon_set_uconfig_reg_seq(cs, R_030D08_SQ_THREAD_TRACE_USERDATA_2, count);
+ radeon_emit_array(cs, dwords, count);
- dwords += count;
- num_dwords -= count;
- }
+ dwords += count;
+ num_dwords -= count;
+ }
}
static void
-radv_emit_spi_config_cntl(struct radv_device *device,
- struct radeon_cmdbuf *cs, bool enable)
+radv_emit_spi_config_cntl(struct radv_device *device, struct radeon_cmdbuf *cs, bool enable)
{
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- uint32_t spi_config_cntl = S_031100_GPR_WRITE_PRIORITY(0x2c688) |
- S_031100_EXP_PRIORITY_ORDER(3) |
- S_031100_ENABLE_SQG_TOP_EVENTS(enable) |
- S_031100_ENABLE_SQG_BOP_EVENTS(enable);
-
- if (device->physical_device->rad_info.chip_class >= GFX10)
- spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
-
- radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
- } else {
- /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */
- radeon_set_privileged_config_reg(cs, R_009100_SPI_CONFIG_CNTL,
- S_009100_ENABLE_SQG_TOP_EVENTS(enable) |
- S_009100_ENABLE_SQG_BOP_EVENTS(enable));
- }
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ uint32_t spi_config_cntl =
+ S_031100_GPR_WRITE_PRIORITY(0x2c688) | S_031100_EXP_PRIORITY_ORDER(3) |
+ S_031100_ENABLE_SQG_TOP_EVENTS(enable) | S_031100_ENABLE_SQG_BOP_EVENTS(enable);
+
+ if (device->physical_device->rad_info.chip_class >= GFX10)
+ spi_config_cntl |= S_031100_PS_PKR_PRIORITY_CNTL(3);
+
+ radeon_set_uconfig_reg(cs, R_031100_SPI_CONFIG_CNTL, spi_config_cntl);
+ } else {
+ /* SPI_CONFIG_CNTL is a protected register on GFX6-GFX8. */
+ radeon_set_privileged_config_reg(
+ cs, R_009100_SPI_CONFIG_CNTL,
+ S_009100_ENABLE_SQG_TOP_EVENTS(enable) | S_009100_ENABLE_SQG_BOP_EVENTS(enable));
+ }
}
static void
-radv_emit_inhibit_clockgating(struct radv_device *device,
- struct radeon_cmdbuf *cs, bool inhibit)
+radv_emit_inhibit_clockgating(struct radv_device *device, struct radeon_cmdbuf *cs, bool inhibit)
{
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
- S_037390_PERFMON_CLOCK_STATE(inhibit));
- } else if (device->physical_device->rad_info.chip_class >= GFX8) {
- radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
- S_0372FC_PERFMON_CLOCK_STATE(inhibit));
- }
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_uconfig_reg(cs, R_037390_RLC_PERFMON_CLK_CNTL,
+ S_037390_PERFMON_CLOCK_STATE(inhibit));
+ } else if (device->physical_device->rad_info.chip_class >= GFX8) {
+ radeon_set_uconfig_reg(cs, R_0372FC_RLC_PERFMON_CLK_CNTL,
+ S_0372FC_PERFMON_CLOCK_STATE(inhibit));
+ }
}
static void
-radv_emit_wait_for_idle(struct radv_device *device,
- struct radeon_cmdbuf *cs, int family)
+radv_emit_wait_for_idle(struct radv_device *device, struct radeon_cmdbuf *cs, int family)
{
- enum rgp_flush_bits sqtt_flush_bits = 0;
- si_cs_emit_cache_flush(cs, device->physical_device->rad_info.chip_class,
- NULL, 0,
- family == RING_COMPUTE &&
- device->physical_device->rad_info.chip_class >= GFX7,
- (family == RADV_QUEUE_COMPUTE ?
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH :
- (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
- RADV_CMD_FLAG_INV_ICACHE |
- RADV_CMD_FLAG_INV_SCACHE |
- RADV_CMD_FLAG_INV_VCACHE |
- RADV_CMD_FLAG_INV_L2, &sqtt_flush_bits, 0);
+ enum rgp_flush_bits sqtt_flush_bits = 0;
+ si_cs_emit_cache_flush(
+ cs, device->physical_device->rad_info.chip_class, NULL, 0,
+ family == RING_COMPUTE && device->physical_device->rad_info.chip_class >= GFX7,
+ (family == RADV_QUEUE_COMPUTE
+ ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
+ : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
+ RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2,
+ &sqtt_flush_bits, 0);
}
static bool
radv_thread_trace_init_bo(struct radv_device *device)
{
- unsigned max_se = device->physical_device->rad_info.max_se;
- struct radeon_winsys *ws = device->ws;
- uint64_t size;
-
- /* The buffer size and address need to be aligned in HW regs. Align the
- * size as early as possible so that we do all the allocation & addressing
- * correctly. */
- device->thread_trace.buffer_size = align64(device->thread_trace.buffer_size,
- 1u << SQTT_BUFFER_ALIGN_SHIFT);
-
- /* Compute total size of the thread trace BO for all SEs. */
- size = align64(sizeof(struct ac_thread_trace_info) * max_se,
- 1 << SQTT_BUFFER_ALIGN_SHIFT);
- size += device->thread_trace.buffer_size * (uint64_t)max_se;
-
- device->thread_trace.bo = ws->buffer_create(ws, size, 4096,
- RADEON_DOMAIN_VRAM,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_ZERO_VRAM,
- RADV_BO_PRIORITY_SCRATCH);
- if (!device->thread_trace.bo)
- return false;
-
- device->thread_trace.ptr = ws->buffer_map(device->thread_trace.bo);
- if (!device->thread_trace.ptr)
- return false;
-
- return true;
+ unsigned max_se = device->physical_device->rad_info.max_se;
+ struct radeon_winsys *ws = device->ws;
+ uint64_t size;
+
+ /* The buffer size and address need to be aligned in HW regs. Align the
+ * size as early as possible so that we do all the allocation & addressing
+ * correctly. */
+ device->thread_trace.buffer_size =
+ align64(device->thread_trace.buffer_size, 1u << SQTT_BUFFER_ALIGN_SHIFT);
+
+ /* Compute total size of the thread trace BO for all SEs. */
+ size = align64(sizeof(struct ac_thread_trace_info) * max_se, 1 << SQTT_BUFFER_ALIGN_SHIFT);
+ size += device->thread_trace.buffer_size * (uint64_t)max_se;
+
+ device->thread_trace.bo = ws->buffer_create(
+ ws, size, 4096, RADEON_DOMAIN_VRAM,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
+ RADV_BO_PRIORITY_SCRATCH);
+ if (!device->thread_trace.bo)
+ return false;
+
+ device->thread_trace.ptr = ws->buffer_map(device->thread_trace.bo);
+ if (!device->thread_trace.ptr)
+ return false;
+
+ return true;
}
bool
radv_thread_trace_init(struct radv_device *device)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+ struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- /* Default buffer size set to 32MB per SE. */
- device->thread_trace.buffer_size =
- radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024);
- device->thread_trace.start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
+ /* Default buffer size set to 32MB per SE. */
+ device->thread_trace.buffer_size =
+ radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 32 * 1024 * 1024);
+ device->thread_trace.start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
- const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
- if (trigger_file)
- device->thread_trace.trigger_file = strdup(trigger_file);
+ const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
+ if (trigger_file)
+ device->thread_trace.trigger_file = strdup(trigger_file);
- if (!radv_thread_trace_init_bo(device))
- return false;
+ if (!radv_thread_trace_init_bo(device))
+ return false;
- list_inithead(&thread_trace_data->rgp_pso_correlation.record);
- simple_mtx_init(&thread_trace_data->rgp_pso_correlation.lock, mtx_plain);
+ list_inithead(&thread_trace_data->rgp_pso_correlation.record);
+ simple_mtx_init(&thread_trace_data->rgp_pso_correlation.lock, mtx_plain);
- list_inithead(&thread_trace_data->rgp_loader_events.record);
- simple_mtx_init(&thread_trace_data->rgp_loader_events.lock, mtx_plain);
+ list_inithead(&thread_trace_data->rgp_loader_events.record);
+ simple_mtx_init(&thread_trace_data->rgp_loader_events.lock, mtx_plain);
- list_inithead(&thread_trace_data->rgp_code_object.record);
- simple_mtx_init(&thread_trace_data->rgp_code_object.lock, mtx_plain);
+ list_inithead(&thread_trace_data->rgp_code_object.record);
+ simple_mtx_init(&thread_trace_data->rgp_code_object.lock, mtx_plain);
- return true;
+ return true;
}
void
radv_thread_trace_finish(struct radv_device *device)
{
- struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
- struct radeon_winsys *ws = device->ws;
+ struct ac_thread_trace_data *thread_trace_data = &device->thread_trace;
+ struct radeon_winsys *ws = device->ws;
- if (unlikely(device->thread_trace.bo))
- ws->buffer_destroy(ws, device->thread_trace.bo);
+ if (unlikely(device->thread_trace.bo))
+ ws->buffer_destroy(ws, device->thread_trace.bo);
- for (unsigned i = 0; i < 2; i++) {
- if (device->thread_trace.start_cs[i])
- ws->cs_destroy(device->thread_trace.start_cs[i]);
- if (device->thread_trace.stop_cs[i])
- ws->cs_destroy(device->thread_trace.stop_cs[i]);
- }
+ for (unsigned i = 0; i < 2; i++) {
+ if (device->thread_trace.start_cs[i])
+ ws->cs_destroy(device->thread_trace.start_cs[i]);
+ if (device->thread_trace.stop_cs[i])
+ ws->cs_destroy(device->thread_trace.stop_cs[i]);
+ }
- assert(thread_trace_data->rgp_pso_correlation.record_count == 0);
- simple_mtx_destroy(&thread_trace_data->rgp_pso_correlation.lock);
+ assert(thread_trace_data->rgp_pso_correlation.record_count == 0);
+ simple_mtx_destroy(&thread_trace_data->rgp_pso_correlation.lock);
- assert(thread_trace_data->rgp_loader_events.record_count == 0);
- simple_mtx_destroy(&thread_trace_data->rgp_loader_events.lock);
+ assert(thread_trace_data->rgp_loader_events.record_count == 0);
+ simple_mtx_destroy(&thread_trace_data->rgp_loader_events.lock);
- assert(thread_trace_data->rgp_code_object.record_count == 0);
- simple_mtx_destroy(&thread_trace_data->rgp_code_object.lock);
+ assert(thread_trace_data->rgp_code_object.record_count == 0);
+ simple_mtx_destroy(&thread_trace_data->rgp_code_object.lock);
}
static bool
radv_thread_trace_resize_bo(struct radv_device *device)
{
- struct radeon_winsys *ws = device->ws;
+ struct radeon_winsys *ws = device->ws;
- /* Destroy the previous thread trace BO. */
- ws->buffer_destroy(ws, device->thread_trace.bo);
+ /* Destroy the previous thread trace BO. */
+ ws->buffer_destroy(ws, device->thread_trace.bo);
- /* Double the size of the thread trace buffer per SE. */
- device->thread_trace.buffer_size *= 2;
+ /* Double the size of the thread trace buffer per SE. */
+ device->thread_trace.buffer_size *= 2;
- fprintf(stderr, "Failed to get the thread trace because the buffer "
- "was too small, resizing to %d KB\n",
- device->thread_trace.buffer_size / 1024);
+ fprintf(stderr,
+ "Failed to get the thread trace because the buffer "
+ "was too small, resizing to %d KB\n",
+ device->thread_trace.buffer_size / 1024);
- /* Re-create the thread trace BO. */
- return radv_thread_trace_init_bo(device);
+ /* Re-create the thread trace BO. */
+ return radv_thread_trace_init_bo(device);
}
bool
radv_begin_thread_trace(struct radv_queue *queue)
{
- struct radv_device *device = queue->device;
- int family = queue->queue_family_index;
- struct radeon_winsys *ws = device->ws;
- struct radeon_cmdbuf *cs;
- VkResult result;
-
- /* Destroy the previous start CS and create a new one. */
- if (device->thread_trace.start_cs[family]) {
- ws->cs_destroy(device->thread_trace.start_cs[family]);
- device->thread_trace.start_cs[family] = NULL;
- }
-
- cs = ws->cs_create(ws, family);
- if (!cs)
- return false;
-
- switch (family) {
- case RADV_QUEUE_GENERAL:
- radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
- radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
- break;
- case RADV_QUEUE_COMPUTE:
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, 0);
- break;
- }
-
- radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
-
- /* Make sure to wait-for-idle before starting SQTT. */
- radv_emit_wait_for_idle(device, cs, family);
-
- /* Disable clock gating before starting SQTT. */
- radv_emit_inhibit_clockgating(device, cs, true);
-
- /* Enable SQG events that collects thread trace data. */
- radv_emit_spi_config_cntl(device, cs, true);
-
- /* Start SQTT. */
- radv_emit_thread_trace_start(device, cs, family);
-
- result = ws->cs_finalize(cs);
- if (result != VK_SUCCESS) {
- ws->cs_destroy(cs);
- return false;
- }
-
- device->thread_trace.start_cs[family] = cs;
-
- return radv_queue_internal_submit(queue, cs);
+ struct radv_device *device = queue->device;
+ int family = queue->queue_family_index;
+ struct radeon_winsys *ws = device->ws;
+ struct radeon_cmdbuf *cs;
+ VkResult result;
+
+ /* Destroy the previous start CS and create a new one. */
+ if (device->thread_trace.start_cs[family]) {
+ ws->cs_destroy(device->thread_trace.start_cs[family]);
+ device->thread_trace.start_cs[family] = NULL;
+ }
+
+ cs = ws->cs_create(ws, family);
+ if (!cs)
+ return false;
+
+ switch (family) {
+ case RADV_QUEUE_GENERAL:
+ radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
+ radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
+ break;
+ case RADV_QUEUE_COMPUTE:
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, 0);
+ break;
+ }
+
+ radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
+
+ /* Make sure to wait-for-idle before starting SQTT. */
+ radv_emit_wait_for_idle(device, cs, family);
+
+ /* Disable clock gating before starting SQTT. */
+ radv_emit_inhibit_clockgating(device, cs, true);
+
+ /* Enable SQG events that collects thread trace data. */
+ radv_emit_spi_config_cntl(device, cs, true);
+
+ /* Start SQTT. */
+ radv_emit_thread_trace_start(device, cs, family);
+
+ result = ws->cs_finalize(cs);
+ if (result != VK_SUCCESS) {
+ ws->cs_destroy(cs);
+ return false;
+ }
+
+ device->thread_trace.start_cs[family] = cs;
+
+ return radv_queue_internal_submit(queue, cs);
}
bool
radv_end_thread_trace(struct radv_queue *queue)
{
- struct radv_device *device = queue->device;
- int family = queue->queue_family_index;
- struct radeon_winsys *ws = device->ws;
- struct radeon_cmdbuf *cs;
- VkResult result;
-
- /* Destroy the previous stop CS and create a new one. */
- if (queue->device->thread_trace.stop_cs[family]) {
- ws->cs_destroy(device->thread_trace.stop_cs[family]);
- device->thread_trace.stop_cs[family] = NULL;
- }
-
- cs = ws->cs_create(ws, family);
- if (!cs)
- return false;
-
- switch (family) {
- case RADV_QUEUE_GENERAL:
- radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
- radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
- break;
- case RADV_QUEUE_COMPUTE:
- radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
- radeon_emit(cs, 0);
- break;
- }
-
- radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
-
- /* Make sure to wait-for-idle before stopping SQTT. */
- radv_emit_wait_for_idle(device, cs, family);
-
- /* Stop SQTT. */
- radv_emit_thread_trace_stop(device, cs, family);
-
- /* Restore previous state by disabling SQG events. */
- radv_emit_spi_config_cntl(device, cs, false);
-
- /* Restore previous state by re-enabling clock gating. */
- radv_emit_inhibit_clockgating(device, cs, false);
-
- result = ws->cs_finalize(cs);
- if (result != VK_SUCCESS) {
- ws->cs_destroy(cs);
- return false;
- }
-
- device->thread_trace.stop_cs[family] = cs;
-
- return radv_queue_internal_submit(queue, cs);
+ struct radv_device *device = queue->device;
+ int family = queue->queue_family_index;
+ struct radeon_winsys *ws = device->ws;
+ struct radeon_cmdbuf *cs;
+ VkResult result;
+
+ /* Destroy the previous stop CS and create a new one. */
+ if (queue->device->thread_trace.stop_cs[family]) {
+ ws->cs_destroy(device->thread_trace.stop_cs[family]);
+ device->thread_trace.stop_cs[family] = NULL;
+ }
+
+ cs = ws->cs_create(ws, family);
+ if (!cs)
+ return false;
+
+ switch (family) {
+ case RADV_QUEUE_GENERAL:
+ radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
+ radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
+ break;
+ case RADV_QUEUE_COMPUTE:
+ radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
+ radeon_emit(cs, 0);
+ break;
+ }
+
+ radv_cs_add_buffer(ws, cs, device->thread_trace.bo);
+
+ /* Make sure to wait-for-idle before stopping SQTT. */
+ radv_emit_wait_for_idle(device, cs, family);
+
+ /* Stop SQTT. */
+ radv_emit_thread_trace_stop(device, cs, family);
+
+ /* Restore previous state by disabling SQG events. */
+ radv_emit_spi_config_cntl(device, cs, false);
+
+ /* Restore previous state by re-enabling clock gating. */
+ radv_emit_inhibit_clockgating(device, cs, false);
+
+ result = ws->cs_finalize(cs);
+ if (result != VK_SUCCESS) {
+ ws->cs_destroy(cs);
+ return false;
+ }
+
+ device->thread_trace.stop_cs[family] = cs;
+
+ return radv_queue_internal_submit(queue, cs);
}
bool
-radv_get_thread_trace(struct radv_queue *queue,
- struct ac_thread_trace *thread_trace)
+radv_get_thread_trace(struct radv_queue *queue, struct ac_thread_trace *thread_trace)
{
- struct radv_device *device = queue->device;
- struct radeon_info *rad_info = &device->physical_device->rad_info;
- unsigned max_se = rad_info->max_se;
- void *thread_trace_ptr = device->thread_trace.ptr;
-
- memset(thread_trace, 0, sizeof(*thread_trace));
-
- for (unsigned se = 0; se < max_se; se++) {
- uint64_t info_offset = ac_thread_trace_get_info_offset(se);
- uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se);
- void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset;
- void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset;
- struct ac_thread_trace_info *info =
- (struct ac_thread_trace_info *)info_ptr;
- struct ac_thread_trace_se thread_trace_se = {0};
- int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
-
- if (radv_se_is_disabled(device, se))
- continue;
-
- if (!ac_is_thread_trace_complete(&device->physical_device->rad_info, &device->thread_trace, info)) {
- if (!radv_thread_trace_resize_bo(device)) {
- fprintf(stderr, "Failed to resize the thread "
- "trace buffer.\n");
- abort();
- }
- return false;
- }
-
- thread_trace_se.data_ptr = data_ptr;
- thread_trace_se.info = *info;
- thread_trace_se.shader_engine = se;
-
- /* RGP seems to expect units of WGP on GFX10+. */
- thread_trace_se.compute_unit =
- device->physical_device->rad_info.chip_class >= GFX10 ? (first_active_cu / 2) : first_active_cu;
-
- thread_trace_se.compute_unit = 0;
-
- thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
- thread_trace->num_traces++;
- }
-
- return true;
+ struct radv_device *device = queue->device;
+ struct radeon_info *rad_info = &device->physical_device->rad_info;
+ unsigned max_se = rad_info->max_se;
+ void *thread_trace_ptr = device->thread_trace.ptr;
+
+ memset(thread_trace, 0, sizeof(*thread_trace));
+
+ for (unsigned se = 0; se < max_se; se++) {
+ uint64_t info_offset = ac_thread_trace_get_info_offset(se);
+ uint64_t data_offset = ac_thread_trace_get_data_offset(rad_info, &device->thread_trace, se);
+ void *info_ptr = (uint8_t *)thread_trace_ptr + info_offset;
+ void *data_ptr = (uint8_t *)thread_trace_ptr + data_offset;
+ struct ac_thread_trace_info *info = (struct ac_thread_trace_info *)info_ptr;
+ struct ac_thread_trace_se thread_trace_se = {0};
+ int first_active_cu = ffs(device->physical_device->rad_info.cu_mask[se][0]);
+
+ if (radv_se_is_disabled(device, se))
+ continue;
+
+ if (!ac_is_thread_trace_complete(&device->physical_device->rad_info, &device->thread_trace,
+ info)) {
+ if (!radv_thread_trace_resize_bo(device)) {
+ fprintf(stderr, "Failed to resize the thread "
+ "trace buffer.\n");
+ abort();
+ }
+ return false;
+ }
+
+ thread_trace_se.data_ptr = data_ptr;
+ thread_trace_se.info = *info;
+ thread_trace_se.shader_engine = se;
+
+ /* RGP seems to expect units of WGP on GFX10+. */
+ thread_trace_se.compute_unit = device->physical_device->rad_info.chip_class >= GFX10
+ ? (first_active_cu / 2)
+ : first_active_cu;
+
+ thread_trace_se.compute_unit = 0;
+
+ thread_trace->traces[thread_trace->num_traces] = thread_trace_se;
+ thread_trace->num_traces++;
+ }
+
+ return true;
}
diff --git a/src/amd/vulkan/radv_util.c b/src/amd/vulkan/radv_util.c
index 16666535192..153f65dd90f 100644
--- a/src/amd/vulkan/radv_util.c
+++ b/src/amd/vulkan/radv_util.c
@@ -21,117 +21,110 @@
* IN THE SOFTWARE.
*/
+#include <assert.h>
+#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include "radv_private.h"
#include "radv_debug.h"
+#include "radv_private.h"
#include "vk_enum_to_str.h"
#include "util/u_math.h"
/** Log an error message. */
-void radv_printflike(1, 2)
- radv_loge(const char *format, ...)
+void radv_printflike(1, 2) radv_loge(const char *format, ...)
{
- va_list va;
+ va_list va;
- va_start(va, format);
- radv_loge_v(format, va);
- va_end(va);
+ va_start(va, format);
+ radv_loge_v(format, va);
+ va_end(va);
}
/** \see radv_loge() */
void
radv_loge_v(const char *format, va_list va)
{
- fprintf(stderr, "vk: error: ");
- vfprintf(stderr, format, va);
- fprintf(stderr, "\n");
+ fprintf(stderr, "vk: error: ");
+ vfprintf(stderr, format, va);
+ fprintf(stderr, "\n");
}
/** Log an error message. */
-void radv_printflike(1, 2)
- radv_logi(const char *format, ...)
+void radv_printflike(1, 2) radv_logi(const char *format, ...)
{
- va_list va;
+ va_list va;
- va_start(va, format);
- radv_logi_v(format, va);
- va_end(va);
+ va_start(va, format);
+ radv_logi_v(format, va);
+ va_end(va);
}
/** \see radv_logi() */
void
radv_logi_v(const char *format, va_list va)
{
- fprintf(stderr, "radv: info: ");
- vfprintf(stderr, format, va);
- fprintf(stderr, "\n");
+ fprintf(stderr, "radv: info: ");
+ vfprintf(stderr, format, va);
+ fprintf(stderr, "\n");
}
-void radv_printflike(3, 4)
- __radv_finishme(const char *file, int line, const char *format, ...)
+void radv_printflike(3, 4) __radv_finishme(const char *file, int line, const char *format, ...)
{
- va_list ap;
- char buffer[256];
+ va_list ap;
+ char buffer[256];
- va_start(ap, format);
- vsnprintf(buffer, sizeof(buffer), format, ap);
- va_end(ap);
+ va_start(ap, format);
+ vsnprintf(buffer, sizeof(buffer), format, ap);
+ va_end(ap);
- fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
+ fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
}
VkResult
-__vk_errorv(struct radv_instance *instance, const void *object,
- VkDebugReportObjectTypeEXT type, VkResult error, const char *file,
- int line, const char *format, va_list ap)
+__vk_errorv(struct radv_instance *instance, const void *object, VkDebugReportObjectTypeEXT type,
+ VkResult error, const char *file, int line, const char *format, va_list ap)
{
- char buffer[256];
- char report[512];
+ char buffer[256];
+ char report[512];
- const char *error_str = vk_Result_to_str(error);
+ const char *error_str = vk_Result_to_str(error);
#ifndef DEBUG
- if (instance && !(instance->debug_flags & RADV_DEBUG_ERRORS))
- return error;
+ if (instance && !(instance->debug_flags & RADV_DEBUG_ERRORS))
+ return error;
#endif
- if (format) {
- vsnprintf(buffer, sizeof(buffer), format, ap);
+ if (format) {
+ vsnprintf(buffer, sizeof(buffer), format, ap);
- snprintf(report, sizeof(report), "%s:%d: %s (%s)", file, line,
- buffer, error_str);
- } else {
- snprintf(report, sizeof(report), "%s:%d: %s", file, line,
- error_str);
- }
+ snprintf(report, sizeof(report), "%s:%d: %s (%s)", file, line, buffer, error_str);
+ } else {
+ snprintf(report, sizeof(report), "%s:%d: %s", file, line, error_str);
+ }
- if (instance) {
- vk_debug_report(&instance->vk, VK_DEBUG_REPORT_ERROR_BIT_EXT,
- object, line, 0, "radv", report);
- }
+ if (instance) {
+ vk_debug_report(&instance->vk, VK_DEBUG_REPORT_ERROR_BIT_EXT, object, line, 0, "radv",
+ report);
+ }
- fprintf(stderr, "%s\n", report);
+ fprintf(stderr, "%s\n", report);
- return error;
+ return error;
}
VkResult
-__vk_errorf(struct radv_instance *instance, const void *object,
- VkDebugReportObjectTypeEXT type, VkResult error, const char *file,
- int line, const char *format, ...)
+__vk_errorf(struct radv_instance *instance, const void *object, VkDebugReportObjectTypeEXT type,
+ VkResult error, const char *file, int line, const char *format, ...)
{
- va_list ap;
+ va_list ap;
- va_start(ap, format);
- __vk_errorv(instance, object, type, error, file, line, format, ap);
- va_end(ap);
+ va_start(ap, format);
+ __vk_errorv(instance, object, type, error, file, line, format, ap);
+ va_end(ap);
- return error;
+ return error;
}
diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
index da3114b0334..8e9d70b56c9 100644
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -23,315 +23,263 @@
* IN THE SOFTWARE.
*/
-#include "radv_private.h"
+#include "util/macros.h"
#include "radv_meta.h"
-#include "wsi_common.h"
+#include "radv_private.h"
#include "vk_util.h"
-#include "util/macros.h"
+#include "wsi_common.h"
static PFN_vkVoidFunction
radv_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
- return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+ return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
}
static void
-radv_wsi_set_memory_ownership(VkDevice _device,
- VkDeviceMemory _mem,
- VkBool32 ownership)
+radv_wsi_set_memory_ownership(VkDevice _device, VkDeviceMemory _mem, VkBool32 ownership)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
- if (device->use_global_bo_list) {
- device->ws->buffer_make_resident(device->ws, mem->bo, ownership);
- }
+ if (device->use_global_bo_list) {
+ device->ws->buffer_make_resident(device->ws, mem->bo, ownership);
+ }
}
VkResult
radv_init_wsi(struct radv_physical_device *physical_device)
{
- VkResult result = wsi_device_init(&physical_device->wsi_device,
- radv_physical_device_to_handle(physical_device),
- radv_wsi_proc_addr,
- &physical_device->instance->vk.alloc,
- physical_device->master_fd,
- &physical_device->instance->dri_options,
- false);
- if (result != VK_SUCCESS)
- return result;
-
- physical_device->wsi_device.supports_modifiers = physical_device->rad_info.chip_class >= GFX9;
- physical_device->wsi_device.set_memory_ownership = radv_wsi_set_memory_ownership;
- return VK_SUCCESS;
+ VkResult result =
+ wsi_device_init(&physical_device->wsi_device, radv_physical_device_to_handle(physical_device),
+ radv_wsi_proc_addr, &physical_device->instance->vk.alloc,
+ physical_device->master_fd, &physical_device->instance->dri_options, false);
+ if (result != VK_SUCCESS)
+ return result;
+
+ physical_device->wsi_device.supports_modifiers = physical_device->rad_info.chip_class >= GFX9;
+ physical_device->wsi_device.set_memory_ownership = radv_wsi_set_memory_ownership;
+ return VK_SUCCESS;
}
void
radv_finish_wsi(struct radv_physical_device *physical_device)
{
- wsi_device_finish(&physical_device->wsi_device,
- &physical_device->instance->vk.alloc);
+ wsi_device_finish(&physical_device->wsi_device, &physical_device->instance->vk.alloc);
}
-void radv_DestroySurfaceKHR(
- VkInstance _instance,
- VkSurfaceKHR _surface,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroySurfaceKHR(VkInstance _instance, VkSurfaceKHR _surface,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_instance, instance, _instance);
- ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
+ RADV_FROM_HANDLE(radv_instance, instance, _instance);
+ ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
- vk_free2(&instance->vk.alloc, pAllocator, surface);
+ vk_free2(&instance->vk.alloc, pAllocator, surface);
}
-VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
- VkPhysicalDevice physicalDevice,
- uint32_t queueFamilyIndex,
- VkSurfaceKHR surface,
- VkBool32* pSupported)
+VkResult
+radv_GetPhysicalDeviceSurfaceSupportKHR(VkPhysicalDevice physicalDevice, uint32_t queueFamilyIndex,
+ VkSurfaceKHR surface, VkBool32 *pSupported)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_common_get_surface_support(&device->wsi_device,
- queueFamilyIndex,
- surface,
- pSupported);
+ return wsi_common_get_surface_support(&device->wsi_device, queueFamilyIndex, surface,
+ pSupported);
}
-VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
+VkResult
+radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+ VkSurfaceCapabilitiesKHR *pSurfaceCapabilities)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_common_get_surface_capabilities(&device->wsi_device,
- surface,
- pSurfaceCapabilities);
+ return wsi_common_get_surface_capabilities(&device->wsi_device, surface, pSurfaceCapabilities);
}
-VkResult radv_GetPhysicalDeviceSurfaceCapabilities2KHR(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
- VkSurfaceCapabilities2KHR* pSurfaceCapabilities)
+VkResult
+radv_GetPhysicalDeviceSurfaceCapabilities2KHR(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo,
+ VkSurfaceCapabilities2KHR *pSurfaceCapabilities)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_common_get_surface_capabilities2(&device->wsi_device,
- pSurfaceInfo,
- pSurfaceCapabilities);
+ return wsi_common_get_surface_capabilities2(&device->wsi_device, pSurfaceInfo,
+ pSurfaceCapabilities);
}
-VkResult radv_GetPhysicalDeviceSurfaceCapabilities2EXT(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- VkSurfaceCapabilities2EXT* pSurfaceCapabilities)
+VkResult
+radv_GetPhysicalDeviceSurfaceCapabilities2EXT(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+ VkSurfaceCapabilities2EXT *pSurfaceCapabilities)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_common_get_surface_capabilities2ext(&device->wsi_device,
- surface,
- pSurfaceCapabilities);
+ return wsi_common_get_surface_capabilities2ext(&device->wsi_device, surface,
+ pSurfaceCapabilities);
}
-VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- uint32_t* pSurfaceFormatCount,
- VkSurfaceFormatKHR* pSurfaceFormats)
+VkResult
+radv_GetPhysicalDeviceSurfaceFormatsKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+ uint32_t *pSurfaceFormatCount,
+ VkSurfaceFormatKHR *pSurfaceFormats)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_common_get_surface_formats(&device->wsi_device,
- surface,
- pSurfaceFormatCount,
- pSurfaceFormats);
+ return wsi_common_get_surface_formats(&device->wsi_device, surface, pSurfaceFormatCount,
+ pSurfaceFormats);
}
-VkResult radv_GetPhysicalDeviceSurfaceFormats2KHR(
- VkPhysicalDevice physicalDevice,
- const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
- uint32_t* pSurfaceFormatCount,
- VkSurfaceFormat2KHR* pSurfaceFormats)
+VkResult
+radv_GetPhysicalDeviceSurfaceFormats2KHR(VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceSurfaceInfo2KHR *pSurfaceInfo,
+ uint32_t *pSurfaceFormatCount,
+ VkSurfaceFormat2KHR *pSurfaceFormats)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_common_get_surface_formats2(&device->wsi_device,
- pSurfaceInfo,
- pSurfaceFormatCount,
- pSurfaceFormats);
+ return wsi_common_get_surface_formats2(&device->wsi_device, pSurfaceInfo, pSurfaceFormatCount,
+ pSurfaceFormats);
}
-VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- uint32_t* pPresentModeCount,
- VkPresentModeKHR* pPresentModes)
+VkResult
+radv_GetPhysicalDeviceSurfacePresentModesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+ uint32_t *pPresentModeCount,
+ VkPresentModeKHR *pPresentModes)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_common_get_surface_present_modes(&device->wsi_device,
- surface,
- pPresentModeCount,
- pPresentModes);
+ return wsi_common_get_surface_present_modes(&device->wsi_device, surface, pPresentModeCount,
+ pPresentModes);
}
-VkResult radv_CreateSwapchainKHR(
- VkDevice _device,
- const VkSwapchainCreateInfoKHR* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSwapchainKHR* pSwapchain)
+VkResult
+radv_CreateSwapchainKHR(VkDevice _device, const VkSwapchainCreateInfoKHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkSwapchainKHR *pSwapchain)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- const VkAllocationCallbacks *alloc;
- if (pAllocator)
- alloc = pAllocator;
- else
- alloc = &device->vk.alloc;
-
- return wsi_common_create_swapchain(&device->physical_device->wsi_device,
- radv_device_to_handle(device),
- pCreateInfo,
- alloc,
- pSwapchain);
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ const VkAllocationCallbacks *alloc;
+ if (pAllocator)
+ alloc = pAllocator;
+ else
+ alloc = &device->vk.alloc;
+
+ return wsi_common_create_swapchain(&device->physical_device->wsi_device,
+ radv_device_to_handle(device), pCreateInfo, alloc,
+ pSwapchain);
}
-void radv_DestroySwapchainKHR(
- VkDevice _device,
- VkSwapchainKHR swapchain,
- const VkAllocationCallbacks* pAllocator)
+void
+radv_DestroySwapchainKHR(VkDevice _device, VkSwapchainKHR swapchain,
+ const VkAllocationCallbacks *pAllocator)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- const VkAllocationCallbacks *alloc;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ const VkAllocationCallbacks *alloc;
- if (pAllocator)
- alloc = pAllocator;
- else
- alloc = &device->vk.alloc;
+ if (pAllocator)
+ alloc = pAllocator;
+ else
+ alloc = &device->vk.alloc;
- wsi_common_destroy_swapchain(_device, swapchain, alloc);
+ wsi_common_destroy_swapchain(_device, swapchain, alloc);
}
-VkResult radv_GetSwapchainImagesKHR(
- VkDevice device,
- VkSwapchainKHR swapchain,
- uint32_t* pSwapchainImageCount,
- VkImage* pSwapchainImages)
+VkResult
+radv_GetSwapchainImagesKHR(VkDevice device, VkSwapchainKHR swapchain,
+ uint32_t *pSwapchainImageCount, VkImage *pSwapchainImages)
{
- return wsi_common_get_images(swapchain,
- pSwapchainImageCount,
- pSwapchainImages);
+ return wsi_common_get_images(swapchain, pSwapchainImageCount, pSwapchainImages);
}
-VkResult radv_AcquireNextImageKHR(
- VkDevice device,
- VkSwapchainKHR swapchain,
- uint64_t timeout,
- VkSemaphore semaphore,
- VkFence fence,
- uint32_t* pImageIndex)
+VkResult
+radv_AcquireNextImageKHR(VkDevice device, VkSwapchainKHR swapchain, uint64_t timeout,
+ VkSemaphore semaphore, VkFence fence, uint32_t *pImageIndex)
{
- VkAcquireNextImageInfoKHR acquire_info = {
- .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
- .swapchain = swapchain,
- .timeout = timeout,
- .semaphore = semaphore,
- .fence = fence,
- .deviceMask = 0,
- };
-
- return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
+ VkAcquireNextImageInfoKHR acquire_info = {
+ .sType = VK_STRUCTURE_TYPE_ACQUIRE_NEXT_IMAGE_INFO_KHR,
+ .swapchain = swapchain,
+ .timeout = timeout,
+ .semaphore = semaphore,
+ .fence = fence,
+ .deviceMask = 0,
+ };
+
+ return radv_AcquireNextImage2KHR(device, &acquire_info, pImageIndex);
}
-VkResult radv_AcquireNextImage2KHR(
- VkDevice _device,
- const VkAcquireNextImageInfoKHR* pAcquireInfo,
- uint32_t* pImageIndex)
+VkResult
+radv_AcquireNextImage2KHR(VkDevice _device, const VkAcquireNextImageInfoKHR *pAcquireInfo,
+ uint32_t *pImageIndex)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- struct radv_physical_device *pdevice = device->physical_device;
- RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence);
- RADV_FROM_HANDLE(radv_semaphore, semaphore, pAcquireInfo->semaphore);
-
- VkResult result = wsi_common_acquire_next_image2(&pdevice->wsi_device,
- _device,
- pAcquireInfo,
- pImageIndex);
-
- if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) {
- if (fence) {
- struct radv_fence_part *part =
- fence->temporary.kind != RADV_FENCE_NONE ?
- &fence->temporary : &fence->permanent;
-
- device->ws->signal_syncobj(device->ws, part->syncobj, 0);
- }
- if (semaphore) {
- struct radv_semaphore_part *part =
- semaphore->temporary.kind != RADV_SEMAPHORE_NONE ?
- &semaphore->temporary : &semaphore->permanent;
-
- switch (part->kind) {
- case RADV_SEMAPHORE_NONE:
- /* Do not need to do anything. */
- break;
- case RADV_SEMAPHORE_TIMELINE:
- case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
- unreachable("WSI only allows binary semaphores.");
- case RADV_SEMAPHORE_SYNCOBJ:
- device->ws->signal_syncobj(device->ws, part->syncobj, 0);
- break;
- }
- }
- }
- return result;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ struct radv_physical_device *pdevice = device->physical_device;
+ RADV_FROM_HANDLE(radv_fence, fence, pAcquireInfo->fence);
+ RADV_FROM_HANDLE(radv_semaphore, semaphore, pAcquireInfo->semaphore);
+
+ VkResult result =
+ wsi_common_acquire_next_image2(&pdevice->wsi_device, _device, pAcquireInfo, pImageIndex);
+
+ if (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR) {
+ if (fence) {
+ struct radv_fence_part *part =
+ fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
+
+ device->ws->signal_syncobj(device->ws, part->syncobj, 0);
+ }
+ if (semaphore) {
+ struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
+ ? &semaphore->temporary
+ : &semaphore->permanent;
+
+ switch (part->kind) {
+ case RADV_SEMAPHORE_NONE:
+ /* Do not need to do anything. */
+ break;
+ case RADV_SEMAPHORE_TIMELINE:
+ case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
+ unreachable("WSI only allows binary semaphores.");
+ case RADV_SEMAPHORE_SYNCOBJ:
+ device->ws->signal_syncobj(device->ws, part->syncobj, 0);
+ break;
+ }
+ }
+ }
+ return result;
}
-VkResult radv_QueuePresentKHR(
- VkQueue _queue,
- const VkPresentInfoKHR* pPresentInfo)
+VkResult
+radv_QueuePresentKHR(VkQueue _queue, const VkPresentInfoKHR *pPresentInfo)
{
- RADV_FROM_HANDLE(radv_queue, queue, _queue);
- return wsi_common_queue_present(&queue->device->physical_device->wsi_device,
- radv_device_to_handle(queue->device),
- _queue,
- queue->queue_family_index,
- pPresentInfo);
+ RADV_FROM_HANDLE(radv_queue, queue, _queue);
+ return wsi_common_queue_present(&queue->device->physical_device->wsi_device,
+ radv_device_to_handle(queue->device), _queue,
+ queue->queue_family_index, pPresentInfo);
}
-
-VkResult radv_GetDeviceGroupPresentCapabilitiesKHR(
- VkDevice device,
- VkDeviceGroupPresentCapabilitiesKHR* pCapabilities)
+VkResult
+radv_GetDeviceGroupPresentCapabilitiesKHR(VkDevice device,
+ VkDeviceGroupPresentCapabilitiesKHR *pCapabilities)
{
- memset(pCapabilities->presentMask, 0,
- sizeof(pCapabilities->presentMask));
+ memset(pCapabilities->presentMask, 0, sizeof(pCapabilities->presentMask));
pCapabilities->presentMask[0] = 0x1;
pCapabilities->modes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
return VK_SUCCESS;
}
-VkResult radv_GetDeviceGroupSurfacePresentModesKHR(
- VkDevice device,
- VkSurfaceKHR surface,
- VkDeviceGroupPresentModeFlagsKHR* pModes)
+VkResult
+radv_GetDeviceGroupSurfacePresentModesKHR(VkDevice device, VkSurfaceKHR surface,
+ VkDeviceGroupPresentModeFlagsKHR *pModes)
{
*pModes = VK_DEVICE_GROUP_PRESENT_MODE_LOCAL_BIT_KHR;
return VK_SUCCESS;
}
-VkResult radv_GetPhysicalDevicePresentRectanglesKHR(
- VkPhysicalDevice physicalDevice,
- VkSurfaceKHR surface,
- uint32_t* pRectCount,
- VkRect2D* pRects)
+VkResult
+radv_GetPhysicalDevicePresentRectanglesKHR(VkPhysicalDevice physicalDevice, VkSurfaceKHR surface,
+ uint32_t *pRectCount, VkRect2D *pRects)
{
- RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
+ RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_common_get_present_rectangles(&device->wsi_device,
- surface,
- pRectCount, pRects);
+ return wsi_common_get_present_rectangles(&device->wsi_device, surface, pRectCount, pRects);
}
diff --git a/src/amd/vulkan/radv_wsi_display.c b/src/amd/vulkan/radv_wsi_display.c
index 64c7acd3734..6548466e5b5 100644
--- a/src/amd/vulkan/radv_wsi_display.c
+++ b/src/amd/vulkan/radv_wsi_display.c
@@ -20,39 +20,36 @@
* OF THIS SOFTWARE.
*/
+#include <amdgpu.h>
+#include <fcntl.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
-#include <fcntl.h>
-#include "radv_private.h"
-#include "radv_cs.h"
-#include "util/disk_cache.h"
-#include "util/strtod.h"
-#include "vk_util.h"
#include <xf86drm.h>
#include <xf86drmMode.h>
-#include <amdgpu.h>
#include "drm-uapi/amdgpu_drm.h"
+#include "util/debug.h"
+#include "util/disk_cache.h"
+#include "util/strtod.h"
#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
-#include "vk_format.h"
+#include "radv_cs.h"
+#include "radv_private.h"
#include "sid.h"
-#include "util/debug.h"
+#include "vk_format.h"
+#include "vk_util.h"
#include "wsi_common_display.h"
-#define MM_PER_PIXEL (1.0/96.0 * 25.4)
+#define MM_PER_PIXEL (1.0 / 96.0 * 25.4)
VkResult
radv_GetPhysicalDeviceDisplayPropertiesKHR(VkPhysicalDevice physical_device,
uint32_t *property_count,
VkDisplayPropertiesKHR *properties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_display_get_physical_device_display_properties(
- physical_device,
- &pdevice->wsi_device,
- property_count,
- properties);
+ return wsi_display_get_physical_device_display_properties(physical_device, &pdevice->wsi_device,
+ property_count, properties);
}
VkResult
@@ -60,122 +57,84 @@ radv_GetPhysicalDeviceDisplayProperties2KHR(VkPhysicalDevice physical_device,
uint32_t *property_count,
VkDisplayProperties2KHR *properties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_display_get_physical_device_display_properties2(
- physical_device,
- &pdevice->wsi_device,
- property_count,
- properties);
+ return wsi_display_get_physical_device_display_properties2(physical_device, &pdevice->wsi_device,
+ property_count, properties);
}
VkResult
-radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(
- VkPhysicalDevice physical_device,
- uint32_t *property_count,
- VkDisplayPlanePropertiesKHR *properties)
+radv_GetPhysicalDeviceDisplayPlanePropertiesKHR(VkPhysicalDevice physical_device,
+ uint32_t *property_count,
+ VkDisplayPlanePropertiesKHR *properties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_display_get_physical_device_display_plane_properties(
- physical_device,
- &pdevice->wsi_device,
- property_count,
- properties);
+ return wsi_display_get_physical_device_display_plane_properties(
+ physical_device, &pdevice->wsi_device, property_count, properties);
}
VkResult
-radv_GetPhysicalDeviceDisplayPlaneProperties2KHR(
- VkPhysicalDevice physical_device,
- uint32_t *property_count,
- VkDisplayPlaneProperties2KHR *properties)
+radv_GetPhysicalDeviceDisplayPlaneProperties2KHR(VkPhysicalDevice physical_device,
+ uint32_t *property_count,
+ VkDisplayPlaneProperties2KHR *properties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_display_get_physical_device_display_plane_properties2(
- physical_device,
- &pdevice->wsi_device,
- property_count,
- properties);
+ return wsi_display_get_physical_device_display_plane_properties2(
+ physical_device, &pdevice->wsi_device, property_count, properties);
}
VkResult
-radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device,
- uint32_t plane_index,
- uint32_t *display_count,
- VkDisplayKHR *displays)
+radv_GetDisplayPlaneSupportedDisplaysKHR(VkPhysicalDevice physical_device, uint32_t plane_index,
+ uint32_t *display_count, VkDisplayKHR *displays)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
-
- return wsi_display_get_display_plane_supported_displays(
- physical_device,
- &pdevice->wsi_device,
- plane_index,
- display_count,
- displays);
-}
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ return wsi_display_get_display_plane_supported_displays(physical_device, &pdevice->wsi_device,
+ plane_index, display_count, displays);
+}
VkResult
-radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device,
- VkDisplayKHR display,
- uint32_t *property_count,
- VkDisplayModePropertiesKHR *properties)
+radv_GetDisplayModePropertiesKHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
+ uint32_t *property_count, VkDisplayModePropertiesKHR *properties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_display_get_display_mode_properties(physical_device,
- &pdevice->wsi_device,
- display,
- property_count,
- properties);
+ return wsi_display_get_display_mode_properties(physical_device, &pdevice->wsi_device, display,
+ property_count, properties);
}
VkResult
-radv_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device,
- VkDisplayKHR display,
- uint32_t *property_count,
- VkDisplayModeProperties2KHR *properties)
+radv_GetDisplayModeProperties2KHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
+ uint32_t *property_count, VkDisplayModeProperties2KHR *properties)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_display_get_display_mode_properties2(physical_device,
- &pdevice->wsi_device,
- display,
- property_count,
- properties);
+ return wsi_display_get_display_mode_properties2(physical_device, &pdevice->wsi_device, display,
+ property_count, properties);
}
VkResult
-radv_CreateDisplayModeKHR(VkPhysicalDevice physical_device,
- VkDisplayKHR display,
+radv_CreateDisplayModeKHR(VkPhysicalDevice physical_device, VkDisplayKHR display,
const VkDisplayModeCreateInfoKHR *create_info,
- const VkAllocationCallbacks *allocator,
- VkDisplayModeKHR *mode)
+ const VkAllocationCallbacks *allocator, VkDisplayModeKHR *mode)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
-
- return wsi_display_create_display_mode(physical_device,
- &pdevice->wsi_device,
- display,
- create_info,
- allocator,
- mode);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+
+ return wsi_display_create_display_mode(physical_device, &pdevice->wsi_device, display,
+ create_info, allocator, mode);
}
VkResult
-radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device,
- VkDisplayModeKHR mode_khr,
+radv_GetDisplayPlaneCapabilitiesKHR(VkPhysicalDevice physical_device, VkDisplayModeKHR mode_khr,
uint32_t plane_index,
VkDisplayPlaneCapabilitiesKHR *capabilities)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_get_display_plane_capabilities(physical_device,
- &pdevice->wsi_device,
- mode_khr,
- plane_index,
- capabilities);
+ return wsi_get_display_plane_capabilities(physical_device, &pdevice->wsi_device, mode_khr,
+ plane_index, capabilities);
}
VkResult
@@ -183,187 +142,152 @@ radv_GetDisplayPlaneCapabilities2KHR(VkPhysicalDevice physical_device,
const VkDisplayPlaneInfo2KHR *pDisplayPlaneInfo,
VkDisplayPlaneCapabilities2KHR *capabilities)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_get_display_plane_capabilities2(physical_device,
- &pdevice->wsi_device,
- pDisplayPlaneInfo,
- capabilities);
+ return wsi_get_display_plane_capabilities2(physical_device, &pdevice->wsi_device,
+ pDisplayPlaneInfo, capabilities);
}
VkResult
-radv_CreateDisplayPlaneSurfaceKHR(
- VkInstance _instance,
- const VkDisplaySurfaceCreateInfoKHR *create_info,
- const VkAllocationCallbacks *allocator,
- VkSurfaceKHR *surface)
+radv_CreateDisplayPlaneSurfaceKHR(VkInstance _instance,
+ const VkDisplaySurfaceCreateInfoKHR *create_info,
+ const VkAllocationCallbacks *allocator, VkSurfaceKHR *surface)
{
- RADV_FROM_HANDLE(radv_instance, instance, _instance);
- const VkAllocationCallbacks *alloc;
+ RADV_FROM_HANDLE(radv_instance, instance, _instance);
+ const VkAllocationCallbacks *alloc;
- if (allocator)
- alloc = allocator;
- else
- alloc = &instance->vk.alloc;
+ if (allocator)
+ alloc = allocator;
+ else
+ alloc = &instance->vk.alloc;
- return wsi_create_display_surface(_instance, alloc,
- create_info, surface);
+ return wsi_create_display_surface(_instance, alloc, create_info, surface);
}
VkResult
-radv_ReleaseDisplayEXT(VkPhysicalDevice physical_device,
- VkDisplayKHR display)
+radv_ReleaseDisplayEXT(VkPhysicalDevice physical_device, VkDisplayKHR display)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_release_display(physical_device,
- &pdevice->wsi_device,
- display);
+ return wsi_release_display(physical_device, &pdevice->wsi_device, display);
}
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
VkResult
-radv_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device,
- Display *dpy,
- VkDisplayKHR display)
+radv_AcquireXlibDisplayEXT(VkPhysicalDevice physical_device, Display *dpy, VkDisplayKHR display)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_acquire_xlib_display(physical_device,
- &pdevice->wsi_device,
- dpy,
- display);
+ return wsi_acquire_xlib_display(physical_device, &pdevice->wsi_device, dpy, display);
}
VkResult
-radv_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device,
- Display *dpy,
- RROutput output,
- VkDisplayKHR *display)
+radv_GetRandROutputDisplayEXT(VkPhysicalDevice physical_device, Display *dpy, RROutput output,
+ VkDisplayKHR *display)
{
- RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
+ RADV_FROM_HANDLE(radv_physical_device, pdevice, physical_device);
- return wsi_get_randr_output_display(physical_device,
- &pdevice->wsi_device,
- dpy,
- output,
- display);
+ return wsi_get_randr_output_display(physical_device, &pdevice->wsi_device, dpy, output, display);
}
#endif /* VK_USE_PLATFORM_XLIB_XRANDR_EXT */
/* VK_EXT_display_control */
VkResult
-radv_DisplayPowerControlEXT(VkDevice _device,
- VkDisplayKHR display,
- const VkDisplayPowerInfoEXT *display_power_info)
+radv_DisplayPowerControlEXT(VkDevice _device, VkDisplayKHR display,
+ const VkDisplayPowerInfoEXT *display_power_info)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device, device, _device);
- return wsi_display_power_control(_device,
- &device->physical_device->wsi_device,
- display,
- display_power_info);
+ return wsi_display_power_control(_device, &device->physical_device->wsi_device, display,
+ display_power_info);
}
VkResult
-radv_RegisterDeviceEventEXT(VkDevice _device,
- const VkDeviceEventInfoEXT *device_event_info,
- const VkAllocationCallbacks *allocator,
- VkFence *_fence)
+radv_RegisterDeviceEventEXT(VkDevice _device, const VkDeviceEventInfoEXT *device_event_info,
+ const VkAllocationCallbacks *allocator, VkFence *_fence)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- VkResult ret;
- int fd;
-
- ret = radv_CreateFence(_device, &(VkFenceCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- .pNext = &(VkExportFenceCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
- .handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
- },
- }, allocator, _fence);
- if (ret != VK_SUCCESS)
- return ret;
-
- RADV_FROM_HANDLE(radv_fence, fence, *_fence);
-
- assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
-
- if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
- ret = VK_ERROR_OUT_OF_HOST_MEMORY;
- } else {
- ret = wsi_register_device_event(_device,
- &device->physical_device->wsi_device,
- device_event_info,
- allocator,
- NULL,
- fd);
- close(fd);
- }
-
- if (ret != VK_SUCCESS)
- radv_DestroyFence(_device, *_fence, allocator);
-
- return ret;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ VkResult ret;
+ int fd;
+
+ ret = radv_CreateFence(_device,
+ &(VkFenceCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .pNext =
+ &(VkExportFenceCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
+ },
+ },
+ allocator, _fence);
+ if (ret != VK_SUCCESS)
+ return ret;
+
+ RADV_FROM_HANDLE(radv_fence, fence, *_fence);
+
+ assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
+
+ if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
+ ret = VK_ERROR_OUT_OF_HOST_MEMORY;
+ } else {
+ ret = wsi_register_device_event(_device, &device->physical_device->wsi_device,
+ device_event_info, allocator, NULL, fd);
+ close(fd);
+ }
+
+ if (ret != VK_SUCCESS)
+ radv_DestroyFence(_device, *_fence, allocator);
+
+ return ret;
}
VkResult
-radv_RegisterDisplayEventEXT(VkDevice _device,
- VkDisplayKHR display,
- const VkDisplayEventInfoEXT *display_event_info,
- const VkAllocationCallbacks *allocator,
- VkFence *_fence)
+radv_RegisterDisplayEventEXT(VkDevice _device, VkDisplayKHR display,
+ const VkDisplayEventInfoEXT *display_event_info,
+ const VkAllocationCallbacks *allocator, VkFence *_fence)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
- VkResult ret;
- int fd;
-
- ret = radv_CreateFence(_device, &(VkFenceCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- .pNext = &(VkExportFenceCreateInfo) {
- .sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
- .handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
- },
- }, allocator, _fence);
- if (ret != VK_SUCCESS)
- return ret;
-
- RADV_FROM_HANDLE(radv_fence, fence, *_fence);
-
- assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
-
- if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
- ret = VK_ERROR_OUT_OF_HOST_MEMORY;
- } else {
- ret = wsi_register_display_event(_device,
- &device->physical_device->wsi_device,
- display,
- display_event_info,
- allocator,
- NULL,
- fd);
- close(fd);
- }
-
- if (ret != VK_SUCCESS)
- radv_DestroyFence(_device, *_fence, allocator);
-
- return ret;
+ RADV_FROM_HANDLE(radv_device, device, _device);
+ VkResult ret;
+ int fd;
+
+ ret = radv_CreateFence(_device,
+ &(VkFenceCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+ .pNext =
+ &(VkExportFenceCreateInfo){
+ .sType = VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO,
+ .handleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT,
+ },
+ },
+ allocator, _fence);
+ if (ret != VK_SUCCESS)
+ return ret;
+
+ RADV_FROM_HANDLE(radv_fence, fence, *_fence);
+
+ assert(fence->permanent.kind == RADV_FENCE_SYNCOBJ);
+
+ if (device->ws->export_syncobj(device->ws, fence->permanent.syncobj, &fd)) {
+ ret = VK_ERROR_OUT_OF_HOST_MEMORY;
+ } else {
+ ret = wsi_register_display_event(_device, &device->physical_device->wsi_device, display,
+ display_event_info, allocator, NULL, fd);
+ close(fd);
+ }
+
+ if (ret != VK_SUCCESS)
+ radv_DestroyFence(_device, *_fence, allocator);
+
+ return ret;
}
VkResult
-radv_GetSwapchainCounterEXT(VkDevice _device,
- VkSwapchainKHR swapchain,
- VkSurfaceCounterFlagBitsEXT flag_bits,
- uint64_t *value)
+radv_GetSwapchainCounterEXT(VkDevice _device, VkSwapchainKHR swapchain,
+ VkSurfaceCounterFlagBitsEXT flag_bits, uint64_t *value)
{
- RADV_FROM_HANDLE(radv_device, device, _device);
+ RADV_FROM_HANDLE(radv_device, device, _device);
- return wsi_get_swapchain_counter(_device,
- &device->physical_device->wsi_device,
- swapchain,
- flag_bits,
- value);
+ return wsi_get_swapchain_counter(_device, &device->physical_device->wsi_device, swapchain,
+ flag_bits, value);
}
-
diff --git a/src/amd/vulkan/radv_wsi_wayland.c b/src/amd/vulkan/radv_wsi_wayland.c
index dba5a0610a0..0f5751b3898 100644
--- a/src/amd/vulkan/radv_wsi_wayland.c
+++ b/src/amd/vulkan/radv_wsi_wayland.c
@@ -23,24 +23,22 @@
* IN THE SOFTWARE.
*/
-#include "wsi_common_wayland.h"
#include "radv_private.h"
+#include "wsi_common_wayland.h"
-VkBool32 radv_GetPhysicalDeviceWaylandPresentationSupportKHR(
- VkPhysicalDevice physicalDevice,
- uint32_t queueFamilyIndex,
- struct wl_display* display)
+VkBool32
+radv_GetPhysicalDeviceWaylandPresentationSupportKHR(VkPhysicalDevice physicalDevice,
+ uint32_t queueFamilyIndex,
+ struct wl_display *display)
{
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
return wsi_wl_get_presentation_support(&physical_device->wsi_device, display);
}
-VkResult radv_CreateWaylandSurfaceKHR(
- VkInstance _instance,
- const VkWaylandSurfaceCreateInfoKHR* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSurfaceKHR* pSurface)
+VkResult
+radv_CreateWaylandSurfaceKHR(VkInstance _instance, const VkWaylandSurfaceCreateInfoKHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
const VkAllocationCallbacks *alloc;
diff --git a/src/amd/vulkan/radv_wsi_x11.c b/src/amd/vulkan/radv_wsi_x11.c
index 8dee70555c1..e74cadf2012 100644
--- a/src/amd/vulkan/radv_wsi_x11.c
+++ b/src/amd/vulkan/radv_wsi_x11.c
@@ -27,64 +27,55 @@
#include <X11/Xlib-xcb.h>
#include <X11/xshmfence.h>
-#include <xcb/xcb.h>
#include <xcb/dri3.h>
#include <xcb/present.h>
+#include <xcb/xcb.h>
-#include "wsi_common_x11.h"
#include "radv_private.h"
+#include "wsi_common_x11.h"
-VkBool32 radv_GetPhysicalDeviceXcbPresentationSupportKHR(
- VkPhysicalDevice physicalDevice,
- uint32_t queueFamilyIndex,
- xcb_connection_t* connection,
- xcb_visualid_t visual_id)
+VkBool32
+radv_GetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice physicalDevice,
+ uint32_t queueFamilyIndex,
+ xcb_connection_t *connection,
+ xcb_visualid_t visual_id)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_get_physical_device_xcb_presentation_support(
- &device->wsi_device,
- queueFamilyIndex,
- connection, visual_id);
+ return wsi_get_physical_device_xcb_presentation_support(&device->wsi_device, queueFamilyIndex,
+ connection, visual_id);
}
-VkBool32 radv_GetPhysicalDeviceXlibPresentationSupportKHR(
- VkPhysicalDevice physicalDevice,
- uint32_t queueFamilyIndex,
- Display* dpy,
- VisualID visualID)
+VkBool32
+radv_GetPhysicalDeviceXlibPresentationSupportKHR(VkPhysicalDevice physicalDevice,
+ uint32_t queueFamilyIndex, Display *dpy,
+ VisualID visualID)
{
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
- return wsi_get_physical_device_xcb_presentation_support(
- &device->wsi_device,
- queueFamilyIndex,
- XGetXCBConnection(dpy), visualID);
+ return wsi_get_physical_device_xcb_presentation_support(&device->wsi_device, queueFamilyIndex,
+ XGetXCBConnection(dpy), visualID);
}
-VkResult radv_CreateXcbSurfaceKHR(
- VkInstance _instance,
- const VkXcbSurfaceCreateInfoKHR* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSurfaceKHR* pSurface)
+VkResult
+radv_CreateXcbSurfaceKHR(VkInstance _instance, const VkXcbSurfaceCreateInfoKHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
const VkAllocationCallbacks *alloc;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR);
if (pAllocator)
- alloc = pAllocator;
+ alloc = pAllocator;
else
- alloc = &instance->vk.alloc;
+ alloc = &instance->vk.alloc;
return wsi_create_xcb_surface(alloc, pCreateInfo, pSurface);
}
-VkResult radv_CreateXlibSurfaceKHR(
- VkInstance _instance,
- const VkXlibSurfaceCreateInfoKHR* pCreateInfo,
- const VkAllocationCallbacks* pAllocator,
- VkSurfaceKHR* pSurface)
+VkResult
+radv_CreateXlibSurfaceKHR(VkInstance _instance, const VkXlibSurfaceCreateInfoKHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator, VkSurfaceKHR *pSurface)
{
RADV_FROM_HANDLE(radv_instance, instance, _instance);
const VkAllocationCallbacks *alloc;
@@ -92,9 +83,9 @@ VkResult radv_CreateXlibSurfaceKHR(
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR);
if (pAllocator)
- alloc = pAllocator;
+ alloc = pAllocator;
else
- alloc = &instance->vk.alloc;
+ alloc = &instance->vk.alloc;
return wsi_create_xlib_surface(alloc, pCreateInfo, pSurface);
}
diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
index 90421222a81..eeccc33b790 100644
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -27,2036 +27,1883 @@
/* command buffer handling for AMD GCN */
+#include "radv_cs.h"
#include "radv_private.h"
#include "radv_shader.h"
-#include "radv_cs.h"
#include "sid.h"
static void
si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
- struct radeon_cmdbuf *cs,
- unsigned raster_config,
- unsigned raster_config_1)
+ struct radeon_cmdbuf *cs, unsigned raster_config,
+ unsigned raster_config_1)
{
- unsigned num_se = MAX2(physical_device->rad_info.max_se, 1);
- unsigned raster_config_se[4];
- unsigned se;
-
- ac_get_harvested_configs(&physical_device->rad_info,
- raster_config,
- &raster_config_1,
- raster_config_se);
-
- for (se = 0; se < num_se; se++) {
- /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
- if (physical_device->rad_info.chip_class < GFX7)
- radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
- S_00802C_SE_INDEX(se) |
- S_00802C_SH_BROADCAST_WRITES(1) |
- S_00802C_INSTANCE_BROADCAST_WRITES(1));
- else
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
- radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
- }
-
- /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
- if (physical_device->rad_info.chip_class < GFX7)
- radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
- S_00802C_SE_BROADCAST_WRITES(1) |
- S_00802C_SH_BROADCAST_WRITES(1) |
- S_00802C_INSTANCE_BROADCAST_WRITES(1));
- else
- radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
- S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
- S_030800_INSTANCE_BROADCAST_WRITES(1));
-
- if (physical_device->rad_info.chip_class >= GFX7)
- radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
+ unsigned num_se = MAX2(physical_device->rad_info.max_se, 1);
+ unsigned raster_config_se[4];
+ unsigned se;
+
+ ac_get_harvested_configs(&physical_device->rad_info, raster_config, &raster_config_1,
+ raster_config_se);
+
+ for (se = 0; se < num_se; se++) {
+ /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
+ if (physical_device->rad_info.chip_class < GFX7)
+ radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
+ S_00802C_SE_INDEX(se) | S_00802C_SH_BROADCAST_WRITES(1) |
+ S_00802C_INSTANCE_BROADCAST_WRITES(1));
+ else
+ radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_INDEX(se) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
+ radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]);
+ }
+
+ /* GRBM_GFX_INDEX has a different offset on GFX6 and GFX7+ */
+ if (physical_device->rad_info.chip_class < GFX7)
+ radeon_set_config_reg(cs, R_00802C_GRBM_GFX_INDEX,
+ S_00802C_SE_BROADCAST_WRITES(1) | S_00802C_SH_BROADCAST_WRITES(1) |
+ S_00802C_INSTANCE_BROADCAST_WRITES(1));
+ else
+ radeon_set_uconfig_reg(cs, R_030800_GRBM_GFX_INDEX,
+ S_030800_SE_BROADCAST_WRITES(1) | S_030800_SH_BROADCAST_WRITES(1) |
+ S_030800_INSTANCE_BROADCAST_WRITES(1));
+
+ if (physical_device->rad_info.chip_class >= GFX7)
+ radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
}
void
-si_emit_compute(struct radv_device *device,
- struct radeon_cmdbuf *cs)
+si_emit_compute(struct radv_device *device, struct radeon_cmdbuf *cs)
{
- radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
- radeon_emit(cs, 0);
-
- radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
- /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
- * renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
- radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
- radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
-
- if (device->physical_device->rad_info.chip_class >= GFX7) {
- /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
- radeon_set_sh_reg_seq(cs,
- R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
- radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) |
- S_00B858_SH1_CU_EN(0xffff));
- radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) |
- S_00B858_SH1_CU_EN(0xffff));
-
- if (device->border_color_data.bo) {
- uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
-
- radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2);
- radeon_emit(cs, bc_va >> 8);
- radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40));
- }
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
- device->physical_device->rad_info.chip_class >= GFX10 ? 0x20 : 0);
- }
-
- if (device->physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_sh_reg(cs, R_00B890_COMPUTE_USER_ACCUM_0, 0);
- radeon_set_sh_reg(cs, R_00B894_COMPUTE_USER_ACCUM_1, 0);
- radeon_set_sh_reg(cs, R_00B898_COMPUTE_USER_ACCUM_2, 0);
- radeon_set_sh_reg(cs, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
- radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
- radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
- }
-
- /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
- * and is now per pipe, so it should be handled in the
- * kernel if we want to use something other than the default value,
- * which is now 0x22f.
- */
- if (device->physical_device->rad_info.chip_class <= GFX6) {
- /* XXX: This should be:
- * (number of compute units) * 4 * (waves per simd) - 1 */
-
- radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID,
- 0x190 /* Default value */);
-
- if (device->border_color_data.bo) {
- uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
- radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
- }
- }
-
- if (device->tma_bo) {
- uint64_t tba_va, tma_va;
-
- assert(device->physical_device->rad_info.chip_class == GFX8);
-
- tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) +
- device->trap_handler_shader->bo_offset;
- tma_va = radv_buffer_get_va(device->tma_bo);
-
- radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4);
- radeon_emit(cs, tba_va >> 8);
- radeon_emit(cs, tba_va >> 40);
- radeon_emit(cs, tma_va >> 8);
- radeon_emit(cs, tma_va >> 40);
- }
+ radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+
+ radeon_set_sh_reg_seq(cs, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0, 2);
+ /* R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0 / SE1,
+ * renamed COMPUTE_DESTINATION_EN_SEn on gfx10. */
+ radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+ radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+
+ if (device->physical_device->rad_info.chip_class >= GFX7) {
+ /* Also set R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE2 / SE3 */
+ radeon_set_sh_reg_seq(cs, R_00B864_COMPUTE_STATIC_THREAD_MGMT_SE2, 2);
+ radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+ radeon_emit(cs, S_00B858_SH0_CU_EN(0xffff) | S_00B858_SH1_CU_EN(0xffff));
+
+ if (device->border_color_data.bo) {
+ uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
+
+ radeon_set_uconfig_reg_seq(cs, R_030E00_TA_CS_BC_BASE_ADDR, 2);
+ radeon_emit(cs, bc_va >> 8);
+ radeon_emit(cs, S_030E04_ADDRESS(bc_va >> 40));
+ }
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
+ device->physical_device->rad_info.chip_class >= GFX10 ? 0x20 : 0);
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_sh_reg(cs, R_00B890_COMPUTE_USER_ACCUM_0, 0);
+ radeon_set_sh_reg(cs, R_00B894_COMPUTE_USER_ACCUM_1, 0);
+ radeon_set_sh_reg(cs, R_00B898_COMPUTE_USER_ACCUM_2, 0);
+ radeon_set_sh_reg(cs, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
+ radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
+ radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
+ }
+
+ /* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
+ * and is now per pipe, so it should be handled in the
+ * kernel if we want to use something other than the default value,
+ * which is now 0x22f.
+ */
+ if (device->physical_device->rad_info.chip_class <= GFX6) {
+ /* XXX: This should be:
+ * (number of compute units) * 4 * (waves per simd) - 1 */
+
+ radeon_set_sh_reg(cs, R_00B82C_COMPUTE_MAX_WAVE_ID, 0x190 /* Default value */);
+
+ if (device->border_color_data.bo) {
+ uint64_t bc_va = radv_buffer_get_va(device->border_color_data.bo);
+ radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
+ }
+ }
+
+ if (device->tma_bo) {
+ uint64_t tba_va, tma_va;
+
+ assert(device->physical_device->rad_info.chip_class == GFX8);
+
+ tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) +
+ device->trap_handler_shader->bo_offset;
+ tma_va = radv_buffer_get_va(device->tma_bo);
+
+ radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4);
+ radeon_emit(cs, tba_va >> 8);
+ radeon_emit(cs, tba_va >> 40);
+ radeon_emit(cs, tma_va >> 8);
+ radeon_emit(cs, tma_va >> 40);
+ }
}
/* 12.4 fixed-point */
-static unsigned radv_pack_float_12p4(float x)
+static unsigned
+radv_pack_float_12p4(float x)
{
- return x <= 0 ? 0 :
- x >= 4096 ? 0xffff : x * 16;
+ return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16;
}
static void
-si_set_raster_config(struct radv_physical_device *physical_device,
- struct radeon_cmdbuf *cs)
+si_set_raster_config(struct radv_physical_device *physical_device, struct radeon_cmdbuf *cs)
{
- unsigned num_rb = MIN2(physical_device->rad_info.max_render_backends, 16);
- unsigned rb_mask = physical_device->rad_info.enabled_rb_mask;
- unsigned raster_config, raster_config_1;
-
- ac_get_raster_config(&physical_device->rad_info,
- &raster_config,
- &raster_config_1, NULL);
-
- /* Always use the default config when all backends are enabled
- * (or when we failed to determine the enabled backends).
- */
- if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
- radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
- raster_config);
- if (physical_device->rad_info.chip_class >= GFX7)
- radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
- raster_config_1);
- } else {
- si_write_harvested_raster_configs(physical_device, cs,
- raster_config,
- raster_config_1);
- }
+ unsigned num_rb = MIN2(physical_device->rad_info.max_render_backends, 16);
+ unsigned rb_mask = physical_device->rad_info.enabled_rb_mask;
+ unsigned raster_config, raster_config_1;
+
+ ac_get_raster_config(&physical_device->rad_info, &raster_config, &raster_config_1, NULL);
+
+ /* Always use the default config when all backends are enabled
+ * (or when we failed to determine the enabled backends).
+ */
+ if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+ radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG, raster_config);
+ if (physical_device->rad_info.chip_class >= GFX7)
+ radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1);
+ } else {
+ si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
+ }
}
void
-si_emit_graphics(struct radv_device *device,
- struct radeon_cmdbuf *cs)
+si_emit_graphics(struct radv_device *device, struct radeon_cmdbuf *cs)
{
- struct radv_physical_device *physical_device = device->physical_device;
-
- bool has_clear_state = physical_device->rad_info.has_clear_state;
- int i;
-
- radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
- radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
- radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
-
- if (has_clear_state) {
- radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
- radeon_emit(cs, 0);
- }
-
- if (physical_device->rad_info.chip_class <= GFX8)
- si_set_raster_config(physical_device, cs);
-
- radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
- if (!has_clear_state)
- radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
-
- /* FIXME calculate these values somehow ??? */
- if (physical_device->rad_info.chip_class <= GFX8) {
- radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
- radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
- }
-
- if (!has_clear_state) {
- radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
- radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
- radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
- }
-
- if (physical_device->rad_info.chip_class <= GFX9)
- radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
- if (!has_clear_state)
- radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
- if (physical_device->rad_info.chip_class < GFX7)
- radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
- S_008A14_CLIP_VTX_REORDER_ENA(1));
-
- if (!has_clear_state)
- radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
-
- /* CLEAR_STATE doesn't clear these correctly on certain generations.
- * I don't know why. Deduced by trial and error.
- */
- if (physical_device->rad_info.chip_class <= GFX7 || !has_clear_state) {
- radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
- radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
- S_028204_WINDOW_OFFSET_DISABLE(1));
- radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL,
- S_028240_WINDOW_OFFSET_DISABLE(1));
- radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
- S_028244_BR_X(16384) | S_028244_BR_Y(16384));
- radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
- radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
- S_028034_BR_X(16384) | S_028034_BR_Y(16384));
- }
-
- if (!has_clear_state) {
- for (i = 0; i < 16; i++) {
- radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
- radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
- }
- }
-
- if (!has_clear_state) {
- radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
- radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
- /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */
- radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
- radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
- radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
- radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
- radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
- }
-
- radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE,
- S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
- S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
-
- if (physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_context_reg(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
- radeon_set_uconfig_reg(cs, R_030964_GE_MAX_VTX_INDX, ~0);
- radeon_set_uconfig_reg(cs, R_030924_GE_MIN_VTX_INDX, 0);
- radeon_set_uconfig_reg(cs, R_030928_GE_INDX_OFFSET, 0);
- radeon_set_uconfig_reg(cs, R_03097C_GE_STEREO_CNTL, 0);
- radeon_set_uconfig_reg(cs, R_030988_GE_USER_VGPR_EN, 0);
- } else if (physical_device->rad_info.chip_class == GFX9) {
- radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
- radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
- radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
- } else {
- /* These registers, when written, also overwrite the
- * CLEAR_STATE context, so we can't rely on CLEAR_STATE setting
- * them. It would be an issue if there was another UMD
- * changing them.
- */
- radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
- radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
- radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
- }
-
- unsigned cu_mask_ps = 0xffffffff;
-
- /* It's wasteful to enable all CUs for PS if shader arrays have a
- * different number of CUs. The reason is that the hardware sends the
- * same number of PS waves to each shader array, so the slowest shader
- * array limits the performance. Disable the extra CUs for PS in
- * other shader arrays to save power and thus increase clocks for busy
- * CUs. In the future, we might disable or enable this tweak only for
- * certain apps.
- */
- if (physical_device->rad_info.chip_class >= GFX10_3)
- cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa);
-
- if (physical_device->rad_info.chip_class >= GFX7) {
- if (physical_device->rad_info.chip_class >= GFX10) {
- /* Logical CUs 16 - 31 */
- radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS,
- 3, S_00B404_CU_EN(0xffff));
- radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS,
- 3, S_00B104_CU_EN(0xffff));
- radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS,
- 3, S_00B004_CU_EN(cu_mask_ps >> 16));
- }
-
- if (physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
- 3, S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
- } else {
- radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
- S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
- radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
- S_00B41C_WAVE_LIMIT(0x3F));
- radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
- S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
- /* If this is 0, Bonaire can hang even if GS isn't being used.
- * Other chips are unaffected. These are suboptimal values,
- * but we don't use on-chip GS.
- */
- radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
- S_028A44_ES_VERTS_PER_SUBGRP(64) |
- S_028A44_GS_PRIMS_PER_SUBGRP(4));
- }
-
- /* Compute LATE_ALLOC_VS.LIMIT. */
- unsigned num_cu_per_sh = physical_device->rad_info.min_good_cu_per_sa;
- unsigned late_alloc_wave64 = 0; /* The limit is per SA. */
- unsigned late_alloc_wave64_gs = 0;
- unsigned cu_mask_vs = 0xffff;
- unsigned cu_mask_gs = 0xffff;
-
- if (physical_device->rad_info.chip_class >= GFX10) {
- /* For Wave32, the hw will launch twice the number of late
- * alloc waves, so 1 == 2x wave32.
- */
- if (!physical_device->rad_info.use_late_alloc) {
- late_alloc_wave64 = 0;
- } else if (num_cu_per_sh <= 6) {
- late_alloc_wave64 = num_cu_per_sh - 2;
- } else {
- late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
-
- /* Gfx10: CU2 & CU3 must be disabled to
- * prevent a hw deadlock. Others: CU1 must be
- * disabled to prevent a hw deadlock.
- *
- * The deadlock is caused by late alloc, which
- * usually increases performance.
- */
- cu_mask_vs &= physical_device->rad_info.chip_class == GFX10 ?
- ~BITFIELD_RANGE(2, 2) : ~BITFIELD_RANGE(1, 1);
-
- if (physical_device->use_ngg) {
- cu_mask_gs = cu_mask_vs;
- }
- }
-
- late_alloc_wave64_gs = late_alloc_wave64;
-
- /* Don't use late alloc for NGG on Navi14 due to a hw
- * bug. If NGG is never used, enable all CUs.
- */
- if (!physical_device->use_ngg ||
- physical_device->rad_info.family == CHIP_NAVI14) {
- late_alloc_wave64_gs = 0;
- cu_mask_gs = 0xffff;
- }
-
- /* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */
- if (physical_device->rad_info.chip_class == GFX10)
- late_alloc_wave64_gs = MIN2(late_alloc_wave64_gs, 64);
- } else {
- if (!physical_device->rad_info.use_late_alloc) {
- late_alloc_wave64 = 0;
- } else if (num_cu_per_sh <= 4) {
- /* Too few available compute units per SA.
- * Disallowing VS to run on one CU could hurt
- * us more than late VS allocation would help.
- *
- * 2 is the highest safe number that allows us
- * to keep all CUs enabled.
- */
- late_alloc_wave64 = 2;
- } else {
- /* This is a good initial value, allowing 1
- * late_alloc wave per SIMD on num_cu - 2.
- */
- late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
- }
-
- if (late_alloc_wave64 > 2)
- cu_mask_vs = 0xfffe; /* 1 CU disabled */
- }
-
- radeon_set_sh_reg_idx(physical_device, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
- 3, S_00B118_CU_EN(cu_mask_vs) |
- S_00B118_WAVE_LIMIT(0x3F));
- radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
- S_00B11C_LIMIT(late_alloc_wave64));
-
- radeon_set_sh_reg_idx(physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
- 3, S_00B21C_CU_EN(cu_mask_gs) | S_00B21C_WAVE_LIMIT(0x3F));
-
- if (physical_device->rad_info.chip_class >= GFX10) {
- radeon_set_sh_reg_idx(physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- 3, S_00B204_CU_EN(0xffff) |
- S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64_gs));
- }
-
- radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
- 3, S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
- }
-
- if (physical_device->rad_info.chip_class >= GFX10) {
- /* Break up a pixel wave if it contains deallocs for more than
- * half the parameter cache.
- *
- * To avoid a deadlock where pixel waves aren't launched
- * because they're waiting for more pixels while the frontend
- * is stuck waiting for PC space, the maximum allowed value is
- * the size of the PC minus the largest possible allocation for
- * a single primitive shader subgroup.
- */
- radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL,
- S_028C50_MAX_DEALLOCS_IN_WAVE(512));
- radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
-
- /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
- unsigned meta_write_policy, meta_read_policy;
-
- /* TODO: investigate whether LRU improves performance on other chips too */
- if (physical_device->rad_info.max_render_backends <= 4) {
- meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
- meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */
- } else {
- meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
- meta_read_policy = V_02807C_CACHE_NOA; /* don't cache reads */
- }
-
- radeon_set_context_reg(cs, R_02807C_DB_RMI_L2_CACHE_CONTROL,
- S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) |
- S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
- S_02807C_HTILE_WR_POLICY(meta_write_policy) |
- S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |
- S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) |
- S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) |
- S_02807C_HTILE_RD_POLICY(meta_read_policy));
-
- radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL,
- S_028410_CMASK_WR_POLICY(meta_write_policy) |
- S_028410_FMASK_WR_POLICY(meta_write_policy) |
- S_028410_DCC_WR_POLICY(meta_write_policy) |
- S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) |
- S_028410_CMASK_RD_POLICY(meta_read_policy) |
- S_028410_FMASK_RD_POLICY(meta_read_policy) |
- S_028410_DCC_RD_POLICY(meta_read_policy) |
- S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA));
- radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
-
- radeon_set_sh_reg(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
- radeon_set_sh_reg(cs, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
- radeon_set_sh_reg(cs, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
- radeon_set_sh_reg(cs, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
- radeon_set_sh_reg(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
- radeon_set_sh_reg(cs, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
- radeon_set_sh_reg(cs, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
- radeon_set_sh_reg(cs, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
- radeon_set_sh_reg(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
- radeon_set_sh_reg(cs, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
- radeon_set_sh_reg(cs, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
- radeon_set_sh_reg(cs, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
- radeon_set_sh_reg(cs, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
- radeon_set_sh_reg(cs, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
- radeon_set_sh_reg(cs, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
- radeon_set_sh_reg(cs, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
-
- radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
- S_00B0C0_SOFT_GROUPING_EN(1) |
- S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
- radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
-
- if (physical_device->rad_info.chip_class >= GFX10_3) {
- radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
- /* This allows sample shading. */
- radeon_set_context_reg(cs, R_028848_PA_CL_VRS_CNTL,
- S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
- }
-
- if (physical_device->rad_info.chip_class == GFX10) {
- /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
- }
-
- /* TODO: For culling, replace 128 with 256. */
- radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC,
- S_030980_OVERSUB_EN(physical_device->rad_info.use_late_alloc) |
- S_030980_NUM_PC_LINES(128 * physical_device->rad_info.max_se - 1));
- }
-
- if (physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
- S_028B50_ACCUM_ISOLINE(40) |
- S_028B50_ACCUM_TRI(30) |
- S_028B50_ACCUM_QUAD(24) |
- S_028B50_DONUT_SPLIT(24) |
- S_028B50_TRAP_SPLIT(6));
- } else if (physical_device->rad_info.chip_class >= GFX8) {
- uint32_t vgt_tess_distribution;
-
- vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) |
- S_028B50_ACCUM_TRI(11) |
- S_028B50_ACCUM_QUAD(11) |
- S_028B50_DONUT_SPLIT(16);
-
- if (physical_device->rad_info.family == CHIP_FIJI ||
- physical_device->rad_info.family >= CHIP_POLARIS10)
- vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
-
- radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
- vgt_tess_distribution);
- } else if (!has_clear_state) {
- radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
- radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
- }
-
- if (device->border_color_data.bo) {
- uint64_t border_color_va = radv_buffer_get_va(device->border_color_data.bo);
-
- radeon_set_context_reg(cs, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
- if (physical_device->rad_info.chip_class >= GFX7) {
- radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI,
- S_028084_ADDRESS(border_color_va >> 40));
- }
- }
-
- if (physical_device->rad_info.chip_class >= GFX9) {
- radeon_set_context_reg(cs, R_028C48_PA_SC_BINNER_CNTL_1,
- S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) |
- S_028C48_MAX_PRIM_PER_BATCH(1023));
- radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
- S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
- radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
- }
-
- unsigned tmp = (unsigned)(1.0 * 8.0);
- radeon_set_context_reg_seq(cs, R_028A00_PA_SU_POINT_SIZE, 1);
- radeon_emit(cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
- radeon_set_context_reg_seq(cs, R_028A04_PA_SU_POINT_MINMAX, 1);
- radeon_emit(cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
- S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875/2)));
-
- if (!has_clear_state) {
- radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL,
- S_028004_ZPASS_INCREMENT_DISABLE(1));
- }
-
- /* Enable the Polaris small primitive filter control.
- * XXX: There is possibly an issue when MSAA is off (see RadeonSI
- * has_msaa_sample_loc_bug). But this doesn't seem to regress anything,
- * and AMDVLK doesn't have a workaround as well.
- */
- if (physical_device->rad_info.family >= CHIP_POLARIS10) {
- unsigned small_prim_filter_cntl =
- S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
- /* Workaround for a hw line bug. */
- S_028830_LINE_FILTER_DISABLE(physical_device->rad_info.family <= CHIP_POLARIS12);
-
- radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL,
- small_prim_filter_cntl);
- }
-
- radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0,
- S_0286D4_FLAT_SHADE_ENA(1) |
- S_0286D4_PNT_SPRITE_ENA(1) |
- S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
- S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
- S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
- S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
- S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
-
- radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
- S_028BE4_PIX_CENTER(1) |
- S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
- S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
-
- radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
- S_028818_VTX_W0_FMT(1) |
- S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
- S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
- S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
-
- if (device->tma_bo) {
- uint64_t tba_va, tma_va;
-
- assert(device->physical_device->rad_info.chip_class == GFX8);
-
- tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) +
- device->trap_handler_shader->bo_offset;
- tma_va = radv_buffer_get_va(device->tma_bo);
-
- uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS,
- R_00B100_SPI_SHADER_TBA_LO_VS,
- R_00B200_SPI_SHADER_TBA_LO_GS,
- R_00B300_SPI_SHADER_TBA_LO_ES,
- R_00B400_SPI_SHADER_TBA_LO_HS,
- R_00B500_SPI_SHADER_TBA_LO_LS};
-
- for (i = 0; i < ARRAY_SIZE(regs); ++i) {
- radeon_set_sh_reg_seq(cs, regs[i], 4);
- radeon_emit(cs, tba_va >> 8);
- radeon_emit(cs, tba_va >> 40);
- radeon_emit(cs, tma_va >> 8);
- radeon_emit(cs, tma_va >> 40);
- }
- }
-
- si_emit_compute(device, cs);
+ struct radv_physical_device *physical_device = device->physical_device;
+
+ bool has_clear_state = physical_device->rad_info.has_clear_state;
+ int i;
+
+ radeon_emit(cs, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ radeon_emit(cs, CC0_UPDATE_LOAD_ENABLES(1));
+ radeon_emit(cs, CC1_UPDATE_SHADOW_ENABLES(1));
+
+ if (has_clear_state) {
+ radeon_emit(cs, PKT3(PKT3_CLEAR_STATE, 0, 0));
+ radeon_emit(cs, 0);
+ }
+
+ if (physical_device->rad_info.chip_class <= GFX8)
+ si_set_raster_config(physical_device, cs);
+
+ radeon_set_context_reg(cs, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+ if (!has_clear_state)
+ radeon_set_context_reg(cs, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
+
+ /* FIXME calculate these values somehow ??? */
+ if (physical_device->rad_info.chip_class <= GFX8) {
+ radeon_set_context_reg(cs, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
+ radeon_set_context_reg(cs, R_028A58_VGT_ES_PER_GS, 0x40);
+ }
+
+ if (!has_clear_state) {
+ radeon_set_context_reg(cs, R_028A5C_VGT_GS_PER_VS, 0x2);
+ radeon_set_context_reg(cs, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
+ radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+ }
+
+ if (physical_device->rad_info.chip_class <= GFX9)
+ radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
+ if (!has_clear_state)
+ radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
+ if (physical_device->rad_info.chip_class < GFX7)
+ radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE,
+ S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
+
+ if (!has_clear_state)
+ radeon_set_context_reg(cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
+
+ /* CLEAR_STATE doesn't clear these correctly on certain generations.
+ * I don't know why. Deduced by trial and error.
+ */
+ if (physical_device->rad_info.chip_class <= GFX7 || !has_clear_state) {
+ radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+ radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL,
+ S_028204_WINDOW_OFFSET_DISABLE(1));
+ radeon_set_context_reg(cs, R_028240_PA_SC_GENERIC_SCISSOR_TL,
+ S_028240_WINDOW_OFFSET_DISABLE(1));
+ radeon_set_context_reg(cs, R_028244_PA_SC_GENERIC_SCISSOR_BR,
+ S_028244_BR_X(16384) | S_028244_BR_Y(16384));
+ radeon_set_context_reg(cs, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
+ radeon_set_context_reg(cs, R_028034_PA_SC_SCREEN_SCISSOR_BR,
+ S_028034_BR_X(16384) | S_028034_BR_Y(16384));
+ }
+
+ if (!has_clear_state) {
+ for (i = 0; i < 16; i++) {
+ radeon_set_context_reg(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + i * 8, 0);
+ radeon_set_context_reg(cs, R_0282D4_PA_SC_VPORT_ZMAX_0 + i * 8, fui(1.0));
+ }
+ }
+
+ if (!has_clear_state) {
+ radeon_set_context_reg(cs, R_02820C_PA_SC_CLIPRECT_RULE, 0xFFFF);
+ radeon_set_context_reg(cs, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
+ /* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on GFX6 */
+ radeon_set_context_reg(cs, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+ radeon_set_context_reg(cs, R_028820_PA_CL_NANINF_CNTL, 0);
+ radeon_set_context_reg(cs, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0);
+ radeon_set_context_reg(cs, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
+ radeon_set_context_reg(cs, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
+ }
+
+ radeon_set_context_reg(cs, R_02800C_DB_RENDER_OVERRIDE,
+ S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
+ S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
+
+ if (physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_context_reg(cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
+ radeon_set_uconfig_reg(cs, R_030964_GE_MAX_VTX_INDX, ~0);
+ radeon_set_uconfig_reg(cs, R_030924_GE_MIN_VTX_INDX, 0);
+ radeon_set_uconfig_reg(cs, R_030928_GE_INDX_OFFSET, 0);
+ radeon_set_uconfig_reg(cs, R_03097C_GE_STEREO_CNTL, 0);
+ radeon_set_uconfig_reg(cs, R_030988_GE_USER_VGPR_EN, 0);
+ } else if (physical_device->rad_info.chip_class == GFX9) {
+ radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
+ radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
+ radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
+ } else {
+ /* These registers, when written, also overwrite the
+ * CLEAR_STATE context, so we can't rely on CLEAR_STATE setting
+ * them. It would be an issue if there was another UMD
+ * changing them.
+ */
+ radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
+ radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
+ radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
+ }
+
+ unsigned cu_mask_ps = 0xffffffff;
+
+ /* It's wasteful to enable all CUs for PS if shader arrays have a
+ * different number of CUs. The reason is that the hardware sends the
+ * same number of PS waves to each shader array, so the slowest shader
+ * array limits the performance. Disable the extra CUs for PS in
+ * other shader arrays to save power and thus increase clocks for busy
+ * CUs. In the future, we might disable or enable this tweak only for
+ * certain apps.
+ */
+ if (physical_device->rad_info.chip_class >= GFX10_3)
+ cu_mask_ps = u_bit_consecutive(0, physical_device->rad_info.min_good_cu_per_sa);
+
+ if (physical_device->rad_info.chip_class >= GFX7) {
+ if (physical_device->rad_info.chip_class >= GFX10) {
+ /* Logical CUs 16 - 31 */
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B404_SPI_SHADER_PGM_RSRC4_HS, 3,
+ S_00B404_CU_EN(0xffff));
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B104_SPI_SHADER_PGM_RSRC4_VS, 3,
+ S_00B104_CU_EN(0xffff));
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B004_SPI_SHADER_PGM_RSRC4_PS, 3,
+ S_00B004_CU_EN(cu_mask_ps >> 16));
+ }
+
+ if (physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 3,
+ S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
+ } else {
+ radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+ S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+ S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
+ /* If this is 0, Bonaire can hang even if GS isn't being used.
+ * Other chips are unaffected. These are suboptimal values,
+ * but we don't use on-chip GS.
+ */
+ radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+ S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
+ }
+
+ /* Compute LATE_ALLOC_VS.LIMIT. */
+ unsigned num_cu_per_sh = physical_device->rad_info.min_good_cu_per_sa;
+ unsigned late_alloc_wave64 = 0; /* The limit is per SA. */
+ unsigned late_alloc_wave64_gs = 0;
+ unsigned cu_mask_vs = 0xffff;
+ unsigned cu_mask_gs = 0xffff;
+
+ if (physical_device->rad_info.chip_class >= GFX10) {
+ /* For Wave32, the hw will launch twice the number of late
+ * alloc waves, so 1 == 2x wave32.
+ */
+ if (!physical_device->rad_info.use_late_alloc) {
+ late_alloc_wave64 = 0;
+ } else if (num_cu_per_sh <= 6) {
+ late_alloc_wave64 = num_cu_per_sh - 2;
+ } else {
+ late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
+
+ /* Gfx10: CU2 & CU3 must be disabled to
+ * prevent a hw deadlock. Others: CU1 must be
+ * disabled to prevent a hw deadlock.
+ *
+ * The deadlock is caused by late alloc, which
+ * usually increases performance.
+ */
+ cu_mask_vs &= physical_device->rad_info.chip_class == GFX10 ? ~BITFIELD_RANGE(2, 2)
+ : ~BITFIELD_RANGE(1, 1);
+
+ if (physical_device->use_ngg) {
+ cu_mask_gs = cu_mask_vs;
+ }
+ }
+
+ late_alloc_wave64_gs = late_alloc_wave64;
+
+ /* Don't use late alloc for NGG on Navi14 due to a hw
+ * bug. If NGG is never used, enable all CUs.
+ */
+ if (!physical_device->use_ngg || physical_device->rad_info.family == CHIP_NAVI14) {
+ late_alloc_wave64_gs = 0;
+ cu_mask_gs = 0xffff;
+ }
+
+ /* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */
+ if (physical_device->rad_info.chip_class == GFX10)
+ late_alloc_wave64_gs = MIN2(late_alloc_wave64_gs, 64);
+ } else {
+ if (!physical_device->rad_info.use_late_alloc) {
+ late_alloc_wave64 = 0;
+ } else if (num_cu_per_sh <= 4) {
+ /* Too few available compute units per SA.
+ * Disallowing VS to run on one CU could hurt
+ * us more than late VS allocation would help.
+ *
+ * 2 is the highest safe number that allows us
+ * to keep all CUs enabled.
+ */
+ late_alloc_wave64 = 2;
+ } else {
+ /* This is a good initial value, allowing 1
+ * late_alloc wave per SIMD on num_cu - 2.
+ */
+ late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
+ }
+
+ if (late_alloc_wave64 > 2)
+ cu_mask_vs = 0xfffe; /* 1 CU disabled */
+ }
+
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3,
+ S_00B118_CU_EN(cu_mask_vs) | S_00B118_WAVE_LIMIT(0x3F));
+ radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
+
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3,
+ S_00B21C_CU_EN(cu_mask_gs) | S_00B21C_WAVE_LIMIT(0x3F));
+
+ if (physical_device->rad_info.chip_class >= GFX10) {
+ radeon_set_sh_reg_idx(
+ physical_device, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3,
+ S_00B204_CU_EN(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64_gs));
+ }
+
+ radeon_set_sh_reg_idx(physical_device, cs, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 3,
+ S_00B01C_CU_EN(cu_mask_ps) | S_00B01C_WAVE_LIMIT(0x3F));
+ }
+
+ if (physical_device->rad_info.chip_class >= GFX10) {
+ /* Break up a pixel wave if it contains deallocs for more than
+ * half the parameter cache.
+ *
+ * To avoid a deadlock where pixel waves aren't launched
+ * because they're waiting for more pixels while the frontend
+ * is stuck waiting for PC space, the maximum allowed value is
+ * the size of the PC minus the largest possible allocation for
+ * a single primitive shader subgroup.
+ */
+ radeon_set_context_reg(cs, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+ radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+
+ /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
+ unsigned meta_write_policy, meta_read_policy;
+
+ /* TODO: investigate whether LRU improves performance on other chips too */
+ if (physical_device->rad_info.max_render_backends <= 4) {
+ meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
+ meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */
+ } else {
+ meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
+ meta_read_policy = V_02807C_CACHE_NOA; /* don't cache reads */
+ }
+
+ radeon_set_context_reg(
+ cs, R_02807C_DB_RMI_L2_CACHE_CONTROL,
+ S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
+ S_02807C_HTILE_WR_POLICY(meta_write_policy) |
+ S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |
+ S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) | S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) |
+ S_02807C_HTILE_RD_POLICY(meta_read_policy));
+
+ radeon_set_context_reg(
+ cs, R_028410_CB_RMI_GL2_CACHE_CONTROL,
+ S_028410_CMASK_WR_POLICY(meta_write_policy) | S_028410_FMASK_WR_POLICY(meta_write_policy) |
+ S_028410_DCC_WR_POLICY(meta_write_policy) |
+ S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) |
+ S_028410_CMASK_RD_POLICY(meta_read_policy) |
+ S_028410_FMASK_RD_POLICY(meta_read_policy) | S_028410_DCC_RD_POLICY(meta_read_policy) |
+ S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA));
+ radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
+
+ radeon_set_sh_reg(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
+ radeon_set_sh_reg(cs, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
+ radeon_set_sh_reg(cs, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
+ radeon_set_sh_reg(cs, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
+ radeon_set_sh_reg(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
+ radeon_set_sh_reg(cs, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
+ radeon_set_sh_reg(cs, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
+ radeon_set_sh_reg(cs, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
+ radeon_set_sh_reg(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
+ radeon_set_sh_reg(cs, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
+ radeon_set_sh_reg(cs, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
+ radeon_set_sh_reg(cs, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
+ radeon_set_sh_reg(cs, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
+ radeon_set_sh_reg(cs, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
+ radeon_set_sh_reg(cs, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
+ radeon_set_sh_reg(cs, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
+
+ radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
+ S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
+ radeon_set_sh_reg(cs, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
+
+ if (physical_device->rad_info.chip_class >= GFX10_3) {
+ radeon_set_context_reg(cs, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
+ /* This allows sample shading. */
+ radeon_set_context_reg(
+ cs, R_028848_PA_CL_VRS_CNTL,
+ S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE));
+ }
+
+ if (physical_device->rad_info.chip_class == GFX10) {
+ /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
+ }
+
+ /* TODO: For culling, replace 128 with 256. */
+ radeon_set_uconfig_reg(cs, R_030980_GE_PC_ALLOC,
+ S_030980_OVERSUB_EN(physical_device->rad_info.use_late_alloc) |
+ S_030980_NUM_PC_LINES(128 * physical_device->rad_info.max_se - 1));
+ }
+
+ if (physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION,
+ S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) |
+ S_028B50_ACCUM_QUAD(24) | S_028B50_DONUT_SPLIT(24) |
+ S_028B50_TRAP_SPLIT(6));
+ } else if (physical_device->rad_info.chip_class >= GFX8) {
+ uint32_t vgt_tess_distribution;
+
+ vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) |
+ S_028B50_ACCUM_QUAD(11) | S_028B50_DONUT_SPLIT(16);
+
+ if (physical_device->rad_info.family == CHIP_FIJI ||
+ physical_device->rad_info.family >= CHIP_POLARIS10)
+ vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
+
+ radeon_set_context_reg(cs, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
+ } else if (!has_clear_state) {
+ radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+ radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
+ }
+
+ if (device->border_color_data.bo) {
+ uint64_t border_color_va = radv_buffer_get_va(device->border_color_data.bo);
+
+ radeon_set_context_reg(cs, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
+ if (physical_device->rad_info.chip_class >= GFX7) {
+ radeon_set_context_reg(cs, R_028084_TA_BC_BASE_ADDR_HI,
+ S_028084_ADDRESS(border_color_va >> 40));
+ }
+ }
+
+ if (physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_context_reg(
+ cs, R_028C48_PA_SC_BINNER_CNTL_1,
+ S_028C48_MAX_ALLOC_COUNT(physical_device->rad_info.pbb_max_alloc_count - 1) |
+ S_028C48_MAX_PRIM_PER_BATCH(1023));
+ radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
+ S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
+ radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
+ }
+
+ unsigned tmp = (unsigned)(1.0 * 8.0);
+ radeon_set_context_reg_seq(cs, R_028A00_PA_SU_POINT_SIZE, 1);
+ radeon_emit(cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
+ radeon_set_context_reg_seq(cs, R_028A04_PA_SU_POINT_MINMAX, 1);
+ radeon_emit(cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
+ S_028A04_MAX_SIZE(radv_pack_float_12p4(8191.875 / 2)));
+
+ if (!has_clear_state) {
+ radeon_set_context_reg(cs, R_028004_DB_COUNT_CONTROL, S_028004_ZPASS_INCREMENT_DISABLE(1));
+ }
+
+ /* Enable the Polaris small primitive filter control.
+ * XXX: There is possibly an issue when MSAA is off (see RadeonSI
+ * has_msaa_sample_loc_bug). But this doesn't seem to regress anything,
+ * and AMDVLK doesn't have a workaround as well.
+ */
+ if (physical_device->rad_info.family >= CHIP_POLARIS10) {
+ unsigned small_prim_filter_cntl =
+ S_028830_SMALL_PRIM_FILTER_ENABLE(1) |
+ /* Workaround for a hw line bug. */
+ S_028830_LINE_FILTER_DISABLE(physical_device->rad_info.family <= CHIP_POLARIS12);
+
+ radeon_set_context_reg(cs, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl);
+ }
+
+ radeon_set_context_reg(
+ cs, R_0286D4_SPI_INTERP_CONTROL_0,
+ S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(1) |
+ S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) |
+ S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) |
+ S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) |
+ S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) |
+ S_0286D4_PNT_SPRITE_TOP_1(0)); /* vulkan is top to bottom - 1.0 at bottom */
+
+ radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
+ S_028BE4_PIX_CENTER(1) | S_028BE4_ROUND_MODE(V_028BE4_X_ROUND_TO_EVEN) |
+ S_028BE4_QUANT_MODE(V_028BE4_X_16_8_FIXED_POINT_1_256TH));
+
+ radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
+ S_028818_VTX_W0_FMT(1) | S_028818_VPORT_X_SCALE_ENA(1) |
+ S_028818_VPORT_X_OFFSET_ENA(1) | S_028818_VPORT_Y_SCALE_ENA(1) |
+ S_028818_VPORT_Y_OFFSET_ENA(1) | S_028818_VPORT_Z_SCALE_ENA(1) |
+ S_028818_VPORT_Z_OFFSET_ENA(1));
+
+ if (device->tma_bo) {
+ uint64_t tba_va, tma_va;
+
+ assert(device->physical_device->rad_info.chip_class == GFX8);
+
+ tba_va = radv_buffer_get_va(device->trap_handler_shader->bo) +
+ device->trap_handler_shader->bo_offset;
+ tma_va = radv_buffer_get_va(device->tma_bo);
+
+ uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS, R_00B100_SPI_SHADER_TBA_LO_VS,
+ R_00B200_SPI_SHADER_TBA_LO_GS, R_00B300_SPI_SHADER_TBA_LO_ES,
+ R_00B400_SPI_SHADER_TBA_LO_HS, R_00B500_SPI_SHADER_TBA_LO_LS};
+
+ for (i = 0; i < ARRAY_SIZE(regs); ++i) {
+ radeon_set_sh_reg_seq(cs, regs[i], 4);
+ radeon_emit(cs, tba_va >> 8);
+ radeon_emit(cs, tba_va >> 40);
+ radeon_emit(cs, tma_va >> 8);
+ radeon_emit(cs, tma_va >> 40);
+ }
+ }
+
+ si_emit_compute(device, cs);
}
void
cik_create_gfx_config(struct radv_device *device)
{
- struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, RING_GFX);
- if (!cs)
- return;
-
- si_emit_graphics(device, cs);
-
- while (cs->cdw & 7) {
- if (device->physical_device->rad_info.gfx_ib_pad_with_type2)
- radeon_emit(cs, PKT2_NOP_PAD);
- else
- radeon_emit(cs, PKT3_NOP_PAD);
- }
-
- device->gfx_init = device->ws->buffer_create(device->ws,
- cs->cdw * 4, 4096,
- radv_cmdbuffer_domain(&device->physical_device->rad_info,
- device->instance->perftest_flags),
- RADEON_FLAG_CPU_ACCESS|
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY |
- RADEON_FLAG_GTT_WC,
- RADV_BO_PRIORITY_CS);
- if (!device->gfx_init)
- goto fail;
-
- void *map = device->ws->buffer_map(device->gfx_init);
- if (!map) {
- device->ws->buffer_destroy(device->ws, device->gfx_init);
- device->gfx_init = NULL;
- goto fail;
- }
- memcpy(map, cs->buf, cs->cdw * 4);
-
- device->ws->buffer_unmap(device->gfx_init);
- device->gfx_init_size_dw = cs->cdw;
+ struct radeon_cmdbuf *cs = device->ws->cs_create(device->ws, RING_GFX);
+ if (!cs)
+ return;
+
+ si_emit_graphics(device, cs);
+
+ while (cs->cdw & 7) {
+ if (device->physical_device->rad_info.gfx_ib_pad_with_type2)
+ radeon_emit(cs, PKT2_NOP_PAD);
+ else
+ radeon_emit(cs, PKT3_NOP_PAD);
+ }
+
+ device->gfx_init = device->ws->buffer_create(
+ device->ws, cs->cdw * 4, 4096,
+ radv_cmdbuffer_domain(&device->physical_device->rad_info, device->instance->perftest_flags),
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY |
+ RADEON_FLAG_GTT_WC,
+ RADV_BO_PRIORITY_CS);
+ if (!device->gfx_init)
+ goto fail;
+
+ void *map = device->ws->buffer_map(device->gfx_init);
+ if (!map) {
+ device->ws->buffer_destroy(device->ws, device->gfx_init);
+ device->gfx_init = NULL;
+ goto fail;
+ }
+ memcpy(map, cs->buf, cs->cdw * 4);
+
+ device->ws->buffer_unmap(device->gfx_init);
+ device->gfx_init_size_dw = cs->cdw;
fail:
- device->ws->cs_destroy(cs);
+ device->ws->cs_destroy(cs);
}
static void
-get_viewport_xform(const VkViewport *viewport,
- float scale[3], float translate[3])
+get_viewport_xform(const VkViewport *viewport, float scale[3], float translate[3])
{
- float x = viewport->x;
- float y = viewport->y;
- float half_width = 0.5f * viewport->width;
- float half_height = 0.5f * viewport->height;
- double n = viewport->minDepth;
- double f = viewport->maxDepth;
-
- scale[0] = half_width;
- translate[0] = half_width + x;
- scale[1] = half_height;
- translate[1] = half_height + y;
-
- scale[2] = (f - n);
- translate[2] = n;
+ float x = viewport->x;
+ float y = viewport->y;
+ float half_width = 0.5f * viewport->width;
+ float half_height = 0.5f * viewport->height;
+ double n = viewport->minDepth;
+ double f = viewport->maxDepth;
+
+ scale[0] = half_width;
+ translate[0] = half_width + x;
+ scale[1] = half_height;
+ translate[1] = half_height + y;
+
+ scale[2] = (f - n);
+ translate[2] = n;
}
void
-si_write_viewport(struct radeon_cmdbuf *cs, int first_vp,
- int count, const VkViewport *viewports)
+si_write_viewport(struct radeon_cmdbuf *cs, int first_vp, int count, const VkViewport *viewports)
{
- int i;
-
- assert(count);
- radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
- first_vp * 4 * 6, count * 6);
-
- for (i = 0; i < count; i++) {
- float scale[3], translate[3];
-
-
- get_viewport_xform(&viewports[i], scale, translate);
- radeon_emit(cs, fui(scale[0]));
- radeon_emit(cs, fui(translate[0]));
- radeon_emit(cs, fui(scale[1]));
- radeon_emit(cs, fui(translate[1]));
- radeon_emit(cs, fui(scale[2]));
- radeon_emit(cs, fui(translate[2]));
- }
-
- radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
- first_vp * 4 * 2, count * 2);
- for (i = 0; i < count; i++) {
- float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth);
- float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth);
- radeon_emit(cs, fui(zmin));
- radeon_emit(cs, fui(zmax));
- }
+ int i;
+
+ assert(count);
+ radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE + first_vp * 4 * 6, count * 6);
+
+ for (i = 0; i < count; i++) {
+ float scale[3], translate[3];
+
+ get_viewport_xform(&viewports[i], scale, translate);
+ radeon_emit(cs, fui(scale[0]));
+ radeon_emit(cs, fui(translate[0]));
+ radeon_emit(cs, fui(scale[1]));
+ radeon_emit(cs, fui(translate[1]));
+ radeon_emit(cs, fui(scale[2]));
+ radeon_emit(cs, fui(translate[2]));
+ }
+
+ radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 + first_vp * 4 * 2, count * 2);
+ for (i = 0; i < count; i++) {
+ float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth);
+ float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth);
+ radeon_emit(cs, fui(zmin));
+ radeon_emit(cs, fui(zmax));
+ }
}
-static VkRect2D si_scissor_from_viewport(const VkViewport *viewport)
+static VkRect2D
+si_scissor_from_viewport(const VkViewport *viewport)
{
- float scale[3], translate[3];
- VkRect2D rect;
+ float scale[3], translate[3];
+ VkRect2D rect;
- get_viewport_xform(viewport, scale, translate);
+ get_viewport_xform(viewport, scale, translate);
- rect.offset.x = translate[0] - fabsf(scale[0]);
- rect.offset.y = translate[1] - fabsf(scale[1]);
- rect.extent.width = ceilf(translate[0] + fabsf(scale[0])) - rect.offset.x;
- rect.extent.height = ceilf(translate[1] + fabsf(scale[1])) - rect.offset.y;
+ rect.offset.x = translate[0] - fabsf(scale[0]);
+ rect.offset.y = translate[1] - fabsf(scale[1]);
+ rect.extent.width = ceilf(translate[0] + fabsf(scale[0])) - rect.offset.x;
+ rect.extent.height = ceilf(translate[1] + fabsf(scale[1])) - rect.offset.y;
- return rect;
+ return rect;
}
-static VkRect2D si_intersect_scissor(const VkRect2D *a, const VkRect2D *b) {
- VkRect2D ret;
- ret.offset.x = MAX2(a->offset.x, b->offset.x);
- ret.offset.y = MAX2(a->offset.y, b->offset.y);
- ret.extent.width = MIN2(a->offset.x + a->extent.width,
- b->offset.x + b->extent.width) - ret.offset.x;
- ret.extent.height = MIN2(a->offset.y + a->extent.height,
- b->offset.y + b->extent.height) - ret.offset.y;
- return ret;
+static VkRect2D
+si_intersect_scissor(const VkRect2D *a, const VkRect2D *b)
+{
+ VkRect2D ret;
+ ret.offset.x = MAX2(a->offset.x, b->offset.x);
+ ret.offset.y = MAX2(a->offset.y, b->offset.y);
+ ret.extent.width =
+ MIN2(a->offset.x + a->extent.width, b->offset.x + b->extent.width) - ret.offset.x;
+ ret.extent.height =
+ MIN2(a->offset.y + a->extent.height, b->offset.y + b->extent.height) - ret.offset.y;
+ return ret;
}
void
-si_write_scissors(struct radeon_cmdbuf *cs, int first,
- int count, const VkRect2D *scissors,
+si_write_scissors(struct radeon_cmdbuf *cs, int first, int count, const VkRect2D *scissors,
const VkViewport *viewports, bool can_use_guardband)
{
- int i;
- float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
- const float max_range = 32767.0f;
- if (!count)
- return;
-
- radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
- for (i = 0; i < count; i++) {
- VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i);
- VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor);
-
- get_viewport_xform(viewports + i, scale, translate);
- scale[0] = fabsf(scale[0]);
- scale[1] = fabsf(scale[1]);
-
- if (scale[0] < 0.5)
- scale[0] = 0.5;
- if (scale[1] < 0.5)
- scale[1] = 0.5;
-
- guardband_x = MIN2(guardband_x, (max_range - fabsf(translate[0])) / scale[0]);
- guardband_y = MIN2(guardband_y, (max_range - fabsf(translate[1])) / scale[1]);
-
- radeon_emit(cs, S_028250_TL_X(scissor.offset.x) |
- S_028250_TL_Y(scissor.offset.y) |
- S_028250_WINDOW_OFFSET_DISABLE(1));
- radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) |
- S_028254_BR_Y(scissor.offset.y + scissor.extent.height));
- }
- if (!can_use_guardband) {
- guardband_x = 1.0;
- guardband_y = 1.0;
- }
-
- radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
- radeon_emit(cs, fui(guardband_y));
- radeon_emit(cs, fui(1.0));
- radeon_emit(cs, fui(guardband_x));
- radeon_emit(cs, fui(1.0));
+ int i;
+ float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
+ const float max_range = 32767.0f;
+ if (!count)
+ return;
+
+ radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
+ for (i = 0; i < count; i++) {
+ VkRect2D viewport_scissor = si_scissor_from_viewport(viewports + i);
+ VkRect2D scissor = si_intersect_scissor(&scissors[i], &viewport_scissor);
+
+ get_viewport_xform(viewports + i, scale, translate);
+ scale[0] = fabsf(scale[0]);
+ scale[1] = fabsf(scale[1]);
+
+ if (scale[0] < 0.5)
+ scale[0] = 0.5;
+ if (scale[1] < 0.5)
+ scale[1] = 0.5;
+
+ guardband_x = MIN2(guardband_x, (max_range - fabsf(translate[0])) / scale[0]);
+ guardband_y = MIN2(guardband_y, (max_range - fabsf(translate[1])) / scale[1]);
+
+ radeon_emit(cs, S_028250_TL_X(scissor.offset.x) | S_028250_TL_Y(scissor.offset.y) |
+ S_028250_WINDOW_OFFSET_DISABLE(1));
+ radeon_emit(cs, S_028254_BR_X(scissor.offset.x + scissor.extent.width) |
+ S_028254_BR_Y(scissor.offset.y + scissor.extent.height));
+ }
+ if (!can_use_guardband) {
+ guardband_x = 1.0;
+ guardband_y = 1.0;
+ }
+
+ radeon_set_context_reg_seq(cs, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, 4);
+ radeon_emit(cs, fui(guardband_y));
+ radeon_emit(cs, fui(1.0));
+ radeon_emit(cs, fui(guardband_x));
+ radeon_emit(cs, fui(1.0));
}
static inline unsigned
radv_prims_for_vertices(struct radv_prim_vertex_count *info, unsigned num)
{
- if (num == 0)
- return 0;
+ if (num == 0)
+ return 0;
- if (info->incr == 0)
- return 0;
+ if (info->incr == 0)
+ return 0;
- if (num < info->min)
- return 0;
+ if (num < info->min)
+ return 0;
- return 1 + ((num - info->min) / info->incr);
+ return 1 + ((num - info->min) / info->incr);
}
static const struct radv_prim_vertex_count prim_size_table[] = {
- [V_008958_DI_PT_NONE] = {0, 0},
- [V_008958_DI_PT_POINTLIST] = {1, 1},
- [V_008958_DI_PT_LINELIST] = {2, 2},
- [V_008958_DI_PT_LINESTRIP] = {2, 1},
- [V_008958_DI_PT_TRILIST] = {3, 3},
- [V_008958_DI_PT_TRIFAN] = {3, 1},
- [V_008958_DI_PT_TRISTRIP] = {3, 1},
- [V_008958_DI_PT_LINELIST_ADJ] = {4, 4},
- [V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1},
- [V_008958_DI_PT_TRILIST_ADJ] = {6, 6},
- [V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2},
- [V_008958_DI_PT_RECTLIST] = {3, 3},
- [V_008958_DI_PT_LINELOOP] = {2, 1},
- [V_008958_DI_PT_POLYGON] = {3, 1},
- [V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
+ [V_008958_DI_PT_NONE] = {0, 0}, [V_008958_DI_PT_POINTLIST] = {1, 1},
+ [V_008958_DI_PT_LINELIST] = {2, 2}, [V_008958_DI_PT_LINESTRIP] = {2, 1},
+ [V_008958_DI_PT_TRILIST] = {3, 3}, [V_008958_DI_PT_TRIFAN] = {3, 1},
+ [V_008958_DI_PT_TRISTRIP] = {3, 1}, [V_008958_DI_PT_LINELIST_ADJ] = {4, 4},
+ [V_008958_DI_PT_LINESTRIP_ADJ] = {4, 1}, [V_008958_DI_PT_TRILIST_ADJ] = {6, 6},
+ [V_008958_DI_PT_TRISTRIP_ADJ] = {6, 2}, [V_008958_DI_PT_RECTLIST] = {3, 3},
+ [V_008958_DI_PT_LINELOOP] = {2, 1}, [V_008958_DI_PT_POLYGON] = {3, 1},
+ [V_008958_DI_PT_2D_TRI_STRIP] = {0, 0},
};
uint32_t
-si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
- bool instanced_draw, bool indirect_draw,
- bool count_from_stream_output,
- uint32_t draw_vertex_count,
- unsigned topology)
+si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
+ bool indirect_draw, bool count_from_stream_output,
+ uint32_t draw_vertex_count, unsigned topology)
{
- enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
- enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
- struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
- const unsigned max_primgroup_in_wave = 2;
- /* SWITCH_ON_EOP(0) is always preferable. */
- bool wd_switch_on_eop = false;
- bool ia_switch_on_eop = false;
- bool ia_switch_on_eoi = false;
- bool partial_vs_wave = false;
- bool partial_es_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_es_wave;
- bool multi_instances_smaller_than_primgroup;
- struct radv_prim_vertex_count prim_vertex_count = prim_size_table[topology];
-
- if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
- if (topology == V_008958_DI_PT_PATCH) {
- prim_vertex_count.min = cmd_buffer->state.pipeline->graphics.tess_patch_control_points;
- prim_vertex_count.incr = 1;
- }
- }
-
- multi_instances_smaller_than_primgroup = indirect_draw;
- if (!multi_instances_smaller_than_primgroup && instanced_draw) {
- uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
- if (num_prims < cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.primgroup_size)
- multi_instances_smaller_than_primgroup = true;
- }
-
- ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.ia_switch_on_eoi;
- partial_vs_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_vs_wave;
-
- if (chip_class >= GFX7) {
- /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
- * 4 shader engines. Set 1 to pass the assertion below.
- * The other cases are hardware requirements. */
- if (cmd_buffer->device->physical_device->rad_info.max_se < 4 ||
- topology == V_008958_DI_PT_POLYGON ||
- topology == V_008958_DI_PT_LINELOOP ||
- topology == V_008958_DI_PT_TRIFAN ||
- topology == V_008958_DI_PT_TRISTRIP_ADJ ||
- (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
- (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
- (topology != V_008958_DI_PT_POINTLIST &&
- topology != V_008958_DI_PT_LINESTRIP))))
- wd_switch_on_eop = true;
-
- /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
- * We don't know that for indirect drawing, so treat it as
- * always problematic. */
- if (family == CHIP_HAWAII &&
- (instanced_draw || indirect_draw))
- wd_switch_on_eop = true;
-
- /* Performance recommendation for 4 SE Gfx7-8 parts if
- * instances are smaller than a primgroup.
- * Assume indirect draws always use small instances.
- * This is needed for good VS wave utilization.
- */
- if (chip_class <= GFX8 &&
- info->max_se == 4 &&
- multi_instances_smaller_than_primgroup)
- wd_switch_on_eop = true;
-
- /* Required on GFX7 and later. */
- if (info->max_se > 2 && !wd_switch_on_eop)
- ia_switch_on_eoi = true;
-
- /* Required by Hawaii and, for some special cases, by GFX8. */
- if (ia_switch_on_eoi &&
- (family == CHIP_HAWAII ||
- (chip_class == GFX8 &&
- /* max primgroup in wave is always 2 - leave this for documentation */
- (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
- partial_vs_wave = true;
-
- /* Instancing bug on Bonaire. */
- if (family == CHIP_BONAIRE && ia_switch_on_eoi &&
- (instanced_draw || indirect_draw))
- partial_vs_wave = true;
-
- /* Hardware requirement when drawing primitives from a stream
- * output buffer.
- */
- if (count_from_stream_output)
- wd_switch_on_eop = true;
-
- /* If the WD switch is false, the IA switch must be false too. */
- assert(wd_switch_on_eop || !ia_switch_on_eop);
- }
- /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
- if (chip_class <= GFX8 && ia_switch_on_eoi)
- partial_es_wave = true;
-
- if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
- /* GS hw bug with single-primitive instances and SWITCH_ON_EOI.
- * The hw doc says all multi-SE chips are affected, but amdgpu-pro Vulkan
- * only applies it to Hawaii. Do what amdgpu-pro Vulkan does.
- */
- if (family == CHIP_HAWAII && ia_switch_on_eoi) {
- bool set_vgt_flush = indirect_draw;
- if (!set_vgt_flush && instanced_draw) {
- uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
- if (num_prims <= 1)
- set_vgt_flush = true;
- }
- if (set_vgt_flush)
- cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
- }
- }
-
- /* Workaround for a VGT hang when strip primitive types are used with
- * primitive restart.
- */
- if (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
- (topology == V_008958_DI_PT_LINESTRIP ||
- topology == V_008958_DI_PT_TRISTRIP ||
- topology == V_008958_DI_PT_LINESTRIP_ADJ ||
- topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
- partial_vs_wave = true;
- }
-
- return cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.base |
- S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
- S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
- S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
- S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
- S_028AA8_WD_SWITCH_ON_EOP(chip_class >= GFX7 ? wd_switch_on_eop : 0);
-
+ enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
+ enum radeon_family family = cmd_buffer->device->physical_device->rad_info.family;
+ struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
+ const unsigned max_primgroup_in_wave = 2;
+ /* SWITCH_ON_EOP(0) is always preferable. */
+ bool wd_switch_on_eop = false;
+ bool ia_switch_on_eop = false;
+ bool ia_switch_on_eoi = false;
+ bool partial_vs_wave = false;
+ bool partial_es_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_es_wave;
+ bool multi_instances_smaller_than_primgroup;
+ struct radv_prim_vertex_count prim_vertex_count = prim_size_table[topology];
+
+ if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
+ if (topology == V_008958_DI_PT_PATCH) {
+ prim_vertex_count.min = cmd_buffer->state.pipeline->graphics.tess_patch_control_points;
+ prim_vertex_count.incr = 1;
+ }
+ }
+
+ multi_instances_smaller_than_primgroup = indirect_draw;
+ if (!multi_instances_smaller_than_primgroup && instanced_draw) {
+ uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
+ if (num_prims < cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.primgroup_size)
+ multi_instances_smaller_than_primgroup = true;
+ }
+
+ ia_switch_on_eoi = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.ia_switch_on_eoi;
+ partial_vs_wave = cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.partial_vs_wave;
+
+ if (chip_class >= GFX7) {
+ /* WD_SWITCH_ON_EOP has no effect on GPUs with less than
+ * 4 shader engines. Set 1 to pass the assertion below.
+ * The other cases are hardware requirements. */
+ if (cmd_buffer->device->physical_device->rad_info.max_se < 4 ||
+ topology == V_008958_DI_PT_POLYGON || topology == V_008958_DI_PT_LINELOOP ||
+ topology == V_008958_DI_PT_TRIFAN || topology == V_008958_DI_PT_TRISTRIP_ADJ ||
+ (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
+ (cmd_buffer->device->physical_device->rad_info.family < CHIP_POLARIS10 ||
+ (topology != V_008958_DI_PT_POINTLIST && topology != V_008958_DI_PT_LINESTRIP))))
+ wd_switch_on_eop = true;
+
+ /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0.
+ * We don't know that for indirect drawing, so treat it as
+ * always problematic. */
+ if (family == CHIP_HAWAII && (instanced_draw || indirect_draw))
+ wd_switch_on_eop = true;
+
+ /* Performance recommendation for 4 SE Gfx7-8 parts if
+ * instances are smaller than a primgroup.
+ * Assume indirect draws always use small instances.
+ * This is needed for good VS wave utilization.
+ */
+ if (chip_class <= GFX8 && info->max_se == 4 && multi_instances_smaller_than_primgroup)
+ wd_switch_on_eop = true;
+
+ /* Required on GFX7 and later. */
+ if (info->max_se > 2 && !wd_switch_on_eop)
+ ia_switch_on_eoi = true;
+
+ /* Required by Hawaii and, for some special cases, by GFX8. */
+ if (ia_switch_on_eoi &&
+ (family == CHIP_HAWAII ||
+ (chip_class == GFX8 &&
+ /* max primgroup in wave is always 2 - leave this for documentation */
+ (radv_pipeline_has_gs(cmd_buffer->state.pipeline) || max_primgroup_in_wave != 2))))
+ partial_vs_wave = true;
+
+ /* Instancing bug on Bonaire. */
+ if (family == CHIP_BONAIRE && ia_switch_on_eoi && (instanced_draw || indirect_draw))
+ partial_vs_wave = true;
+
+ /* Hardware requirement when drawing primitives from a stream
+ * output buffer.
+ */
+ if (count_from_stream_output)
+ wd_switch_on_eop = true;
+
+ /* If the WD switch is false, the IA switch must be false too. */
+ assert(wd_switch_on_eop || !ia_switch_on_eop);
+ }
+ /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
+ if (chip_class <= GFX8 && ia_switch_on_eoi)
+ partial_es_wave = true;
+
+ if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
+ /* GS hw bug with single-primitive instances and SWITCH_ON_EOI.
+ * The hw doc says all multi-SE chips are affected, but amdgpu-pro Vulkan
+ * only applies it to Hawaii. Do what amdgpu-pro Vulkan does.
+ */
+ if (family == CHIP_HAWAII && ia_switch_on_eoi) {
+ bool set_vgt_flush = indirect_draw;
+ if (!set_vgt_flush && instanced_draw) {
+ uint32_t num_prims = radv_prims_for_vertices(&prim_vertex_count, draw_vertex_count);
+ if (num_prims <= 1)
+ set_vgt_flush = true;
+ }
+ if (set_vgt_flush)
+ cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
+ }
+ }
+
+ /* Workaround for a VGT hang when strip primitive types are used with
+ * primitive restart.
+ */
+ if (cmd_buffer->state.pipeline->graphics.prim_restart_enable &&
+ (topology == V_008958_DI_PT_LINESTRIP || topology == V_008958_DI_PT_TRISTRIP ||
+ topology == V_008958_DI_PT_LINESTRIP_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
+ partial_vs_wave = true;
+ }
+
+ return cmd_buffer->state.pipeline->graphics.ia_multi_vgt_param.base |
+ S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) |
+ S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
+ S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
+ S_028AA8_WD_SWITCH_ON_EOP(chip_class >= GFX7 ? wd_switch_on_eop : 0);
}
-void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
- enum chip_class chip_class,
- bool is_mec,
- unsigned event, unsigned event_flags,
- unsigned dst_sel, unsigned data_sel,
- uint64_t va,
- uint32_t new_fence,
- uint64_t gfx9_eop_bug_va)
+void
+si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs, enum chip_class chip_class, bool is_mec,
+ unsigned event, unsigned event_flags, unsigned dst_sel,
+ unsigned data_sel, uint64_t va, uint32_t new_fence,
+ uint64_t gfx9_eop_bug_va)
{
- unsigned op = EVENT_TYPE(event) |
- EVENT_INDEX(event == V_028A90_CS_DONE ||
- event == V_028A90_PS_DONE ? 6 : 5) |
- event_flags;
- unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
- unsigned sel = EOP_DST_SEL(dst_sel) |
- EOP_DATA_SEL(data_sel);
-
- /* Wait for write confirmation before writing data, but don't send
- * an interrupt. */
- if (data_sel != EOP_DATA_SEL_DISCARD)
- sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
-
- if (chip_class >= GFX9 || is_gfx8_mec) {
- /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
- * counters) must immediately precede every timestamp event to
- * prevent a GPU hang on GFX9.
- */
- if (chip_class == GFX9 && !is_mec) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
- radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
- radeon_emit(cs, gfx9_eop_bug_va);
- radeon_emit(cs, gfx9_eop_bug_va >> 32);
- }
-
- radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, false));
- radeon_emit(cs, op);
- radeon_emit(cs, sel);
- radeon_emit(cs, va); /* address lo */
- radeon_emit(cs, va >> 32); /* address hi */
- radeon_emit(cs, new_fence); /* immediate data lo */
- radeon_emit(cs, 0); /* immediate data hi */
- if (!is_gfx8_mec)
- radeon_emit(cs, 0); /* unused */
- } else {
- /* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS.
- * On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on
- * the graphics queue, and with RELEASE_MEM on the compute
- * queue.
- */
- if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
- assert(event_flags == 0 &&
- dst_sel == EOP_DST_SEL_MEM &&
- data_sel == EOP_DATA_SEL_VALUE_32BIT);
-
- if (is_mec) {
- radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, false));
- radeon_emit(cs, op);
- radeon_emit(cs, sel);
- radeon_emit(cs, va); /* address lo */
- radeon_emit(cs, va >> 32); /* address hi */
- radeon_emit(cs, new_fence); /* immediate data lo */
- radeon_emit(cs, 0); /* immediate data hi */
- } else {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, false));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) |
- EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT));
- radeon_emit(cs, new_fence);
- }
- } else {
- if (chip_class == GFX7 ||
- chip_class == GFX8) {
- /* Two EOP events are required to make all
- * engines go idle (and optional cache flushes
- * executed) before the timestamp is written.
- */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
- radeon_emit(cs, 0); /* immediate data */
- radeon_emit(cs, 0); /* unused */
- }
-
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
- radeon_emit(cs, op);
- radeon_emit(cs, va);
- radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
- radeon_emit(cs, new_fence); /* immediate data */
- radeon_emit(cs, 0); /* unused */
- }
- }
+ unsigned op = EVENT_TYPE(event) |
+ EVENT_INDEX(event == V_028A90_CS_DONE || event == V_028A90_PS_DONE ? 6 : 5) |
+ event_flags;
+ unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
+ unsigned sel = EOP_DST_SEL(dst_sel) | EOP_DATA_SEL(data_sel);
+
+ /* Wait for write confirmation before writing data, but don't send
+ * an interrupt. */
+ if (data_sel != EOP_DATA_SEL_DISCARD)
+ sel |= EOP_INT_SEL(EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM);
+
+ if (chip_class >= GFX9 || is_gfx8_mec) {
+ /* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
+ * counters) must immediately precede every timestamp event to
+ * prevent a GPU hang on GFX9.
+ */
+ if (chip_class == GFX9 && !is_mec) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+ radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
+ radeon_emit(cs, gfx9_eop_bug_va);
+ radeon_emit(cs, gfx9_eop_bug_va >> 32);
+ }
+
+ radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, false));
+ radeon_emit(cs, op);
+ radeon_emit(cs, sel);
+ radeon_emit(cs, va); /* address lo */
+ radeon_emit(cs, va >> 32); /* address hi */
+ radeon_emit(cs, new_fence); /* immediate data lo */
+ radeon_emit(cs, 0); /* immediate data hi */
+ if (!is_gfx8_mec)
+ radeon_emit(cs, 0); /* unused */
+ } else {
+ /* On GFX6, EOS events are always emitted with EVENT_WRITE_EOS.
+ * On GFX7+, EOS events are emitted with EVENT_WRITE_EOS on
+ * the graphics queue, and with RELEASE_MEM on the compute
+ * queue.
+ */
+ if (event == V_028B9C_CS_DONE || event == V_028B9C_PS_DONE) {
+ assert(event_flags == 0 && dst_sel == EOP_DST_SEL_MEM &&
+ data_sel == EOP_DATA_SEL_VALUE_32BIT);
+
+ if (is_mec) {
+ radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, false));
+ radeon_emit(cs, op);
+ radeon_emit(cs, sel);
+ radeon_emit(cs, va); /* address lo */
+ radeon_emit(cs, va >> 32); /* address hi */
+ radeon_emit(cs, new_fence); /* immediate data lo */
+ radeon_emit(cs, 0); /* immediate data hi */
+ } else {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOS, 3, false));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, ((va >> 32) & 0xffff) | EOS_DATA_SEL(EOS_DATA_SEL_VALUE_32BIT));
+ radeon_emit(cs, new_fence);
+ }
+ } else {
+ if (chip_class == GFX7 || chip_class == GFX8) {
+ /* Two EOP events are required to make all
+ * engines go idle (and optional cache flushes
+ * executed) before the timestamp is written.
+ */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
+ radeon_emit(cs, 0); /* immediate data */
+ radeon_emit(cs, 0); /* unused */
+ }
+
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, false));
+ radeon_emit(cs, op);
+ radeon_emit(cs, va);
+ radeon_emit(cs, ((va >> 32) & 0xffff) | sel);
+ radeon_emit(cs, new_fence); /* immediate data */
+ radeon_emit(cs, 0); /* unused */
+ }
+ }
}
void
-radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va,
- uint32_t ref, uint32_t mask)
+radv_cp_wait_mem(struct radeon_cmdbuf *cs, uint32_t op, uint64_t va, uint32_t ref, uint32_t mask)
{
- assert(op == WAIT_REG_MEM_EQUAL ||
- op == WAIT_REG_MEM_NOT_EQUAL ||
- op == WAIT_REG_MEM_GREATER_OR_EQUAL);
-
- radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
- radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
- radeon_emit(cs, va);
- radeon_emit(cs, va >> 32);
- radeon_emit(cs, ref); /* reference value */
- radeon_emit(cs, mask); /* mask */
- radeon_emit(cs, 4); /* poll interval */
+ assert(op == WAIT_REG_MEM_EQUAL || op == WAIT_REG_MEM_NOT_EQUAL ||
+ op == WAIT_REG_MEM_GREATER_OR_EQUAL);
+
+ radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, false));
+ radeon_emit(cs, op | WAIT_REG_MEM_MEM_SPACE(1));
+ radeon_emit(cs, va);
+ radeon_emit(cs, va >> 32);
+ radeon_emit(cs, ref); /* reference value */
+ radeon_emit(cs, mask); /* mask */
+ radeon_emit(cs, 4); /* poll interval */
}
static void
-si_emit_acquire_mem(struct radeon_cmdbuf *cs,
- bool is_mec,
- bool is_gfx9,
- unsigned cp_coher_cntl)
+si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigned cp_coher_cntl)
{
- if (is_mec || is_gfx9) {
- uint32_t hi_val = is_gfx9 ? 0xffffff : 0xff;
- radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, false) |
- PKT3_SHADER_TYPE_S(is_mec));
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, hi_val); /* CP_COHER_SIZE_HI */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- } else {
- /* ACQUIRE_MEM is only required on a compute ring. */
- radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, false));
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- }
+ if (is_mec || is_gfx9) {
+ uint32_t hi_val = is_gfx9 ? 0xffffff : 0xff;
+ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, false) | PKT3_SHADER_TYPE_S(is_mec));
+ radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, hi_val); /* CP_COHER_SIZE_HI */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ } else {
+ /* ACQUIRE_MEM is only required on a compute ring. */
+ radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, false));
+ radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ }
}
static void
-gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
- enum chip_class chip_class,
- uint32_t *flush_cnt,
- uint64_t flush_va,
- bool is_mec,
- enum radv_cmd_flush_bits flush_bits,
- enum rgp_flush_bits *sqtt_flush_bits,
- uint64_t gfx9_eop_bug_va)
+gfx10_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t *flush_cnt,
+ uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
+ enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
{
- uint32_t gcr_cntl = 0;
- unsigned cb_db_event = 0;
-
- /* We don't need these. */
- assert(!(flush_bits & (RADV_CMD_FLAG_VGT_STREAMOUT_SYNC)));
-
- if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
- gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
-
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
- }
- if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
- /* TODO: When writing to the SMEM L1 cache, we need to set SEQ
- * to FORWARD when both L1 and L2 are written out (WB or INV).
- */
- gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1);
-
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
- }
- if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
- gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1);
-
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0 | RGP_FLUSH_INVAL_L1;
- }
- if (flush_bits & RADV_CMD_FLAG_INV_L2) {
- /* Writeback and invalidate everything in L2. */
- gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1) |
- S_586_GLM_INV(1) | S_586_GLM_WB(1);
-
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
- } else if (flush_bits & RADV_CMD_FLAG_WB_L2) {
- /* Writeback but do not invalidate.
- * GLM doesn't support WB alone. If WB is set, INV must be set too.
- */
- gcr_cntl |= S_586_GL2_WB(1) |
- S_586_GLM_WB(1) | S_586_GLM_INV(1);
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2;
- } else if (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA) {
- gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
- }
-
- if (flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
- /* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_CB_META */
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
- /* Flush CMASK/FMASK/DCC. Will wait for idle later. */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) |
- EVENT_INDEX(0));
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
- }
-
- /* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_DB_META ? */
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
- /* Flush HTILE. Will wait for idle later. */
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) |
- EVENT_INDEX(0));
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
- }
-
- /* First flush CB/DB, then L1/L2. */
- gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
-
- if ((flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) ==
- (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
- cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
- } else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
- cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
- } else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
- cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
- } else {
- assert(0);
- }
- } else {
- /* Wait for graphics shaders to go idle if requested. */
- if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
- *sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH;
- } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
- *sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH;
- }
- }
-
- if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
-
- *sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
- }
-
- if (cb_db_event) {
- /* CB/DB flush and invalidate (or possibly just a wait for a
- * meta flush) via RELEASE_MEM.
- *
- * Combine this with other cache flushes when possible; this
- * requires affected shaders to be idle, so do it after the
- * CS_PARTIAL_FLUSH before (VS/PS partial flushes are always
- * implied).
- */
- /* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */
- unsigned glm_wb = G_586_GLM_WB(gcr_cntl);
- unsigned glm_inv = G_586_GLM_INV(gcr_cntl);
- unsigned glv_inv = G_586_GLV_INV(gcr_cntl);
- unsigned gl1_inv = G_586_GL1_INV(gcr_cntl);
- assert(G_586_GL2_US(gcr_cntl) == 0);
- assert(G_586_GL2_RANGE(gcr_cntl) == 0);
- assert(G_586_GL2_DISCARD(gcr_cntl) == 0);
- unsigned gl2_inv = G_586_GL2_INV(gcr_cntl);
- unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
- unsigned gcr_seq = G_586_SEQ(gcr_cntl);
-
- gcr_cntl &= C_586_GLM_WB &
- C_586_GLM_INV &
- C_586_GLV_INV &
- C_586_GL1_INV &
- C_586_GL2_INV &
- C_586_GL2_WB; /* keep SEQ */
-
- assert(flush_cnt);
- (*flush_cnt)++;
-
- si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event,
- S_490_GLM_WB(glm_wb) |
- S_490_GLM_INV(glm_inv) |
- S_490_GLV_INV(glv_inv) |
- S_490_GL1_INV(gl1_inv) |
- S_490_GL2_INV(gl2_inv) |
- S_490_GL2_WB(gl2_wb) |
- S_490_SEQ(gcr_seq),
- EOP_DST_SEL_MEM,
- EOP_DATA_SEL_VALUE_32BIT,
- flush_va, *flush_cnt,
- gfx9_eop_bug_va);
-
- radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va,
- *flush_cnt, 0xffffffff);
- }
-
- /* VGT state sync */
- if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
- }
-
- /* Ignore fields that only modify the behavior of other fields. */
- if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
- /* Flush caches and wait for the caches to assert idle.
- * The cache flush is executed in the ME, but the PFP waits
- * for completion.
- */
- radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
- radeon_emit(cs, 0); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0xffffff); /* CP_COHER_SIZE_HI */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
- radeon_emit(cs, gcr_cntl); /* GCR_CNTL */
- } else if ((cb_db_event ||
- (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_CS_PARTIAL_FLUSH)))
- && !is_mec) {
- /* We need to ensure that PFP waits as well. */
- radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
- radeon_emit(cs, 0);
-
- *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
- }
-
- if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
- EVENT_INDEX(0));
- } else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) |
- EVENT_INDEX(0));
- }
+ uint32_t gcr_cntl = 0;
+ unsigned cb_db_event = 0;
+
+ /* We don't need these. */
+ assert(!(flush_bits & (RADV_CMD_FLAG_VGT_STREAMOUT_SYNC)));
+
+ if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
+ gcr_cntl |= S_586_GLI_INV(V_586_GLI_ALL);
+
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
+ }
+ if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
+ /* TODO: When writing to the SMEM L1 cache, we need to set SEQ
+ * to FORWARD when both L1 and L2 are written out (WB or INV).
+ */
+ gcr_cntl |= S_586_GL1_INV(1) | S_586_GLK_INV(1);
+
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
+ }
+ if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
+ gcr_cntl |= S_586_GL1_INV(1) | S_586_GLV_INV(1);
+
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0 | RGP_FLUSH_INVAL_L1;
+ }
+ if (flush_bits & RADV_CMD_FLAG_INV_L2) {
+ /* Writeback and invalidate everything in L2. */
+ gcr_cntl |= S_586_GL2_INV(1) | S_586_GL2_WB(1) | S_586_GLM_INV(1) | S_586_GLM_WB(1);
+
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
+ } else if (flush_bits & RADV_CMD_FLAG_WB_L2) {
+ /* Writeback but do not invalidate.
+ * GLM doesn't support WB alone. If WB is set, INV must be set too.
+ */
+ gcr_cntl |= S_586_GL2_WB(1) | S_586_GLM_WB(1) | S_586_GLM_INV(1);
+
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2;
+ } else if (flush_bits & RADV_CMD_FLAG_INV_L2_METADATA) {
+ gcr_cntl |= S_586_GLM_INV(1) | S_586_GLM_WB(1);
+ }
+
+ if (flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
+ /* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_CB_META */
+ if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
+ /* Flush CMASK/FMASK/DCC. Will wait for idle later. */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
+
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
+ }
+
+ /* TODO: trigger on RADV_CMD_FLAG_FLUSH_AND_INV_DB_META ? */
+ if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
+ /* Flush HTILE. Will wait for idle later. */
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
+
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+ }
+
+ /* First flush CB/DB, then L1/L2. */
+ gcr_cntl |= S_586_SEQ(V_586_SEQ_FORWARD);
+
+ if ((flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) ==
+ (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB)) {
+ cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
+ } else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
+ cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
+ } else if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
+ cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
+ } else {
+ assert(0);
+ }
+ } else {
+ /* Wait for graphics shaders to go idle if requested. */
+ if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+ *sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH;
+ } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+ *sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH;
+ }
+ }
+
+ if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4)));
+
+ *sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
+ }
+
+ if (cb_db_event) {
+ /* CB/DB flush and invalidate (or possibly just a wait for a
+ * meta flush) via RELEASE_MEM.
+ *
+ * Combine this with other cache flushes when possible; this
+ * requires affected shaders to be idle, so do it after the
+ * CS_PARTIAL_FLUSH before (VS/PS partial flushes are always
+ * implied).
+ */
+ /* Get GCR_CNTL fields, because the encoding is different in RELEASE_MEM. */
+ unsigned glm_wb = G_586_GLM_WB(gcr_cntl);
+ unsigned glm_inv = G_586_GLM_INV(gcr_cntl);
+ unsigned glv_inv = G_586_GLV_INV(gcr_cntl);
+ unsigned gl1_inv = G_586_GL1_INV(gcr_cntl);
+ assert(G_586_GL2_US(gcr_cntl) == 0);
+ assert(G_586_GL2_RANGE(gcr_cntl) == 0);
+ assert(G_586_GL2_DISCARD(gcr_cntl) == 0);
+ unsigned gl2_inv = G_586_GL2_INV(gcr_cntl);
+ unsigned gl2_wb = G_586_GL2_WB(gcr_cntl);
+ unsigned gcr_seq = G_586_SEQ(gcr_cntl);
+
+ gcr_cntl &= C_586_GLM_WB & C_586_GLM_INV & C_586_GLV_INV & C_586_GL1_INV & C_586_GL2_INV &
+ C_586_GL2_WB; /* keep SEQ */
+
+ assert(flush_cnt);
+ (*flush_cnt)++;
+
+ si_cs_emit_write_event_eop(
+ cs, chip_class, false, cb_db_event,
+ S_490_GLM_WB(glm_wb) | S_490_GLM_INV(glm_inv) | S_490_GLV_INV(glv_inv) |
+ S_490_GL1_INV(gl1_inv) | S_490_GL2_INV(gl2_inv) | S_490_GL2_WB(gl2_wb) |
+ S_490_SEQ(gcr_seq),
+ EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
+
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
+ }
+
+ /* VGT state sync */
+ if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+ }
+
+ /* Ignore fields that only modify the behavior of other fields. */
+ if (gcr_cntl & C_586_GL1_RANGE & C_586_GL2_RANGE & C_586_SEQ) {
+ /* Flush caches and wait for the caches to assert idle.
+ * The cache flush is executed in the ME, but the PFP waits
+ * for completion.
+ */
+ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 6, 0));
+ radeon_emit(cs, 0); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, 0xffffff); /* CP_COHER_SIZE_HI */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ radeon_emit(cs, gcr_cntl); /* GCR_CNTL */
+ } else if ((cb_db_event ||
+ (flush_bits & (RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_CS_PARTIAL_FLUSH))) &&
+ !is_mec) {
+ /* We need to ensure that PFP waits as well. */
+ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+ radeon_emit(cs, 0);
+
+ *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
+ }
+
+ if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
+ } else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
+ }
}
void
-si_cs_emit_cache_flush(struct radeon_cmdbuf *cs,
- enum chip_class chip_class,
- uint32_t *flush_cnt,
- uint64_t flush_va,
- bool is_mec,
- enum radv_cmd_flush_bits flush_bits,
- enum rgp_flush_bits *sqtt_flush_bits,
- uint64_t gfx9_eop_bug_va)
+si_cs_emit_cache_flush(struct radeon_cmdbuf *cs, enum chip_class chip_class, uint32_t *flush_cnt,
+ uint64_t flush_va, bool is_mec, enum radv_cmd_flush_bits flush_bits,
+ enum rgp_flush_bits *sqtt_flush_bits, uint64_t gfx9_eop_bug_va)
{
- unsigned cp_coher_cntl = 0;
- uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB);
-
- if (chip_class >= GFX10) {
- /* GFX10 cache flush handling is quite different. */
- gfx10_cs_emit_cache_flush(cs, chip_class, flush_cnt, flush_va,
- is_mec, flush_bits, sqtt_flush_bits,
- gfx9_eop_bug_va);
- return;
- }
-
- if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
- cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
- }
- if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
- cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
- }
-
- if (chip_class <= GFX8) {
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
- cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
- S_0085F0_CB0_DEST_BASE_ENA(1) |
- S_0085F0_CB1_DEST_BASE_ENA(1) |
- S_0085F0_CB2_DEST_BASE_ENA(1) |
- S_0085F0_CB3_DEST_BASE_ENA(1) |
- S_0085F0_CB4_DEST_BASE_ENA(1) |
- S_0085F0_CB5_DEST_BASE_ENA(1) |
- S_0085F0_CB6_DEST_BASE_ENA(1) |
- S_0085F0_CB7_DEST_BASE_ENA(1);
-
- /* Necessary for DCC */
- if (chip_class >= GFX8) {
- si_cs_emit_write_event_eop(cs,
- chip_class,
- is_mec,
- V_028A90_FLUSH_AND_INV_CB_DATA_TS,
- 0,
- EOP_DST_SEL_MEM,
- EOP_DATA_SEL_DISCARD,
- 0, 0,
- gfx9_eop_bug_va);
- }
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
- }
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
- cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
- S_0085F0_DB_DEST_BASE_ENA(1);
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
- }
- }
-
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
- }
-
- if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
- }
-
- if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
- *sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH;
- } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
- *sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH;
- }
-
- if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
-
- *sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
- }
-
- if (chip_class == GFX9 && flush_cb_db) {
- unsigned cb_db_event, tc_flags;
-
- /* Set the CB/DB flush event. */
- cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
-
- /* These are the only allowed combinations. If you need to
- * do multiple operations at once, do them separately.
- * All operations that invalidate L2 also seem to invalidate
- * metadata. Volatile (VOL) and WC flushes are not listed here.
- *
- * TC | TC_WB = writeback & invalidate L2 & L1
- * TC | TC_WB | TC_NC = writeback & invalidate L2 for MTYPE == NC
- * TC_WB | TC_NC = writeback L2 for MTYPE == NC
- * TC | TC_NC = invalidate L2 for MTYPE == NC
- * TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.)
- * TCL1 = invalidate L1
- */
- tc_flags = EVENT_TC_ACTION_ENA |
- EVENT_TC_MD_ACTION_ENA;
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB |
- RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
-
- /* Ideally flush TC together with CB/DB. */
- if (flush_bits & RADV_CMD_FLAG_INV_L2) {
- /* Writeback and invalidate everything in L2 & L1. */
- tc_flags = EVENT_TC_ACTION_ENA |
- EVENT_TC_WB_ACTION_ENA;
-
-
- /* Clear the flags. */
- flush_bits &= ~(RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_WB_L2 |
- RADV_CMD_FLAG_INV_VCACHE);
-
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
- }
-
- assert(flush_cnt);
- (*flush_cnt)++;
-
- si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags,
- EOP_DST_SEL_MEM,
- EOP_DATA_SEL_VALUE_32BIT,
- flush_va, *flush_cnt,
- gfx9_eop_bug_va);
- radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va,
- *flush_cnt, 0xffffffff);
- }
-
- /* VGT state sync */
- if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
- }
-
- /* VGT streamout state sync */
- if (flush_bits & RADV_CMD_FLAG_VGT_STREAMOUT_SYNC) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
- }
-
- /* Make sure ME is idle (it executes most packets) before continuing.
- * This prevents read-after-write hazards between PFP and ME.
- */
- if ((cp_coher_cntl ||
- (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_INV_VCACHE |
- RADV_CMD_FLAG_INV_L2 |
- RADV_CMD_FLAG_WB_L2))) &&
- !is_mec) {
- radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
- radeon_emit(cs, 0);
-
- *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
- }
-
- if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
- (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
- si_emit_acquire_mem(cs, is_mec, chip_class == GFX9,
- cp_coher_cntl |
- S_0085F0_TC_ACTION_ENA(1) |
- S_0085F0_TCL1_ACTION_ENA(1) |
- S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
- cp_coher_cntl = 0;
-
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2 | RGP_FLUSH_INVAL_VMEM_L0;
- } else {
- if(flush_bits & RADV_CMD_FLAG_WB_L2) {
- /* WB = write-back
- * NC = apply to non-coherent MTYPEs
- * (i.e. MTYPE <= 1, which is what we use everywhere)
- *
- * WB doesn't work without NC.
- */
- si_emit_acquire_mem(cs, is_mec,
- chip_class == GFX9,
- cp_coher_cntl |
- S_0301F0_TC_WB_ACTION_ENA(1) |
- S_0301F0_TC_NC_ACTION_ENA(1));
- cp_coher_cntl = 0;
-
- *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
- }
- if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
- si_emit_acquire_mem(cs, is_mec,
- chip_class == GFX9,
- cp_coher_cntl |
- S_0085F0_TCL1_ACTION_ENA(1));
- cp_coher_cntl = 0;
-
- *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
- }
- }
-
- /* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
- * Therefore, it should be last. Done in PFP.
- */
- if (cp_coher_cntl)
- si_emit_acquire_mem(cs, is_mec, chip_class == GFX9, cp_coher_cntl);
-
- if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) |
- EVENT_INDEX(0));
- } else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) {
- radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
- radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) |
- EVENT_INDEX(0));
- }
+ unsigned cp_coher_cntl = 0;
+ uint32_t flush_cb_db =
+ flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB);
+
+ if (chip_class >= GFX10) {
+ /* GFX10 cache flush handling is quite different. */
+ gfx10_cs_emit_cache_flush(cs, chip_class, flush_cnt, flush_va, is_mec, flush_bits,
+ sqtt_flush_bits, gfx9_eop_bug_va);
+ return;
+ }
+
+ if (flush_bits & RADV_CMD_FLAG_INV_ICACHE) {
+ cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_ICACHE;
+ }
+ if (flush_bits & RADV_CMD_FLAG_INV_SCACHE) {
+ cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_SMEM_L0;
+ }
+
+ if (chip_class <= GFX8) {
+ if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
+ cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) |
+ S_0085F0_CB1_DEST_BASE_ENA(1) | S_0085F0_CB2_DEST_BASE_ENA(1) |
+ S_0085F0_CB3_DEST_BASE_ENA(1) | S_0085F0_CB4_DEST_BASE_ENA(1) |
+ S_0085F0_CB5_DEST_BASE_ENA(1) | S_0085F0_CB6_DEST_BASE_ENA(1) |
+ S_0085F0_CB7_DEST_BASE_ENA(1);
+
+ /* Necessary for DCC */
+ if (chip_class >= GFX8) {
+ si_cs_emit_write_event_eop(cs, chip_class, is_mec, V_028A90_FLUSH_AND_INV_CB_DATA_TS, 0,
+ EOP_DST_SEL_MEM, EOP_DATA_SEL_DISCARD, 0, 0,
+ gfx9_eop_bug_va);
+ }
+
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
+ }
+ if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
+ cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | S_0085F0_DB_DEST_BASE_ENA(1);
+
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+ }
+ }
+
+ if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
+
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB;
+ }
+
+ if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
+
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+ }
+
+ if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+ *sqtt_flush_bits |= RGP_FLUSH_PS_PARTIAL_FLUSH;
+ } else if (flush_bits & RADV_CMD_FLAG_VS_PARTIAL_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+ *sqtt_flush_bits |= RGP_FLUSH_VS_PARTIAL_FLUSH;
+ }
+
+ if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+
+ *sqtt_flush_bits |= RGP_FLUSH_CS_PARTIAL_FLUSH;
+ }
+
+ if (chip_class == GFX9 && flush_cb_db) {
+ unsigned cb_db_event, tc_flags;
+
+ /* Set the CB/DB flush event. */
+ cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
+
+ /* These are the only allowed combinations. If you need to
+ * do multiple operations at once, do them separately.
+ * All operations that invalidate L2 also seem to invalidate
+ * metadata. Volatile (VOL) and WC flushes are not listed here.
+ *
+ * TC | TC_WB = writeback & invalidate L2 & L1
+ * TC | TC_WB | TC_NC = writeback & invalidate L2 for MTYPE == NC
+ * TC_WB | TC_NC = writeback L2 for MTYPE == NC
+ * TC | TC_NC = invalidate L2 for MTYPE == NC
+ * TC | TC_MD = writeback & invalidate L2 metadata (DCC, etc.)
+ * TCL1 = invalidate L1
+ */
+ tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_MD_ACTION_ENA;
+
+ *sqtt_flush_bits |=
+ RGP_FLUSH_FLUSH_CB | RGP_FLUSH_INVAL_CB | RGP_FLUSH_FLUSH_DB | RGP_FLUSH_INVAL_DB;
+
+ /* Ideally flush TC together with CB/DB. */
+ if (flush_bits & RADV_CMD_FLAG_INV_L2) {
+ /* Writeback and invalidate everything in L2 & L1. */
+ tc_flags = EVENT_TC_ACTION_ENA | EVENT_TC_WB_ACTION_ENA;
+
+ /* Clear the flags. */
+ flush_bits &= ~(RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2 | RADV_CMD_FLAG_INV_VCACHE);
+
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2;
+ }
+
+ assert(flush_cnt);
+ (*flush_cnt)++;
+
+ si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags, EOP_DST_SEL_MEM,
+ EOP_DATA_SEL_VALUE_32BIT, flush_va, *flush_cnt, gfx9_eop_bug_va);
+ radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, flush_va, *flush_cnt, 0xffffffff);
+ }
+
+ /* VGT state sync */
+ if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
+ }
+
+ /* VGT streamout state sync */
+ if (flush_bits & RADV_CMD_FLAG_VGT_STREAMOUT_SYNC) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0));
+ }
+
+ /* Make sure ME is idle (it executes most packets) before continuing.
+ * This prevents read-after-write hazards between PFP and ME.
+ */
+ if ((cp_coher_cntl || (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
+ RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_WB_L2))) &&
+ !is_mec) {
+ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
+ radeon_emit(cs, 0);
+
+ *sqtt_flush_bits |= RGP_FLUSH_PFP_SYNC_ME;
+ }
+
+ if ((flush_bits & RADV_CMD_FLAG_INV_L2) ||
+ (chip_class <= GFX7 && (flush_bits & RADV_CMD_FLAG_WB_L2))) {
+ si_emit_acquire_mem(cs, is_mec, chip_class == GFX9,
+ cp_coher_cntl | S_0085F0_TC_ACTION_ENA(1) | S_0085F0_TCL1_ACTION_ENA(1) |
+ S_0301F0_TC_WB_ACTION_ENA(chip_class >= GFX8));
+ cp_coher_cntl = 0;
+
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_L2 | RGP_FLUSH_INVAL_VMEM_L0;
+ } else {
+ if (flush_bits & RADV_CMD_FLAG_WB_L2) {
+ /* WB = write-back
+ * NC = apply to non-coherent MTYPEs
+ * (i.e. MTYPE <= 1, which is what we use everywhere)
+ *
+ * WB doesn't work without NC.
+ */
+ si_emit_acquire_mem(
+ cs, is_mec, chip_class == GFX9,
+ cp_coher_cntl | S_0301F0_TC_WB_ACTION_ENA(1) | S_0301F0_TC_NC_ACTION_ENA(1));
+ cp_coher_cntl = 0;
+
+ *sqtt_flush_bits |= RGP_FLUSH_FLUSH_L2 | RGP_FLUSH_INVAL_VMEM_L0;
+ }
+ if (flush_bits & RADV_CMD_FLAG_INV_VCACHE) {
+ si_emit_acquire_mem(cs, is_mec, chip_class == GFX9,
+ cp_coher_cntl | S_0085F0_TCL1_ACTION_ENA(1));
+ cp_coher_cntl = 0;
+
+ *sqtt_flush_bits |= RGP_FLUSH_INVAL_VMEM_L0;
+ }
+ }
+
+ /* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
+ * Therefore, it should be last. Done in PFP.
+ */
+ if (cp_coher_cntl)
+ si_emit_acquire_mem(cs, is_mec, chip_class == GFX9, cp_coher_cntl);
+
+ if (flush_bits & RADV_CMD_FLAG_START_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | EVENT_INDEX(0));
+ } else if (flush_bits & RADV_CMD_FLAG_STOP_PIPELINE_STATS) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | EVENT_INDEX(0));
+ }
}
void
si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
{
- bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
-
- if (is_compute)
- cmd_buffer->state.flush_bits &= ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB |
- RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB |
- RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
- RADV_CMD_FLAG_INV_L2_METADATA |
- RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
- RADV_CMD_FLAG_VGT_FLUSH |
- RADV_CMD_FLAG_START_PIPELINE_STATS |
- RADV_CMD_FLAG_STOP_PIPELINE_STATS);
-
- if (!cmd_buffer->state.flush_bits) {
- radv_describe_barrier_end_delayed(cmd_buffer);
- return;
- }
-
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
-
- si_cs_emit_cache_flush(cmd_buffer->cs,
- cmd_buffer->device->physical_device->rad_info.chip_class,
- &cmd_buffer->gfx9_fence_idx,
- cmd_buffer->gfx9_fence_va,
- radv_cmd_buffer_uses_mec(cmd_buffer),
- cmd_buffer->state.flush_bits,
- &cmd_buffer->state.sqtt_flush_bits,
- cmd_buffer->gfx9_eop_bug_va);
-
-
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_cmd_buffer_trace_emit(cmd_buffer);
-
- /* Clear the caches that have been flushed to avoid syncing too much
- * when there is some pending active queries.
- */
- cmd_buffer->active_query_flush_bits &= ~cmd_buffer->state.flush_bits;
-
- cmd_buffer->state.flush_bits = 0;
-
- /* If the driver used a compute shader for resetting a query pool, it
- * should be finished at this point.
- */
- cmd_buffer->pending_reset_query = false;
-
- radv_describe_barrier_end_delayed(cmd_buffer);
+ bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+
+ if (is_compute)
+ cmd_buffer->state.flush_bits &=
+ ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+ RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
+ RADV_CMD_FLAG_INV_L2_METADATA | RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+ RADV_CMD_FLAG_VS_PARTIAL_FLUSH | RADV_CMD_FLAG_VGT_FLUSH |
+ RADV_CMD_FLAG_START_PIPELINE_STATS | RADV_CMD_FLAG_STOP_PIPELINE_STATS);
+
+ if (!cmd_buffer->state.flush_bits) {
+ radv_describe_barrier_end_delayed(cmd_buffer);
+ return;
+ }
+
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
+
+ si_cs_emit_cache_flush(cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.chip_class,
+ &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
+ radv_cmd_buffer_uses_mec(cmd_buffer), cmd_buffer->state.flush_bits,
+ &cmd_buffer->state.sqtt_flush_bits, cmd_buffer->gfx9_eop_bug_va);
+
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_cmd_buffer_trace_emit(cmd_buffer);
+
+ /* Clear the caches that have been flushed to avoid syncing too much
+ * when there is some pending active queries.
+ */
+ cmd_buffer->active_query_flush_bits &= ~cmd_buffer->state.flush_bits;
+
+ cmd_buffer->state.flush_bits = 0;
+
+ /* If the driver used a compute shader for resetting a query pool, it
+ * should be finished at this point.
+ */
+ cmd_buffer->pending_reset_query = false;
+
+ radv_describe_barrier_end_delayed(cmd_buffer);
}
/* sets the CP predication state using a boolean stored at va */
void
-si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer,
- bool draw_visible, unsigned pred_op, uint64_t va)
+si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, bool draw_visible,
+ unsigned pred_op, uint64_t va)
{
- uint32_t op = 0;
-
- if (va) {
- assert(pred_op == PREDICATION_OP_BOOL32 ||
- pred_op == PREDICATION_OP_BOOL64);
-
- op = PRED_OP(pred_op);
-
- /* PREDICATION_DRAW_VISIBLE means that if the 32-bit value is
- * zero, all rendering commands are discarded. Otherwise, they
- * are discarded if the value is non zero.
- */
- op |= draw_visible ? PREDICATION_DRAW_VISIBLE :
- PREDICATION_DRAW_NOT_VISIBLE;
- }
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
- radeon_emit(cmd_buffer->cs, op);
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, va >> 32);
- } else {
- radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
- radeon_emit(cmd_buffer->cs, va);
- radeon_emit(cmd_buffer->cs, op | ((va >> 32) & 0xFF));
- }
+ uint32_t op = 0;
+
+ if (va) {
+ assert(pred_op == PREDICATION_OP_BOOL32 || pred_op == PREDICATION_OP_BOOL64);
+
+ op = PRED_OP(pred_op);
+
+ /* PREDICATION_DRAW_VISIBLE means that if the 32-bit value is
+ * zero, all rendering commands are discarded. Otherwise, they
+ * are discarded if the value is non zero.
+ */
+ op |= draw_visible ? PREDICATION_DRAW_VISIBLE : PREDICATION_DRAW_NOT_VISIBLE;
+ }
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
+ radeon_emit(cmd_buffer->cs, op);
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, va >> 32);
+ } else {
+ radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+ radeon_emit(cmd_buffer->cs, va);
+ radeon_emit(cmd_buffer->cs, op | ((va >> 32) & 0xFF));
+ }
}
/* Set this if you want the 3D engine to wait until CP DMA is done.
* It should be set on the last CP DMA packet. */
-#define CP_DMA_SYNC (1 << 0)
+#define CP_DMA_SYNC (1 << 0)
/* Set this if the source data was used as a destination in a previous CP DMA
* packet. It's for preventing a read-after-write (RAW) hazard between two
* CP DMA packets. */
-#define CP_DMA_RAW_WAIT (1 << 1)
-#define CP_DMA_USE_L2 (1 << 2)
-#define CP_DMA_CLEAR (1 << 3)
+#define CP_DMA_RAW_WAIT (1 << 1)
+#define CP_DMA_USE_L2 (1 << 2)
+#define CP_DMA_CLEAR (1 << 3)
/* Alignment for optimal performance. */
-#define SI_CPDMA_ALIGNMENT 32
+#define SI_CPDMA_ALIGNMENT 32
/* The max number of bytes that can be copied per packet. */
-static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
+static inline unsigned
+cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
{
- unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
- S_415_BYTE_COUNT_GFX9(~0u) :
- S_415_BYTE_COUNT_GFX6(~0u);
+ unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9
+ ? S_415_BYTE_COUNT_GFX9(~0u)
+ : S_415_BYTE_COUNT_GFX6(~0u);
- /* make it aligned for optimal performance */
- return max & ~(SI_CPDMA_ALIGNMENT - 1);
+ /* make it aligned for optimal performance */
+ return max & ~(SI_CPDMA_ALIGNMENT - 1);
}
/* Emit a CP DMA packet to do a copy from one buffer to another, or to clear
* a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit
* clear value.
*/
-static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
- uint64_t dst_va, uint64_t src_va,
- unsigned size, unsigned flags)
+static void
+si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src_va, unsigned size,
+ unsigned flags)
{
- struct radeon_cmdbuf *cs = cmd_buffer->cs;
- uint32_t header = 0, command = 0;
-
- assert(size <= cp_dma_max_byte_count(cmd_buffer));
-
- radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
- command |= S_415_BYTE_COUNT_GFX9(size);
- else
- command |= S_415_BYTE_COUNT_GFX6(size);
-
- /* Sync flags. */
- if (flags & CP_DMA_SYNC)
- header |= S_411_CP_SYNC(1);
- else {
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
- command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
- else
- command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
- }
-
- if (flags & CP_DMA_RAW_WAIT)
- command |= S_415_RAW_WAIT(1);
-
- /* Src and dst flags. */
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
- !(flags & CP_DMA_CLEAR) &&
- src_va == dst_va)
- header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
- else if (flags & CP_DMA_USE_L2)
- header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
-
- if (flags & CP_DMA_CLEAR)
- header |= S_411_SRC_SEL(V_411_DATA);
- else if (flags & CP_DMA_USE_L2)
- header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
- radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, cmd_buffer->state.predicating));
- radeon_emit(cs, header);
- radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
- radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
- radeon_emit(cs, command);
- } else {
- assert(!(flags & CP_DMA_USE_L2));
- header |= S_411_SRC_ADDR_HI(src_va >> 32);
- radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, cmd_buffer->state.predicating));
- radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
- radeon_emit(cs, header); /* SRC_ADDR_HI [15:0] + flags. */
- radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
- radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
- radeon_emit(cs, command);
- }
-
- /* CP DMA is executed in ME, but index buffers are read by PFP.
- * This ensures that ME (CP DMA) is idle before PFP starts fetching
- * indices. If we wanted to execute CP DMA in PFP, this packet
- * should precede it.
- */
- if (flags & CP_DMA_SYNC) {
- if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
- radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
- radeon_emit(cs, 0);
- }
-
- /* CP will see the sync flag and wait for all DMAs to complete. */
- cmd_buffer->state.dma_is_busy = false;
- }
-
- if (unlikely(cmd_buffer->device->trace_bo))
- radv_cmd_buffer_trace_emit(cmd_buffer);
+ struct radeon_cmdbuf *cs = cmd_buffer->cs;
+ uint32_t header = 0, command = 0;
+
+ assert(size <= cp_dma_max_byte_count(cmd_buffer));
+
+ radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
+ command |= S_415_BYTE_COUNT_GFX9(size);
+ else
+ command |= S_415_BYTE_COUNT_GFX6(size);
+
+ /* Sync flags. */
+ if (flags & CP_DMA_SYNC)
+ header |= S_411_CP_SYNC(1);
+ else {
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
+ command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
+ else
+ command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
+ }
+
+ if (flags & CP_DMA_RAW_WAIT)
+ command |= S_415_RAW_WAIT(1);
+
+ /* Src and dst flags. */
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+ !(flags & CP_DMA_CLEAR) && src_va == dst_va)
+ header |= S_411_DST_SEL(V_411_NOWHERE); /* prefetch only */
+ else if (flags & CP_DMA_USE_L2)
+ header |= S_411_DST_SEL(V_411_DST_ADDR_TC_L2);
+
+ if (flags & CP_DMA_CLEAR)
+ header |= S_411_SRC_SEL(V_411_DATA);
+ else if (flags & CP_DMA_USE_L2)
+ header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX7) {
+ radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, cmd_buffer->state.predicating));
+ radeon_emit(cs, header);
+ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
+ radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
+ radeon_emit(cs, command);
+ } else {
+ assert(!(flags & CP_DMA_USE_L2));
+ header |= S_411_SRC_ADDR_HI(src_va >> 32);
+ radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, cmd_buffer->state.predicating));
+ radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
+ radeon_emit(cs, header); /* SRC_ADDR_HI [15:0] + flags. */
+ radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
+ radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
+ radeon_emit(cs, command);
+ }
+
+ /* CP DMA is executed in ME, but index buffers are read by PFP.
+ * This ensures that ME (CP DMA) is idle before PFP starts fetching
+ * indices. If we wanted to execute CP DMA in PFP, this packet
+ * should precede it.
+ */
+ if (flags & CP_DMA_SYNC) {
+ if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
+ radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
+ radeon_emit(cs, 0);
+ }
+
+ /* CP will see the sync flag and wait for all DMAs to complete. */
+ cmd_buffer->state.dma_is_busy = false;
+ }
+
+ if (unlikely(cmd_buffer->device->trace_bo))
+ radv_cmd_buffer_trace_emit(cmd_buffer);
}
-void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
- unsigned size)
+void
+si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned size)
{
- uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
- uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT -1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
+ uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
+ uint64_t aligned_size =
+ ((va + size + SI_CPDMA_ALIGNMENT - 1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
- si_emit_cp_dma(cmd_buffer, aligned_va, aligned_va,
- aligned_size, CP_DMA_USE_L2);
+ si_emit_cp_dma(cmd_buffer, aligned_va, aligned_va, aligned_size, CP_DMA_USE_L2);
}
-static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
- uint64_t remaining_size, unsigned *flags)
+static void
+si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count, uint64_t remaining_size,
+ unsigned *flags)
{
- /* Flush the caches for the first copy only.
- * Also wait for the previous CP DMA operations.
- */
- if (cmd_buffer->state.flush_bits) {
- si_emit_cache_flush(cmd_buffer);
- *flags |= CP_DMA_RAW_WAIT;
- }
-
- /* Do the synchronization after the last dma, so that all data
- * is written to memory.
- */
- if (byte_count == remaining_size)
- *flags |= CP_DMA_SYNC;
+ /* Flush the caches for the first copy only.
+ * Also wait for the previous CP DMA operations.
+ */
+ if (cmd_buffer->state.flush_bits) {
+ si_emit_cache_flush(cmd_buffer);
+ *flags |= CP_DMA_RAW_WAIT;
+ }
+
+ /* Do the synchronization after the last dma, so that all data
+ * is written to memory.
+ */
+ if (byte_count == remaining_size)
+ *flags |= CP_DMA_SYNC;
}
-static void si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
+static void
+si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
{
- uint64_t va;
- uint32_t offset;
- unsigned dma_flags = 0;
- unsigned buf_size = SI_CPDMA_ALIGNMENT * 2;
- void *ptr;
+ uint64_t va;
+ uint32_t offset;
+ unsigned dma_flags = 0;
+ unsigned buf_size = SI_CPDMA_ALIGNMENT * 2;
+ void *ptr;
- assert(size < SI_CPDMA_ALIGNMENT);
+ assert(size < SI_CPDMA_ALIGNMENT);
- radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, &offset, &ptr);
+ radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, &offset, &ptr);
- va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
- va += offset;
+ va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+ va += offset;
- si_cp_dma_prepare(cmd_buffer, size, size, &dma_flags);
+ si_cp_dma_prepare(cmd_buffer, size, size, &dma_flags);
- si_emit_cp_dma(cmd_buffer, va, va + SI_CPDMA_ALIGNMENT, size,
- dma_flags);
+ si_emit_cp_dma(cmd_buffer, va, va + SI_CPDMA_ALIGNMENT, size, dma_flags);
}
-void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
- uint64_t src_va, uint64_t dest_va,
- uint64_t size)
+void
+si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dest_va,
+ uint64_t size)
{
- uint64_t main_src_va, main_dest_va;
- uint64_t skipped_size = 0, realign_size = 0;
-
- /* Assume that we are not going to sync after the last DMA operation. */
- cmd_buffer->state.dma_is_busy = true;
-
- if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
- cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
- /* If the size is not aligned, we must add a dummy copy at the end
- * just to align the internal counter. Otherwise, the DMA engine
- * would slow down by an order of magnitude for following copies.
- */
- if (size % SI_CPDMA_ALIGNMENT)
- realign_size = SI_CPDMA_ALIGNMENT - (size % SI_CPDMA_ALIGNMENT);
-
- /* If the copy begins unaligned, we must start copying from the next
- * aligned block and the skipped part should be copied after everything
- * else has been copied. Only the src alignment matters, not dst.
- */
- if (src_va % SI_CPDMA_ALIGNMENT) {
- skipped_size = SI_CPDMA_ALIGNMENT - (src_va % SI_CPDMA_ALIGNMENT);
- /* The main part will be skipped if the size is too small. */
- skipped_size = MIN2(skipped_size, size);
- size -= skipped_size;
- }
- }
- main_src_va = src_va + skipped_size;
- main_dest_va = dest_va + skipped_size;
-
- while (size) {
- unsigned dma_flags = 0;
- unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
-
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- /* DMA operations via L2 are coherent and faster.
- * TODO: GFX7-GFX8 should also support this but it
- * requires tests/benchmarks.
- *
- * Also enable on GFX9 so we can use L2 at rest on GFX9+. On Raven
- * this didn't seem to be worse.
- *
- * Note that we only use CP DMA for sizes < RADV_BUFFER_OPS_CS_THRESHOLD,
- * which is 4k at the moment, so this is really unlikely to cause
- * significant thrashing.
- */
- dma_flags |= CP_DMA_USE_L2;
- }
-
- si_cp_dma_prepare(cmd_buffer, byte_count,
- size + skipped_size + realign_size,
- &dma_flags);
-
- dma_flags &= ~CP_DMA_SYNC;
-
- si_emit_cp_dma(cmd_buffer, main_dest_va, main_src_va,
- byte_count, dma_flags);
-
- size -= byte_count;
- main_src_va += byte_count;
- main_dest_va += byte_count;
- }
-
- if (skipped_size) {
- unsigned dma_flags = 0;
-
- si_cp_dma_prepare(cmd_buffer, skipped_size,
- size + skipped_size + realign_size,
- &dma_flags);
-
- si_emit_cp_dma(cmd_buffer, dest_va, src_va,
- skipped_size, dma_flags);
- }
- if (realign_size)
- si_cp_dma_realign_engine(cmd_buffer, realign_size);
+ uint64_t main_src_va, main_dest_va;
+ uint64_t skipped_size = 0, realign_size = 0;
+
+ /* Assume that we are not going to sync after the last DMA operation. */
+ cmd_buffer->state.dma_is_busy = true;
+
+ if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
+ cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
+ /* If the size is not aligned, we must add a dummy copy at the end
+ * just to align the internal counter. Otherwise, the DMA engine
+ * would slow down by an order of magnitude for following copies.
+ */
+ if (size % SI_CPDMA_ALIGNMENT)
+ realign_size = SI_CPDMA_ALIGNMENT - (size % SI_CPDMA_ALIGNMENT);
+
+ /* If the copy begins unaligned, we must start copying from the next
+ * aligned block and the skipped part should be copied after everything
+ * else has been copied. Only the src alignment matters, not dst.
+ */
+ if (src_va % SI_CPDMA_ALIGNMENT) {
+ skipped_size = SI_CPDMA_ALIGNMENT - (src_va % SI_CPDMA_ALIGNMENT);
+ /* The main part will be skipped if the size is too small. */
+ skipped_size = MIN2(skipped_size, size);
+ size -= skipped_size;
+ }
+ }
+ main_src_va = src_va + skipped_size;
+ main_dest_va = dest_va + skipped_size;
+
+ while (size) {
+ unsigned dma_flags = 0;
+ unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
+
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ /* DMA operations via L2 are coherent and faster.
+ * TODO: GFX7-GFX8 should also support this but it
+ * requires tests/benchmarks.
+ *
+ * Also enable on GFX9 so we can use L2 at rest on GFX9+. On Raven
+ * this didn't seem to be worse.
+ *
+ * Note that we only use CP DMA for sizes < RADV_BUFFER_OPS_CS_THRESHOLD,
+ * which is 4k at the moment, so this is really unlikely to cause
+ * significant thrashing.
+ */
+ dma_flags |= CP_DMA_USE_L2;
+ }
+
+ si_cp_dma_prepare(cmd_buffer, byte_count, size + skipped_size + realign_size, &dma_flags);
+
+ dma_flags &= ~CP_DMA_SYNC;
+
+ si_emit_cp_dma(cmd_buffer, main_dest_va, main_src_va, byte_count, dma_flags);
+
+ size -= byte_count;
+ main_src_va += byte_count;
+ main_dest_va += byte_count;
+ }
+
+ if (skipped_size) {
+ unsigned dma_flags = 0;
+
+ si_cp_dma_prepare(cmd_buffer, skipped_size, size + skipped_size + realign_size, &dma_flags);
+
+ si_emit_cp_dma(cmd_buffer, dest_va, src_va, skipped_size, dma_flags);
+ }
+ if (realign_size)
+ si_cp_dma_realign_engine(cmd_buffer, realign_size);
}
-void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
- uint64_t size, unsigned value)
+void
+si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size,
+ unsigned value)
{
- if (!size)
- return;
+ if (!size)
+ return;
- assert(va % 4 == 0 && size % 4 == 0);
+ assert(va % 4 == 0 && size % 4 == 0);
- /* Assume that we are not going to sync after the last DMA operation. */
- cmd_buffer->state.dma_is_busy = true;
+ /* Assume that we are not going to sync after the last DMA operation. */
+ cmd_buffer->state.dma_is_busy = true;
- while (size) {
- unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
- unsigned dma_flags = CP_DMA_CLEAR;
+ while (size) {
+ unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
+ unsigned dma_flags = CP_DMA_CLEAR;
- if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
- /* DMA operations via L2 are coherent and faster.
- * TODO: GFX7-GFX8 should also support this but it
- * requires tests/benchmarks.
- *
- * Also enable on GFX9 so we can use L2 at rest on GFX9+.
- */
- dma_flags |= CP_DMA_USE_L2;
- }
+ if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+ /* DMA operations via L2 are coherent and faster.
+ * TODO: GFX7-GFX8 should also support this but it
+ * requires tests/benchmarks.
+ *
+ * Also enable on GFX9 so we can use L2 at rest on GFX9+.
+ */
+ dma_flags |= CP_DMA_USE_L2;
+ }
- si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags);
+ si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags);
- /* Emit the clear packet. */
- si_emit_cp_dma(cmd_buffer, va, value, byte_count,
- dma_flags);
+ /* Emit the clear packet. */
+ si_emit_cp_dma(cmd_buffer, va, value, byte_count, dma_flags);
- size -= byte_count;
- va += byte_count;
- }
+ size -= byte_count;
+ va += byte_count;
+ }
}
-void si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
+void
+si_cp_dma_wait_for_idle(struct radv_cmd_buffer *cmd_buffer)
{
- if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX7)
- return;
+ if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX7)
+ return;
- if (!cmd_buffer->state.dma_is_busy)
- return;
+ if (!cmd_buffer->state.dma_is_busy)
+ return;
- /* Issue a dummy DMA that copies zero bytes.
- *
- * The DMA engine will see that there's no work to do and skip this
- * DMA request, however, the CP will see the sync flag and still wait
- * for all DMAs to complete.
- */
- si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
+ /* Issue a dummy DMA that copies zero bytes.
+ *
+ * The DMA engine will see that there's no work to do and skip this
+ * DMA request, however, the CP will see the sync flag and still wait
+ * for all DMAs to complete.
+ */
+ si_emit_cp_dma(cmd_buffer, 0, 0, 0, CP_DMA_SYNC);
- cmd_buffer->state.dma_is_busy = false;
+ cmd_buffer->state.dma_is_busy = false;
}
/* For MSAA sample positions. */
-#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
- ((((unsigned)(s0x) & 0xf) << 0) | (((unsigned)(s0y) & 0xf) << 4) | \
- (((unsigned)(s1x) & 0xf) << 8) | (((unsigned)(s1y) & 0xf) << 12) | \
- (((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
- (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
+#define FILL_SREG(s0x, s0y, s1x, s1y, s2x, s2y, s3x, s3y) \
+ ((((unsigned)(s0x)&0xf) << 0) | (((unsigned)(s0y)&0xf) << 4) | (((unsigned)(s1x)&0xf) << 8) | \
+ (((unsigned)(s1y)&0xf) << 12) | (((unsigned)(s2x)&0xf) << 16) | \
+ (((unsigned)(s2y)&0xf) << 20) | (((unsigned)(s3x)&0xf) << 24) | (((unsigned)(s3y)&0xf) << 28))
/* For obtaining location coordinates from registers */
-#define SEXT4(x) ((int)((x) | ((x) & 0x8 ? 0xfffffff0 : 0)))
-#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index) * 4)) & 0xf)
-#define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2)
-#define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1)
+#define SEXT4(x) ((int)((x) | ((x)&0x8 ? 0xfffffff0 : 0)))
+#define GET_SFIELD(reg, index) SEXT4(((reg) >> ((index)*4)) & 0xf)
+#define GET_SX(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2)
+#define GET_SY(reg, index) GET_SFIELD((reg)[(index) / 4], ((index) % 4) * 2 + 1)
/* 1x MSAA */
-static const uint32_t sample_locs_1x =
- FILL_SREG(0, 0, 0, 0, 0, 0, 0, 0);
+static const uint32_t sample_locs_1x = FILL_SREG(0, 0, 0, 0, 0, 0, 0, 0);
static const unsigned max_dist_1x = 0;
static const uint64_t centroid_priority_1x = 0x0000000000000000ull;
/* 2xMSAA */
-static const uint32_t sample_locs_2x =
- FILL_SREG(4,4, -4, -4, 0, 0, 0, 0);
+static const uint32_t sample_locs_2x = FILL_SREG(4, 4, -4, -4, 0, 0, 0, 0);
static const unsigned max_dist_2x = 4;
static const uint64_t centroid_priority_2x = 0x1010101010101010ull;
/* 4xMSAA */
-static const uint32_t sample_locs_4x =
- FILL_SREG(-2,-6, 6, -2, -6, 2, 2, 6);
+static const uint32_t sample_locs_4x = FILL_SREG(-2, -6, 6, -2, -6, 2, 2, 6);
static const unsigned max_dist_4x = 6;
static const uint64_t centroid_priority_4x = 0x3210321032103210ull;
/* 8xMSAA */
static const uint32_t sample_locs_8x[] = {
- FILL_SREG( 1,-3, -1, 3, 5, 1, -3,-5),
- FILL_SREG(-5, 5, -7,-1, 3, 7, 7,-7),
- /* The following are unused by hardware, but we emit them to IBs
- * instead of multiple SET_CONTEXT_REG packets. */
- 0,
- 0,
+ FILL_SREG(1, -3, -1, 3, 5, 1, -3, -5),
+ FILL_SREG(-5, 5, -7, -1, 3, 7, 7, -7),
+ /* The following are unused by hardware, but we emit them to IBs
+ * instead of multiple SET_CONTEXT_REG packets. */
+ 0,
+ 0,
};
static const unsigned max_dist_8x = 7;
static const uint64_t centroid_priority_8x = 0x7654321076543210ull;
-unsigned radv_get_default_max_sample_dist(int log_samples)
+unsigned
+radv_get_default_max_sample_dist(int log_samples)
{
- unsigned max_dist[] = {
- max_dist_1x,
- max_dist_2x,
- max_dist_4x,
- max_dist_8x,
- };
- return max_dist[log_samples];
+ unsigned max_dist[] = {
+ max_dist_1x,
+ max_dist_2x,
+ max_dist_4x,
+ max_dist_8x,
+ };
+ return max_dist[log_samples];
}
-void radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)
+void
+radv_emit_default_sample_locations(struct radeon_cmdbuf *cs, int nr_samples)
{
- switch (nr_samples) {
- default:
- case 1:
- radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
- radeon_emit(cs, (uint32_t)centroid_priority_1x);
- radeon_emit(cs, centroid_priority_1x >> 32);
- radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_1x);
- radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_1x);
- radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_1x);
- radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_1x);
- break;
- case 2:
- radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
- radeon_emit(cs, (uint32_t)centroid_priority_2x);
- radeon_emit(cs, centroid_priority_2x >> 32);
- radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x);
- radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x);
- radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x);
- radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x);
- break;
- case 4:
- radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
- radeon_emit(cs, (uint32_t)centroid_priority_4x);
- radeon_emit(cs, centroid_priority_4x >> 32);
- radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x);
- radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x);
- radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x);
- radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x);
- break;
- case 8:
- radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
- radeon_emit(cs, (uint32_t)centroid_priority_8x);
- radeon_emit(cs, centroid_priority_8x >> 32);
- radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
- radeon_emit_array(cs, sample_locs_8x, 4);
- radeon_emit_array(cs, sample_locs_8x, 4);
- radeon_emit_array(cs, sample_locs_8x, 4);
- radeon_emit_array(cs, sample_locs_8x, 2);
- break;
- }
+ switch (nr_samples) {
+ default:
+ case 1:
+ radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+ radeon_emit(cs, (uint32_t)centroid_priority_1x);
+ radeon_emit(cs, centroid_priority_1x >> 32);
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_1x);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_1x);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_1x);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_1x);
+ break;
+ case 2:
+ radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+ radeon_emit(cs, (uint32_t)centroid_priority_2x);
+ radeon_emit(cs, centroid_priority_2x >> 32);
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_2x);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_2x);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_2x);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_2x);
+ break;
+ case 4:
+ radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+ radeon_emit(cs, (uint32_t)centroid_priority_4x);
+ radeon_emit(cs, centroid_priority_4x >> 32);
+ radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, sample_locs_4x);
+ radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, sample_locs_4x);
+ radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, sample_locs_4x);
+ radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, sample_locs_4x);
+ break;
+ case 8:
+ radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
+ radeon_emit(cs, (uint32_t)centroid_priority_8x);
+ radeon_emit(cs, centroid_priority_8x >> 32);
+ radeon_set_context_reg_seq(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 14);
+ radeon_emit_array(cs, sample_locs_8x, 4);
+ radeon_emit_array(cs, sample_locs_8x, 4);
+ radeon_emit_array(cs, sample_locs_8x, 4);
+ radeon_emit_array(cs, sample_locs_8x, 2);
+ break;
+ }
}
-static void radv_get_sample_position(struct radv_device *device,
- unsigned sample_count,
- unsigned sample_index, float *out_value)
+static void
+radv_get_sample_position(struct radv_device *device, unsigned sample_count, unsigned sample_index,
+ float *out_value)
{
- const uint32_t *sample_locs;
-
- switch (sample_count) {
- case 1:
- default:
- sample_locs = &sample_locs_1x;
- break;
- case 2:
- sample_locs = &sample_locs_2x;
- break;
- case 4:
- sample_locs = &sample_locs_4x;
- break;
- case 8:
- sample_locs = sample_locs_8x;
- break;
- }
-
- out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f;
- out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;
+ const uint32_t *sample_locs;
+
+ switch (sample_count) {
+ case 1:
+ default:
+ sample_locs = &sample_locs_1x;
+ break;
+ case 2:
+ sample_locs = &sample_locs_2x;
+ break;
+ case 4:
+ sample_locs = &sample_locs_4x;
+ break;
+ case 8:
+ sample_locs = sample_locs_8x;
+ break;
+ }
+
+ out_value[0] = (GET_SX(sample_locs, sample_index) + 8) / 16.0f;
+ out_value[1] = (GET_SY(sample_locs, sample_index) + 8) / 16.0f;
}
-void radv_device_init_msaa(struct radv_device *device)
+void
+radv_device_init_msaa(struct radv_device *device)
{
- int i;
+ int i;
- radv_get_sample_position(device, 1, 0, device->sample_locations_1x[0]);
+ radv_get_sample_position(device, 1, 0, device->sample_locations_1x[0]);
- for (i = 0; i < 2; i++)
- radv_get_sample_position(device, 2, i, device->sample_locations_2x[i]);
- for (i = 0; i < 4; i++)
- radv_get_sample_position(device, 4, i, device->sample_locations_4x[i]);
- for (i = 0; i < 8; i++)
- radv_get_sample_position(device, 8, i, device->sample_locations_8x[i]);
+ for (i = 0; i < 2; i++)
+ radv_get_sample_position(device, 2, i, device->sample_locations_2x[i]);
+ for (i = 0; i < 4; i++)
+ radv_get_sample_position(device, 4, i, device->sample_locations_4x[i]);
+ for (i = 0; i < 8; i++)
+ radv_get_sample_position(device, 8, i, device->sample_locations_8x[i]);
}
diff --git a/src/amd/vulkan/vk_format.h b/src/amd/vulkan/vk_format.h
index d6132a5b96b..d58180c0196 100644
--- a/src/amd/vulkan/vk_format.h
+++ b/src/amd/vulkan/vk_format.h
@@ -28,13 +28,14 @@
#define VK_FORMAT_H
#include <assert.h>
-#include <vulkan/vulkan.h>
#include <util/macros.h>
#include <vulkan/util/vk_format.h>
+#include <vulkan/vulkan.h>
-static inline const struct util_format_description *vk_format_description(VkFormat format)
+static inline const struct util_format_description *
+vk_format_description(VkFormat format)
{
- return util_format_description(vk_format_to_pipe_format(format));
+ return util_format_description(vk_format_to_pipe_format(format));
}
/**
@@ -43,7 +44,7 @@ static inline const struct util_format_description *vk_format_description(VkForm
static inline unsigned
vk_format_get_blocksizebits(VkFormat format)
{
- return util_format_get_blocksizebits(vk_format_to_pipe_format(format));
+ return util_format_get_blocksizebits(vk_format_to_pipe_format(format));
}
/**
@@ -52,19 +53,19 @@ vk_format_get_blocksizebits(VkFormat format)
static inline unsigned
vk_format_get_blocksize(VkFormat format)
{
- return util_format_get_blocksize(vk_format_to_pipe_format(format));
+ return util_format_get_blocksize(vk_format_to_pipe_format(format));
}
static inline unsigned
vk_format_get_blockwidth(VkFormat format)
{
- return util_format_get_blockwidth(vk_format_to_pipe_format(format));
+ return util_format_get_blockwidth(vk_format_to_pipe_format(format));
}
static inline unsigned
vk_format_get_blockheight(VkFormat format)
{
- return util_format_get_blockheight(vk_format_to_pipe_format(format));
+ return util_format_get_blockheight(vk_format_to_pipe_format(format));
}
/**
@@ -74,259 +75,256 @@ vk_format_get_blockheight(VkFormat format)
static inline int
vk_format_get_first_non_void_channel(VkFormat format)
{
- return util_format_get_first_non_void_channel(vk_format_to_pipe_format(format));
+ return util_format_get_first_non_void_channel(vk_format_to_pipe_format(format));
}
static inline enum pipe_swizzle
-radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4], VkComponentSwizzle vk_swiz)
+radv_swizzle_conv(VkComponentSwizzle component, const unsigned char chan[4],
+ VkComponentSwizzle vk_swiz)
{
- if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY)
- vk_swiz = component;
- switch (vk_swiz) {
- case VK_COMPONENT_SWIZZLE_ZERO:
- return PIPE_SWIZZLE_0;
- case VK_COMPONENT_SWIZZLE_ONE:
- return PIPE_SWIZZLE_1;
- case VK_COMPONENT_SWIZZLE_R:
- case VK_COMPONENT_SWIZZLE_G:
- case VK_COMPONENT_SWIZZLE_B:
- case VK_COMPONENT_SWIZZLE_A:
- return (enum pipe_swizzle)chan[vk_swiz - VK_COMPONENT_SWIZZLE_R];
- default:
- unreachable("Illegal swizzle");
- }
+ if (vk_swiz == VK_COMPONENT_SWIZZLE_IDENTITY)
+ vk_swiz = component;
+ switch (vk_swiz) {
+ case VK_COMPONENT_SWIZZLE_ZERO:
+ return PIPE_SWIZZLE_0;
+ case VK_COMPONENT_SWIZZLE_ONE:
+ return PIPE_SWIZZLE_1;
+ case VK_COMPONENT_SWIZZLE_R:
+ case VK_COMPONENT_SWIZZLE_G:
+ case VK_COMPONENT_SWIZZLE_B:
+ case VK_COMPONENT_SWIZZLE_A:
+ return (enum pipe_swizzle)chan[vk_swiz - VK_COMPONENT_SWIZZLE_R];
+ default:
+ unreachable("Illegal swizzle");
+ }
}
-static inline void vk_format_compose_swizzles(const VkComponentMapping *mapping,
- const unsigned char swz[4],
- enum pipe_swizzle dst[4])
+static inline void
+vk_format_compose_swizzles(const VkComponentMapping *mapping, const unsigned char swz[4],
+ enum pipe_swizzle dst[4])
{
- dst[0] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r);
- dst[1] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g);
- dst[2] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_B, swz, mapping->b);
- dst[3] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_A, swz, mapping->a);
+ dst[0] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_R, swz, mapping->r);
+ dst[1] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_G, swz, mapping->g);
+ dst[2] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_B, swz, mapping->b);
+ dst[3] = radv_swizzle_conv(VK_COMPONENT_SWIZZLE_A, swz, mapping->a);
}
static inline bool
vk_format_is_compressed(VkFormat format)
{
- return util_format_is_compressed(vk_format_to_pipe_format(format));
+ return util_format_is_compressed(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_subsampled(VkFormat format)
{
- return util_format_is_subsampled_422(vk_format_to_pipe_format(format));
+ return util_format_is_subsampled_422(vk_format_to_pipe_format(format));
}
static inline VkFormat
vk_format_depth_only(VkFormat format)
{
- switch (format) {
- case VK_FORMAT_D16_UNORM_S8_UINT:
- return VK_FORMAT_D16_UNORM;
- case VK_FORMAT_D24_UNORM_S8_UINT:
- return VK_FORMAT_X8_D24_UNORM_PACK32;
- case VK_FORMAT_D32_SFLOAT_S8_UINT:
- return VK_FORMAT_D32_SFLOAT;
- default:
- return format;
- }
+ switch (format) {
+ case VK_FORMAT_D16_UNORM_S8_UINT:
+ return VK_FORMAT_D16_UNORM;
+ case VK_FORMAT_D24_UNORM_S8_UINT:
+ return VK_FORMAT_X8_D24_UNORM_PACK32;
+ case VK_FORMAT_D32_SFLOAT_S8_UINT:
+ return VK_FORMAT_D32_SFLOAT;
+ default:
+ return format;
+ }
}
static inline bool
vk_format_is_int(VkFormat format)
{
- return util_format_is_pure_integer(vk_format_to_pipe_format(format));
+ return util_format_is_pure_integer(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_uint(VkFormat format)
{
- return util_format_is_pure_uint(vk_format_to_pipe_format(format));
+ return util_format_is_pure_uint(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_sint(VkFormat format)
{
- return util_format_is_pure_sint(vk_format_to_pipe_format(format));
+ return util_format_is_pure_sint(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_unorm(VkFormat format)
{
- return util_format_is_unorm(vk_format_to_pipe_format(format));
+ return util_format_is_unorm(vk_format_to_pipe_format(format));
}
static inline bool
vk_format_is_srgb(VkFormat format)
{
- return util_format_is_srgb(vk_format_to_pipe_format(format));
+ return util_format_is_srgb(vk_format_to_pipe_format(format));
}
static inline VkFormat
vk_format_no_srgb(VkFormat format)
{
- switch(format) {
- case VK_FORMAT_R8_SRGB:
- return VK_FORMAT_R8_UNORM;
- case VK_FORMAT_R8G8_SRGB:
- return VK_FORMAT_R8G8_UNORM;
- case VK_FORMAT_R8G8B8_SRGB:
- return VK_FORMAT_R8G8B8_UNORM;
- case VK_FORMAT_B8G8R8_SRGB:
- return VK_FORMAT_B8G8R8_UNORM;
- case VK_FORMAT_R8G8B8A8_SRGB:
- return VK_FORMAT_R8G8B8A8_UNORM;
- case VK_FORMAT_B8G8R8A8_SRGB:
- return VK_FORMAT_B8G8R8A8_UNORM;
- case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
- return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
- case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
- return VK_FORMAT_BC1_RGB_UNORM_BLOCK;
- case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
- return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
- case VK_FORMAT_BC2_SRGB_BLOCK:
- return VK_FORMAT_BC2_UNORM_BLOCK;
- case VK_FORMAT_BC3_SRGB_BLOCK:
- return VK_FORMAT_BC3_UNORM_BLOCK;
- case VK_FORMAT_BC7_SRGB_BLOCK:
- return VK_FORMAT_BC7_UNORM_BLOCK;
- case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
- return VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK;
- case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
- return VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK;
- case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
- return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
- default:
- assert(!vk_format_is_srgb(format));
- return format;
- }
+ switch (format) {
+ case VK_FORMAT_R8_SRGB:
+ return VK_FORMAT_R8_UNORM;
+ case VK_FORMAT_R8G8_SRGB:
+ return VK_FORMAT_R8G8_UNORM;
+ case VK_FORMAT_R8G8B8_SRGB:
+ return VK_FORMAT_R8G8B8_UNORM;
+ case VK_FORMAT_B8G8R8_SRGB:
+ return VK_FORMAT_B8G8R8_UNORM;
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ return VK_FORMAT_R8G8B8A8_UNORM;
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ return VK_FORMAT_B8G8R8A8_UNORM;
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+ case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+ return VK_FORMAT_BC1_RGB_UNORM_BLOCK;
+ case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+ return VK_FORMAT_BC1_RGBA_UNORM_BLOCK;
+ case VK_FORMAT_BC2_SRGB_BLOCK:
+ return VK_FORMAT_BC2_UNORM_BLOCK;
+ case VK_FORMAT_BC3_SRGB_BLOCK:
+ return VK_FORMAT_BC3_UNORM_BLOCK;
+ case VK_FORMAT_BC7_SRGB_BLOCK:
+ return VK_FORMAT_BC7_UNORM_BLOCK;
+ case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+ return VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK;
+ case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+ return VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK;
+ case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+ return VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK;
+ default:
+ assert(!vk_format_is_srgb(format));
+ return format;
+ }
}
static inline VkFormat
vk_format_stencil_only(VkFormat format)
{
- return VK_FORMAT_S8_UINT;
+ return VK_FORMAT_S8_UINT;
}
static inline unsigned
-vk_format_get_component_bits(VkFormat format,
- enum util_format_colorspace colorspace,
- unsigned component)
+vk_format_get_component_bits(VkFormat format, enum util_format_colorspace colorspace,
+ unsigned component)
{
- const struct util_format_description *desc = vk_format_description(format);
- enum util_format_colorspace desc_colorspace;
-
- assert(format);
- if (!format) {
- return 0;
- }
-
- assert(component < 4);
-
- /* Treat RGB and SRGB as equivalent. */
- if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
- colorspace = UTIL_FORMAT_COLORSPACE_RGB;
- }
- if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
- desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB;
- } else {
- desc_colorspace = desc->colorspace;
- }
-
- if (desc_colorspace != colorspace) {
- return 0;
- }
-
- switch (desc->swizzle[component]) {
- case PIPE_SWIZZLE_X:
- return desc->channel[0].size;
- case PIPE_SWIZZLE_Y:
- return desc->channel[1].size;
- case PIPE_SWIZZLE_Z:
- return desc->channel[2].size;
- case PIPE_SWIZZLE_W:
- return desc->channel[3].size;
- default:
- return 0;
- }
+ const struct util_format_description *desc = vk_format_description(format);
+ enum util_format_colorspace desc_colorspace;
+
+ assert(format);
+ if (!format) {
+ return 0;
+ }
+
+ assert(component < 4);
+
+ /* Treat RGB and SRGB as equivalent. */
+ if (colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ colorspace = UTIL_FORMAT_COLORSPACE_RGB;
+ }
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) {
+ desc_colorspace = UTIL_FORMAT_COLORSPACE_RGB;
+ } else {
+ desc_colorspace = desc->colorspace;
+ }
+
+ if (desc_colorspace != colorspace) {
+ return 0;
+ }
+
+ switch (desc->swizzle[component]) {
+ case PIPE_SWIZZLE_X:
+ return desc->channel[0].size;
+ case PIPE_SWIZZLE_Y:
+ return desc->channel[1].size;
+ case PIPE_SWIZZLE_Z:
+ return desc->channel[2].size;
+ case PIPE_SWIZZLE_W:
+ return desc->channel[3].size;
+ default:
+ return 0;
+ }
}
static inline VkFormat
vk_to_non_srgb_format(VkFormat format)
{
- switch(format) {
- case VK_FORMAT_R8_SRGB :
- return VK_FORMAT_R8_UNORM;
- case VK_FORMAT_R8G8_SRGB:
- return VK_FORMAT_R8G8_UNORM;
- case VK_FORMAT_R8G8B8_SRGB:
- return VK_FORMAT_R8G8B8_UNORM;
- case VK_FORMAT_B8G8R8_SRGB:
- return VK_FORMAT_B8G8R8_UNORM;
- case VK_FORMAT_R8G8B8A8_SRGB :
- return VK_FORMAT_R8G8B8A8_UNORM;
- case VK_FORMAT_B8G8R8A8_SRGB:
- return VK_FORMAT_B8G8R8A8_UNORM;
- case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
- return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
- default:
- return format;
- }
+ switch (format) {
+ case VK_FORMAT_R8_SRGB:
+ return VK_FORMAT_R8_UNORM;
+ case VK_FORMAT_R8G8_SRGB:
+ return VK_FORMAT_R8G8_UNORM;
+ case VK_FORMAT_R8G8B8_SRGB:
+ return VK_FORMAT_R8G8B8_UNORM;
+ case VK_FORMAT_B8G8R8_SRGB:
+ return VK_FORMAT_B8G8R8_UNORM;
+ case VK_FORMAT_R8G8B8A8_SRGB:
+ return VK_FORMAT_R8G8B8A8_UNORM;
+ case VK_FORMAT_B8G8R8A8_SRGB:
+ return VK_FORMAT_B8G8R8A8_UNORM;
+ case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+ return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+ default:
+ return format;
+ }
}
static inline unsigned
vk_format_get_nr_components(VkFormat format)
{
- return util_format_get_nr_components(vk_format_to_pipe_format(format));
+ return util_format_get_nr_components(vk_format_to_pipe_format(format));
}
static inline unsigned
vk_format_get_plane_count(VkFormat format)
{
- return util_format_get_num_planes(vk_format_to_pipe_format(format));
+ return util_format_get_num_planes(vk_format_to_pipe_format(format));
}
static inline unsigned
-vk_format_get_plane_width(VkFormat format, unsigned plane,
- unsigned width)
+vk_format_get_plane_width(VkFormat format, unsigned plane, unsigned width)
{
- return util_format_get_plane_width(vk_format_to_pipe_format(format), plane, width);
+ return util_format_get_plane_width(vk_format_to_pipe_format(format), plane, width);
}
static inline unsigned
-vk_format_get_plane_height(VkFormat format, unsigned plane,
- unsigned height)
+vk_format_get_plane_height(VkFormat format, unsigned plane, unsigned height)
{
- return util_format_get_plane_height(vk_format_to_pipe_format(format), plane, height);
+ return util_format_get_plane_height(vk_format_to_pipe_format(format), plane, height);
}
static inline VkFormat
vk_format_get_plane_format(VkFormat format, unsigned plane_id)
{
- assert(plane_id < vk_format_get_plane_count(format));
-
- switch(format) {
- case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
- case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
- case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
- return VK_FORMAT_R8_UNORM;
- case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
- case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
- return plane_id ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
- case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
- case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
- case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
- return VK_FORMAT_R16_UNORM;
- case VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
- case VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
- return plane_id ? VK_FORMAT_R16G16_UNORM : VK_FORMAT_R16_UNORM;
- default:
- assert(vk_format_get_plane_count(format) == 1);
- return format;
- }
+ assert(plane_id < vk_format_get_plane_count(format));
+
+ switch (format) {
+ case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+ case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+ case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+ return VK_FORMAT_R8_UNORM;
+ case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+ case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+ return plane_id ? VK_FORMAT_R8G8_UNORM : VK_FORMAT_R8_UNORM;
+ case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+ case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+ case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+ return VK_FORMAT_R16_UNORM;
+ case VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
+ case VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
+ return plane_id ? VK_FORMAT_R16G16_UNORM : VK_FORMAT_R16_UNORM;
+ default:
+ assert(vk_format_get_plane_count(format) == 1);
+ return format;
+ }
}
-
#endif /* VK_FORMAT_H */
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
index 2fc391d3c2d..e96bcf23ee8 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -31,1025 +31,1018 @@
#include "radv_amdgpu_bo.h"
#include <amdgpu.h>
-#include "drm-uapi/amdgpu_drm.h"
#include <inttypes.h>
#include <pthread.h>
#include <unistd.h>
+#include "drm-uapi/amdgpu_drm.h"
+#include "util/os_time.h"
#include "util/u_atomic.h"
-#include "util/u_memory.h"
#include "util/u_math.h"
-#include "util/os_time.h"
+#include "util/u_memory.h"
-static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws,
- struct radeon_winsys_bo *_bo);
+static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo);
static int
-radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws,
- amdgpu_bo_handle bo,
- uint64_t offset,
- uint64_t size,
- uint64_t addr,
- uint32_t bo_flags,
- uint64_t internal_flags,
- uint32_t ops)
+radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, amdgpu_bo_handle bo, uint64_t offset,
+ uint64_t size, uint64_t addr, uint32_t bo_flags, uint64_t internal_flags,
+ uint32_t ops)
{
- uint64_t flags = internal_flags;
- if (bo) {
- flags = AMDGPU_VM_PAGE_READABLE |
- AMDGPU_VM_PAGE_EXECUTABLE;
+ uint64_t flags = internal_flags;
+ if (bo) {
+ flags = AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_EXECUTABLE;
- if ((bo_flags & RADEON_FLAG_VA_UNCACHED) &&
- ws->info.chip_class >= GFX9)
- flags |= AMDGPU_VM_MTYPE_UC;
+ if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9)
+ flags |= AMDGPU_VM_MTYPE_UC;
- if (!(bo_flags & RADEON_FLAG_READ_ONLY))
- flags |= AMDGPU_VM_PAGE_WRITEABLE;
- }
+ if (!(bo_flags & RADEON_FLAG_READ_ONLY))
+ flags |= AMDGPU_VM_PAGE_WRITEABLE;
+ }
- size = align64(size, getpagesize());
+ size = align64(size, getpagesize());
- return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr,
- flags, ops);
+ return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, flags, ops);
}
static void
-radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys *ws,
- struct radv_amdgpu_winsys_bo *bo,
+radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
const struct radv_amdgpu_map_range *range)
{
- uint64_t internal_flags = 0;
- assert(range->size);
-
- if (!range->bo) {
- if (!ws->info.has_sparse_vm_mappings)
- return;
-
- internal_flags |= AMDGPU_VM_PAGE_PRT;
- } else
- p_atomic_inc(&range->bo->ref_count);
-
- int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL,
- range->bo_offset, range->size,
- range->offset + bo->base.va, 0,
- internal_flags, AMDGPU_VA_OP_MAP);
- if (r)
- abort();
+ uint64_t internal_flags = 0;
+ assert(range->size);
+
+ if (!range->bo) {
+ if (!ws->info.has_sparse_vm_mappings)
+ return;
+
+ internal_flags |= AMDGPU_VM_PAGE_PRT;
+ } else
+ p_atomic_inc(&range->bo->ref_count);
+
+ int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL, range->bo_offset, range->size,
+ range->offset + bo->base.va, 0, internal_flags, AMDGPU_VA_OP_MAP);
+ if (r)
+ abort();
}
static void
-radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys *ws,
- struct radv_amdgpu_winsys_bo *bo,
+radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo,
const struct radv_amdgpu_map_range *range)
{
- uint64_t internal_flags = 0;
- assert(range->size);
-
- if (!range->bo) {
- if(!ws->info.has_sparse_vm_mappings)
- return;
-
- /* Even though this is an unmap, if we don't set this flag,
- AMDGPU is going to complain about the missing buffer. */
- internal_flags |= AMDGPU_VM_PAGE_PRT;
- }
-
- int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL,
- range->bo_offset, range->size,
- range->offset + bo->base.va, 0, internal_flags,
- AMDGPU_VA_OP_UNMAP);
- if (r)
- abort();
-
- if (range->bo)
- ws->base.buffer_destroy(&ws->base, (struct radeon_winsys_bo *)range->bo);
+ uint64_t internal_flags = 0;
+ assert(range->size);
+
+ if (!range->bo) {
+ if (!ws->info.has_sparse_vm_mappings)
+ return;
+
+ /* Even though this is an unmap, if we don't set this flag,
+ AMDGPU is going to complain about the missing buffer. */
+ internal_flags |= AMDGPU_VM_PAGE_PRT;
+ }
+
+ int r = radv_amdgpu_bo_va_op(ws, range->bo ? range->bo->bo : NULL, range->bo_offset, range->size,
+ range->offset + bo->base.va, 0, internal_flags, AMDGPU_VA_OP_UNMAP);
+ if (r)
+ abort();
+
+ if (range->bo)
+ ws->base.buffer_destroy(&ws->base, (struct radeon_winsys_bo *)range->bo);
}
-static int bo_comparator(const void *ap, const void *bp) {
- struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
- struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
- return (a > b) ? 1 : (a < b) ? -1 : 0;
+static int
+bo_comparator(const void *ap, const void *bp)
+{
+ struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap;
+ struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp;
+ return (a > b) ? 1 : (a < b) ? -1 : 0;
}
static VkResult
radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo)
{
- if (bo->bo_capacity < bo->range_count) {
- uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
- struct radv_amdgpu_winsys_bo **bos =
- realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
- if (!bos)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- bo->bos = bos;
- bo->bo_capacity = new_count;
- }
-
- uint32_t temp_bo_count = 0;
- for (uint32_t i = 0; i < bo->range_count; ++i)
- if (bo->ranges[i].bo)
- bo->bos[temp_bo_count++] = bo->ranges[i].bo;
-
- qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
-
- uint32_t final_bo_count = 1;
- for (uint32_t i = 1; i < temp_bo_count; ++i)
- if (bo->bos[i] != bo->bos[i - 1])
- bo->bos[final_bo_count++] = bo->bos[i];
-
- bo->bo_count = final_bo_count;
-
- return VK_SUCCESS;
+ if (bo->bo_capacity < bo->range_count) {
+ uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count);
+ struct radv_amdgpu_winsys_bo **bos =
+ realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *));
+ if (!bos)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ bo->bos = bos;
+ bo->bo_capacity = new_count;
+ }
+
+ uint32_t temp_bo_count = 0;
+ for (uint32_t i = 0; i < bo->range_count; ++i)
+ if (bo->ranges[i].bo)
+ bo->bos[temp_bo_count++] = bo->ranges[i].bo;
+
+ qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator);
+
+ uint32_t final_bo_count = 1;
+ for (uint32_t i = 1; i < temp_bo_count; ++i)
+ if (bo->bos[i] != bo->bos[i - 1])
+ bo->bos[final_bo_count++] = bo->bos[i];
+
+ bo->bo_count = final_bo_count;
+
+ return VK_SUCCESS;
}
static VkResult
-radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws,
- struct radeon_winsys_bo *_parent,
- uint64_t offset, uint64_t size,
- struct radeon_winsys_bo *_bo, uint64_t bo_offset)
+radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys *_ws, struct radeon_winsys_bo *_parent,
+ uint64_t offset, uint64_t size, struct radeon_winsys_bo *_bo,
+ uint64_t bo_offset)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
- struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo;
- int range_count_delta, new_idx;
- int first = 0, last;
- struct radv_amdgpu_map_range new_first, new_last;
- VkResult result;
-
- assert(parent->is_virtual);
- assert(!bo || !bo->is_virtual);
-
- /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */
- if (parent->range_capacity - parent->range_count < 2) {
- uint32_t range_capacity = parent->range_capacity + 2;
- struct radv_amdgpu_map_range *ranges =
- realloc(parent->ranges,
- range_capacity * sizeof(struct radv_amdgpu_map_range));
- if (!ranges)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- parent->ranges = ranges;
- parent->range_capacity = range_capacity;
- }
-
- /*
- * [first, last] is exactly the range of ranges that either overlap the
- * new parent, or are adjacent to it. This corresponds to the bind ranges
- * that may change.
- */
- while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset)
- ++first;
-
- last = first;
- while(last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
- ++last;
-
- /* Whether the first or last range are going to be totally removed or just
- * resized/left alone. Note that in the case of first == last, we will split
- * this into a part before and after the new range. The remove flag is then
- * whether to not create the corresponding split part. */
- bool remove_first = parent->ranges[first].offset == offset;
- bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
- bool unmapped_first = false;
-
- assert(parent->ranges[first].offset <= offset);
- assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
-
- /* Try to merge the new range with the first range. */
- if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
- size += offset - parent->ranges[first].offset;
- offset = parent->ranges[first].offset;
- bo_offset = parent->ranges[first].bo_offset;
- remove_first = true;
- }
-
- /* Try to merge the new range with the last range. */
- if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
- size = parent->ranges[last].offset + parent->ranges[last].size - offset;
- remove_last = true;
- }
-
- range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
- new_idx = first + !remove_first;
-
- /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */
- for (int i = first + 1; i < last; ++i)
- radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + i);
-
- /* If the first/last range are not left alone we unmap then and optionally map
- * them again after modifications. Not that this implicitly can do the splitting
- * if first == last. */
- new_first = parent->ranges[first];
- new_last = parent->ranges[last];
-
- if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
- radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + first);
- unmapped_first = true;
-
- if (!remove_first) {
- new_first.size = offset - new_first.offset;
- radv_amdgpu_winsys_virtual_map(ws, parent, &new_first);
- }
- }
-
- if (parent->ranges[last].offset < offset + size || remove_last) {
- if (first != last || !unmapped_first)
- radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + last);
-
- if (!remove_last) {
- new_last.size -= offset + size - new_last.offset;
- new_last.bo_offset += (offset + size - new_last.offset);
- new_last.offset = offset + size;
- radv_amdgpu_winsys_virtual_map(ws, parent, &new_last);
- }
- }
-
- /* Moves the range list after last to account for the changed number of ranges. */
- memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
- sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
-
- if (!remove_first)
- parent->ranges[first] = new_first;
-
- if (!remove_last)
- parent->ranges[new_idx + 1] = new_last;
-
- /* Actually set up the new range. */
- parent->ranges[new_idx].offset = offset;
- parent->ranges[new_idx].size = size;
- parent->ranges[new_idx].bo = bo;
- parent->ranges[new_idx].bo_offset = bo_offset;
-
- radv_amdgpu_winsys_virtual_map(ws, parent, parent->ranges + new_idx);
-
- parent->range_count += range_count_delta;
-
- result = radv_amdgpu_winsys_rebuild_bo_list(parent);
- if (result != VK_SUCCESS)
- return result;
-
- return VK_SUCCESS;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent;
+ struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo *)_bo;
+ int range_count_delta, new_idx;
+ int first = 0, last;
+ struct radv_amdgpu_map_range new_first, new_last;
+ VkResult result;
+
+ assert(parent->is_virtual);
+ assert(!bo || !bo->is_virtual);
+
+ /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that
+ * contains the newly bound range). */
+ if (parent->range_capacity - parent->range_count < 2) {
+ uint32_t range_capacity = parent->range_capacity + 2;
+ struct radv_amdgpu_map_range *ranges =
+ realloc(parent->ranges, range_capacity * sizeof(struct radv_amdgpu_map_range));
+ if (!ranges)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ parent->ranges = ranges;
+ parent->range_capacity = range_capacity;
+ }
+
+ /*
+ * [first, last] is exactly the range of ranges that either overlap the
+ * new parent, or are adjacent to it. This corresponds to the bind ranges
+ * that may change.
+ */
+ while (first + 1 < parent->range_count &&
+ parent->ranges[first].offset + parent->ranges[first].size < offset)
+ ++first;
+
+ last = first;
+ while (last + 1 < parent->range_count && parent->ranges[last + 1].offset <= offset + size)
+ ++last;
+
+ /* Whether the first or last range are going to be totally removed or just
+ * resized/left alone. Note that in the case of first == last, we will split
+ * this into a part before and after the new range. The remove flag is then
+ * whether to not create the corresponding split part. */
+ bool remove_first = parent->ranges[first].offset == offset;
+ bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size;
+ bool unmapped_first = false;
+
+ assert(parent->ranges[first].offset <= offset);
+ assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size);
+
+ /* Try to merge the new range with the first range. */
+ if (parent->ranges[first].bo == bo &&
+ (!bo ||
+ offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) {
+ size += offset - parent->ranges[first].offset;
+ offset = parent->ranges[first].offset;
+ bo_offset = parent->ranges[first].bo_offset;
+ remove_first = true;
+ }
+
+ /* Try to merge the new range with the last range. */
+ if (parent->ranges[last].bo == bo &&
+ (!bo ||
+ offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) {
+ size = parent->ranges[last].offset + parent->ranges[last].size - offset;
+ remove_last = true;
+ }
+
+ range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last;
+ new_idx = first + !remove_first;
+
+ /* Any range between first and last is going to be entirely covered by the new range so just
+ * unmap them. */
+ for (int i = first + 1; i < last; ++i)
+ radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + i);
+
+ /* If the first/last range are not left alone we unmap then and optionally map
+ * them again after modifications. Not that this implicitly can do the splitting
+ * if first == last. */
+ new_first = parent->ranges[first];
+ new_last = parent->ranges[last];
+
+ if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) {
+ radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + first);
+ unmapped_first = true;
+
+ if (!remove_first) {
+ new_first.size = offset - new_first.offset;
+ radv_amdgpu_winsys_virtual_map(ws, parent, &new_first);
+ }
+ }
+
+ if (parent->ranges[last].offset < offset + size || remove_last) {
+ if (first != last || !unmapped_first)
+ radv_amdgpu_winsys_virtual_unmap(ws, parent, parent->ranges + last);
+
+ if (!remove_last) {
+ new_last.size -= offset + size - new_last.offset;
+ new_last.bo_offset += (offset + size - new_last.offset);
+ new_last.offset = offset + size;
+ radv_amdgpu_winsys_virtual_map(ws, parent, &new_last);
+ }
+ }
+
+ /* Moves the range list after last to account for the changed number of ranges. */
+ memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1,
+ sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1));
+
+ if (!remove_first)
+ parent->ranges[first] = new_first;
+
+ if (!remove_last)
+ parent->ranges[new_idx + 1] = new_last;
+
+ /* Actually set up the new range. */
+ parent->ranges[new_idx].offset = offset;
+ parent->ranges[new_idx].size = size;
+ parent->ranges[new_idx].bo = bo;
+ parent->ranges[new_idx].bo_offset = bo_offset;
+
+ radv_amdgpu_winsys_virtual_map(ws, parent, parent->ranges + new_idx);
+
+ parent->range_count += range_count_delta;
+
+ result = radv_amdgpu_winsys_rebuild_bo_list(parent);
+ if (result != VK_SUCCESS)
+ return result;
+
+ return VK_SUCCESS;
}
struct radv_amdgpu_winsys_bo_log {
- struct list_head list;
- uint64_t va;
- uint64_t size;
- uint64_t timestamp; /* CPU timestamp */
- uint8_t is_virtual : 1;
- uint8_t destroyed : 1;
+ struct list_head list;
+ uint64_t va;
+ uint64_t size;
+ uint64_t timestamp; /* CPU timestamp */
+ uint8_t is_virtual : 1;
+ uint8_t destroyed : 1;
};
-static void radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws,
- struct radv_amdgpu_winsys_bo *bo,
- bool destroyed)
+static void
+radv_amdgpu_log_bo(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo, bool destroyed)
{
- struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
+ struct radv_amdgpu_winsys_bo_log *bo_log = NULL;
- if (!ws->debug_log_bos)
- return;
+ if (!ws->debug_log_bos)
+ return;
- bo_log = malloc(sizeof(*bo_log));
- if (!bo_log)
- return;
+ bo_log = malloc(sizeof(*bo_log));
+ if (!bo_log)
+ return;
- bo_log->va = bo->base.va;
- bo_log->size = bo->size;
- bo_log->timestamp = os_time_get_nano();
- bo_log->is_virtual = bo->is_virtual;
- bo_log->destroyed = destroyed;
+ bo_log->va = bo->base.va;
+ bo_log->size = bo->size;
+ bo_log->timestamp = os_time_get_nano();
+ bo_log->is_virtual = bo->is_virtual;
+ bo_log->destroyed = destroyed;
- u_rwlock_wrlock(&ws->log_bo_list_lock);
- list_addtail(&bo_log->list, &ws->log_bo_list);
- u_rwlock_wrunlock(&ws->log_bo_list_lock);
+ u_rwlock_wrlock(&ws->log_bo_list_lock);
+ list_addtail(&bo_log->list, &ws->log_bo_list);
+ u_rwlock_wrunlock(&ws->log_bo_list_lock);
}
-static int radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws,
- struct radv_amdgpu_winsys_bo *bo)
+static int
+radv_amdgpu_global_bo_list_add(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
{
- u_rwlock_wrlock(&ws->global_bo_list.lock);
- if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
- unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
- void *data = realloc(ws->global_bo_list.bos,
- capacity * sizeof(struct radv_amdgpu_winsys_bo *));
- if (!data) {
- u_rwlock_wrunlock(&ws->global_bo_list.lock);
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- }
-
- ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
- ws->global_bo_list.capacity = capacity;
- }
-
- ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
- bo->base.use_global_list = true;
- u_rwlock_wrunlock(&ws->global_bo_list.lock);
- return VK_SUCCESS;
+ u_rwlock_wrlock(&ws->global_bo_list.lock);
+ if (ws->global_bo_list.count == ws->global_bo_list.capacity) {
+ unsigned capacity = MAX2(4, ws->global_bo_list.capacity * 2);
+ void *data =
+ realloc(ws->global_bo_list.bos, capacity * sizeof(struct radv_amdgpu_winsys_bo *));
+ if (!data) {
+ u_rwlock_wrunlock(&ws->global_bo_list.lock);
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+
+ ws->global_bo_list.bos = (struct radv_amdgpu_winsys_bo **)data;
+ ws->global_bo_list.capacity = capacity;
+ }
+
+ ws->global_bo_list.bos[ws->global_bo_list.count++] = bo;
+ bo->base.use_global_list = true;
+ u_rwlock_wrunlock(&ws->global_bo_list.lock);
+ return VK_SUCCESS;
}
-static void radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws,
- struct radv_amdgpu_winsys_bo *bo)
+static void
+radv_amdgpu_global_bo_list_del(struct radv_amdgpu_winsys *ws, struct radv_amdgpu_winsys_bo *bo)
{
- u_rwlock_wrlock(&ws->global_bo_list.lock);
- for(unsigned i = ws->global_bo_list.count; i-- > 0;) {
- if (ws->global_bo_list.bos[i] == bo) {
- ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
- --ws->global_bo_list.count;
- bo->base.use_global_list = false;
- break;
- }
- }
- u_rwlock_wrunlock(&ws->global_bo_list.lock);
+ u_rwlock_wrlock(&ws->global_bo_list.lock);
+ for (unsigned i = ws->global_bo_list.count; i-- > 0;) {
+ if (ws->global_bo_list.bos[i] == bo) {
+ ws->global_bo_list.bos[i] = ws->global_bo_list.bos[ws->global_bo_list.count - 1];
+ --ws->global_bo_list.count;
+ bo->base.use_global_list = false;
+ break;
+ }
+ }
+ u_rwlock_wrunlock(&ws->global_bo_list.lock);
}
-static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws,
- struct radeon_winsys_bo *_bo)
+static void
+radv_amdgpu_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
-
- if (p_atomic_dec_return(&bo->ref_count))
- return;
-
- radv_amdgpu_log_bo(ws, bo, true);
-
- if (bo->is_virtual) {
- for (uint32_t i = 0; i < bo->range_count; ++i) {
- radv_amdgpu_winsys_virtual_unmap(ws, bo, bo->ranges + i);
- }
- free(bo->bos);
- free(bo->ranges);
- } else {
- if (ws->debug_all_bos)
- radv_amdgpu_global_bo_list_del(ws, bo);
- radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va,
- 0, 0, AMDGPU_VA_OP_UNMAP);
- amdgpu_bo_free(bo->bo);
- }
-
- if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
- if (bo->base.vram_no_cpu_access) {
- p_atomic_add(&ws->allocated_vram,
- -align64(bo->size, ws->info.gart_page_size));
- } else {
- p_atomic_add(&ws->allocated_vram_vis,
- -align64(bo->size, ws->info.gart_page_size));
- }
- }
-
- if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
- p_atomic_add(&ws->allocated_gtt,
- -align64(bo->size, ws->info.gart_page_size));
-
- amdgpu_va_range_free(bo->va_handle);
- FREE(bo);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+
+ if (p_atomic_dec_return(&bo->ref_count))
+ return;
+
+ radv_amdgpu_log_bo(ws, bo, true);
+
+ if (bo->is_virtual) {
+ for (uint32_t i = 0; i < bo->range_count; ++i) {
+ radv_amdgpu_winsys_virtual_unmap(ws, bo, bo->ranges + i);
+ }
+ free(bo->bos);
+ free(bo->ranges);
+ } else {
+ if (ws->debug_all_bos)
+ radv_amdgpu_global_bo_list_del(ws, bo);
+ radv_amdgpu_bo_va_op(ws, bo->bo, 0, bo->size, bo->base.va, 0, 0, AMDGPU_VA_OP_UNMAP);
+ amdgpu_bo_free(bo->bo);
+ }
+
+ if (bo->base.initial_domain & RADEON_DOMAIN_VRAM) {
+ if (bo->base.vram_no_cpu_access) {
+ p_atomic_add(&ws->allocated_vram, -align64(bo->size, ws->info.gart_page_size));
+ } else {
+ p_atomic_add(&ws->allocated_vram_vis, -align64(bo->size, ws->info.gart_page_size));
+ }
+ }
+
+ if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
+ p_atomic_add(&ws->allocated_gtt, -align64(bo->size, ws->info.gart_page_size));
+
+ amdgpu_va_range_free(bo->va_handle);
+ FREE(bo);
}
static struct radeon_winsys_bo *
-radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
- uint64_t size,
- unsigned alignment,
- enum radeon_bo_domain initial_domain,
- enum radeon_bo_flag flags,
- unsigned priority)
+radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
+ enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
+ unsigned priority)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_winsys_bo *bo;
- struct amdgpu_bo_alloc_request request = {0};
- struct radv_amdgpu_map_range *ranges = NULL;
- amdgpu_bo_handle buf_handle;
- uint64_t va = 0;
- amdgpu_va_handle va_handle;
- int r;
- bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
- if (!bo) {
- return NULL;
- }
-
- unsigned virt_alignment = alignment;
- if (size >= ws->info.pte_fragment_size)
- virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
-
- r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
- size, virt_alignment, 0, &va, &va_handle,
- (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
- AMDGPU_VA_RANGE_HIGH);
- if (r)
- goto error_va_alloc;
-
- bo->base.va = va;
- bo->va_handle = va_handle;
- bo->size = size;
- bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
- bo->ref_count = 1;
-
- if (flags & RADEON_FLAG_VIRTUAL) {
- ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
- if (!ranges)
- goto error_ranges_alloc;
-
- bo->ranges = ranges;
- bo->range_count = 1;
- bo->range_capacity = 1;
-
- bo->ranges[0].offset = 0;
- bo->ranges[0].size = size;
- bo->ranges[0].bo = NULL;
- bo->ranges[0].bo_offset = 0;
-
- radv_amdgpu_winsys_virtual_map(ws, bo, bo->ranges);
- radv_amdgpu_log_bo(ws, bo, false);
-
- return (struct radeon_winsys_bo *)bo;
- }
-
- request.alloc_size = size;
- request.phys_alignment = alignment;
-
- if (initial_domain & RADEON_DOMAIN_VRAM) {
- request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
-
- /* Since VRAM and GTT have almost the same performance on
- * APUs, we could just set GTT. However, in order to decrease
- * GTT(RAM) usage, which is shared with the OS, allow VRAM
- * placements too. The idea is not to use VRAM usefully, but
- * to use it so that it's not unused and wasted.
- *
- * Furthermore, even on discrete GPUs this is beneficial. If
- * both GTT and VRAM are set then AMDGPU still prefers VRAM
- * for the initial placement, but it makes the buffers
- * spillable. Otherwise AMDGPU tries to place the buffers in
- * VRAM really hard to the extent that we are getting a lot
- * of unnecessary movement. This helps significantly when
- * e.g. Horizon Zero Dawn allocates more memory than we have
- * VRAM.
- */
- request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
- }
-
- if (initial_domain & RADEON_DOMAIN_GTT)
- request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
- if (initial_domain & RADEON_DOMAIN_GDS)
- request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
- if (initial_domain & RADEON_DOMAIN_OA)
- request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
-
- if (flags & RADEON_FLAG_CPU_ACCESS)
- request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
- bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
- request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
- }
- if (flags & RADEON_FLAG_GTT_WC)
- request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
- request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
- if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
- (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
- bo->base.is_local = true;
- request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
- }
-
- /* this won't do anything on pre 4.9 kernels */
- if (initial_domain & RADEON_DOMAIN_VRAM) {
- if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
- request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
- }
-
- r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
- if (r) {
- fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
- fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size);
- fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
- fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
- goto error_bo_alloc;
- }
-
- r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0,
- AMDGPU_VA_OP_MAP);
- if (r)
- goto error_va_map;
-
- bo->bo = buf_handle;
- bo->base.initial_domain = initial_domain;
- bo->base.use_global_list = bo->base.is_local;
- bo->is_shared = false;
- bo->priority = priority;
-
- r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
- assert(!r);
-
- if (initial_domain & RADEON_DOMAIN_VRAM) {
- /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
- * aren't mappable and they are counted as part of the VRAM
- * counter.
- *
- * Otherwise, buffers with the CPU_ACCESS flag or without any
- * of both (imported buffers) are counted as part of the VRAM
- * visible counter because they can be mapped.
- */
- if (bo->base.vram_no_cpu_access) {
- p_atomic_add(&ws->allocated_vram,
- align64(bo->size, ws->info.gart_page_size));
- } else {
- p_atomic_add(&ws->allocated_vram_vis,
- align64(bo->size, ws->info.gart_page_size));
- }
- }
-
- if (initial_domain & RADEON_DOMAIN_GTT)
- p_atomic_add(&ws->allocated_gtt,
- align64(bo->size, ws->info.gart_page_size));
-
- if (ws->debug_all_bos)
- radv_amdgpu_global_bo_list_add(ws, bo);
- radv_amdgpu_log_bo(ws, bo, false);
-
- return (struct radeon_winsys_bo *)bo;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys_bo *bo;
+ struct amdgpu_bo_alloc_request request = {0};
+ struct radv_amdgpu_map_range *ranges = NULL;
+ amdgpu_bo_handle buf_handle;
+ uint64_t va = 0;
+ amdgpu_va_handle va_handle;
+ int r;
+ bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
+ if (!bo) {
+ return NULL;
+ }
+
+ unsigned virt_alignment = alignment;
+ if (size >= ws->info.pte_fragment_size)
+ virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
+
+ r = amdgpu_va_range_alloc(
+ ws->dev, amdgpu_gpu_va_range_general, size, virt_alignment, 0, &va, &va_handle,
+ (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) | AMDGPU_VA_RANGE_HIGH);
+ if (r)
+ goto error_va_alloc;
+
+ bo->base.va = va;
+ bo->va_handle = va_handle;
+ bo->size = size;
+ bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL);
+ bo->ref_count = 1;
+
+ if (flags & RADEON_FLAG_VIRTUAL) {
+ ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range));
+ if (!ranges)
+ goto error_ranges_alloc;
+
+ bo->ranges = ranges;
+ bo->range_count = 1;
+ bo->range_capacity = 1;
+
+ bo->ranges[0].offset = 0;
+ bo->ranges[0].size = size;
+ bo->ranges[0].bo = NULL;
+ bo->ranges[0].bo_offset = 0;
+
+ radv_amdgpu_winsys_virtual_map(ws, bo, bo->ranges);
+ radv_amdgpu_log_bo(ws, bo, false);
+
+ return (struct radeon_winsys_bo *)bo;
+ }
+
+ request.alloc_size = size;
+ request.phys_alignment = alignment;
+
+ if (initial_domain & RADEON_DOMAIN_VRAM) {
+ request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM;
+
+ /* Since VRAM and GTT have almost the same performance on
+ * APUs, we could just set GTT. However, in order to decrease
+ * GTT(RAM) usage, which is shared with the OS, allow VRAM
+ * placements too. The idea is not to use VRAM usefully, but
+ * to use it so that it's not unused and wasted.
+ *
+ * Furthermore, even on discrete GPUs this is beneficial. If
+ * both GTT and VRAM are set then AMDGPU still prefers VRAM
+ * for the initial placement, but it makes the buffers
+ * spillable. Otherwise AMDGPU tries to place the buffers in
+ * VRAM really hard to the extent that we are getting a lot
+ * of unnecessary movement. This helps significantly when
+ * e.g. Horizon Zero Dawn allocates more memory than we have
+ * VRAM.
+ */
+ request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+ }
+
+ if (initial_domain & RADEON_DOMAIN_GTT)
+ request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT;
+ if (initial_domain & RADEON_DOMAIN_GDS)
+ request.preferred_heap |= AMDGPU_GEM_DOMAIN_GDS;
+ if (initial_domain & RADEON_DOMAIN_OA)
+ request.preferred_heap |= AMDGPU_GEM_DOMAIN_OA;
+
+ if (flags & RADEON_FLAG_CPU_ACCESS)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ if (flags & RADEON_FLAG_NO_CPU_ACCESS) {
+ bo->base.vram_no_cpu_access = initial_domain & RADEON_DOMAIN_VRAM;
+ request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ }
+ if (flags & RADEON_FLAG_GTT_WC)
+ request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+ if (!(flags & RADEON_FLAG_IMPLICIT_SYNC))
+ request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
+ if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING &&
+ (ws->use_local_bos || (flags & RADEON_FLAG_PREFER_LOCAL_BO))) {
+ bo->base.is_local = true;
+ request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
+ }
+
+ /* this won't do anything on pre 4.9 kernels */
+ if (initial_domain & RADEON_DOMAIN_VRAM) {
+ if (ws->zero_all_vram_allocs || (flags & RADEON_FLAG_ZERO_VRAM))
+ request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED;
+ }
+
+ r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle);
+ if (r) {
+ fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n");
+ fprintf(stderr, "amdgpu: size : %" PRIu64 " bytes\n", size);
+ fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment);
+ fprintf(stderr, "amdgpu: domains : %u\n", initial_domain);
+ goto error_bo_alloc;
+ }
+
+ r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 0, AMDGPU_VA_OP_MAP);
+ if (r)
+ goto error_va_map;
+
+ bo->bo = buf_handle;
+ bo->base.initial_domain = initial_domain;
+ bo->base.use_global_list = bo->base.is_local;
+ bo->is_shared = false;
+ bo->priority = priority;
+
+ r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+ assert(!r);
+
+ if (initial_domain & RADEON_DOMAIN_VRAM) {
+ /* Buffers allocated in VRAM with the NO_CPU_ACCESS flag
+ * aren't mappable and they are counted as part of the VRAM
+ * counter.
+ *
+ * Otherwise, buffers with the CPU_ACCESS flag or without any
+ * of both (imported buffers) are counted as part of the VRAM
+ * visible counter because they can be mapped.
+ */
+ if (bo->base.vram_no_cpu_access) {
+ p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
+ } else {
+ p_atomic_add(&ws->allocated_vram_vis, align64(bo->size, ws->info.gart_page_size));
+ }
+ }
+
+ if (initial_domain & RADEON_DOMAIN_GTT)
+ p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
+
+ if (ws->debug_all_bos)
+ radv_amdgpu_global_bo_list_add(ws, bo);
+ radv_amdgpu_log_bo(ws, bo, false);
+
+ return (struct radeon_winsys_bo *)bo;
error_va_map:
- amdgpu_bo_free(buf_handle);
+ amdgpu_bo_free(buf_handle);
error_bo_alloc:
- free(ranges);
+ free(ranges);
error_ranges_alloc:
- amdgpu_va_range_free(va_handle);
+ amdgpu_va_range_free(va_handle);
error_va_alloc:
- FREE(bo);
- return NULL;
+ FREE(bo);
+ return NULL;
}
static void *
radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo)
{
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- int ret;
- void *data;
- ret = amdgpu_bo_cpu_map(bo->bo, &data);
- if (ret)
- return NULL;
- return data;
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+ int ret;
+ void *data;
+ ret = amdgpu_bo_cpu_map(bo->bo, &data);
+ if (ret)
+ return NULL;
+ return data;
}
static void
radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
{
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- amdgpu_bo_cpu_unmap(bo->bo);
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+ amdgpu_bo_cpu_unmap(bo->bo);
}
static uint64_t
-radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws,
- uint64_t size, unsigned alignment)
+radv_amdgpu_get_optimal_vm_alignment(struct radv_amdgpu_winsys *ws, uint64_t size,
+ unsigned alignment)
{
- uint64_t vm_alignment = alignment;
-
- /* Increase the VM alignment for faster address translation. */
- if (size >= ws->info.pte_fragment_size)
- vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
-
- /* Gfx9: Increase the VM alignment to the most significant bit set
- * in the size for faster address translation.
- */
- if (ws->info.chip_class >= GFX9) {
- unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
- uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
-
- vm_alignment = MAX2(vm_alignment, msb_alignment);
- }
- return vm_alignment;
+ uint64_t vm_alignment = alignment;
+
+ /* Increase the VM alignment for faster address translation. */
+ if (size >= ws->info.pte_fragment_size)
+ vm_alignment = MAX2(vm_alignment, ws->info.pte_fragment_size);
+
+ /* Gfx9: Increase the VM alignment to the most significant bit set
+ * in the size for faster address translation.
+ */
+ if (ws->info.chip_class >= GFX9) {
+ unsigned msb = util_last_bit64(size); /* 0 = no bit is set */
+ uint64_t msb_alignment = msb ? 1ull << (msb - 1) : 0;
+
+ vm_alignment = MAX2(vm_alignment, msb_alignment);
+ }
+ return vm_alignment;
}
static struct radeon_winsys_bo *
-radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws,
- void *pointer,
- uint64_t size,
- unsigned priority)
+radv_amdgpu_winsys_bo_from_ptr(struct radeon_winsys *_ws, void *pointer, uint64_t size,
+ unsigned priority)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- amdgpu_bo_handle buf_handle;
- struct radv_amdgpu_winsys_bo *bo;
- uint64_t va;
- amdgpu_va_handle va_handle;
- uint64_t vm_alignment;
-
- bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
- if (!bo)
- return NULL;
-
- if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
- goto error;
-
- /* Using the optimal VM alignment also fixes GPU hangs for buffers that
- * are imported.
- */
- vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size,
- ws->info.gart_page_size);
-
- if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
- size, vm_alignment, 0, &va, &va_handle,
- AMDGPU_VA_RANGE_HIGH))
- goto error_va_alloc;
-
- if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
- goto error_va_map;
-
- /* Initialize it */
- bo->base.va = va;
- bo->va_handle = va_handle;
- bo->size = size;
- bo->ref_count = 1;
- bo->bo = buf_handle;
- bo->base.initial_domain = RADEON_DOMAIN_GTT;
- bo->base.use_global_list = false;
- bo->priority = priority;
-
- ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
- assert(!r);
-
- p_atomic_add(&ws->allocated_gtt,
- align64(bo->size, ws->info.gart_page_size));
-
- if (ws->debug_all_bos)
- radv_amdgpu_global_bo_list_add(ws, bo);
- radv_amdgpu_log_bo(ws, bo, false);
-
- return (struct radeon_winsys_bo *)bo;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ amdgpu_bo_handle buf_handle;
+ struct radv_amdgpu_winsys_bo *bo;
+ uint64_t va;
+ amdgpu_va_handle va_handle;
+ uint64_t vm_alignment;
+
+ bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
+ if (!bo)
+ return NULL;
+
+ if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle))
+ goto error;
+
+ /* Using the optimal VM alignment also fixes GPU hangs for buffers that
+ * are imported.
+ */
+ vm_alignment = radv_amdgpu_get_optimal_vm_alignment(ws, size, ws->info.gart_page_size);
+
+ if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, size, vm_alignment, 0, &va,
+ &va_handle, AMDGPU_VA_RANGE_HIGH))
+ goto error_va_alloc;
+
+ if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP))
+ goto error_va_map;
+
+ /* Initialize it */
+ bo->base.va = va;
+ bo->va_handle = va_handle;
+ bo->size = size;
+ bo->ref_count = 1;
+ bo->bo = buf_handle;
+ bo->base.initial_domain = RADEON_DOMAIN_GTT;
+ bo->base.use_global_list = false;
+ bo->priority = priority;
+
+ ASSERTED int r = amdgpu_bo_export(buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+ assert(!r);
+
+ p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
+
+ if (ws->debug_all_bos)
+ radv_amdgpu_global_bo_list_add(ws, bo);
+ radv_amdgpu_log_bo(ws, bo, false);
+
+ return (struct radeon_winsys_bo *)bo;
error_va_map:
- amdgpu_va_range_free(va_handle);
+ amdgpu_va_range_free(va_handle);
error_va_alloc:
- amdgpu_bo_free(buf_handle);
+ amdgpu_bo_free(buf_handle);
error:
- FREE(bo);
- return NULL;
+ FREE(bo);
+ return NULL;
}
static struct radeon_winsys_bo *
-radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
- int fd, unsigned priority,
- uint64_t *alloc_size)
+radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, int fd, unsigned priority,
+ uint64_t *alloc_size)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_winsys_bo *bo;
- uint64_t va;
- amdgpu_va_handle va_handle;
- enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
- struct amdgpu_bo_import_result result = {0};
- struct amdgpu_bo_info info = {0};
- enum radeon_bo_domain initial = 0;
- int r;
- bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
- if (!bo)
- return NULL;
-
- r = amdgpu_bo_import(ws->dev, type, fd, &result);
- if (r)
- goto error;
-
- r = amdgpu_bo_query_info(result.buf_handle, &info);
- if (r)
- goto error_query;
-
- if (alloc_size) {
- *alloc_size = info.alloc_size;
- }
-
- r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
- result.alloc_size, 1 << 20, 0, &va, &va_handle,
- AMDGPU_VA_RANGE_HIGH);
- if (r)
- goto error_query;
-
- r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size,
- va, 0, 0, AMDGPU_VA_OP_MAP);
- if (r)
- goto error_va_map;
-
- if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
- initial |= RADEON_DOMAIN_VRAM;
- if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
- initial |= RADEON_DOMAIN_GTT;
-
- bo->bo = result.buf_handle;
- bo->base.va = va;
- bo->va_handle = va_handle;
- bo->base.initial_domain = initial;
- bo->base.use_global_list = false;
- bo->size = result.alloc_size;
- bo->is_shared = true;
- bo->priority = priority;
- bo->ref_count = 1;
-
- r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
- assert(!r);
-
- if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
- p_atomic_add(&ws->allocated_vram,
- align64(bo->size, ws->info.gart_page_size));
- if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
- p_atomic_add(&ws->allocated_gtt,
- align64(bo->size, ws->info.gart_page_size));
-
- if (ws->debug_all_bos)
- radv_amdgpu_global_bo_list_add(ws, bo);
- radv_amdgpu_log_bo(ws, bo, false);
-
- return (struct radeon_winsys_bo *)bo;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys_bo *bo;
+ uint64_t va;
+ amdgpu_va_handle va_handle;
+ enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
+ struct amdgpu_bo_import_result result = {0};
+ struct amdgpu_bo_info info = {0};
+ enum radeon_bo_domain initial = 0;
+ int r;
+ bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo);
+ if (!bo)
+ return NULL;
+
+ r = amdgpu_bo_import(ws->dev, type, fd, &result);
+ if (r)
+ goto error;
+
+ r = amdgpu_bo_query_info(result.buf_handle, &info);
+ if (r)
+ goto error_query;
+
+ if (alloc_size) {
+ *alloc_size = info.alloc_size;
+ }
+
+ r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, result.alloc_size, 1 << 20, 0,
+ &va, &va_handle, AMDGPU_VA_RANGE_HIGH);
+ if (r)
+ goto error_query;
+
+ r =
+ radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, va, 0, 0, AMDGPU_VA_OP_MAP);
+ if (r)
+ goto error_va_map;
+
+ if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
+ initial |= RADEON_DOMAIN_VRAM;
+ if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
+ initial |= RADEON_DOMAIN_GTT;
+
+ bo->bo = result.buf_handle;
+ bo->base.va = va;
+ bo->va_handle = va_handle;
+ bo->base.initial_domain = initial;
+ bo->base.use_global_list = false;
+ bo->size = result.alloc_size;
+ bo->is_shared = true;
+ bo->priority = priority;
+ bo->ref_count = 1;
+
+ r = amdgpu_bo_export(result.buf_handle, amdgpu_bo_handle_type_kms, &bo->bo_handle);
+ assert(!r);
+
+ if (bo->base.initial_domain & RADEON_DOMAIN_VRAM)
+ p_atomic_add(&ws->allocated_vram, align64(bo->size, ws->info.gart_page_size));
+ if (bo->base.initial_domain & RADEON_DOMAIN_GTT)
+ p_atomic_add(&ws->allocated_gtt, align64(bo->size, ws->info.gart_page_size));
+
+ if (ws->debug_all_bos)
+ radv_amdgpu_global_bo_list_add(ws, bo);
+ radv_amdgpu_log_bo(ws, bo, false);
+
+ return (struct radeon_winsys_bo *)bo;
error_va_map:
- amdgpu_va_range_free(va_handle);
+ amdgpu_va_range_free(va_handle);
error_query:
- amdgpu_bo_free(result.buf_handle);
+ amdgpu_bo_free(result.buf_handle);
error:
- FREE(bo);
- return NULL;
+ FREE(bo);
+ return NULL;
}
static bool
-radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws,
- struct radeon_winsys_bo *_bo,
- int *fd)
+radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo, int *fd)
{
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
- int r;
- unsigned handle;
- r = amdgpu_bo_export(bo->bo, type, &handle);
- if (r)
- return false;
-
- *fd = (int)handle;
- bo->is_shared = true;
- return true;
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+ enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd;
+ int r;
+ unsigned handle;
+ r = amdgpu_bo_export(bo->bo, type, &handle);
+ if (r)
+ return false;
+
+ *fd = (int)handle;
+ bo->is_shared = true;
+ return true;
}
static bool
-radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd,
- enum radeon_bo_domain *domains,
+radv_amdgpu_bo_get_flags_from_fd(struct radeon_winsys *_ws, int fd, enum radeon_bo_domain *domains,
enum radeon_bo_flag *flags)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct amdgpu_bo_import_result result = {0};
- struct amdgpu_bo_info info = {0};
- int r;
-
- *domains = 0;
- *flags = 0;
-
- r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
- if (r)
- return false;
-
- r = amdgpu_bo_query_info(result.buf_handle, &info);
- amdgpu_bo_free(result.buf_handle);
- if (r)
- return false;
-
- if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
- *domains |= RADEON_DOMAIN_VRAM;
- if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
- *domains |= RADEON_DOMAIN_GTT;
- if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
- *domains |= RADEON_DOMAIN_GDS;
- if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
- *domains |= RADEON_DOMAIN_OA;
-
- if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
- *flags |= RADEON_FLAG_CPU_ACCESS;
- if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
- *flags |= RADEON_FLAG_NO_CPU_ACCESS;
- if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
- *flags |= RADEON_FLAG_IMPLICIT_SYNC;
- if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
- *flags |= RADEON_FLAG_GTT_WC;
- if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
- *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
- if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
- *flags |= RADEON_FLAG_ZERO_VRAM;
- return true;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct amdgpu_bo_import_result result = {0};
+ struct amdgpu_bo_info info = {0};
+ int r;
+
+ *domains = 0;
+ *flags = 0;
+
+ r = amdgpu_bo_import(ws->dev, amdgpu_bo_handle_type_dma_buf_fd, fd, &result);
+ if (r)
+ return false;
+
+ r = amdgpu_bo_query_info(result.buf_handle, &info);
+ amdgpu_bo_free(result.buf_handle);
+ if (r)
+ return false;
+
+ if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM)
+ *domains |= RADEON_DOMAIN_VRAM;
+ if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT)
+ *domains |= RADEON_DOMAIN_GTT;
+ if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GDS)
+ *domains |= RADEON_DOMAIN_GDS;
+ if (info.preferred_heap & AMDGPU_GEM_DOMAIN_OA)
+ *domains |= RADEON_DOMAIN_OA;
+
+ if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+ *flags |= RADEON_FLAG_CPU_ACCESS;
+ if (info.alloc_flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+ *flags |= RADEON_FLAG_NO_CPU_ACCESS;
+ if (!(info.alloc_flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC))
+ *flags |= RADEON_FLAG_IMPLICIT_SYNC;
+ if (info.alloc_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
+ *flags |= RADEON_FLAG_GTT_WC;
+ if (info.alloc_flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+ *flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_PREFER_LOCAL_BO;
+ if (info.alloc_flags & AMDGPU_GEM_CREATE_VRAM_CLEARED)
+ *flags |= RADEON_FLAG_ZERO_VRAM;
+ return true;
}
-static unsigned eg_tile_split(unsigned tile_split)
+static unsigned
+eg_tile_split(unsigned tile_split)
{
- switch (tile_split) {
- case 0: tile_split = 64; break;
- case 1: tile_split = 128; break;
- case 2: tile_split = 256; break;
- case 3: tile_split = 512; break;
- default:
- case 4: tile_split = 1024; break;
- case 5: tile_split = 2048; break;
- case 6: tile_split = 4096; break;
- }
- return tile_split;
+ switch (tile_split) {
+ case 0:
+ tile_split = 64;
+ break;
+ case 1:
+ tile_split = 128;
+ break;
+ case 2:
+ tile_split = 256;
+ break;
+ case 3:
+ tile_split = 512;
+ break;
+ default:
+ case 4:
+ tile_split = 1024;
+ break;
+ case 5:
+ tile_split = 2048;
+ break;
+ case 6:
+ tile_split = 4096;
+ break;
+ }
+ return tile_split;
}
-static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split)
+static unsigned
+radv_eg_tile_split_rev(unsigned eg_tile_split)
{
- switch (eg_tile_split) {
- case 64: return 0;
- case 128: return 1;
- case 256: return 2;
- case 512: return 3;
- default:
- case 1024: return 4;
- case 2048: return 5;
- case 4096: return 6;
- }
+ switch (eg_tile_split) {
+ case 64:
+ return 0;
+ case 128:
+ return 1;
+ case 256:
+ return 2;
+ case 512:
+ return 3;
+ default:
+ case 1024:
+ return 4;
+ case 2048:
+ return 5;
+ case 4096:
+ return 6;
+ }
}
-#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
-#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
+#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
+#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
static void
-radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws,
- struct radeon_winsys_bo *_bo,
- struct radeon_bo_metadata *md)
+radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
+ struct radeon_bo_metadata *md)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- struct amdgpu_bo_metadata metadata = {0};
- uint64_t tiling_flags = 0;
-
- if (ws->info.chip_class >= GFX9) {
- tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
- tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
- tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
- tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
- tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
- tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
- tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
- } else {
- if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
- tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
- else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
- tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
- else
- tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
-
- tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
- tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
- tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
- if (md->u.legacy.tile_split)
- tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
- tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
- tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
-
- if (md->u.legacy.scanout)
- tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
- else
- tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
- }
-
- metadata.tiling_info = tiling_flags;
- metadata.size_metadata = md->size_metadata;
- memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
-
- amdgpu_bo_set_metadata(bo->bo, &metadata);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+ struct amdgpu_bo_metadata metadata = {0};
+ uint64_t tiling_flags = 0;
+
+ if (ws->info.chip_class >= GFX9) {
+ tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
+ tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256b);
+ tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
+ tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64b_blocks);
+ tiling_flags |=
+ AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128b_blocks);
+ tiling_flags |=
+ AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
+ tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
+ } else {
+ if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
+ else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
+ else
+ tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
+
+ tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
+ tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
+ tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
+ if (md->u.legacy.tile_split)
+ tiling_flags |=
+ AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
+ tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
+ tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks) - 1);
+
+ if (md->u.legacy.scanout)
+ tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
+ else
+ tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
+ }
+
+ metadata.tiling_info = tiling_flags;
+ metadata.size_metadata = md->size_metadata;
+ memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata));
+
+ amdgpu_bo_set_metadata(bo->bo, &metadata);
}
static void
-radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws,
- struct radeon_winsys_bo *_bo,
+radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
struct radeon_bo_metadata *md)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- struct amdgpu_bo_info info = {0};
-
- int r = amdgpu_bo_query_info(bo->bo, &info);
- if (r)
- return;
-
- uint64_t tiling_flags = info.metadata.tiling_info;
-
- if (ws->info.chip_class >= GFX9) {
- md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
- md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
- } else {
- md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
- md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
-
- if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
- md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
- else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
- md->u.legacy.microtile = RADEON_LAYOUT_TILED;
-
- md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
- md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
- md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
- md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
- md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
- md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
- md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
- }
-
- md->size_metadata = info.metadata.size_metadata;
- memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+ struct amdgpu_bo_info info = {0};
+
+ int r = amdgpu_bo_query_info(bo->bo, &info);
+ if (r)
+ return;
+
+ uint64_t tiling_flags = info.metadata.tiling_info;
+
+ if (ws->info.chip_class >= GFX9) {
+ md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
+ md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
+ } else {
+ md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
+ md->u.legacy.macrotile = RADEON_LAYOUT_LINEAR;
+
+ if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
+ md->u.legacy.macrotile = RADEON_LAYOUT_TILED;
+ else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
+ md->u.legacy.microtile = RADEON_LAYOUT_TILED;
+
+ md->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
+ md->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
+ md->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
+ md->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
+ md->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
+ md->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
+ md->u.legacy.scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
+ }
+
+ md->size_metadata = info.metadata.size_metadata;
+ memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata));
}
static VkResult
-radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws,
- struct radeon_winsys_bo *_bo,
- bool resident)
+radv_amdgpu_winsys_bo_make_resident(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo,
+ bool resident)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- VkResult result = VK_SUCCESS;
-
- /* Do not add the BO to the global list if it's a local BO because the
- * kernel maintains a list for us.
- */
- if (bo->base.is_local)
- return VK_SUCCESS;
-
- /* Do not add the BO twice to the global list if the allbos debug
- * option is enabled.
- */
- if (ws->debug_all_bos)
- return VK_SUCCESS;
-
- if (resident) {
- result = radv_amdgpu_global_bo_list_add(ws, bo);
- } else {
- radv_amdgpu_global_bo_list_del(ws, bo);
- }
-
- return result;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+ VkResult result = VK_SUCCESS;
+
+ /* Do not add the BO to the global list if it's a local BO because the
+ * kernel maintains a list for us.
+ */
+ if (bo->base.is_local)
+ return VK_SUCCESS;
+
+ /* Do not add the BO twice to the global list if the allbos debug
+ * option is enabled.
+ */
+ if (ws->debug_all_bos)
+ return VK_SUCCESS;
+
+ if (resident) {
+ result = radv_amdgpu_global_bo_list_add(ws, bo);
+ } else {
+ radv_amdgpu_global_bo_list_del(ws, bo);
+ }
+
+ return result;
}
-static int radv_amdgpu_bo_va_compare(const void *a, const void *b)
+static int
+radv_amdgpu_bo_va_compare(const void *a, const void *b)
{
- const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo * const*)a;
- const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo * const*)b;
- return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
+ const struct radv_amdgpu_winsys_bo *bo_a = *(const struct radv_amdgpu_winsys_bo *const *)a;
+ const struct radv_amdgpu_winsys_bo *bo_b = *(const struct radv_amdgpu_winsys_bo *const *)b;
+ return bo_a->base.va < bo_b->base.va ? -1 : bo_a->base.va > bo_b->base.va ? 1 : 0;
}
-static void radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
+static void
+radv_amdgpu_dump_bo_log(struct radeon_winsys *_ws, FILE *file)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_winsys_bo_log *bo_log;
-
- if (!ws->debug_log_bos)
- return;
-
- u_rwlock_rdlock(&ws->log_bo_list_lock);
- LIST_FOR_EACH_ENTRY(bo_log, &ws->log_bo_list, list) {
- fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
- (long long)bo_log->timestamp, (long long)bo_log->va,
- (long long)(bo_log->va + bo_log->size),
- bo_log->destroyed, bo_log->is_virtual);
- }
- u_rwlock_rdunlock(&ws->log_bo_list_lock);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys_bo_log *bo_log;
+
+ if (!ws->debug_log_bos)
+ return;
+
+ u_rwlock_rdlock(&ws->log_bo_list_lock);
+ LIST_FOR_EACH_ENTRY (bo_log, &ws->log_bo_list, list) {
+ fprintf(file, "timestamp=%llu, VA=%.16llx-%.16llx, destroyed=%d, is_virtual=%d\n",
+ (long long)bo_log->timestamp, (long long)bo_log->va,
+ (long long)(bo_log->va + bo_log->size), bo_log->destroyed, bo_log->is_virtual);
+ }
+ u_rwlock_rdunlock(&ws->log_bo_list_lock);
}
-static void radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
+static void
+radv_amdgpu_dump_bo_ranges(struct radeon_winsys *_ws, FILE *file)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- if (ws->debug_all_bos) {
- struct radv_amdgpu_winsys_bo **bos = NULL;
- int i = 0;
-
- u_rwlock_rdlock(&ws->global_bo_list.lock);
- bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
- if (!bos) {
- u_rwlock_rdunlock(&ws->global_bo_list.lock);
- fprintf(file, " Failed to allocate memory to sort VA ranges for dumping\n");
- return;
- }
-
- for (i = 0; i < ws->global_bo_list.count; i++) {
- bos[i] = ws->global_bo_list.bos[i];
- }
- qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
-
- for (i = 0; i < ws->global_bo_list.count; ++i) {
- fprintf(file, " VA=%.16llx-%.16llx, handle=%d%s\n",
- (long long)bos[i]->base.va, (long long)(bos[i]->base.va + bos[i]->size),
- bos[i]->bo_handle, bos[i]->is_virtual ? " sparse" : "");
- }
- free(bos);
- u_rwlock_rdunlock(&ws->global_bo_list.lock);
- } else
- fprintf(file, " To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ if (ws->debug_all_bos) {
+ struct radv_amdgpu_winsys_bo **bos = NULL;
+ int i = 0;
+
+ u_rwlock_rdlock(&ws->global_bo_list.lock);
+ bos = malloc(sizeof(*bos) * ws->global_bo_list.count);
+ if (!bos) {
+ u_rwlock_rdunlock(&ws->global_bo_list.lock);
+ fprintf(file, " Failed to allocate memory to sort VA ranges for dumping\n");
+ return;
+ }
+
+ for (i = 0; i < ws->global_bo_list.count; i++) {
+ bos[i] = ws->global_bo_list.bos[i];
+ }
+ qsort(bos, ws->global_bo_list.count, sizeof(bos[0]), radv_amdgpu_bo_va_compare);
+
+ for (i = 0; i < ws->global_bo_list.count; ++i) {
+ fprintf(file, " VA=%.16llx-%.16llx, handle=%d%s\n", (long long)bos[i]->base.va,
+ (long long)(bos[i]->base.va + bos[i]->size), bos[i]->bo_handle,
+ bos[i]->is_virtual ? " sparse" : "");
+ }
+ free(bos);
+ u_rwlock_rdunlock(&ws->global_bo_list.lock);
+ } else
+ fprintf(file, " To get BO VA ranges, please specify RADV_DEBUG=allbos\n");
}
-void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
+void
+radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws)
{
- ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
- ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
- ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
- ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
- ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
- ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
- ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
- ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
- ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
- ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
- ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
- ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
- ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
- ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
+ ws->base.buffer_create = radv_amdgpu_winsys_bo_create;
+ ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy;
+ ws->base.buffer_map = radv_amdgpu_winsys_bo_map;
+ ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap;
+ ws->base.buffer_from_ptr = radv_amdgpu_winsys_bo_from_ptr;
+ ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd;
+ ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd;
+ ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata;
+ ws->base.buffer_get_metadata = radv_amdgpu_winsys_bo_get_metadata;
+ ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind;
+ ws->base.buffer_get_flags_from_fd = radv_amdgpu_bo_get_flags_from_fd;
+ ws->base.buffer_make_resident = radv_amdgpu_winsys_bo_make_resident;
+ ws->base.dump_bo_ranges = radv_amdgpu_dump_bo_ranges;
+ ws->base.dump_bo_log = radv_amdgpu_dump_bo_log;
}
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
index 6284484261d..0beaa7ef727 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.h
@@ -31,46 +31,45 @@
#include "radv_amdgpu_winsys.h"
-
struct radv_amdgpu_map_range {
- uint64_t offset;
- uint64_t size;
- struct radv_amdgpu_winsys_bo *bo;
- uint64_t bo_offset;
+ uint64_t offset;
+ uint64_t size;
+ struct radv_amdgpu_winsys_bo *bo;
+ uint64_t bo_offset;
};
struct radv_amdgpu_winsys_bo {
- struct radeon_winsys_bo base;
- amdgpu_va_handle va_handle;
- uint64_t size;
- bool is_virtual;
- uint8_t priority;
- int ref_count;
+ struct radeon_winsys_bo base;
+ amdgpu_va_handle va_handle;
+ uint64_t size;
+ bool is_virtual;
+ uint8_t priority;
+ int ref_count;
- union {
- /* physical bo */
- struct {
- amdgpu_bo_handle bo;
- bool is_shared;
- uint32_t bo_handle;
- };
- /* virtual bo */
- struct {
- struct radv_amdgpu_map_range *ranges;
- uint32_t range_count;
- uint32_t range_capacity;
+ union {
+ /* physical bo */
+ struct {
+ amdgpu_bo_handle bo;
+ bool is_shared;
+ uint32_t bo_handle;
+ };
+ /* virtual bo */
+ struct {
+ struct radv_amdgpu_map_range *ranges;
+ uint32_t range_count;
+ uint32_t range_capacity;
- struct radv_amdgpu_winsys_bo **bos;
- uint32_t bo_count;
- uint32_t bo_capacity;
- };
- };
+ struct radv_amdgpu_winsys_bo **bos;
+ uint32_t bo_count;
+ uint32_t bo_capacity;
+ };
+ };
};
-static inline
-struct radv_amdgpu_winsys_bo *radv_amdgpu_winsys_bo(struct radeon_winsys_bo *bo)
+static inline struct radv_amdgpu_winsys_bo *
+radv_amdgpu_winsys_bo(struct radeon_winsys_bo *bo)
{
- return (struct radv_amdgpu_winsys_bo *)bo;
+ return (struct radv_amdgpu_winsys_bo *)bo;
}
void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws);
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
index 5ecbe777a59..7347f04c8b2 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -22,1255 +22,1220 @@
* IN THE SOFTWARE.
*/
-#include <stdlib.h>
#include <amdgpu.h>
-#include "drm-uapi/amdgpu_drm.h"
#include <assert.h>
-#include <pthread.h>
#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include "drm-uapi/amdgpu_drm.h"
#include "util/u_memory.h"
#include "ac_debug.h"
-#include "radv_radeon_winsys.h"
-#include "radv_amdgpu_cs.h"
#include "radv_amdgpu_bo.h"
+#include "radv_amdgpu_cs.h"
+#include "radv_radeon_winsys.h"
#include "sid.h"
-
-enum {
- VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024
-};
+enum { VIRTUAL_BUFFER_HASH_TABLE_SIZE = 1024 };
struct radv_amdgpu_cs {
- struct radeon_cmdbuf base;
- struct radv_amdgpu_winsys *ws;
-
- struct amdgpu_cs_ib_info ib;
-
- struct radeon_winsys_bo *ib_buffer;
- uint8_t *ib_mapped;
- unsigned max_num_buffers;
- unsigned num_buffers;
- struct drm_amdgpu_bo_list_entry *handles;
-
- struct radeon_winsys_bo **old_ib_buffers;
- unsigned num_old_ib_buffers;
- unsigned max_num_old_ib_buffers;
- unsigned *ib_size_ptr;
- VkResult status;
- bool is_chained;
-
- int buffer_hash_table[1024];
- unsigned hw_ip;
-
- unsigned num_virtual_buffers;
- unsigned max_num_virtual_buffers;
- struct radeon_winsys_bo **virtual_buffers;
- int *virtual_buffer_hash_table;
-
- /* For chips that don't support chaining. */
- struct radeon_cmdbuf *old_cs_buffers;
- unsigned num_old_cs_buffers;
+ struct radeon_cmdbuf base;
+ struct radv_amdgpu_winsys *ws;
+
+ struct amdgpu_cs_ib_info ib;
+
+ struct radeon_winsys_bo *ib_buffer;
+ uint8_t *ib_mapped;
+ unsigned max_num_buffers;
+ unsigned num_buffers;
+ struct drm_amdgpu_bo_list_entry *handles;
+
+ struct radeon_winsys_bo **old_ib_buffers;
+ unsigned num_old_ib_buffers;
+ unsigned max_num_old_ib_buffers;
+ unsigned *ib_size_ptr;
+ VkResult status;
+ bool is_chained;
+
+ int buffer_hash_table[1024];
+ unsigned hw_ip;
+
+ unsigned num_virtual_buffers;
+ unsigned max_num_virtual_buffers;
+ struct radeon_winsys_bo **virtual_buffers;
+ int *virtual_buffer_hash_table;
+
+ /* For chips that don't support chaining. */
+ struct radeon_cmdbuf *old_cs_buffers;
+ unsigned num_old_cs_buffers;
};
static inline struct radv_amdgpu_cs *
radv_amdgpu_cs(struct radeon_cmdbuf *base)
{
- return (struct radv_amdgpu_cs*)base;
+ return (struct radv_amdgpu_cs *)base;
}
-static int ring_to_hw_ip(enum ring_type ring)
+static int
+ring_to_hw_ip(enum ring_type ring)
{
- switch (ring) {
- case RING_GFX:
- return AMDGPU_HW_IP_GFX;
- case RING_DMA:
- return AMDGPU_HW_IP_DMA;
- case RING_COMPUTE:
- return AMDGPU_HW_IP_COMPUTE;
- default:
- unreachable("unsupported ring");
- }
+ switch (ring) {
+ case RING_GFX:
+ return AMDGPU_HW_IP_GFX;
+ case RING_DMA:
+ return AMDGPU_HW_IP_DMA;
+ case RING_COMPUTE:
+ return AMDGPU_HW_IP_COMPUTE;
+ default:
+ unreachable("unsupported ring");
+ }
}
struct radv_amdgpu_cs_request {
- /** Specify HW IP block type to which to send the IB. */
- unsigned ip_type;
-
- /** IP instance index if there are several IPs of the same type. */
- unsigned ip_instance;
-
- /**
- * Specify ring index of the IP. We could have several rings
- * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
- */
- uint32_t ring;
-
- /**
- * BO list handles used by this request.
- */
- struct drm_amdgpu_bo_list_entry *handles;
- uint32_t num_handles;
-
- /** Number of IBs to submit in the field ibs. */
- uint32_t number_of_ibs;
-
- /**
- * IBs to submit. Those IBs will be submit together as single entity
- */
- struct amdgpu_cs_ib_info *ibs;
-
- /**
- * The returned sequence number for the command submission
- */
- uint64_t seq_no;
+ /** Specify HW IP block type to which to send the IB. */
+ unsigned ip_type;
+
+ /** IP instance index if there are several IPs of the same type. */
+ unsigned ip_instance;
+
+ /**
+ * Specify ring index of the IP. We could have several rings
+ * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
+ */
+ uint32_t ring;
+
+ /**
+ * BO list handles used by this request.
+ */
+ struct drm_amdgpu_bo_list_entry *handles;
+ uint32_t num_handles;
+
+ /** Number of IBs to submit in the field ibs. */
+ uint32_t number_of_ibs;
+
+ /**
+ * IBs to submit. Those IBs will be submit together as single entity
+ */
+ struct amdgpu_cs_ib_info *ibs;
+
+ /**
+ * The returned sequence number for the command submission
+ */
+ uint64_t seq_no;
};
-
static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
- struct radv_amdgpu_cs_request *request,
- struct radv_winsys_sem_info *sem_info);
+ struct radv_amdgpu_cs_request *request,
+ struct radv_winsys_sem_info *sem_info);
-static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
- struct radv_amdgpu_fence *fence,
- struct radv_amdgpu_cs_request *req)
+static void
+radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_fence *fence,
+ struct radv_amdgpu_cs_request *req)
{
- fence->fence.context = ctx->ctx;
- fence->fence.ip_type = req->ip_type;
- fence->fence.ip_instance = req->ip_instance;
- fence->fence.ring = req->ring;
- fence->fence.fence = req->seq_no;
- fence->user_ptr = (volatile uint64_t*)(ctx->fence_map + req->ip_type * MAX_RINGS_PER_TYPE + req->ring);
+ fence->fence.context = ctx->ctx;
+ fence->fence.ip_type = req->ip_type;
+ fence->fence.ip_instance = req->ip_instance;
+ fence->fence.ring = req->ring;
+ fence->fence.fence = req->seq_no;
+ fence->user_ptr =
+ (volatile uint64_t *)(ctx->fence_map + req->ip_type * MAX_RINGS_PER_TYPE + req->ring);
}
-static void radv_amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
+static void
+radv_amdgpu_cs_destroy(struct radeon_cmdbuf *rcs)
{
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
-
- if (cs->ib_buffer)
- cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
- else
- free(cs->base.buf);
-
- for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
- cs->ws->base.buffer_destroy(&cs->ws->base, cs->old_ib_buffers[i]);
-
- for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
- free(cs->old_cs_buffers[i].buf);
- }
-
- free(cs->old_cs_buffers);
- free(cs->old_ib_buffers);
- free(cs->virtual_buffers);
- free(cs->virtual_buffer_hash_table);
- free(cs->handles);
- free(cs);
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(rcs);
+
+ if (cs->ib_buffer)
+ cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
+ else
+ free(cs->base.buf);
+
+ for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
+ cs->ws->base.buffer_destroy(&cs->ws->base, cs->old_ib_buffers[i]);
+
+ for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
+ free(cs->old_cs_buffers[i].buf);
+ }
+
+ free(cs->old_cs_buffers);
+ free(cs->old_ib_buffers);
+ free(cs->virtual_buffers);
+ free(cs->virtual_buffer_hash_table);
+ free(cs->handles);
+ free(cs);
}
-static void radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
- enum ring_type ring_type)
+static void
+radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs, enum ring_type ring_type)
{
- for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
- cs->buffer_hash_table[i] = -1;
+ for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
+ cs->buffer_hash_table[i] = -1;
- cs->hw_ip = ring_to_hw_ip(ring_type);
+ cs->hw_ip = ring_to_hw_ip(ring_type);
}
static struct radeon_cmdbuf *
-radv_amdgpu_cs_create(struct radeon_winsys *ws,
- enum ring_type ring_type)
+radv_amdgpu_cs_create(struct radeon_winsys *ws, enum ring_type ring_type)
{
- struct radv_amdgpu_cs *cs;
- uint32_t ib_size = 20 * 1024 * 4;
- cs = calloc(1, sizeof(struct radv_amdgpu_cs));
- if (!cs)
- return NULL;
-
- cs->ws = radv_amdgpu_winsys(ws);
- radv_amdgpu_init_cs(cs, ring_type);
-
- if (cs->ws->use_ib_bos) {
- cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
- cs->ws->cs_bo_domain,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY |
- RADEON_FLAG_GTT_WC,
- RADV_BO_PRIORITY_CS);
- if (!cs->ib_buffer) {
- free(cs);
- return NULL;
- }
-
- cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
- if (!cs->ib_mapped) {
- ws->buffer_destroy(ws, cs->ib_buffer);
- free(cs);
- return NULL;
- }
-
- cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
- cs->base.buf = (uint32_t *)cs->ib_mapped;
- cs->base.max_dw = ib_size / 4 - 4;
- cs->ib_size_ptr = &cs->ib.size;
- cs->ib.size = 0;
-
- ws->cs_add_buffer(&cs->base, cs->ib_buffer);
- } else {
- uint32_t *buf = malloc(16384);
- if (!buf) {
- free(cs);
- return NULL;
- }
- cs->base.buf = buf;
- cs->base.max_dw = 4096;
- }
-
- return &cs->base;
+ struct radv_amdgpu_cs *cs;
+ uint32_t ib_size = 20 * 1024 * 4;
+ cs = calloc(1, sizeof(struct radv_amdgpu_cs));
+ if (!cs)
+ return NULL;
+
+ cs->ws = radv_amdgpu_winsys(ws);
+ radv_amdgpu_init_cs(cs, ring_type);
+
+ if (cs->ws->use_ib_bos) {
+ cs->ib_buffer =
+ ws->buffer_create(ws, ib_size, 0, cs->ws->cs_bo_domain,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
+ RADV_BO_PRIORITY_CS);
+ if (!cs->ib_buffer) {
+ free(cs);
+ return NULL;
+ }
+
+ cs->ib_mapped = ws->buffer_map(cs->ib_buffer);
+ if (!cs->ib_mapped) {
+ ws->buffer_destroy(ws, cs->ib_buffer);
+ free(cs);
+ return NULL;
+ }
+
+ cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
+ cs->base.buf = (uint32_t *)cs->ib_mapped;
+ cs->base.max_dw = ib_size / 4 - 4;
+ cs->ib_size_ptr = &cs->ib.size;
+ cs->ib.size = 0;
+
+ ws->cs_add_buffer(&cs->base, cs->ib_buffer);
+ } else {
+ uint32_t *buf = malloc(16384);
+ if (!buf) {
+ free(cs);
+ return NULL;
+ }
+ cs->base.buf = buf;
+ cs->base.max_dw = 4096;
+ }
+
+ return &cs->base;
}
-static void radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
+static void
+radv_amdgpu_cs_grow(struct radeon_cmdbuf *_cs, size_t min_size)
{
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
-
- if (cs->status != VK_SUCCESS) {
- cs->base.cdw = 0;
- return;
- }
-
- if (!cs->ws->use_ib_bos) {
- const uint64_t limit_dws = 0xffff8;
- uint64_t ib_dws = MAX2(cs->base.cdw + min_size,
- MIN2(cs->base.max_dw * 2, limit_dws));
-
- /* The total ib size cannot exceed limit_dws dwords. */
- if (ib_dws > limit_dws)
- {
- /* The maximum size in dwords has been reached,
- * try to allocate a new one.
- */
- struct radeon_cmdbuf *old_cs_buffers =
- realloc(cs->old_cs_buffers,
- (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
- if (!old_cs_buffers) {
- cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
- cs->base.cdw = 0;
- return;
- }
- cs->old_cs_buffers = old_cs_buffers;
-
- /* Store the current one for submitting it later. */
- cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
- cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
- cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
- cs->num_old_cs_buffers++;
-
- /* Reset the cs, it will be re-allocated below. */
- cs->base.cdw = 0;
- cs->base.buf = NULL;
-
- /* Re-compute the number of dwords to allocate. */
- ib_dws = MAX2(cs->base.cdw + min_size,
- MIN2(cs->base.max_dw * 2, limit_dws));
- if (ib_dws > limit_dws) {
- fprintf(stderr, "amdgpu: Too high number of "
- "dwords to allocate\n");
- cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
- return;
- }
- }
-
- uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
- if (new_buf) {
- cs->base.buf = new_buf;
- cs->base.max_dw = ib_dws;
- } else {
- cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
- cs->base.cdw = 0;
- }
- return;
- }
-
- uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
-
- /* max that fits in the chain size field. */
- ib_size = MIN2(ib_size, 0xfffff);
-
- while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
- radeon_emit(&cs->base, PKT3_NOP_PAD);
-
- *cs->ib_size_ptr |= cs->base.cdw + 4;
-
- if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
- unsigned max_num_old_ib_buffers =
- MAX2(1, cs->max_num_old_ib_buffers * 2);
- struct radeon_winsys_bo **old_ib_buffers =
- realloc(cs->old_ib_buffers,
- max_num_old_ib_buffers * sizeof(void*));
- if (!old_ib_buffers) {
- cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
- return;
- }
- cs->max_num_old_ib_buffers = max_num_old_ib_buffers;
- cs->old_ib_buffers = old_ib_buffers;
- }
-
- cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
-
- cs->ib_buffer = cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0,
- cs->ws->cs_bo_domain,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY |
- RADEON_FLAG_GTT_WC,
- RADV_BO_PRIORITY_CS);
-
- if (!cs->ib_buffer) {
- cs->base.cdw = 0;
- cs->status = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
- }
-
- cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
- if (!cs->ib_mapped) {
- cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
- cs->base.cdw = 0;
-
- /* VK_ERROR_MEMORY_MAP_FAILED is not valid for vkEndCommandBuffer. */
- cs->status = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
- }
-
- cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
-
- radeon_emit(&cs->base, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
- radeon_emit(&cs->base, radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va);
- radeon_emit(&cs->base, radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va >> 32);
- radeon_emit(&cs->base, S_3F2_CHAIN(1) | S_3F2_VALID(1));
-
- cs->ib_size_ptr = cs->base.buf + cs->base.cdw - 1;
-
- cs->base.buf = (uint32_t *)cs->ib_mapped;
- cs->base.cdw = 0;
- cs->base.max_dw = ib_size / 4 - 4;
-
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+
+ if (cs->status != VK_SUCCESS) {
+ cs->base.cdw = 0;
+ return;
+ }
+
+ if (!cs->ws->use_ib_bos) {
+ const uint64_t limit_dws = 0xffff8;
+ uint64_t ib_dws = MAX2(cs->base.cdw + min_size, MIN2(cs->base.max_dw * 2, limit_dws));
+
+ /* The total ib size cannot exceed limit_dws dwords. */
+ if (ib_dws > limit_dws) {
+ /* The maximum size in dwords has been reached,
+ * try to allocate a new one.
+ */
+ struct radeon_cmdbuf *old_cs_buffers =
+ realloc(cs->old_cs_buffers, (cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
+ if (!old_cs_buffers) {
+ cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+ cs->base.cdw = 0;
+ return;
+ }
+ cs->old_cs_buffers = old_cs_buffers;
+
+ /* Store the current one for submitting it later. */
+ cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
+ cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
+ cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
+ cs->num_old_cs_buffers++;
+
+ /* Reset the cs, it will be re-allocated below. */
+ cs->base.cdw = 0;
+ cs->base.buf = NULL;
+
+ /* Re-compute the number of dwords to allocate. */
+ ib_dws = MAX2(cs->base.cdw + min_size, MIN2(cs->base.max_dw * 2, limit_dws));
+ if (ib_dws > limit_dws) {
+ fprintf(stderr, "amdgpu: Too high number of "
+ "dwords to allocate\n");
+ cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return;
+ }
+ }
+
+ uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
+ if (new_buf) {
+ cs->base.buf = new_buf;
+ cs->base.max_dw = ib_dws;
+ } else {
+ cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+ cs->base.cdw = 0;
+ }
+ return;
+ }
+
+ uint64_t ib_size = MAX2(min_size * 4 + 16, cs->base.max_dw * 4 * 2);
+
+ /* max that fits in the chain size field. */
+ ib_size = MIN2(ib_size, 0xfffff);
+
+ while (!cs->base.cdw || (cs->base.cdw & 7) != 4)
+ radeon_emit(&cs->base, PKT3_NOP_PAD);
+
+ *cs->ib_size_ptr |= cs->base.cdw + 4;
+
+ if (cs->num_old_ib_buffers == cs->max_num_old_ib_buffers) {
+ unsigned max_num_old_ib_buffers = MAX2(1, cs->max_num_old_ib_buffers * 2);
+ struct radeon_winsys_bo **old_ib_buffers =
+ realloc(cs->old_ib_buffers, max_num_old_ib_buffers * sizeof(void *));
+ if (!old_ib_buffers) {
+ cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return;
+ }
+ cs->max_num_old_ib_buffers = max_num_old_ib_buffers;
+ cs->old_ib_buffers = old_ib_buffers;
+ }
+
+ cs->old_ib_buffers[cs->num_old_ib_buffers++] = cs->ib_buffer;
+
+ cs->ib_buffer =
+ cs->ws->base.buffer_create(&cs->ws->base, ib_size, 0, cs->ws->cs_bo_domain,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
+ RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC,
+ RADV_BO_PRIORITY_CS);
+
+ if (!cs->ib_buffer) {
+ cs->base.cdw = 0;
+ cs->status = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
+ }
+
+ cs->ib_mapped = cs->ws->base.buffer_map(cs->ib_buffer);
+ if (!cs->ib_mapped) {
+ cs->ws->base.buffer_destroy(&cs->ws->base, cs->ib_buffer);
+ cs->base.cdw = 0;
+
+ /* VK_ERROR_MEMORY_MAP_FAILED is not valid for vkEndCommandBuffer. */
+ cs->status = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ cs->ib_buffer = cs->old_ib_buffers[--cs->num_old_ib_buffers];
+ }
+
+ cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
+
+ radeon_emit(&cs->base, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+ radeon_emit(&cs->base, radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va);
+ radeon_emit(&cs->base, radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va >> 32);
+ radeon_emit(&cs->base, S_3F2_CHAIN(1) | S_3F2_VALID(1));
+
+ cs->ib_size_ptr = cs->base.buf + cs->base.cdw - 1;
+
+ cs->base.buf = (uint32_t *)cs->ib_mapped;
+ cs->base.cdw = 0;
+ cs->base.max_dw = ib_size / 4 - 4;
}
-static VkResult radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs)
+static VkResult
+radv_amdgpu_cs_finalize(struct radeon_cmdbuf *_cs)
{
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
- if (cs->ws->use_ib_bos) {
- while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
- radeon_emit(&cs->base, PKT3_NOP_PAD);
+ if (cs->ws->use_ib_bos) {
+ while (!cs->base.cdw || (cs->base.cdw & 7) != 0)
+ radeon_emit(&cs->base, PKT3_NOP_PAD);
- *cs->ib_size_ptr |= cs->base.cdw;
+ *cs->ib_size_ptr |= cs->base.cdw;
- cs->is_chained = false;
- }
+ cs->is_chained = false;
+ }
- return cs->status;
+ return cs->status;
}
-static void radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs)
+static void
+radv_amdgpu_cs_reset(struct radeon_cmdbuf *_cs)
{
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
- cs->base.cdw = 0;
- cs->status = VK_SUCCESS;
-
- for (unsigned i = 0; i < cs->num_buffers; ++i) {
- unsigned hash = cs->handles[i].bo_handle &
- (ARRAY_SIZE(cs->buffer_hash_table) - 1);
- cs->buffer_hash_table[hash] = -1;
- }
-
- for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
- unsigned hash = ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
- cs->virtual_buffer_hash_table[hash] = -1;
- }
-
- cs->num_buffers = 0;
- cs->num_virtual_buffers = 0;
-
- if (cs->ws->use_ib_bos) {
- cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
-
- for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
- cs->ws->base.buffer_destroy(&cs->ws->base, cs->old_ib_buffers[i]);
-
- cs->num_old_ib_buffers = 0;
- cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
- cs->ib_size_ptr = &cs->ib.size;
- cs->ib.size = 0;
- } else {
- for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
- struct radeon_cmdbuf *rcs = &cs->old_cs_buffers[i];
- free(rcs->buf);
- }
-
- free(cs->old_cs_buffers);
- cs->old_cs_buffers = NULL;
- cs->num_old_cs_buffers = 0;
- }
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+ cs->base.cdw = 0;
+ cs->status = VK_SUCCESS;
+
+ for (unsigned i = 0; i < cs->num_buffers; ++i) {
+ unsigned hash = cs->handles[i].bo_handle & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+ cs->buffer_hash_table[hash] = -1;
+ }
+
+ for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
+ unsigned hash =
+ ((uintptr_t)cs->virtual_buffers[i] >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
+ cs->virtual_buffer_hash_table[hash] = -1;
+ }
+
+ cs->num_buffers = 0;
+ cs->num_virtual_buffers = 0;
+
+ if (cs->ws->use_ib_bos) {
+ cs->ws->base.cs_add_buffer(&cs->base, cs->ib_buffer);
+
+ for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
+ cs->ws->base.buffer_destroy(&cs->ws->base, cs->old_ib_buffers[i]);
+
+ cs->num_old_ib_buffers = 0;
+ cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
+ cs->ib_size_ptr = &cs->ib.size;
+ cs->ib.size = 0;
+ } else {
+ for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
+ struct radeon_cmdbuf *rcs = &cs->old_cs_buffers[i];
+ free(rcs->buf);
+ }
+
+ free(cs->old_cs_buffers);
+ cs->old_cs_buffers = NULL;
+ cs->num_old_cs_buffers = 0;
+ }
}
-static int radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs,
- uint32_t bo)
+static int
+radv_amdgpu_cs_find_buffer(struct radv_amdgpu_cs *cs, uint32_t bo)
{
- unsigned hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
- int index = cs->buffer_hash_table[hash];
+ unsigned hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+ int index = cs->buffer_hash_table[hash];
- if (index == -1)
- return -1;
+ if (index == -1)
+ return -1;
- if (cs->handles[index].bo_handle == bo)
- return index;
+ if (cs->handles[index].bo_handle == bo)
+ return index;
- for (unsigned i = 0; i < cs->num_buffers; ++i) {
- if (cs->handles[i].bo_handle == bo) {
- cs->buffer_hash_table[hash] = i;
- return i;
- }
- }
+ for (unsigned i = 0; i < cs->num_buffers; ++i) {
+ if (cs->handles[i].bo_handle == bo) {
+ cs->buffer_hash_table[hash] = i;
+ return i;
+ }
+ }
- return -1;
+ return -1;
}
-static void radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs,
- uint32_t bo, uint8_t priority)
+static void
+radv_amdgpu_cs_add_buffer_internal(struct radv_amdgpu_cs *cs, uint32_t bo, uint8_t priority)
{
- unsigned hash;
- int index = radv_amdgpu_cs_find_buffer(cs, bo);
-
- if (index != -1)
- return;
-
- if (cs->num_buffers == cs->max_num_buffers) {
- unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
- struct drm_amdgpu_bo_list_entry *new_entries =
- realloc(cs->handles, new_count * sizeof(struct drm_amdgpu_bo_list_entry));
- if (new_entries) {
- cs->max_num_buffers = new_count;
- cs->handles = new_entries;
- } else {
- cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
- return;
- }
- }
-
- cs->handles[cs->num_buffers].bo_handle = bo;
- cs->handles[cs->num_buffers].bo_priority = priority;
-
- hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
- cs->buffer_hash_table[hash] = cs->num_buffers;
-
- ++cs->num_buffers;
+ unsigned hash;
+ int index = radv_amdgpu_cs_find_buffer(cs, bo);
+
+ if (index != -1)
+ return;
+
+ if (cs->num_buffers == cs->max_num_buffers) {
+ unsigned new_count = MAX2(1, cs->max_num_buffers * 2);
+ struct drm_amdgpu_bo_list_entry *new_entries =
+ realloc(cs->handles, new_count * sizeof(struct drm_amdgpu_bo_list_entry));
+ if (new_entries) {
+ cs->max_num_buffers = new_count;
+ cs->handles = new_entries;
+ } else {
+ cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return;
+ }
+ }
+
+ cs->handles[cs->num_buffers].bo_handle = bo;
+ cs->handles[cs->num_buffers].bo_priority = priority;
+
+ hash = bo & (ARRAY_SIZE(cs->buffer_hash_table) - 1);
+ cs->buffer_hash_table[hash] = cs->num_buffers;
+
+ ++cs->num_buffers;
}
-static void radv_amdgpu_cs_add_virtual_buffer(struct radeon_cmdbuf *_cs,
- struct radeon_winsys_bo *bo)
+static void
+radv_amdgpu_cs_add_virtual_buffer(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *bo)
{
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
- unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
-
-
- if (!cs->virtual_buffer_hash_table) {
- int *virtual_buffer_hash_table =
- malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
- if (!virtual_buffer_hash_table) {
- cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
- return;
- }
- cs->virtual_buffer_hash_table = virtual_buffer_hash_table;
-
- for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
- cs->virtual_buffer_hash_table[i] = -1;
- }
-
- if (cs->virtual_buffer_hash_table[hash] >= 0) {
- int idx = cs->virtual_buffer_hash_table[hash];
- if (cs->virtual_buffers[idx] == bo) {
- return;
- }
- for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
- if (cs->virtual_buffers[i] == bo) {
- cs->virtual_buffer_hash_table[hash] = i;
- return;
- }
- }
- }
-
- if(cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
- unsigned max_num_virtual_buffers =
- MAX2(2, cs->max_num_virtual_buffers * 2);
- struct radeon_winsys_bo **virtual_buffers =
- realloc(cs->virtual_buffers,
- sizeof(struct radeon_winsys_bo*) * max_num_virtual_buffers);
- if (!virtual_buffers) {
- cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
- return;
- }
- cs->max_num_virtual_buffers = max_num_virtual_buffers;
- cs->virtual_buffers = virtual_buffers;
- }
-
- cs->virtual_buffers[cs->num_virtual_buffers] = bo;
-
- cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
- ++cs->num_virtual_buffers;
-
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+ unsigned hash = ((uintptr_t)bo >> 6) & (VIRTUAL_BUFFER_HASH_TABLE_SIZE - 1);
+
+ if (!cs->virtual_buffer_hash_table) {
+ int *virtual_buffer_hash_table = malloc(VIRTUAL_BUFFER_HASH_TABLE_SIZE * sizeof(int));
+ if (!virtual_buffer_hash_table) {
+ cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return;
+ }
+ cs->virtual_buffer_hash_table = virtual_buffer_hash_table;
+
+ for (int i = 0; i < VIRTUAL_BUFFER_HASH_TABLE_SIZE; ++i)
+ cs->virtual_buffer_hash_table[i] = -1;
+ }
+
+ if (cs->virtual_buffer_hash_table[hash] >= 0) {
+ int idx = cs->virtual_buffer_hash_table[hash];
+ if (cs->virtual_buffers[idx] == bo) {
+ return;
+ }
+ for (unsigned i = 0; i < cs->num_virtual_buffers; ++i) {
+ if (cs->virtual_buffers[i] == bo) {
+ cs->virtual_buffer_hash_table[hash] = i;
+ return;
+ }
+ }
+ }
+
+ if (cs->max_num_virtual_buffers <= cs->num_virtual_buffers) {
+ unsigned max_num_virtual_buffers = MAX2(2, cs->max_num_virtual_buffers * 2);
+ struct radeon_winsys_bo **virtual_buffers =
+ realloc(cs->virtual_buffers, sizeof(struct radeon_winsys_bo *) * max_num_virtual_buffers);
+ if (!virtual_buffers) {
+ cs->status = VK_ERROR_OUT_OF_HOST_MEMORY;
+ return;
+ }
+ cs->max_num_virtual_buffers = max_num_virtual_buffers;
+ cs->virtual_buffers = virtual_buffers;
+ }
+
+ cs->virtual_buffers[cs->num_virtual_buffers] = bo;
+
+ cs->virtual_buffer_hash_table[hash] = cs->num_virtual_buffers;
+ ++cs->num_virtual_buffers;
}
-static void radv_amdgpu_cs_add_buffer(struct radeon_cmdbuf *_cs,
- struct radeon_winsys_bo *_bo)
+static void
+radv_amdgpu_cs_add_buffer(struct radeon_cmdbuf *_cs, struct radeon_winsys_bo *_bo)
{
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
- struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(_cs);
+ struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo);
- if (cs->status != VK_SUCCESS)
- return;
+ if (cs->status != VK_SUCCESS)
+ return;
- if (bo->is_virtual) {
- radv_amdgpu_cs_add_virtual_buffer(_cs, _bo);
- return;
- }
+ if (bo->is_virtual) {
+ radv_amdgpu_cs_add_virtual_buffer(_cs, _bo);
+ return;
+ }
- radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority);
+ radv_amdgpu_cs_add_buffer_internal(cs, bo->bo_handle, bo->priority);
}
-static void radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent,
- struct radeon_cmdbuf *_child)
+static void
+radv_amdgpu_cs_execute_secondary(struct radeon_cmdbuf *_parent, struct radeon_cmdbuf *_child)
{
- struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
- struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
-
- if (parent->status != VK_SUCCESS || child->status != VK_SUCCESS)
- return;
-
- for (unsigned i = 0; i < child->num_buffers; ++i) {
- radv_amdgpu_cs_add_buffer_internal(parent,
- child->handles[i].bo_handle,
- child->handles[i].bo_priority);
- }
-
- for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
- radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i]);
- }
-
- if (parent->ws->use_ib_bos) {
- if (parent->base.cdw + 4 > parent->base.max_dw)
- radv_amdgpu_cs_grow(&parent->base, 4);
-
- radeon_emit(&parent->base, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
- radeon_emit(&parent->base, child->ib.ib_mc_address);
- radeon_emit(&parent->base, child->ib.ib_mc_address >> 32);
- radeon_emit(&parent->base, child->ib.size);
- } else {
- if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
- radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
-
- memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
- parent->base.cdw += child->base.cdw;
- }
+ struct radv_amdgpu_cs *parent = radv_amdgpu_cs(_parent);
+ struct radv_amdgpu_cs *child = radv_amdgpu_cs(_child);
+
+ if (parent->status != VK_SUCCESS || child->status != VK_SUCCESS)
+ return;
+
+ for (unsigned i = 0; i < child->num_buffers; ++i) {
+ radv_amdgpu_cs_add_buffer_internal(parent, child->handles[i].bo_handle,
+ child->handles[i].bo_priority);
+ }
+
+ for (unsigned i = 0; i < child->num_virtual_buffers; ++i) {
+ radv_amdgpu_cs_add_buffer(&parent->base, child->virtual_buffers[i]);
+ }
+
+ if (parent->ws->use_ib_bos) {
+ if (parent->base.cdw + 4 > parent->base.max_dw)
+ radv_amdgpu_cs_grow(&parent->base, 4);
+
+ radeon_emit(&parent->base, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+ radeon_emit(&parent->base, child->ib.ib_mc_address);
+ radeon_emit(&parent->base, child->ib.ib_mc_address >> 32);
+ radeon_emit(&parent->base, child->ib.size);
+ } else {
+ if (parent->base.cdw + child->base.cdw > parent->base.max_dw)
+ radv_amdgpu_cs_grow(&parent->base, child->base.cdw);
+
+ memcpy(parent->base.buf + parent->base.cdw, child->base.buf, 4 * child->base.cdw);
+ parent->base.cdw += child->base.cdw;
+ }
}
static VkResult
-radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws,
- struct radeon_cmdbuf **cs_array,
- unsigned count,
- struct radv_amdgpu_winsys_bo **extra_bo_array,
- unsigned num_extra_bo,
- struct radeon_cmdbuf *extra_cs,
- unsigned *rnum_handles,
- struct drm_amdgpu_bo_list_entry **rhandles)
+radv_amdgpu_get_bo_list(struct radv_amdgpu_winsys *ws, struct radeon_cmdbuf **cs_array,
+ unsigned count, struct radv_amdgpu_winsys_bo **extra_bo_array,
+ unsigned num_extra_bo, struct radeon_cmdbuf *extra_cs,
+ unsigned *rnum_handles, struct drm_amdgpu_bo_list_entry **rhandles)
{
- struct drm_amdgpu_bo_list_entry *handles = NULL;
- unsigned num_handles = 0;
-
- if (ws->debug_all_bos) {
- handles = malloc(sizeof(handles[0]) * ws->global_bo_list.count);
- if (!handles) {
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- }
-
- for (uint32_t i = 0; i < ws->global_bo_list.count; i++) {
- handles[i].bo_handle = ws->global_bo_list.bos[i]->bo_handle;
- handles[i].bo_priority = ws->global_bo_list.bos[i]->priority;
- num_handles++;
- }
- } else if (count == 1 && !num_extra_bo && !extra_cs &&
- !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers &&
- !ws->global_bo_list.count) {
- struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
- if (cs->num_buffers == 0)
- return VK_SUCCESS;
-
- handles = malloc(sizeof(handles[0]) * cs->num_buffers);
- if (!handles)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- memcpy(handles, cs->handles,
- sizeof(handles[0]) * cs->num_buffers);
- num_handles = cs->num_buffers;
- } else {
- unsigned total_buffer_count = num_extra_bo;
- num_handles = num_extra_bo;
- for (unsigned i = 0; i < count; ++i) {
- struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
- total_buffer_count += cs->num_buffers;
- for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
- total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
- }
-
- if (extra_cs) {
- total_buffer_count += ((struct radv_amdgpu_cs*)extra_cs)->num_buffers;
- }
-
- total_buffer_count += ws->global_bo_list.count;
-
- if (total_buffer_count == 0)
- return VK_SUCCESS;
-
- handles = malloc(sizeof(handles[0]) * total_buffer_count);
- if (!handles)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- for (unsigned i = 0; i < num_extra_bo; i++) {
- handles[i].bo_handle = extra_bo_array[i]->bo_handle;
- handles[i].bo_priority = extra_bo_array[i]->priority;
- }
-
- for (unsigned i = 0; i < count + !!extra_cs; ++i) {
- struct radv_amdgpu_cs *cs;
-
- if (i == count)
- cs = (struct radv_amdgpu_cs*)extra_cs;
- else
- cs = (struct radv_amdgpu_cs*)cs_array[i];
-
- if (!cs->num_buffers)
- continue;
-
- if (num_handles == 0 && !cs->num_virtual_buffers) {
- memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
- num_handles = cs->num_buffers;
- continue;
- }
- int unique_bo_so_far = num_handles;
- for (unsigned j = 0; j < cs->num_buffers; ++j) {
- bool found = false;
- for (unsigned k = 0; k < unique_bo_so_far; ++k) {
- if (handles[k].bo_handle == cs->handles[j].bo_handle) {
- found = true;
- break;
- }
- }
- if (!found) {
- handles[num_handles] = cs->handles[j];
- ++num_handles;
- }
- }
- for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
- struct radv_amdgpu_winsys_bo *virtual_bo = radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
- for(unsigned k = 0; k < virtual_bo->bo_count; ++k) {
- struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
- bool found = false;
- for (unsigned m = 0; m < num_handles; ++m) {
- if (handles[m].bo_handle == bo->bo_handle) {
- found = true;
- break;
- }
- }
- if (!found) {
- handles[num_handles].bo_handle = bo->bo_handle;
- handles[num_handles].bo_priority = bo->priority;
- ++num_handles;
- }
- }
- }
- }
-
- unsigned unique_bo_so_far = num_handles;
- for (unsigned i = 0; i < ws->global_bo_list.count; ++i) {
- struct radv_amdgpu_winsys_bo *bo = ws->global_bo_list.bos[i];
- bool found = false;
- for (unsigned j = 0; j < unique_bo_so_far; ++j) {
- if (bo->bo_handle == handles[j].bo_handle) {
- found = true;
- break;
- }
- }
- if (!found) {
- handles[num_handles].bo_handle = bo->bo_handle;
- handles[num_handles].bo_priority = bo->priority;
- ++num_handles;
- }
- }
- }
-
- *rhandles = handles;
- *rnum_handles = num_handles;
-
- return VK_SUCCESS;
+ struct drm_amdgpu_bo_list_entry *handles = NULL;
+ unsigned num_handles = 0;
+
+ if (ws->debug_all_bos) {
+ handles = malloc(sizeof(handles[0]) * ws->global_bo_list.count);
+ if (!handles) {
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+
+ for (uint32_t i = 0; i < ws->global_bo_list.count; i++) {
+ handles[i].bo_handle = ws->global_bo_list.bos[i]->bo_handle;
+ handles[i].bo_priority = ws->global_bo_list.bos[i]->priority;
+ num_handles++;
+ }
+ } else if (count == 1 && !num_extra_bo && !extra_cs &&
+ !radv_amdgpu_cs(cs_array[0])->num_virtual_buffers && !ws->global_bo_list.count) {
+ struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[0];
+ if (cs->num_buffers == 0)
+ return VK_SUCCESS;
+
+ handles = malloc(sizeof(handles[0]) * cs->num_buffers);
+ if (!handles)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ memcpy(handles, cs->handles, sizeof(handles[0]) * cs->num_buffers);
+ num_handles = cs->num_buffers;
+ } else {
+ unsigned total_buffer_count = num_extra_bo;
+ num_handles = num_extra_bo;
+ for (unsigned i = 0; i < count; ++i) {
+ struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)cs_array[i];
+ total_buffer_count += cs->num_buffers;
+ for (unsigned j = 0; j < cs->num_virtual_buffers; ++j)
+ total_buffer_count += radv_amdgpu_winsys_bo(cs->virtual_buffers[j])->bo_count;
+ }
+
+ if (extra_cs) {
+ total_buffer_count += ((struct radv_amdgpu_cs *)extra_cs)->num_buffers;
+ }
+
+ total_buffer_count += ws->global_bo_list.count;
+
+ if (total_buffer_count == 0)
+ return VK_SUCCESS;
+
+ handles = malloc(sizeof(handles[0]) * total_buffer_count);
+ if (!handles)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ for (unsigned i = 0; i < num_extra_bo; i++) {
+ handles[i].bo_handle = extra_bo_array[i]->bo_handle;
+ handles[i].bo_priority = extra_bo_array[i]->priority;
+ }
+
+ for (unsigned i = 0; i < count + !!extra_cs; ++i) {
+ struct radv_amdgpu_cs *cs;
+
+ if (i == count)
+ cs = (struct radv_amdgpu_cs *)extra_cs;
+ else
+ cs = (struct radv_amdgpu_cs *)cs_array[i];
+
+ if (!cs->num_buffers)
+ continue;
+
+ if (num_handles == 0 && !cs->num_virtual_buffers) {
+ memcpy(handles, cs->handles, cs->num_buffers * sizeof(struct drm_amdgpu_bo_list_entry));
+ num_handles = cs->num_buffers;
+ continue;
+ }
+ int unique_bo_so_far = num_handles;
+ for (unsigned j = 0; j < cs->num_buffers; ++j) {
+ bool found = false;
+ for (unsigned k = 0; k < unique_bo_so_far; ++k) {
+ if (handles[k].bo_handle == cs->handles[j].bo_handle) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ handles[num_handles] = cs->handles[j];
+ ++num_handles;
+ }
+ }
+ for (unsigned j = 0; j < cs->num_virtual_buffers; ++j) {
+ struct radv_amdgpu_winsys_bo *virtual_bo =
+ radv_amdgpu_winsys_bo(cs->virtual_buffers[j]);
+ for (unsigned k = 0; k < virtual_bo->bo_count; ++k) {
+ struct radv_amdgpu_winsys_bo *bo = virtual_bo->bos[k];
+ bool found = false;
+ for (unsigned m = 0; m < num_handles; ++m) {
+ if (handles[m].bo_handle == bo->bo_handle) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ handles[num_handles].bo_handle = bo->bo_handle;
+ handles[num_handles].bo_priority = bo->priority;
+ ++num_handles;
+ }
+ }
+ }
+ }
+
+ unsigned unique_bo_so_far = num_handles;
+ for (unsigned i = 0; i < ws->global_bo_list.count; ++i) {
+ struct radv_amdgpu_winsys_bo *bo = ws->global_bo_list.bos[i];
+ bool found = false;
+ for (unsigned j = 0; j < unique_bo_so_far; ++j) {
+ if (bo->bo_handle == handles[j].bo_handle) {
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ handles[num_handles].bo_handle = bo->bo_handle;
+ handles[num_handles].bo_priority = bo->priority;
+ ++num_handles;
+ }
+ }
+ }
+
+ *rhandles = handles;
+ *rnum_handles = num_handles;
+
+ return VK_SUCCESS;
}
-static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
- struct radv_amdgpu_cs_request *request)
+static void
+radv_assign_last_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request)
{
- radv_amdgpu_request_to_fence(ctx,
- &ctx->last_submission[request->ip_type][request->ring],
- request);
+ radv_amdgpu_request_to_fence(ctx, &ctx->last_submission[request->ip_type][request->ring],
+ request);
}
static VkResult
-radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
- int queue_idx,
- struct radv_winsys_sem_info *sem_info,
- struct radeon_cmdbuf **cs_array,
- unsigned cs_count,
- struct radeon_cmdbuf *initial_preamble_cs)
+radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx, int queue_idx,
+ struct radv_winsys_sem_info *sem_info,
+ struct radeon_cmdbuf **cs_array, unsigned cs_count,
+ struct radeon_cmdbuf *initial_preamble_cs)
{
- struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
- struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
- struct radv_amdgpu_winsys *aws = cs0->ws;
- struct drm_amdgpu_bo_list_entry *handles = NULL;
- struct radv_amdgpu_cs_request request;
- struct amdgpu_cs_ib_info ibs[2];
- unsigned number_of_ibs = 1;
- unsigned num_handles = 0;
- VkResult result;
-
- for (unsigned i = cs_count; i--;) {
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
-
- if (cs->is_chained) {
- *cs->ib_size_ptr -= 4;
- cs->is_chained = false;
- }
-
- if (i + 1 < cs_count) {
- struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
- assert(cs->base.cdw + 4 <= cs->base.max_dw);
-
- cs->is_chained = true;
- *cs->ib_size_ptr += 4;
-
- cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
- cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
- cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
- cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
- }
- }
-
- u_rwlock_rdlock(&aws->global_bo_list.lock);
-
- /* Get the BO list. */
- result = radv_amdgpu_get_bo_list(cs0->ws, cs_array, cs_count, NULL, 0,
- initial_preamble_cs,
- &num_handles, &handles);
- if (result != VK_SUCCESS)
- goto fail;
-
- /* Configure the CS request. */
- if (initial_preamble_cs) {
- ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
- ibs[1] = cs0->ib;
- number_of_ibs++;
- } else {
- ibs[0] = cs0->ib;
- }
-
- request.ip_type = cs0->hw_ip;
- request.ip_instance = 0;
- request.ring = queue_idx;
- request.number_of_ibs = number_of_ibs;
- request.ibs = ibs;
- request.handles = handles;
- request.num_handles = num_handles;
-
- /* Submit the CS. */
- result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
-
- free(request.handles);
-
- if (result != VK_SUCCESS)
- goto fail;
-
- radv_assign_last_submit(ctx, &request);
+ struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
+ struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
+ struct radv_amdgpu_winsys *aws = cs0->ws;
+ struct drm_amdgpu_bo_list_entry *handles = NULL;
+ struct radv_amdgpu_cs_request request;
+ struct amdgpu_cs_ib_info ibs[2];
+ unsigned number_of_ibs = 1;
+ unsigned num_handles = 0;
+ VkResult result;
+
+ for (unsigned i = cs_count; i--;) {
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+
+ if (cs->is_chained) {
+ *cs->ib_size_ptr -= 4;
+ cs->is_chained = false;
+ }
+
+ if (i + 1 < cs_count) {
+ struct radv_amdgpu_cs *next = radv_amdgpu_cs(cs_array[i + 1]);
+ assert(cs->base.cdw + 4 <= cs->base.max_dw);
+
+ cs->is_chained = true;
+ *cs->ib_size_ptr += 4;
+
+ cs->base.buf[cs->base.cdw + 0] = PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0);
+ cs->base.buf[cs->base.cdw + 1] = next->ib.ib_mc_address;
+ cs->base.buf[cs->base.cdw + 2] = next->ib.ib_mc_address >> 32;
+ cs->base.buf[cs->base.cdw + 3] = S_3F2_CHAIN(1) | S_3F2_VALID(1) | next->ib.size;
+ }
+ }
+
+ u_rwlock_rdlock(&aws->global_bo_list.lock);
+
+ /* Get the BO list. */
+ result = radv_amdgpu_get_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, initial_preamble_cs,
+ &num_handles, &handles);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ /* Configure the CS request. */
+ if (initial_preamble_cs) {
+ ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
+ ibs[1] = cs0->ib;
+ number_of_ibs++;
+ } else {
+ ibs[0] = cs0->ib;
+ }
+
+ request.ip_type = cs0->hw_ip;
+ request.ip_instance = 0;
+ request.ring = queue_idx;
+ request.number_of_ibs = number_of_ibs;
+ request.ibs = ibs;
+ request.handles = handles;
+ request.num_handles = num_handles;
+
+ /* Submit the CS. */
+ result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+
+ free(request.handles);
+
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ radv_assign_last_submit(ctx, &request);
fail:
- u_rwlock_rdunlock(&aws->global_bo_list.lock);
- return result;
+ u_rwlock_rdunlock(&aws->global_bo_list.lock);
+ return result;
}
static VkResult
-radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
- int queue_idx,
- struct radv_winsys_sem_info *sem_info,
- struct radeon_cmdbuf **cs_array,
- unsigned cs_count,
- struct radeon_cmdbuf *initial_preamble_cs)
+radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx, int queue_idx,
+ struct radv_winsys_sem_info *sem_info,
+ struct radeon_cmdbuf **cs_array, unsigned cs_count,
+ struct radeon_cmdbuf *initial_preamble_cs)
{
- struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
- struct drm_amdgpu_bo_list_entry *handles = NULL;
- struct radv_amdgpu_cs_request request;
- struct amdgpu_cs_ib_info *ibs;
- struct radv_amdgpu_cs *cs0;
- struct radv_amdgpu_winsys *aws;
- unsigned num_handles = 0;
- unsigned number_of_ibs;
- VkResult result;
-
- assert(cs_count);
- cs0 = radv_amdgpu_cs(cs_array[0]);
- aws = cs0->ws;
-
- /* Compute the number of IBs for this submit. */
- number_of_ibs = cs_count + !!initial_preamble_cs;
-
- u_rwlock_rdlock(&aws->global_bo_list.lock);
-
- /* Get the BO list. */
- result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0,
- initial_preamble_cs,
- &num_handles, &handles);
- if (result != VK_SUCCESS) {
- goto fail;
- }
-
- ibs = malloc(number_of_ibs * sizeof(*ibs));
- if (!ibs) {
- free(handles);
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail;
- }
-
- /* Configure the CS request. */
- if (initial_preamble_cs)
- ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
-
- for (unsigned i = 0; i < cs_count; i++) {
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
-
- ibs[i + !!initial_preamble_cs] = cs->ib;
-
- if (cs->is_chained) {
- *cs->ib_size_ptr -= 4;
- cs->is_chained = false;
- }
- }
-
- request.ip_type = cs0->hw_ip;
- request.ip_instance = 0;
- request.ring = queue_idx;
- request.handles = handles;
- request.num_handles = num_handles;
- request.number_of_ibs = number_of_ibs;
- request.ibs = ibs;
-
- /* Submit the CS. */
- result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
-
- free(request.handles);
- free(ibs);
-
- if (result != VK_SUCCESS)
- goto fail;
-
- radv_assign_last_submit(ctx, &request);
+ struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
+ struct drm_amdgpu_bo_list_entry *handles = NULL;
+ struct radv_amdgpu_cs_request request;
+ struct amdgpu_cs_ib_info *ibs;
+ struct radv_amdgpu_cs *cs0;
+ struct radv_amdgpu_winsys *aws;
+ unsigned num_handles = 0;
+ unsigned number_of_ibs;
+ VkResult result;
+
+ assert(cs_count);
+ cs0 = radv_amdgpu_cs(cs_array[0]);
+ aws = cs0->ws;
+
+ /* Compute the number of IBs for this submit. */
+ number_of_ibs = cs_count + !!initial_preamble_cs;
+
+ u_rwlock_rdlock(&aws->global_bo_list.lock);
+
+ /* Get the BO list. */
+ result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[0], cs_count, NULL, 0, initial_preamble_cs,
+ &num_handles, &handles);
+ if (result != VK_SUCCESS) {
+ goto fail;
+ }
+
+ ibs = malloc(number_of_ibs * sizeof(*ibs));
+ if (!ibs) {
+ free(handles);
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail;
+ }
+
+ /* Configure the CS request. */
+ if (initial_preamble_cs)
+ ibs[0] = radv_amdgpu_cs(initial_preamble_cs)->ib;
+
+ for (unsigned i = 0; i < cs_count; i++) {
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+
+ ibs[i + !!initial_preamble_cs] = cs->ib;
+
+ if (cs->is_chained) {
+ *cs->ib_size_ptr -= 4;
+ cs->is_chained = false;
+ }
+ }
+
+ request.ip_type = cs0->hw_ip;
+ request.ip_instance = 0;
+ request.ring = queue_idx;
+ request.handles = handles;
+ request.num_handles = num_handles;
+ request.number_of_ibs = number_of_ibs;
+ request.ibs = ibs;
+
+ /* Submit the CS. */
+ result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+
+ free(request.handles);
+ free(ibs);
+
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ radv_assign_last_submit(ctx, &request);
fail:
- u_rwlock_rdunlock(&aws->global_bo_list.lock);
- return result;
+ u_rwlock_rdunlock(&aws->global_bo_list.lock);
+ return result;
}
static VkResult
-radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
- int queue_idx,
- struct radv_winsys_sem_info *sem_info,
- struct radeon_cmdbuf **cs_array,
- unsigned cs_count,
- struct radeon_cmdbuf *initial_preamble_cs,
- struct radeon_cmdbuf *continue_preamble_cs)
+radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx, int queue_idx,
+ struct radv_winsys_sem_info *sem_info,
+ struct radeon_cmdbuf **cs_array, unsigned cs_count,
+ struct radeon_cmdbuf *initial_preamble_cs,
+ struct radeon_cmdbuf *continue_preamble_cs)
{
- struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
- struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
- struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
- struct radv_amdgpu_winsys *aws = cs0->ws;
- struct radv_amdgpu_cs_request request;
- uint32_t pad_word = PKT3_NOP_PAD;
- bool emit_signal_sem = sem_info->cs_emit_signal;
- VkResult result;
-
- if (radv_amdgpu_winsys(ws)->info.chip_class == GFX6)
- pad_word = 0x80000000;
-
- assert(cs_count);
-
- for (unsigned i = 0; i < cs_count;) {
- struct amdgpu_cs_ib_info *ibs;
- struct radeon_winsys_bo **bos;
- struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
- struct drm_amdgpu_bo_list_entry *handles = NULL;
- unsigned num_handles = 0;
- unsigned number_of_ibs;
- uint32_t *ptr;
- unsigned cnt = 0;
- unsigned pad_words = 0;
-
- /* Compute the number of IBs for this submit. */
- number_of_ibs = cs->num_old_cs_buffers + 1;
-
- ibs = malloc(number_of_ibs * sizeof(*ibs));
- if (!ibs)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- bos = malloc(number_of_ibs * sizeof(*bos));
- if (!bos) {
- free(ibs);
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- }
-
- if (number_of_ibs > 1) {
- /* Special path when the maximum size in dwords has
- * been reached because we need to handle more than one
- * IB per submit.
- */
- struct radeon_cmdbuf **new_cs_array;
- unsigned idx = 0;
-
- new_cs_array = malloc(cs->num_old_cs_buffers *
- sizeof(*new_cs_array));
- assert(new_cs_array);
-
- for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
- new_cs_array[idx++] = &cs->old_cs_buffers[j];
- new_cs_array[idx++] = cs_array[i];
-
- for (unsigned j = 0; j < number_of_ibs; j++) {
- struct radeon_cmdbuf *rcs = new_cs_array[j];
- bool needs_preamble = preamble_cs && j == 0;
- unsigned size = 0;
-
- if (needs_preamble)
- size += preamble_cs->cdw;
- size += rcs->cdw;
-
- assert(size < 0xffff8);
-
- while (!size || (size & 7)) {
- size++;
- pad_words++;
- }
-
- bos[j] = ws->buffer_create(ws, 4 * size, 4096,
- aws->cs_bo_domain,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY,
- RADV_BO_PRIORITY_CS);
- ptr = ws->buffer_map(bos[j]);
-
- if (needs_preamble) {
- memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
- ptr += preamble_cs->cdw;
- }
-
- memcpy(ptr, rcs->buf, 4 * rcs->cdw);
- ptr += rcs->cdw;
-
- for (unsigned k = 0; k < pad_words; ++k)
- *ptr++ = pad_word;
-
- ibs[j].size = size;
- ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
- ibs[j].flags = 0;
- }
-
- cnt++;
- free(new_cs_array);
- } else {
- unsigned size = 0;
-
- if (preamble_cs)
- size += preamble_cs->cdw;
-
- while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
- size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
- ++cnt;
- }
-
- while (!size || (size & 7)) {
- size++;
- pad_words++;
- }
- assert(cnt);
-
- bos[0] = ws->buffer_create(ws, 4 * size, 4096,
- aws->cs_bo_domain,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING |
- RADEON_FLAG_READ_ONLY,
- RADV_BO_PRIORITY_CS);
- ptr = ws->buffer_map(bos[0]);
-
- if (preamble_cs) {
- memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
- ptr += preamble_cs->cdw;
- }
-
- for (unsigned j = 0; j < cnt; ++j) {
- struct radv_amdgpu_cs *cs2 = radv_amdgpu_cs(cs_array[i + j]);
- memcpy(ptr, cs2->base.buf, 4 * cs2->base.cdw);
- ptr += cs2->base.cdw;
-
- }
-
- for (unsigned j = 0; j < pad_words; ++j)
- *ptr++ = pad_word;
-
- ibs[0].size = size;
- ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]);
- ibs[0].flags = 0;
- }
-
- u_rwlock_rdlock(&aws->global_bo_list.lock);
-
- result = radv_amdgpu_get_bo_list(cs0->ws, &cs_array[i], cnt,
- (struct radv_amdgpu_winsys_bo **)bos,
- number_of_ibs, preamble_cs,
- &num_handles, &handles);
- if (result != VK_SUCCESS) {
- free(ibs);
- free(bos);
- u_rwlock_rdunlock(&aws->global_bo_list.lock);
- return result;
- }
-
- request.ip_type = cs0->hw_ip;
- request.ip_instance = 0;
- request.ring = queue_idx;
- request.handles = handles;
- request.num_handles = num_handles;
- request.number_of_ibs = number_of_ibs;
- request.ibs = ibs;
-
- sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
- result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
-
- free(request.handles);
- u_rwlock_rdunlock(&aws->global_bo_list.lock);
-
- for (unsigned j = 0; j < number_of_ibs; j++) {
- ws->buffer_destroy(ws, bos[j]);
- }
-
- free(ibs);
- free(bos);
-
- if (result != VK_SUCCESS)
- return result;
-
- i += cnt;
- }
-
- radv_assign_last_submit(ctx, &request);
-
- return VK_SUCCESS;
+ struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
+ struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
+ struct radeon_winsys *ws = (struct radeon_winsys *)cs0->ws;
+ struct radv_amdgpu_winsys *aws = cs0->ws;
+ struct radv_amdgpu_cs_request request;
+ uint32_t pad_word = PKT3_NOP_PAD;
+ bool emit_signal_sem = sem_info->cs_emit_signal;
+ VkResult result;
+
+ if (radv_amdgpu_winsys(ws)->info.chip_class == GFX6)
+ pad_word = 0x80000000;
+
+ assert(cs_count);
+
+ for (unsigned i = 0; i < cs_count;) {
+ struct amdgpu_cs_ib_info *ibs;
+ struct radeon_winsys_bo **bos;
+ struct radeon_cmdbuf *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
+ struct drm_amdgpu_bo_list_entry *handles = NULL;
+ unsigned num_handles = 0;
+ unsigned number_of_ibs;
+ uint32_t *ptr;
+ unsigned cnt = 0;
+ unsigned pad_words = 0;
+
+ /* Compute the number of IBs for this submit. */
+ number_of_ibs = cs->num_old_cs_buffers + 1;
+
+ ibs = malloc(number_of_ibs * sizeof(*ibs));
+ if (!ibs)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ bos = malloc(number_of_ibs * sizeof(*bos));
+ if (!bos) {
+ free(ibs);
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+
+ if (number_of_ibs > 1) {
+ /* Special path when the maximum size in dwords has
+ * been reached because we need to handle more than one
+ * IB per submit.
+ */
+ struct radeon_cmdbuf **new_cs_array;
+ unsigned idx = 0;
+
+ new_cs_array = malloc(cs->num_old_cs_buffers * sizeof(*new_cs_array));
+ assert(new_cs_array);
+
+ for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
+ new_cs_array[idx++] = &cs->old_cs_buffers[j];
+ new_cs_array[idx++] = cs_array[i];
+
+ for (unsigned j = 0; j < number_of_ibs; j++) {
+ struct radeon_cmdbuf *rcs = new_cs_array[j];
+ bool needs_preamble = preamble_cs && j == 0;
+ unsigned size = 0;
+
+ if (needs_preamble)
+ size += preamble_cs->cdw;
+ size += rcs->cdw;
+
+ assert(size < 0xffff8);
+
+ while (!size || (size & 7)) {
+ size++;
+ pad_words++;
+ }
+
+ bos[j] = ws->buffer_create(
+ ws, 4 * size, 4096, aws->cs_bo_domain,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
+ RADV_BO_PRIORITY_CS);
+ ptr = ws->buffer_map(bos[j]);
+
+ if (needs_preamble) {
+ memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
+ ptr += preamble_cs->cdw;
+ }
+
+ memcpy(ptr, rcs->buf, 4 * rcs->cdw);
+ ptr += rcs->cdw;
+
+ for (unsigned k = 0; k < pad_words; ++k)
+ *ptr++ = pad_word;
+
+ ibs[j].size = size;
+ ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
+ ibs[j].flags = 0;
+ }
+
+ cnt++;
+ free(new_cs_array);
+ } else {
+ unsigned size = 0;
+
+ if (preamble_cs)
+ size += preamble_cs->cdw;
+
+ while (i + cnt < cs_count &&
+ 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
+ size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
+ ++cnt;
+ }
+
+ while (!size || (size & 7)) {
+ size++;
+ pad_words++;
+ }
+ assert(cnt);
+
+ bos[0] = ws->buffer_create(
+ ws, 4 * size, 4096, aws->cs_bo_domain,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
+ RADV_BO_PRIORITY_CS);
+ ptr = ws->buffer_map(bos[0]);
+
+ if (preamble_cs) {
+ memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
+ ptr += preamble_cs->cdw;
+ }
+
+ for (unsigned j = 0; j < cnt; ++j) {
+ struct radv_amdgpu_cs *cs2 = radv_amdgpu_cs(cs_array[i + j]);
+ memcpy(ptr, cs2->base.buf, 4 * cs2->base.cdw);
+ ptr += cs2->base.cdw;
+ }
+
+ for (unsigned j = 0; j < pad_words; ++j)
+ *ptr++ = pad_word;
+
+ ibs[0].size = size;
+ ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]);
+ ibs[0].flags = 0;
+ }
+
+ u_rwlock_rdlock(&aws->global_bo_list.lock);
+
+ result =
+ radv_amdgpu_get_bo_list(cs0->ws, &cs_array[i], cnt, (struct radv_amdgpu_winsys_bo **)bos,
+ number_of_ibs, preamble_cs, &num_handles, &handles);
+ if (result != VK_SUCCESS) {
+ free(ibs);
+ free(bos);
+ u_rwlock_rdunlock(&aws->global_bo_list.lock);
+ return result;
+ }
+
+ request.ip_type = cs0->hw_ip;
+ request.ip_instance = 0;
+ request.ring = queue_idx;
+ request.handles = handles;
+ request.num_handles = num_handles;
+ request.number_of_ibs = number_of_ibs;
+ request.ibs = ibs;
+
+ sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
+ result = radv_amdgpu_cs_submit(ctx, &request, sem_info);
+
+ free(request.handles);
+ u_rwlock_rdunlock(&aws->global_bo_list.lock);
+
+ for (unsigned j = 0; j < number_of_ibs; j++) {
+ ws->buffer_destroy(ws, bos[j]);
+ }
+
+ free(ibs);
+ free(bos);
+
+ if (result != VK_SUCCESS)
+ return result;
+
+ i += cnt;
+ }
+
+ radv_assign_last_submit(ctx, &request);
+
+ return VK_SUCCESS;
}
-static VkResult radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
- int queue_idx,
- struct radeon_cmdbuf **cs_array,
- unsigned cs_count,
- struct radeon_cmdbuf *initial_preamble_cs,
- struct radeon_cmdbuf *continue_preamble_cs,
- struct radv_winsys_sem_info *sem_info,
- bool can_patch)
+static VkResult
+radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx, int queue_idx,
+ struct radeon_cmdbuf **cs_array, unsigned cs_count,
+ struct radeon_cmdbuf *initial_preamble_cs,
+ struct radeon_cmdbuf *continue_preamble_cs,
+ struct radv_winsys_sem_info *sem_info, bool can_patch)
{
- struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
- VkResult result;
-
- assert(sem_info);
- if (!cs->ws->use_ib_bos) {
- result = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array,
- cs_count, initial_preamble_cs, continue_preamble_cs);
- } else if (can_patch) {
- result = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array,
- cs_count, initial_preamble_cs);
- } else {
- result = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array,
- cs_count, initial_preamble_cs);
- }
-
- return result;
+ struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
+ VkResult result;
+
+ assert(sem_info);
+ if (!cs->ws->use_ib_bos) {
+ result = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array, cs_count,
+ initial_preamble_cs, continue_preamble_cs);
+ } else if (can_patch) {
+ result = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array, cs_count,
+ initial_preamble_cs);
+ } else {
+ result = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array, cs_count,
+ initial_preamble_cs);
+ }
+
+ return result;
}
-static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
+static void *
+radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
{
- struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
- void *ret = NULL;
-
- if (!cs->ib_buffer)
- return NULL;
- for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
- struct radv_amdgpu_winsys_bo *bo;
-
- bo = (struct radv_amdgpu_winsys_bo*)
- (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
- if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
- if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
- return (char *)ret + (addr - bo->base.va);
- }
- }
- u_rwlock_rdlock(&cs->ws->global_bo_list.lock);
- for (uint32_t i = 0; i < cs->ws->global_bo_list.count; i++) {
- struct radv_amdgpu_winsys_bo *bo = cs->ws->global_bo_list.bos[i];
- if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
- if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) {
- u_rwlock_rdunlock(&cs->ws->global_bo_list.lock);
- return (char *)ret + (addr - bo->base.va);
- }
- }
- }
- u_rwlock_rdunlock(&cs->ws->global_bo_list.lock);
-
- return ret;
+ struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+ void *ret = NULL;
+
+ if (!cs->ib_buffer)
+ return NULL;
+ for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
+ struct radv_amdgpu_winsys_bo *bo;
+
+ bo = (struct radv_amdgpu_winsys_bo *)(i == cs->num_old_ib_buffers ? cs->ib_buffer
+ : cs->old_ib_buffers[i]);
+ if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
+ if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
+ return (char *)ret + (addr - bo->base.va);
+ }
+ }
+ u_rwlock_rdlock(&cs->ws->global_bo_list.lock);
+ for (uint32_t i = 0; i < cs->ws->global_bo_list.count; i++) {
+ struct radv_amdgpu_winsys_bo *bo = cs->ws->global_bo_list.bos[i];
+ if (addr >= bo->base.va && addr - bo->base.va < bo->size) {
+ if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0) {
+ u_rwlock_rdunlock(&cs->ws->global_bo_list.lock);
+ return (char *)ret + (addr - bo->base.va);
+ }
+ }
+ }
+ u_rwlock_rdunlock(&cs->ws->global_bo_list.lock);
+
+ return ret;
}
-static void radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs,
- FILE* file,
- const int *trace_ids, int trace_id_count)
+static void
+radv_amdgpu_winsys_cs_dump(struct radeon_cmdbuf *_cs, FILE *file, const int *trace_ids,
+ int trace_id_count)
{
- struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
- void *ib = cs->base.buf;
- int num_dw = cs->base.cdw;
-
- if (cs->ws->use_ib_bos) {
- ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
- num_dw = cs->ib.size;
- }
- assert(ib);
- ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB",
- cs->ws->info.chip_class, radv_amdgpu_winsys_get_cpu_addr, cs);
+ struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+ void *ib = cs->base.buf;
+ int num_dw = cs->base.cdw;
+
+ if (cs->ws->use_ib_bos) {
+ ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
+ num_dw = cs->ib.size;
+ }
+ assert(ib);
+ ac_parse_ib(file, ib, num_dw, trace_ids, trace_id_count, "main IB", cs->ws->info.chip_class,
+ radv_amdgpu_winsys_get_cpu_addr, cs);
}
-static uint32_t radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority)
+static uint32_t
+radv_to_amdgpu_priority(enum radeon_ctx_priority radv_priority)
{
- switch (radv_priority) {
- case RADEON_CTX_PRIORITY_REALTIME:
- return AMDGPU_CTX_PRIORITY_VERY_HIGH;
- case RADEON_CTX_PRIORITY_HIGH:
- return AMDGPU_CTX_PRIORITY_HIGH;
- case RADEON_CTX_PRIORITY_MEDIUM:
- return AMDGPU_CTX_PRIORITY_NORMAL;
- case RADEON_CTX_PRIORITY_LOW:
- return AMDGPU_CTX_PRIORITY_LOW;
- default:
- unreachable("Invalid context priority");
- }
+ switch (radv_priority) {
+ case RADEON_CTX_PRIORITY_REALTIME:
+ return AMDGPU_CTX_PRIORITY_VERY_HIGH;
+ case RADEON_CTX_PRIORITY_HIGH:
+ return AMDGPU_CTX_PRIORITY_HIGH;
+ case RADEON_CTX_PRIORITY_MEDIUM:
+ return AMDGPU_CTX_PRIORITY_NORMAL;
+ case RADEON_CTX_PRIORITY_LOW:
+ return AMDGPU_CTX_PRIORITY_LOW;
+ default:
+ unreachable("Invalid context priority");
+ }
}
-static VkResult radv_amdgpu_ctx_create(struct radeon_winsys *_ws,
- enum radeon_ctx_priority priority,
- struct radeon_winsys_ctx **rctx)
+static VkResult
+radv_amdgpu_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority,
+ struct radeon_winsys_ctx **rctx)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
- uint32_t amdgpu_priority = radv_to_amdgpu_priority(priority);
- VkResult result;
- int r;
-
- if (!ctx)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- r = amdgpu_cs_ctx_create2(ws->dev, amdgpu_priority, &ctx->ctx);
- if (r && r == -EACCES) {
- result = VK_ERROR_NOT_PERMITTED_EXT;
- goto fail_create;
- } else if (r) {
- fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create2 failed. (%i)\n", r);
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto fail_create;
- }
- ctx->ws = ws;
-
- assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
- ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
- RADEON_DOMAIN_GTT,
- RADEON_FLAG_CPU_ACCESS |
- RADEON_FLAG_NO_INTERPROCESS_SHARING,
- RADV_BO_PRIORITY_CS);
- if (!ctx->fence_bo) {
- result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- goto fail_alloc;
- }
-
- ctx->fence_map = (uint64_t *)ws->base.buffer_map(ctx->fence_bo);
- if (!ctx->fence_map) {
- result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
- goto fail_map;
- }
-
- memset(ctx->fence_map, 0, 4096);
-
- *rctx = (struct radeon_winsys_ctx *)ctx;
- return VK_SUCCESS;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_ctx *ctx = CALLOC_STRUCT(radv_amdgpu_ctx);
+ uint32_t amdgpu_priority = radv_to_amdgpu_priority(priority);
+ VkResult result;
+ int r;
+
+ if (!ctx)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ r = amdgpu_cs_ctx_create2(ws->dev, amdgpu_priority, &ctx->ctx);
+ if (r && r == -EACCES) {
+ result = VK_ERROR_NOT_PERMITTED_EXT;
+ goto fail_create;
+ } else if (r) {
+ fprintf(stderr, "amdgpu: radv_amdgpu_cs_ctx_create2 failed. (%i)\n", r);
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto fail_create;
+ }
+ ctx->ws = ws;
+
+ assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
+ ctx->fence_bo = ws->base.buffer_create(
+ &ws->base, 4096, 8, RADEON_DOMAIN_GTT,
+ RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_CS);
+ if (!ctx->fence_bo) {
+ result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ goto fail_alloc;
+ }
+
+ ctx->fence_map = (uint64_t *)ws->base.buffer_map(ctx->fence_bo);
+ if (!ctx->fence_map) {
+ result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ goto fail_map;
+ }
+
+ memset(ctx->fence_map, 0, 4096);
+
+ *rctx = (struct radeon_winsys_ctx *)ctx;
+ return VK_SUCCESS;
fail_map:
- ws->base.buffer_destroy(&ws->base, ctx->fence_bo);
+ ws->base.buffer_destroy(&ws->base, ctx->fence_bo);
fail_alloc:
- amdgpu_cs_ctx_free(ctx->ctx);
+ amdgpu_cs_ctx_free(ctx->ctx);
fail_create:
- FREE(ctx);
- return result;
+ FREE(ctx);
+ return result;
}
-static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
+static void
+radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
{
- struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
- ctx->ws->base.buffer_destroy(&ctx->ws->base, ctx->fence_bo);
- amdgpu_cs_ctx_free(ctx->ctx);
- FREE(ctx);
+ struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+ ctx->ws->base.buffer_destroy(&ctx->ws->base, ctx->fence_bo);
+ amdgpu_cs_ctx_free(ctx->ctx);
+ FREE(ctx);
}
-static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
- enum ring_type ring_type, int ring_index)
+static bool
+radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx, enum ring_type ring_type, int ring_index)
{
- struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
- int ip_type = ring_to_hw_ip(ring_type);
+ struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+ int ip_type = ring_to_hw_ip(ring_type);
- if (ctx->last_submission[ip_type][ring_index].fence.fence) {
- uint32_t expired;
- int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence,
- 1000000000ull, 0, &expired);
+ if (ctx->last_submission[ip_type][ring_index].fence.fence) {
+ uint32_t expired;
+ int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence,
+ 1000000000ull, 0, &expired);
- if (ret || !expired)
- return false;
- }
+ if (ret || !expired)
+ return false;
+ }
- return true;
+ return true;
}
-static void *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
- const uint32_t *syncobj_override,
- struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
+static void *
+radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
+ const uint32_t *syncobj_override,
+ struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
{
- const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
- struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count);
- if (!syncobj)
- return NULL;
-
- for (unsigned i = 0; i < counts->syncobj_count; i++) {
- struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i];
- sem->handle = src[i];
- }
-
- chunk->chunk_id = chunk_id;
- chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * counts->syncobj_count;
- chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
- return syncobj;
+ const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
+ struct drm_amdgpu_cs_chunk_sem *syncobj =
+ malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count);
+ if (!syncobj)
+ return NULL;
+
+ for (unsigned i = 0; i < counts->syncobj_count; i++) {
+ struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i];
+ sem->handle = src[i];
+ }
+
+ chunk->chunk_id = chunk_id;
+ chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * counts->syncobj_count;
+ chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
+ return syncobj;
}
static void *
@@ -1278,442 +1243,431 @@ radv_amdgpu_cs_alloc_timeline_syncobj_chunk(struct radv_winsys_sem_counts *count
const uint32_t *syncobj_override,
struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
{
- const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
- struct drm_amdgpu_cs_chunk_syncobj *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) *
- (counts->syncobj_count + counts->timeline_syncobj_count));
- if (!syncobj)
- return NULL;
-
- for (unsigned i = 0; i < counts->syncobj_count; i++) {
- struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i];
- sem->handle = src[i];
- sem->flags = 0;
- sem->point = 0;
- }
-
- for (unsigned i = 0; i < counts->timeline_syncobj_count; i++) {
- struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i + counts->syncobj_count];
- sem->handle = counts->syncobj[i + counts->syncobj_count];
- sem->flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
- sem->point = counts->points[i];
- }
-
- chunk->chunk_id = chunk_id;
- chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4 *
- (counts->syncobj_count + counts->timeline_syncobj_count);
- chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
- return syncobj;
+ const uint32_t *src = syncobj_override ? syncobj_override : counts->syncobj;
+ struct drm_amdgpu_cs_chunk_syncobj *syncobj =
+ malloc(sizeof(struct drm_amdgpu_cs_chunk_syncobj) *
+ (counts->syncobj_count + counts->timeline_syncobj_count));
+ if (!syncobj)
+ return NULL;
+
+ for (unsigned i = 0; i < counts->syncobj_count; i++) {
+ struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i];
+ sem->handle = src[i];
+ sem->flags = 0;
+ sem->point = 0;
+ }
+
+ for (unsigned i = 0; i < counts->timeline_syncobj_count; i++) {
+ struct drm_amdgpu_cs_chunk_syncobj *sem = &syncobj[i + counts->syncobj_count];
+ sem->handle = counts->syncobj[i + counts->syncobj_count];
+ sem->flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
+ sem->point = counts->points[i];
+ }
+
+ chunk->chunk_id = chunk_id;
+ chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_syncobj) / 4 *
+ (counts->syncobj_count + counts->timeline_syncobj_count);
+ chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
+ return syncobj;
}
-static int radv_amdgpu_cache_alloc_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *dst)
+static int
+radv_amdgpu_cache_alloc_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *dst)
{
- pthread_mutex_lock(&ws->syncobj_lock);
- if (count > ws->syncobj_capacity) {
- if (ws->syncobj_capacity > UINT32_MAX / 2)
- goto fail;
-
- unsigned new_capacity = MAX2(count, ws->syncobj_capacity * 2);
- uint32_t *n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj));
- if (!n)
- goto fail;
- ws->syncobj_capacity = new_capacity;
- ws->syncobj = n;
- }
-
- while(ws->syncobj_count < count) {
- int r = amdgpu_cs_create_syncobj(ws->dev, ws->syncobj + ws->syncobj_count);
- if (r)
- goto fail;
- ++ws->syncobj_count;
- }
-
- for (unsigned i = 0; i < count; ++i)
- dst[i] = ws->syncobj[--ws->syncobj_count];
-
- pthread_mutex_unlock(&ws->syncobj_lock);
- return 0;
+ pthread_mutex_lock(&ws->syncobj_lock);
+ if (count > ws->syncobj_capacity) {
+ if (ws->syncobj_capacity > UINT32_MAX / 2)
+ goto fail;
+
+ unsigned new_capacity = MAX2(count, ws->syncobj_capacity * 2);
+ uint32_t *n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj));
+ if (!n)
+ goto fail;
+ ws->syncobj_capacity = new_capacity;
+ ws->syncobj = n;
+ }
+
+ while (ws->syncobj_count < count) {
+ int r = amdgpu_cs_create_syncobj(ws->dev, ws->syncobj + ws->syncobj_count);
+ if (r)
+ goto fail;
+ ++ws->syncobj_count;
+ }
+
+ for (unsigned i = 0; i < count; ++i)
+ dst[i] = ws->syncobj[--ws->syncobj_count];
+
+ pthread_mutex_unlock(&ws->syncobj_lock);
+ return 0;
fail:
- pthread_mutex_unlock(&ws->syncobj_lock);
- return -ENOMEM;
+ pthread_mutex_unlock(&ws->syncobj_lock);
+ return -ENOMEM;
}
-static void radv_amdgpu_cache_free_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *src)
+static void
+radv_amdgpu_cache_free_syncobjs(struct radv_amdgpu_winsys *ws, unsigned count, uint32_t *src)
{
- pthread_mutex_lock(&ws->syncobj_lock);
-
- uint32_t cache_count = MIN2(count, UINT32_MAX - ws->syncobj_count);
- if (cache_count + ws->syncobj_count > ws->syncobj_capacity) {
- unsigned new_capacity = MAX2(ws->syncobj_count + cache_count, ws->syncobj_capacity * 2);
- uint32_t* n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj));
- if (n) {
- ws->syncobj_capacity = new_capacity;
- ws->syncobj = n;
- }
- }
-
- for (unsigned i = 0; i < count; ++i) {
- if (ws->syncobj_count < ws->syncobj_capacity)
- ws->syncobj[ws->syncobj_count++] = src[i];
- else
- amdgpu_cs_destroy_syncobj(ws->dev, src[i]);
- }
-
- pthread_mutex_unlock(&ws->syncobj_lock);
-
+ pthread_mutex_lock(&ws->syncobj_lock);
+
+ uint32_t cache_count = MIN2(count, UINT32_MAX - ws->syncobj_count);
+ if (cache_count + ws->syncobj_count > ws->syncobj_capacity) {
+ unsigned new_capacity = MAX2(ws->syncobj_count + cache_count, ws->syncobj_capacity * 2);
+ uint32_t *n = realloc(ws->syncobj, new_capacity * sizeof(*ws->syncobj));
+ if (n) {
+ ws->syncobj_capacity = new_capacity;
+ ws->syncobj = n;
+ }
+ }
+
+ for (unsigned i = 0; i < count; ++i) {
+ if (ws->syncobj_count < ws->syncobj_capacity)
+ ws->syncobj[ws->syncobj_count++] = src[i];
+ else
+ amdgpu_cs_destroy_syncobj(ws->dev, src[i]);
+ }
+
+ pthread_mutex_unlock(&ws->syncobj_lock);
}
-static int radv_amdgpu_cs_prepare_syncobjs(struct radv_amdgpu_winsys *ws,
- struct radv_winsys_sem_counts *counts,
- uint32_t **out_syncobjs)
+static int
+radv_amdgpu_cs_prepare_syncobjs(struct radv_amdgpu_winsys *ws,
+ struct radv_winsys_sem_counts *counts, uint32_t **out_syncobjs)
{
- int r = 0;
-
- if (!ws->info.has_timeline_syncobj || !counts->syncobj_count) {
- *out_syncobjs = NULL;
- return 0;
- }
-
- *out_syncobjs = malloc(counts->syncobj_count * sizeof(**out_syncobjs));
- if (!*out_syncobjs)
- return -ENOMEM;
-
- r = radv_amdgpu_cache_alloc_syncobjs(ws, counts->syncobj_count, *out_syncobjs);
- if (r)
- return r;
-
- for (unsigned i = 0; i < counts->syncobj_count; ++i) {
- r = amdgpu_cs_syncobj_transfer(ws->dev, (*out_syncobjs)[i], 0, counts->syncobj[i], 0, DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
- if (r)
- goto fail;
- }
-
- r = amdgpu_cs_syncobj_reset(ws->dev, counts->syncobj, counts->syncobj_reset_count);
- if (r)
- goto fail;
-
- return 0;
+ int r = 0;
+
+ if (!ws->info.has_timeline_syncobj || !counts->syncobj_count) {
+ *out_syncobjs = NULL;
+ return 0;
+ }
+
+ *out_syncobjs = malloc(counts->syncobj_count * sizeof(**out_syncobjs));
+ if (!*out_syncobjs)
+ return -ENOMEM;
+
+ r = radv_amdgpu_cache_alloc_syncobjs(ws, counts->syncobj_count, *out_syncobjs);
+ if (r)
+ return r;
+
+ for (unsigned i = 0; i < counts->syncobj_count; ++i) {
+ r = amdgpu_cs_syncobj_transfer(ws->dev, (*out_syncobjs)[i], 0, counts->syncobj[i], 0,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT);
+ if (r)
+ goto fail;
+ }
+
+ r = amdgpu_cs_syncobj_reset(ws->dev, counts->syncobj, counts->syncobj_reset_count);
+ if (r)
+ goto fail;
+
+ return 0;
fail:
- radv_amdgpu_cache_free_syncobjs(ws, counts->syncobj_count, *out_syncobjs);
- free(*out_syncobjs);
- *out_syncobjs = NULL;
- return r;
+ radv_amdgpu_cache_free_syncobjs(ws, counts->syncobj_count, *out_syncobjs);
+ free(*out_syncobjs);
+ *out_syncobjs = NULL;
+ return r;
}
static VkResult
-radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
- struct radv_amdgpu_cs_request *request,
- struct radv_winsys_sem_info *sem_info)
+radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx, struct radv_amdgpu_cs_request *request,
+ struct radv_winsys_sem_info *sem_info)
{
- int r;
- int num_chunks;
- int size;
- struct drm_amdgpu_cs_chunk *chunks;
- struct drm_amdgpu_cs_chunk_data *chunk_data;
- struct drm_amdgpu_bo_list_in bo_list_in;
- void *wait_syncobj = NULL, *signal_syncobj = NULL;
- uint32_t *in_syncobjs = NULL;
- int i;
- uint32_t bo_list = 0;
- VkResult result = VK_SUCCESS;
-
- size = request->number_of_ibs + 2 /* user fence */ + 4;
-
- chunks = malloc(sizeof(chunks[0]) * size);
- if (!chunks)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
-
- size = request->number_of_ibs + 1 /* user fence */;
-
- chunk_data = malloc(sizeof(chunk_data[0]) * size);
- if (!chunk_data) {
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto error_out;
- }
-
- num_chunks = request->number_of_ibs;
- for (i = 0; i < request->number_of_ibs; i++) {
- struct amdgpu_cs_ib_info *ib;
- chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
- chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
- chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
-
- ib = &request->ibs[i];
-
- chunk_data[i].ib_data._pad = 0;
- chunk_data[i].ib_data.va_start = ib->ib_mc_address;
- chunk_data[i].ib_data.ib_bytes = ib->size * 4;
- chunk_data[i].ib_data.ip_type = request->ip_type;
- chunk_data[i].ib_data.ip_instance = request->ip_instance;
- chunk_data[i].ib_data.ring = request->ring;
- chunk_data[i].ib_data.flags = ib->flags;
- }
-
- i = num_chunks++;
- chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
- chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
- chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
-
- struct amdgpu_cs_fence_info fence_info;
- fence_info.handle = radv_amdgpu_winsys_bo(ctx->fence_bo)->bo;
- fence_info.offset = (request->ip_type * MAX_RINGS_PER_TYPE + request->ring) * sizeof(uint64_t);
- amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]);
-
- if ((sem_info->wait.syncobj_count || sem_info->wait.timeline_syncobj_count) && sem_info->cs_emit_wait) {
- r = radv_amdgpu_cs_prepare_syncobjs(ctx->ws, &sem_info->wait, &in_syncobjs);
- if (r)
- goto error_out;
-
- if (ctx->ws->info.has_timeline_syncobj) {
- wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(&sem_info->wait,
- in_syncobjs,
- &chunks[num_chunks],
- AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
- } else {
- wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait,
- in_syncobjs,
- &chunks[num_chunks],
- AMDGPU_CHUNK_ID_SYNCOBJ_IN);
- }
- if (!wait_syncobj) {
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto error_out;
- }
- num_chunks++;
-
- sem_info->cs_emit_wait = false;
- }
-
- if ((sem_info->signal.syncobj_count || sem_info->signal.timeline_syncobj_count) && sem_info->cs_emit_signal) {
- if (ctx->ws->info.has_timeline_syncobj) {
- signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(&sem_info->signal,
- NULL,
- &chunks[num_chunks],
- AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
- } else {
- signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal,
- NULL,
- &chunks[num_chunks],
- AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
- }
- if (!signal_syncobj) {
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- goto error_out;
- }
- num_chunks++;
- }
-
- /* Standard path passing the buffer list via the CS ioctl. */
- bo_list_in.operation = ~0;
- bo_list_in.list_handle = ~0;
- bo_list_in.bo_number = request->num_handles;
- bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
- bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)request->handles;
-
- chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
- chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4;
- chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in;
- num_chunks++;
-
- r = amdgpu_cs_submit_raw2(ctx->ws->dev,
- ctx->ctx,
- bo_list,
- num_chunks,
- chunks,
- &request->seq_no);
-
- if (r) {
- if (r == -ENOMEM) {
- fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
- result = VK_ERROR_OUT_OF_HOST_MEMORY;
- } else if (r == -ECANCELED) {
- fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n");
- result = VK_ERROR_DEVICE_LOST;
- } else {
- fprintf(stderr, "amdgpu: The CS has been rejected, "
- "see dmesg for more information (%i).\n", r);
- result = VK_ERROR_UNKNOWN;
- }
- }
-
- if (bo_list)
- amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list);
+ int r;
+ int num_chunks;
+ int size;
+ struct drm_amdgpu_cs_chunk *chunks;
+ struct drm_amdgpu_cs_chunk_data *chunk_data;
+ struct drm_amdgpu_bo_list_in bo_list_in;
+ void *wait_syncobj = NULL, *signal_syncobj = NULL;
+ uint32_t *in_syncobjs = NULL;
+ int i;
+ uint32_t bo_list = 0;
+ VkResult result = VK_SUCCESS;
+
+ size = request->number_of_ibs + 2 /* user fence */ + 4;
+
+ chunks = malloc(sizeof(chunks[0]) * size);
+ if (!chunks)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+ size = request->number_of_ibs + 1 /* user fence */;
+
+ chunk_data = malloc(sizeof(chunk_data[0]) * size);
+ if (!chunk_data) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto error_out;
+ }
+
+ num_chunks = request->number_of_ibs;
+ for (i = 0; i < request->number_of_ibs; i++) {
+ struct amdgpu_cs_ib_info *ib;
+ chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
+ chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
+ chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
+
+ ib = &request->ibs[i];
+
+ chunk_data[i].ib_data._pad = 0;
+ chunk_data[i].ib_data.va_start = ib->ib_mc_address;
+ chunk_data[i].ib_data.ib_bytes = ib->size * 4;
+ chunk_data[i].ib_data.ip_type = request->ip_type;
+ chunk_data[i].ib_data.ip_instance = request->ip_instance;
+ chunk_data[i].ib_data.ring = request->ring;
+ chunk_data[i].ib_data.flags = ib->flags;
+ }
+
+ i = num_chunks++;
+ chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
+ chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
+ chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
+
+ struct amdgpu_cs_fence_info fence_info;
+ fence_info.handle = radv_amdgpu_winsys_bo(ctx->fence_bo)->bo;
+ fence_info.offset = (request->ip_type * MAX_RINGS_PER_TYPE + request->ring) * sizeof(uint64_t);
+ amdgpu_cs_chunk_fence_info_to_data(&fence_info, &chunk_data[i]);
+
+ if ((sem_info->wait.syncobj_count || sem_info->wait.timeline_syncobj_count) &&
+ sem_info->cs_emit_wait) {
+ r = radv_amdgpu_cs_prepare_syncobjs(ctx->ws, &sem_info->wait, &in_syncobjs);
+ if (r)
+ goto error_out;
+
+ if (ctx->ws->info.has_timeline_syncobj) {
+ wait_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
+ &sem_info->wait, in_syncobjs, &chunks[num_chunks],
+ AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT);
+ } else {
+ wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(
+ &sem_info->wait, in_syncobjs, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_IN);
+ }
+ if (!wait_syncobj) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto error_out;
+ }
+ num_chunks++;
+
+ sem_info->cs_emit_wait = false;
+ }
+
+ if ((sem_info->signal.syncobj_count || sem_info->signal.timeline_syncobj_count) &&
+ sem_info->cs_emit_signal) {
+ if (ctx->ws->info.has_timeline_syncobj) {
+ signal_syncobj = radv_amdgpu_cs_alloc_timeline_syncobj_chunk(
+ &sem_info->signal, NULL, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL);
+ } else {
+ signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(
+ &sem_info->signal, NULL, &chunks[num_chunks], AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
+ }
+ if (!signal_syncobj) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto error_out;
+ }
+ num_chunks++;
+ }
+
+ /* Standard path passing the buffer list via the CS ioctl. */
+ bo_list_in.operation = ~0;
+ bo_list_in.list_handle = ~0;
+ bo_list_in.bo_number = request->num_handles;
+ bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
+ bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)request->handles;
+
+ chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
+ chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4;
+ chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in;
+ num_chunks++;
+
+ r = amdgpu_cs_submit_raw2(ctx->ws->dev, ctx->ctx, bo_list, num_chunks, chunks, &request->seq_no);
+
+ if (r) {
+ if (r == -ENOMEM) {
+ fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ } else if (r == -ECANCELED) {
+ fprintf(stderr, "amdgpu: The CS has been cancelled because the context is lost.\n");
+ result = VK_ERROR_DEVICE_LOST;
+ } else {
+ fprintf(stderr,
+ "amdgpu: The CS has been rejected, "
+ "see dmesg for more information (%i).\n",
+ r);
+ result = VK_ERROR_UNKNOWN;
+ }
+ }
+
+ if (bo_list)
+ amdgpu_bo_list_destroy_raw(ctx->ws->dev, bo_list);
error_out:
- if (in_syncobjs) {
- radv_amdgpu_cache_free_syncobjs(ctx->ws, sem_info->wait.syncobj_count, in_syncobjs);
- free(in_syncobjs);
- }
- free(chunks);
- free(chunk_data);
- free(wait_syncobj);
- free(signal_syncobj);
- return result;
+ if (in_syncobjs) {
+ radv_amdgpu_cache_free_syncobjs(ctx->ws, sem_info->wait.syncobj_count, in_syncobjs);
+ free(in_syncobjs);
+ }
+ free(chunks);
+ free(chunk_data);
+ free(wait_syncobj);
+ free(signal_syncobj);
+ return result;
}
-static int radv_amdgpu_create_syncobj(struct radeon_winsys *_ws,
- bool create_signaled,
- uint32_t *handle)
+static int
+radv_amdgpu_create_syncobj(struct radeon_winsys *_ws, bool create_signaled, uint32_t *handle)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- uint32_t flags = 0;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ uint32_t flags = 0;
- if (create_signaled)
- flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
+ if (create_signaled)
+ flags |= DRM_SYNCOBJ_CREATE_SIGNALED;
- return amdgpu_cs_create_syncobj2(ws->dev, flags, handle);
+ return amdgpu_cs_create_syncobj2(ws->dev, flags, handle);
}
-static void radv_amdgpu_destroy_syncobj(struct radeon_winsys *_ws,
- uint32_t handle)
+static void
+radv_amdgpu_destroy_syncobj(struct radeon_winsys *_ws, uint32_t handle)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- amdgpu_cs_destroy_syncobj(ws->dev, handle);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ amdgpu_cs_destroy_syncobj(ws->dev, handle);
}
-static void radv_amdgpu_reset_syncobj(struct radeon_winsys *_ws,
- uint32_t handle)
+static void
+radv_amdgpu_reset_syncobj(struct radeon_winsys *_ws, uint32_t handle)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- amdgpu_cs_syncobj_reset(ws->dev, &handle, 1);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ amdgpu_cs_syncobj_reset(ws->dev, &handle, 1);
}
-static void radv_amdgpu_signal_syncobj(struct radeon_winsys *_ws,
- uint32_t handle, uint64_t point)
+static void
+radv_amdgpu_signal_syncobj(struct radeon_winsys *_ws, uint32_t handle, uint64_t point)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- if (point)
- amdgpu_cs_syncobj_timeline_signal(ws->dev, &handle, &point, 1);
- else
- amdgpu_cs_syncobj_signal(ws->dev, &handle, 1);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ if (point)
+ amdgpu_cs_syncobj_timeline_signal(ws->dev, &handle, &point, 1);
+ else
+ amdgpu_cs_syncobj_signal(ws->dev, &handle, 1);
}
-static VkResult radv_amdgpu_query_syncobj(struct radeon_winsys *_ws,
- uint32_t handle, uint64_t *point)
+static VkResult
+radv_amdgpu_query_syncobj(struct radeon_winsys *_ws, uint32_t handle, uint64_t *point)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- int ret = amdgpu_cs_syncobj_query(ws->dev, &handle, point, 1);
- if (ret == 0)
- return VK_SUCCESS;
- else if (ret == -ENOMEM)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
- else {
- /* Remaining error are driver internal issues: EFAULT for
- * dangling pointers and ENOENT for non-existing syncobj. */
- fprintf(stderr, "amdgpu: internal error in radv_amdgpu_query_syncobj. (%d)\n", ret);
- return VK_ERROR_UNKNOWN;
- }
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ int ret = amdgpu_cs_syncobj_query(ws->dev, &handle, point, 1);
+ if (ret == 0)
+ return VK_SUCCESS;
+ else if (ret == -ENOMEM)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ else {
+ /* Remaining error are driver internal issues: EFAULT for
+ * dangling pointers and ENOENT for non-existing syncobj. */
+ fprintf(stderr, "amdgpu: internal error in radv_amdgpu_query_syncobj. (%d)\n", ret);
+ return VK_ERROR_UNKNOWN;
+ }
}
-static bool radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws, const uint32_t *handles,
- uint32_t handle_count, bool wait_all, uint64_t timeout)
+static bool
+radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws, const uint32_t *handles, uint32_t handle_count,
+ bool wait_all, uint64_t timeout)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- uint32_t tmp;
-
- /* The timeouts are signed, while vulkan timeouts are unsigned. */
- timeout = MIN2(timeout, INT64_MAX);
-
- int ret = amdgpu_cs_syncobj_wait(ws->dev, (uint32_t*)handles, handle_count, timeout,
- DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
- (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0),
- &tmp);
- if (ret == 0) {
- return true;
- } else if (ret == -ETIME) {
- return false;
- } else {
- fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
- return false;
- }
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ uint32_t tmp;
+
+ /* The timeouts are signed, while vulkan timeouts are unsigned. */
+ timeout = MIN2(timeout, INT64_MAX);
+
+ int ret = amdgpu_cs_syncobj_wait(
+ ws->dev, (uint32_t *)handles, handle_count, timeout,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0),
+ &tmp);
+ if (ret == 0) {
+ return true;
+ } else if (ret == -ETIME) {
+ return false;
+ } else {
+ fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
+ return false;
+ }
}
-static bool radv_amdgpu_wait_timeline_syncobj(struct radeon_winsys *_ws, const uint32_t *handles,
- const uint64_t *points, uint32_t handle_count,
- bool wait_all, bool available, uint64_t timeout)
+static bool
+radv_amdgpu_wait_timeline_syncobj(struct radeon_winsys *_ws, const uint32_t *handles,
+ const uint64_t *points, uint32_t handle_count, bool wait_all,
+ bool available, uint64_t timeout)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-
- /* The timeouts are signed, while vulkan timeouts are unsigned. */
- timeout = MIN2(timeout, INT64_MAX);
-
- int ret = amdgpu_cs_syncobj_timeline_wait(ws->dev, (uint32_t*)handles, (uint64_t*)points,
- handle_count, timeout,
- DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
- (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0) |
- (available ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE : 0),
- NULL);
- if (ret == 0) {
- return true;
- } else if (ret == -ETIME) {
- return false;
- } else {
- fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed! (%d)\n", errno);
- return false;
- }
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+
+ /* The timeouts are signed, while vulkan timeouts are unsigned. */
+ timeout = MIN2(timeout, INT64_MAX);
+
+ int ret = amdgpu_cs_syncobj_timeline_wait(
+ ws->dev, (uint32_t *)handles, (uint64_t *)points, handle_count, timeout,
+ DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT | (wait_all ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL : 0) |
+ (available ? DRM_SYNCOBJ_WAIT_FLAGS_WAIT_AVAILABLE : 0),
+ NULL);
+ if (ret == 0) {
+ return true;
+ } else if (ret == -ETIME) {
+ return false;
+ } else {
+ fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed! (%d)\n", errno);
+ return false;
+ }
}
-
-static int radv_amdgpu_export_syncobj(struct radeon_winsys *_ws,
- uint32_t syncobj,
- int *fd)
+static int
+radv_amdgpu_export_syncobj(struct radeon_winsys *_ws, uint32_t syncobj, int *fd)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- return amdgpu_cs_export_syncobj(ws->dev, syncobj, fd);
+ return amdgpu_cs_export_syncobj(ws->dev, syncobj, fd);
}
-static int radv_amdgpu_import_syncobj(struct radeon_winsys *_ws,
- int fd,
- uint32_t *syncobj)
+static int
+radv_amdgpu_import_syncobj(struct radeon_winsys *_ws, int fd, uint32_t *syncobj)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- return amdgpu_cs_import_syncobj(ws->dev, fd, syncobj);
+ return amdgpu_cs_import_syncobj(ws->dev, fd, syncobj);
}
-
-static int radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys *_ws,
- uint32_t syncobj,
- int *fd)
+static int
+radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys *_ws, uint32_t syncobj, int *fd)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- return amdgpu_cs_syncobj_export_sync_file(ws->dev, syncobj, fd);
+ return amdgpu_cs_syncobj_export_sync_file(ws->dev, syncobj, fd);
}
-static int radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys *_ws,
- uint32_t syncobj,
- int fd)
+static int
+radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys *_ws, uint32_t syncobj, int fd)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- return amdgpu_cs_syncobj_import_sync_file(ws->dev, syncobj, fd);
+ return amdgpu_cs_syncobj_import_sync_file(ws->dev, syncobj, fd);
}
-void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
+void
+radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
{
- ws->base.ctx_create = radv_amdgpu_ctx_create;
- ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
- ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
- ws->base.cs_create = radv_amdgpu_cs_create;
- ws->base.cs_destroy = radv_amdgpu_cs_destroy;
- ws->base.cs_grow = radv_amdgpu_cs_grow;
- ws->base.cs_finalize = radv_amdgpu_cs_finalize;
- ws->base.cs_reset = radv_amdgpu_cs_reset;
- ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
- ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
- ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
- ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
- ws->base.create_syncobj = radv_amdgpu_create_syncobj;
- ws->base.destroy_syncobj = radv_amdgpu_destroy_syncobj;
- ws->base.reset_syncobj = radv_amdgpu_reset_syncobj;
- ws->base.signal_syncobj = radv_amdgpu_signal_syncobj;
- ws->base.query_syncobj = radv_amdgpu_query_syncobj;
- ws->base.wait_syncobj = radv_amdgpu_wait_syncobj;
- ws->base.wait_timeline_syncobj = radv_amdgpu_wait_timeline_syncobj;
- ws->base.export_syncobj = radv_amdgpu_export_syncobj;
- ws->base.import_syncobj = radv_amdgpu_import_syncobj;
- ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
- ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
+ ws->base.ctx_create = radv_amdgpu_ctx_create;
+ ws->base.ctx_destroy = radv_amdgpu_ctx_destroy;
+ ws->base.ctx_wait_idle = radv_amdgpu_ctx_wait_idle;
+ ws->base.cs_create = radv_amdgpu_cs_create;
+ ws->base.cs_destroy = radv_amdgpu_cs_destroy;
+ ws->base.cs_grow = radv_amdgpu_cs_grow;
+ ws->base.cs_finalize = radv_amdgpu_cs_finalize;
+ ws->base.cs_reset = radv_amdgpu_cs_reset;
+ ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
+ ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
+ ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
+ ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
+ ws->base.create_syncobj = radv_amdgpu_create_syncobj;
+ ws->base.destroy_syncobj = radv_amdgpu_destroy_syncobj;
+ ws->base.reset_syncobj = radv_amdgpu_reset_syncobj;
+ ws->base.signal_syncobj = radv_amdgpu_signal_syncobj;
+ ws->base.query_syncobj = radv_amdgpu_query_syncobj;
+ ws->base.wait_syncobj = radv_amdgpu_wait_syncobj;
+ ws->base.wait_timeline_syncobj = radv_amdgpu_wait_timeline_syncobj;
+ ws->base.export_syncobj = radv_amdgpu_export_syncobj;
+ ws->base.import_syncobj = radv_amdgpu_import_syncobj;
+ ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
+ ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
}
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
index 135d4faf943..a901afbf5d8 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
@@ -29,37 +29,34 @@
#ifndef RADV_AMDGPU_CS_H
#define RADV_AMDGPU_CS_H
-#include <string.h>
-#include <stdint.h>
-#include <assert.h>
#include <amdgpu.h>
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
-#include "radv_radeon_winsys.h"
#include "radv_amdgpu_winsys.h"
+#include "radv_radeon_winsys.h"
-enum {
- MAX_RINGS_PER_TYPE = 8
-};
-
+enum { MAX_RINGS_PER_TYPE = 8 };
struct radv_amdgpu_fence {
- struct amdgpu_cs_fence fence;
- volatile uint64_t *user_ptr;
+ struct amdgpu_cs_fence fence;
+ volatile uint64_t *user_ptr;
};
struct radv_amdgpu_ctx {
- struct radv_amdgpu_winsys *ws;
- amdgpu_context_handle ctx;
- struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
+ struct radv_amdgpu_winsys *ws;
+ amdgpu_context_handle ctx;
+ struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
- struct radeon_winsys_bo *fence_bo;
- uint64_t *fence_map;
+ struct radeon_winsys_bo *fence_bo;
+ uint64_t *fence_map;
};
static inline struct radv_amdgpu_ctx *
radv_amdgpu_ctx(struct radeon_winsys_ctx *base)
{
- return (struct radv_amdgpu_ctx *)base;
+ return (struct radv_amdgpu_ctx *)base;
}
void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws);
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
index 598baa2addc..d1f0cd6cbc6 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
@@ -28,77 +28,77 @@
#include <errno.h>
-#include "radv_private.h"
#include "util/bitset.h"
-#include "radv_amdgpu_winsys.h"
#include "radv_amdgpu_surface.h"
+#include "radv_amdgpu_winsys.h"
+#include "radv_private.h"
#include "sid.h"
#include "ac_surface.h"
-static int radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info,
- const struct radeon_surf *surf)
+static int
+radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info, const struct radeon_surf *surf)
{
- unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
+ unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
- if (!surf->blk_w || !surf->blk_h)
- return -EINVAL;
+ if (!surf->blk_w || !surf->blk_h)
+ return -EINVAL;
- switch (type) {
- case RADEON_SURF_TYPE_1D:
- if (surf_info->height > 1)
- return -EINVAL;
- /* fall through */
- case RADEON_SURF_TYPE_2D:
- case RADEON_SURF_TYPE_CUBEMAP:
- if (surf_info->depth > 1 || surf_info->array_size > 1)
- return -EINVAL;
- break;
- case RADEON_SURF_TYPE_3D:
- if (surf_info->array_size > 1)
- return -EINVAL;
- break;
- case RADEON_SURF_TYPE_1D_ARRAY:
- if (surf_info->height > 1)
- return -EINVAL;
- /* fall through */
- case RADEON_SURF_TYPE_2D_ARRAY:
- if (surf_info->depth > 1)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
- return 0;
+ switch (type) {
+ case RADEON_SURF_TYPE_1D:
+ if (surf_info->height > 1)
+ return -EINVAL;
+ /* fall through */
+ case RADEON_SURF_TYPE_2D:
+ case RADEON_SURF_TYPE_CUBEMAP:
+ if (surf_info->depth > 1 || surf_info->array_size > 1)
+ return -EINVAL;
+ break;
+ case RADEON_SURF_TYPE_3D:
+ if (surf_info->array_size > 1)
+ return -EINVAL;
+ break;
+ case RADEON_SURF_TYPE_1D_ARRAY:
+ if (surf_info->height > 1)
+ return -EINVAL;
+ /* fall through */
+ case RADEON_SURF_TYPE_2D_ARRAY:
+ if (surf_info->depth > 1)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
}
-static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
- const struct ac_surf_info *surf_info,
- struct radeon_surf *surf)
+static int
+radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws, const struct ac_surf_info *surf_info,
+ struct radeon_surf *surf)
{
- struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
- unsigned mode, type;
- int r;
+ struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
+ unsigned mode, type;
+ int r;
- r = radv_amdgpu_surface_sanity(surf_info, surf);
- if (r)
- return r;
+ r = radv_amdgpu_surface_sanity(surf_info, surf);
+ if (r)
+ return r;
- type = RADEON_SURF_GET(surf->flags, TYPE);
- mode = RADEON_SURF_GET(surf->flags, MODE);
+ type = RADEON_SURF_GET(surf->flags, TYPE);
+ mode = RADEON_SURF_GET(surf->flags, MODE);
- struct ac_surf_config config;
+ struct ac_surf_config config;
- memcpy(&config.info, surf_info, sizeof(config.info));
- config.is_1d = type == RADEON_SURF_TYPE_1D ||
- type == RADEON_SURF_TYPE_1D_ARRAY;
- config.is_3d = type == RADEON_SURF_TYPE_3D;
- config.is_cube = type == RADEON_SURF_TYPE_CUBEMAP;
+ memcpy(&config.info, surf_info, sizeof(config.info));
+ config.is_1d = type == RADEON_SURF_TYPE_1D || type == RADEON_SURF_TYPE_1D_ARRAY;
+ config.is_3d = type == RADEON_SURF_TYPE_3D;
+ config.is_cube = type == RADEON_SURF_TYPE_CUBEMAP;
- return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
+ return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
}
-void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws)
+void
+radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws)
{
- ws->base.surface_init = radv_amdgpu_winsys_surface_init;
+ ws->base.surface_init = radv_amdgpu_winsys_surface_init;
}
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h
index a5652a32570..90bc2b97883 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.h
@@ -27,6 +27,8 @@
#include <amdgpu.h>
+struct radv_amdgpu_winsys;
+
void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws);
#endif /* RADV_AMDGPU_SURFACE_H */
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
index 0a15b6bc287..ad5953aeb11 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -25,255 +25,246 @@
* IN THE SOFTWARE.
*/
#include "radv_amdgpu_winsys.h"
-#include "radv_amdgpu_winsys_public.h"
-#include "radv_amdgpu_surface.h"
-#include "radv_debug.h"
-#include "ac_surface.h"
-#include "xf86drm.h"
+#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "drm-uapi/amdgpu_drm.h"
-#include <assert.h>
-#include "radv_amdgpu_cs.h"
+#include "ac_surface.h"
#include "radv_amdgpu_bo.h"
+#include "radv_amdgpu_cs.h"
#include "radv_amdgpu_surface.h"
+#include "radv_amdgpu_winsys_public.h"
+#include "radv_debug.h"
+#include "xf86drm.h"
static bool
do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
{
- if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
- return false;
-
- if (ws->info.drm_minor < 35) {
- fprintf(stderr, "radv: DRM 3.35+ is required (Linux kernel 4.15+)\n");
- return false;
- }
-
- /* LLVM 11 is required for GFX10.3. */
- if (ws->info.chip_class == GFX10_3 && ws->use_llvm && LLVM_VERSION_MAJOR < 11) {
- fprintf(stderr, "radv: GFX 10.3 requires LLVM 11 or higher\n");
- return false;
- }
-
- /* LLVM 9.0 is required for GFX10. */
- if (ws->info.chip_class == GFX10 && ws->use_llvm && LLVM_VERSION_MAJOR < 9) {
- fprintf(stderr, "radv: Navi family support requires LLVM 9 or higher\n");
- return false;
- }
-
- ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment);
- if (!ws->addrlib) {
- fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
- return false;
- }
-
- ws->info.num_rings[RING_DMA] = MIN2(ws->info.num_rings[RING_DMA], MAX_RINGS_PER_TYPE);
- ws->info.num_rings[RING_COMPUTE] = MIN2(ws->info.num_rings[RING_COMPUTE], MAX_RINGS_PER_TYPE);
-
- ws->use_ib_bos = ws->info.chip_class >= GFX7;
- return true;
+ if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
+ return false;
+
+ if (ws->info.drm_minor < 35) {
+ fprintf(stderr, "radv: DRM 3.35+ is required (Linux kernel 4.15+)\n");
+ return false;
+ }
+
+ /* LLVM 11 is required for GFX10.3. */
+ if (ws->info.chip_class == GFX10_3 && ws->use_llvm && LLVM_VERSION_MAJOR < 11) {
+ fprintf(stderr, "radv: GFX 10.3 requires LLVM 11 or higher\n");
+ return false;
+ }
+
+ /* LLVM 9.0 is required for GFX10. */
+ if (ws->info.chip_class == GFX10 && ws->use_llvm && LLVM_VERSION_MAJOR < 9) {
+ fprintf(stderr, "radv: Navi family support requires LLVM 9 or higher\n");
+ return false;
+ }
+
+ ws->addrlib = ac_addrlib_create(&ws->info, &ws->info.max_alignment);
+ if (!ws->addrlib) {
+ fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
+ return false;
+ }
+
+ ws->info.num_rings[RING_DMA] = MIN2(ws->info.num_rings[RING_DMA], MAX_RINGS_PER_TYPE);
+ ws->info.num_rings[RING_COMPUTE] = MIN2(ws->info.num_rings[RING_COMPUTE], MAX_RINGS_PER_TYPE);
+
+ ws->use_ib_bos = ws->info.chip_class >= GFX7;
+ return true;
}
-static void radv_amdgpu_winsys_query_info(struct radeon_winsys *rws,
- struct radeon_info *info)
+static void
+radv_amdgpu_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
{
- *info = ((struct radv_amdgpu_winsys *)rws)->info;
+ *info = ((struct radv_amdgpu_winsys *)rws)->info;
}
-static uint64_t radv_amdgpu_winsys_query_value(struct radeon_winsys *rws,
- enum radeon_value_id value)
+static uint64_t
+radv_amdgpu_winsys_query_value(struct radeon_winsys *rws, enum radeon_value_id value)
{
- struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
- struct amdgpu_heap_info heap;
- uint64_t retval = 0;
-
- switch (value) {
- case RADEON_ALLOCATED_VRAM:
- return ws->allocated_vram;
- case RADEON_ALLOCATED_VRAM_VIS:
- return ws->allocated_vram_vis;
- case RADEON_ALLOCATED_GTT:
- return ws->allocated_gtt;
- case RADEON_TIMESTAMP:
- amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
- return retval;
- case RADEON_NUM_BYTES_MOVED:
- amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED,
- 8, &retval);
- return retval;
- case RADEON_NUM_EVICTIONS:
- amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS,
- 8, &retval);
- return retval;
- case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
- amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS,
- 8, &retval);
- return retval;
- case RADEON_VRAM_USAGE:
- amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
- 0, &heap);
- return heap.heap_usage;
- case RADEON_VRAM_VIS_USAGE:
- amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
- &heap);
- return heap.heap_usage;
- case RADEON_GTT_USAGE:
- amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT,
- 0, &heap);
- return heap.heap_usage;
- case RADEON_GPU_TEMPERATURE:
- amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP,
- 4, &retval);
- return retval;
- case RADEON_CURRENT_SCLK:
- amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK,
- 4, &retval);
- return retval;
- case RADEON_CURRENT_MCLK:
- amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK,
- 4, &retval);
- return retval;
- default:
- unreachable("invalid query value");
- }
-
- return 0;
+ struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
+ struct amdgpu_heap_info heap;
+ uint64_t retval = 0;
+
+ switch (value) {
+ case RADEON_ALLOCATED_VRAM:
+ return ws->allocated_vram;
+ case RADEON_ALLOCATED_VRAM_VIS:
+ return ws->allocated_vram_vis;
+ case RADEON_ALLOCATED_GTT:
+ return ws->allocated_gtt;
+ case RADEON_TIMESTAMP:
+ amdgpu_query_info(ws->dev, AMDGPU_INFO_TIMESTAMP, 8, &retval);
+ return retval;
+ case RADEON_NUM_BYTES_MOVED:
+ amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_BYTES_MOVED, 8, &retval);
+ return retval;
+ case RADEON_NUM_EVICTIONS:
+ amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_EVICTIONS, 8, &retval);
+ return retval;
+ case RADEON_NUM_VRAM_CPU_PAGE_FAULTS:
+ amdgpu_query_info(ws->dev, AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS, 8, &retval);
+ return retval;
+ case RADEON_VRAM_USAGE:
+ amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &heap);
+ return heap.heap_usage;
+ case RADEON_VRAM_VIS_USAGE:
+ amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+ &heap);
+ return heap.heap_usage;
+ case RADEON_GTT_USAGE:
+ amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &heap);
+ return heap.heap_usage;
+ case RADEON_GPU_TEMPERATURE:
+ amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, 4, &retval);
+ return retval;
+ case RADEON_CURRENT_SCLK:
+ amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, 4, &retval);
+ return retval;
+ case RADEON_CURRENT_MCLK:
+ amdgpu_query_sensor_info(ws->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, 4, &retval);
+ return retval;
+ default:
+ unreachable("invalid query value");
+ }
+
+ return 0;
}
-static bool radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws,
- unsigned reg_offset,
- unsigned num_registers, uint32_t *out)
+static bool
+radv_amdgpu_winsys_read_registers(struct radeon_winsys *rws, unsigned reg_offset,
+ unsigned num_registers, uint32_t *out)
{
- struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
+ struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
- return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers,
- 0xffffffff, 0, out) == 0;
+ return amdgpu_read_mm_registers(ws->dev, reg_offset / 4, num_registers, 0xffffffff, 0, out) == 0;
}
-static const char *radv_amdgpu_winsys_get_chip_name(struct radeon_winsys *rws)
+static const char *
+radv_amdgpu_winsys_get_chip_name(struct radeon_winsys *rws)
{
- amdgpu_device_handle dev = ((struct radv_amdgpu_winsys *)rws)->dev;
+ amdgpu_device_handle dev = ((struct radv_amdgpu_winsys *)rws)->dev;
- return amdgpu_get_marketing_name(dev);
+ return amdgpu_get_marketing_name(dev);
}
static simple_mtx_t winsys_creation_mutex = _SIMPLE_MTX_INITIALIZER_NP;
static struct hash_table *winsyses = NULL;
-static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
+static void
+radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
{
- struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys*)rws;
- bool destroy = false;
-
- simple_mtx_lock(&winsys_creation_mutex);
- if (!--ws->refcount) {
- _mesa_hash_table_remove_key(winsyses, ws->dev);
-
- /* Clean the hashtable up if empty, though there is no
- * empty function. */
- if (_mesa_hash_table_num_entries(winsyses) == 0) {
- _mesa_hash_table_destroy(winsyses, NULL);
- winsyses = NULL;
- }
-
- destroy = true;
- }
- simple_mtx_unlock(&winsys_creation_mutex);
- if (!destroy)
- return;
-
- for (unsigned i = 0; i < ws->syncobj_count; ++i)
- amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]);
- free(ws->syncobj);
-
- u_rwlock_destroy(&ws->global_bo_list.lock);
- free(ws->global_bo_list.bos);
-
- pthread_mutex_destroy(&ws->syncobj_lock);
- u_rwlock_destroy(&ws->log_bo_list_lock);
- ac_addrlib_destroy(ws->addrlib);
- amdgpu_device_deinitialize(ws->dev);
- FREE(rws);
+ struct radv_amdgpu_winsys *ws = (struct radv_amdgpu_winsys *)rws;
+ bool destroy = false;
+
+ simple_mtx_lock(&winsys_creation_mutex);
+ if (!--ws->refcount) {
+ _mesa_hash_table_remove_key(winsyses, ws->dev);
+
+ /* Clean the hashtable up if empty, though there is no
+ * empty function. */
+ if (_mesa_hash_table_num_entries(winsyses) == 0) {
+ _mesa_hash_table_destroy(winsyses, NULL);
+ winsyses = NULL;
+ }
+
+ destroy = true;
+ }
+ simple_mtx_unlock(&winsys_creation_mutex);
+ if (!destroy)
+ return;
+
+ for (unsigned i = 0; i < ws->syncobj_count; ++i)
+ amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]);
+ free(ws->syncobj);
+
+ u_rwlock_destroy(&ws->global_bo_list.lock);
+ free(ws->global_bo_list.bos);
+
+ pthread_mutex_destroy(&ws->syncobj_lock);
+ u_rwlock_destroy(&ws->log_bo_list_lock);
+ ac_addrlib_destroy(ws->addrlib);
+ amdgpu_device_deinitialize(ws->dev);
+ FREE(rws);
}
struct radeon_winsys *
radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
{
- uint32_t drm_major, drm_minor, r;
- amdgpu_device_handle dev;
- struct radv_amdgpu_winsys *ws = NULL;
-
- r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
- if (r)
- return NULL;
-
- /* We have to keep this lock till insertion. */
- simple_mtx_lock(&winsys_creation_mutex);
- if (!winsyses)
- winsyses = _mesa_pointer_hash_table_create(NULL);
- if (!winsyses)
- goto fail;
-
- struct hash_entry *entry = _mesa_hash_table_search(winsyses, dev);
- if (entry) {
- ws = (struct radv_amdgpu_winsys *)entry->data;
- ++ws->refcount;
- }
-
- if (ws) {
- simple_mtx_unlock(&winsys_creation_mutex);
- amdgpu_device_deinitialize(dev);
- return &ws->base;
- }
-
- ws = calloc(1, sizeof(struct radv_amdgpu_winsys));
- if (!ws)
- goto fail;
-
- ws->refcount = 1;
- ws->dev = dev;
- ws->info.drm_major = drm_major;
- ws->info.drm_minor = drm_minor;
- if (!do_winsys_init(ws, fd))
- goto winsys_fail;
-
- ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
- ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
- if (debug_flags & RADV_DEBUG_NO_IBS)
- ws->use_ib_bos = false;
-
- ws->use_local_bos = perftest_flags & RADV_PERFTEST_LOCAL_BOS;
- ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
- ws->use_llvm = debug_flags & RADV_DEBUG_LLVM;
- ws->cs_bo_domain = radv_cmdbuffer_domain(&ws->info, perftest_flags);
- u_rwlock_init(&ws->global_bo_list.lock);
- list_inithead(&ws->log_bo_list);
- u_rwlock_init(&ws->log_bo_list_lock);
- pthread_mutex_init(&ws->syncobj_lock, NULL);
- ws->base.query_info = radv_amdgpu_winsys_query_info;
- ws->base.query_value = radv_amdgpu_winsys_query_value;
- ws->base.read_registers = radv_amdgpu_winsys_read_registers;
- ws->base.get_chip_name = radv_amdgpu_winsys_get_chip_name;
- ws->base.destroy = radv_amdgpu_winsys_destroy;
- radv_amdgpu_bo_init_functions(ws);
- radv_amdgpu_cs_init_functions(ws);
- radv_amdgpu_surface_init_functions(ws);
-
- _mesa_hash_table_insert(winsyses, dev, ws);
- simple_mtx_unlock(&winsys_creation_mutex);
-
- return &ws->base;
+ uint32_t drm_major, drm_minor, r;
+ amdgpu_device_handle dev;
+ struct radv_amdgpu_winsys *ws = NULL;
+
+ r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
+ if (r)
+ return NULL;
+
+ /* We have to keep this lock till insertion. */
+ simple_mtx_lock(&winsys_creation_mutex);
+ if (!winsyses)
+ winsyses = _mesa_pointer_hash_table_create(NULL);
+ if (!winsyses)
+ goto fail;
+
+ struct hash_entry *entry = _mesa_hash_table_search(winsyses, dev);
+ if (entry) {
+ ws = (struct radv_amdgpu_winsys *)entry->data;
+ ++ws->refcount;
+ }
+
+ if (ws) {
+ simple_mtx_unlock(&winsys_creation_mutex);
+ amdgpu_device_deinitialize(dev);
+ return &ws->base;
+ }
+
+ ws = calloc(1, sizeof(struct radv_amdgpu_winsys));
+ if (!ws)
+ goto fail;
+
+ ws->refcount = 1;
+ ws->dev = dev;
+ ws->info.drm_major = drm_major;
+ ws->info.drm_minor = drm_minor;
+ if (!do_winsys_init(ws, fd))
+ goto winsys_fail;
+
+ ws->debug_all_bos = !!(debug_flags & RADV_DEBUG_ALL_BOS);
+ ws->debug_log_bos = debug_flags & RADV_DEBUG_HANG;
+ if (debug_flags & RADV_DEBUG_NO_IBS)
+ ws->use_ib_bos = false;
+
+ ws->use_local_bos = perftest_flags & RADV_PERFTEST_LOCAL_BOS;
+ ws->zero_all_vram_allocs = debug_flags & RADV_DEBUG_ZERO_VRAM;
+ ws->use_llvm = debug_flags & RADV_DEBUG_LLVM;
+ ws->cs_bo_domain = radv_cmdbuffer_domain(&ws->info, perftest_flags);
+ u_rwlock_init(&ws->global_bo_list.lock);
+ list_inithead(&ws->log_bo_list);
+ u_rwlock_init(&ws->log_bo_list_lock);
+ pthread_mutex_init(&ws->syncobj_lock, NULL);
+ ws->base.query_info = radv_amdgpu_winsys_query_info;
+ ws->base.query_value = radv_amdgpu_winsys_query_value;
+ ws->base.read_registers = radv_amdgpu_winsys_read_registers;
+ ws->base.get_chip_name = radv_amdgpu_winsys_get_chip_name;
+ ws->base.destroy = radv_amdgpu_winsys_destroy;
+ radv_amdgpu_bo_init_functions(ws);
+ radv_amdgpu_cs_init_functions(ws);
+ radv_amdgpu_surface_init_functions(ws);
+
+ _mesa_hash_table_insert(winsyses, dev, ws);
+ simple_mtx_unlock(&winsys_creation_mutex);
+
+ return &ws->base;
winsys_fail:
- free(ws);
+ free(ws);
fail:
- if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) {
- _mesa_hash_table_destroy(winsyses, NULL);
- winsyses = NULL;
- }
- simple_mtx_unlock(&winsys_creation_mutex);
- amdgpu_device_deinitialize(dev);
- return NULL;
+ if (winsyses && _mesa_hash_table_num_entries(winsyses) == 0) {
+ _mesa_hash_table_destroy(winsyses, NULL);
+ winsyses = NULL;
+ }
+ simple_mtx_unlock(&winsys_creation_mutex);
+ amdgpu_device_deinitialize(dev);
+ return NULL;
}
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
index 3c8987a1ab9..ba9cf500cd5 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.h
@@ -28,57 +28,57 @@
#ifndef RADV_AMDGPU_WINSYS_H
#define RADV_AMDGPU_WINSYS_H
-#include "radv_radeon_winsys.h"
-#include "ac_gpu_info.h"
#include <amdgpu.h>
+#include <pthread.h>
#include "util/list.h"
#include "util/rwlock.h"
-#include <pthread.h>
+#include "ac_gpu_info.h"
+#include "radv_radeon_winsys.h"
struct radv_amdgpu_winsys {
- struct radeon_winsys base;
- amdgpu_device_handle dev;
+ struct radeon_winsys base;
+ amdgpu_device_handle dev;
- struct radeon_info info;
- struct amdgpu_gpu_info amdinfo;
- struct ac_addrlib *addrlib;
+ struct radeon_info info;
+ struct amdgpu_gpu_info amdinfo;
+ struct ac_addrlib *addrlib;
- bool debug_all_bos;
- bool debug_log_bos;
- bool use_ib_bos;
- enum radeon_bo_domain cs_bo_domain;
- bool zero_all_vram_allocs;
- bool use_local_bos;
- bool use_llvm;
+ bool debug_all_bos;
+ bool debug_log_bos;
+ bool use_ib_bos;
+ enum radeon_bo_domain cs_bo_domain;
+ bool zero_all_vram_allocs;
+ bool use_local_bos;
+ bool use_llvm;
- uint64_t allocated_vram;
- uint64_t allocated_vram_vis;
- uint64_t allocated_gtt;
+ uint64_t allocated_vram;
+ uint64_t allocated_vram_vis;
+ uint64_t allocated_gtt;
- /* Global BO list */
- struct {
- struct radv_amdgpu_winsys_bo **bos;
- uint32_t count;
- uint32_t capacity;
- struct u_rwlock lock;
- } global_bo_list;
+ /* Global BO list */
+ struct {
+ struct radv_amdgpu_winsys_bo **bos;
+ uint32_t count;
+ uint32_t capacity;
+ struct u_rwlock lock;
+ } global_bo_list;
- /* syncobj cache */
- pthread_mutex_t syncobj_lock;
- uint32_t *syncobj;
- uint32_t syncobj_count, syncobj_capacity;
+ /* syncobj cache */
+ pthread_mutex_t syncobj_lock;
+ uint32_t *syncobj;
+ uint32_t syncobj_count, syncobj_capacity;
- /* BO log */
- struct u_rwlock log_bo_list_lock;
- struct list_head log_bo_list;
+ /* BO log */
+ struct u_rwlock log_bo_list_lock;
+ struct list_head log_bo_list;
- uint32_t refcount;
+ uint32_t refcount;
};
static inline struct radv_amdgpu_winsys *
radv_amdgpu_winsys(struct radeon_winsys *base)
{
- return (struct radv_amdgpu_winsys*)base;
+ return (struct radv_amdgpu_winsys *)base;
}
#endif /* RADV_AMDGPU_WINSYS_H */
diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
index 790a404d24a..84fe347ec3e 100644
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
@@ -30,7 +30,7 @@
#define RADV_AMDGPU_WINSYS_PUBLIC_H
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
- uint64_t perftest_flags);
+ uint64_t perftest_flags);
struct radeon_winsys *radv_dummy_winsys_create(void);
diff --git a/src/amd/vulkan/winsys/null/radv_null_bo.c b/src/amd/vulkan/winsys/null/radv_null_bo.c
index 318b498a23c..3c8d12431a7 100644
--- a/src/amd/vulkan/winsys/null/radv_null_bo.c
+++ b/src/amd/vulkan/winsys/null/radv_null_bo.c
@@ -29,34 +29,31 @@
#include "util/u_memory.h"
static struct radeon_winsys_bo *
-radv_null_winsys_bo_create(struct radeon_winsys *_ws,
- uint64_t size,
- unsigned alignment,
- enum radeon_bo_domain initial_domain,
- enum radeon_bo_flag flags,
- unsigned priority)
+radv_null_winsys_bo_create(struct radeon_winsys *_ws, uint64_t size, unsigned alignment,
+ enum radeon_bo_domain initial_domain, enum radeon_bo_flag flags,
+ unsigned priority)
{
- struct radv_null_winsys_bo *bo;
+ struct radv_null_winsys_bo *bo;
- bo = CALLOC_STRUCT(radv_null_winsys_bo);
- if (!bo)
- return NULL;
+ bo = CALLOC_STRUCT(radv_null_winsys_bo);
+ if (!bo)
+ return NULL;
- bo->ptr = malloc(size);
- if (!bo->ptr)
- goto error_ptr_alloc;
+ bo->ptr = malloc(size);
+ if (!bo->ptr)
+ goto error_ptr_alloc;
- return (struct radeon_winsys_bo *)bo;
+ return (struct radeon_winsys_bo *)bo;
error_ptr_alloc:
- FREE(bo);
- return NULL;
+ FREE(bo);
+ return NULL;
}
static void *
radv_null_winsys_bo_map(struct radeon_winsys_bo *_bo)
{
- struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
- return bo->ptr;
+ struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
+ return bo->ptr;
}
static void
@@ -64,18 +61,19 @@ radv_null_winsys_bo_unmap(struct radeon_winsys_bo *_bo)
{
}
-static void radv_null_winsys_bo_destroy(struct radeon_winsys *_ws,
- struct radeon_winsys_bo *_bo)
+static void
+radv_null_winsys_bo_destroy(struct radeon_winsys *_ws, struct radeon_winsys_bo *_bo)
{
- struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
- FREE(bo->ptr);
- FREE(bo);
+ struct radv_null_winsys_bo *bo = radv_null_winsys_bo(_bo);
+ FREE(bo->ptr);
+ FREE(bo);
}
-void radv_null_bo_init_functions(struct radv_null_winsys *ws)
+void
+radv_null_bo_init_functions(struct radv_null_winsys *ws)
{
- ws->base.buffer_create = radv_null_winsys_bo_create;
- ws->base.buffer_destroy = radv_null_winsys_bo_destroy;
- ws->base.buffer_map = radv_null_winsys_bo_map;
- ws->base.buffer_unmap = radv_null_winsys_bo_unmap;
+ ws->base.buffer_create = radv_null_winsys_bo_create;
+ ws->base.buffer_destroy = radv_null_winsys_bo_destroy;
+ ws->base.buffer_map = radv_null_winsys_bo_map;
+ ws->base.buffer_unmap = radv_null_winsys_bo_unmap;
}
diff --git a/src/amd/vulkan/winsys/null/radv_null_bo.h b/src/amd/vulkan/winsys/null/radv_null_bo.h
index 2f2f8b711d6..49a7440b613 100644
--- a/src/amd/vulkan/winsys/null/radv_null_bo.h
+++ b/src/amd/vulkan/winsys/null/radv_null_bo.h
@@ -31,15 +31,15 @@
#include "radv_null_winsys.h"
struct radv_null_winsys_bo {
- struct radeon_winsys_bo base;
- struct radv_null_winsys *ws;
- void *ptr;
+ struct radeon_winsys_bo base;
+ struct radv_null_winsys *ws;
+ void *ptr;
};
-static inline
-struct radv_null_winsys_bo *radv_null_winsys_bo(struct radeon_winsys_bo *bo)
+static inline struct radv_null_winsys_bo *
+radv_null_winsys_bo(struct radeon_winsys_bo *bo)
{
- return (struct radv_null_winsys_bo *)bo;
+ return (struct radv_null_winsys_bo *)bo;
}
void radv_null_bo_init_functions(struct radv_null_winsys *ws);
diff --git a/src/amd/vulkan/winsys/null/radv_null_cs.c b/src/amd/vulkan/winsys/null/radv_null_cs.c
index 5788cbf92cb..f5a186fe04a 100644
--- a/src/amd/vulkan/winsys/null/radv_null_cs.c
+++ b/src/amd/vulkan/winsys/null/radv_null_cs.c
@@ -29,73 +29,75 @@
#include "util/u_memory.h"
struct radv_null_cs {
- struct radeon_cmdbuf base;
- struct radv_null_winsys *ws;
+ struct radeon_cmdbuf base;
+ struct radv_null_winsys *ws;
};
static inline struct radv_null_cs *
radv_null_cs(struct radeon_cmdbuf *base)
{
- return (struct radv_null_cs*)base;
+ return (struct radv_null_cs *)base;
}
-static VkResult radv_null_ctx_create(struct radeon_winsys *_ws,
- enum radeon_ctx_priority priority,
- struct radeon_winsys_ctx **rctx)
+static VkResult
+radv_null_ctx_create(struct radeon_winsys *_ws, enum radeon_ctx_priority priority,
+ struct radeon_winsys_ctx **rctx)
{
- struct radv_null_ctx *ctx = CALLOC_STRUCT(radv_null_ctx);
+ struct radv_null_ctx *ctx = CALLOC_STRUCT(radv_null_ctx);
- if (!ctx)
- return VK_ERROR_OUT_OF_HOST_MEMORY;
+ if (!ctx)
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
- *rctx = (struct radeon_winsys_ctx *)ctx;
- return VK_SUCCESS;
+ *rctx = (struct radeon_winsys_ctx *)ctx;
+ return VK_SUCCESS;
}
-static void radv_null_ctx_destroy(struct radeon_winsys_ctx *rwctx)
+static void
+radv_null_ctx_destroy(struct radeon_winsys_ctx *rwctx)
{
- struct radv_null_ctx *ctx = (struct radv_null_ctx *)rwctx;
- FREE(ctx);
+ struct radv_null_ctx *ctx = (struct radv_null_ctx *)rwctx;
+ FREE(ctx);
}
static struct radeon_cmdbuf *
-radv_null_cs_create(struct radeon_winsys *ws,
- enum ring_type ring_type)
+radv_null_cs_create(struct radeon_winsys *ws, enum ring_type ring_type)
{
- struct radv_null_cs *cs = calloc(1, sizeof(struct radv_null_cs));
- if (!cs)
- return NULL;
+ struct radv_null_cs *cs = calloc(1, sizeof(struct radv_null_cs));
+ if (!cs)
+ return NULL;
- cs->ws = radv_null_winsys(ws);
+ cs->ws = radv_null_winsys(ws);
- cs->base.buf = malloc(16384);
- cs->base.max_dw = 4096;
- if (!cs->base.buf) {
- FREE(cs);
- return NULL;
- }
+ cs->base.buf = malloc(16384);
+ cs->base.max_dw = 4096;
+ if (!cs->base.buf) {
+ FREE(cs);
+ return NULL;
+ }
- return &cs->base;
+ return &cs->base;
}
-static VkResult radv_null_cs_finalize(struct radeon_cmdbuf *_cs)
+static VkResult
+radv_null_cs_finalize(struct radeon_cmdbuf *_cs)
{
- return VK_SUCCESS;
+ return VK_SUCCESS;
}
-static void radv_null_cs_destroy(struct radeon_cmdbuf *rcs)
+static void
+radv_null_cs_destroy(struct radeon_cmdbuf *rcs)
{
- struct radv_null_cs *cs = radv_null_cs(rcs);
- FREE(cs->base.buf);
- FREE(cs);
+ struct radv_null_cs *cs = radv_null_cs(rcs);
+ FREE(cs->base.buf);
+ FREE(cs);
}
-void radv_null_cs_init_functions(struct radv_null_winsys *ws)
+void
+radv_null_cs_init_functions(struct radv_null_winsys *ws)
{
- ws->base.ctx_create = radv_null_ctx_create;
- ws->base.ctx_destroy = radv_null_ctx_destroy;
- ws->base.cs_create = radv_null_cs_create;
- ws->base.cs_finalize = radv_null_cs_finalize;
- ws->base.cs_destroy = radv_null_cs_destroy;
-
+ ws->base.ctx_create = radv_null_ctx_create;
+ ws->base.ctx_destroy = radv_null_ctx_destroy;
+ ws->base.cs_create = radv_null_cs_create;
+ ws->base.cs_finalize = radv_null_cs_finalize;
+ ws->base.cs_destroy = radv_null_cs_destroy;
}
diff --git a/src/amd/vulkan/winsys/null/radv_null_cs.h b/src/amd/vulkan/winsys/null/radv_null_cs.h
index 344e9502ff6..cfb467ebdd1 100644
--- a/src/amd/vulkan/winsys/null/radv_null_cs.h
+++ b/src/amd/vulkan/winsys/null/radv_null_cs.h
@@ -28,17 +28,17 @@
#ifndef RADV_NULL_CS_H
#define RADV_NULL_CS_H
-#include "radv_radeon_winsys.h"
#include "radv_null_winsys.h"
+#include "radv_radeon_winsys.h"
struct radv_null_ctx {
- struct radv_null_winsys *ws;
+ struct radv_null_winsys *ws;
};
static inline struct radv_null_ctx *
radv_null_ctx(struct radeon_winsys_ctx *base)
{
- return (struct radv_null_ctx *)base;
+ return (struct radv_null_ctx *)base;
}
void radv_null_cs_init_functions(struct radv_null_winsys *ws);
diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.c b/src/amd/vulkan/winsys/null/radv_null_winsys.c
index 5df0b13f493..6afd02e8d6e 100644
--- a/src/amd/vulkan/winsys/null/radv_null_winsys.c
+++ b/src/amd/vulkan/winsys/null/radv_null_winsys.c
@@ -33,129 +33,131 @@
/* Hardcode some GPU info that are needed for the driver or for some tools. */
static const struct {
- uint32_t pci_id;
- uint32_t num_render_backends;
- bool has_dedicated_vram;
+ uint32_t pci_id;
+ uint32_t num_render_backends;
+ bool has_dedicated_vram;
} gpu_info[] = {
- [CHIP_TAHITI] = { 0x6780, 8, true },
- [CHIP_PITCAIRN] = { 0x6800, 8, true },
- [CHIP_VERDE] = { 0x6820, 4, true },
- [CHIP_OLAND] = { 0x6060, 2, true },
- [CHIP_HAINAN] = { 0x6660, 2, true },
- [CHIP_BONAIRE] = { 0x6640, 4, true },
- [CHIP_KAVERI] = { 0x1304, 2, false },
- [CHIP_KABINI] = { 0x9830, 2, false },
- [CHIP_HAWAII] = { 0x67A0, 16, true },
- [CHIP_TONGA] = { 0x6920, 8, true },
- [CHIP_ICELAND] = { 0x6900, 2, true },
- [CHIP_CARRIZO] = { 0x9870, 2, false },
- [CHIP_FIJI] = { 0x7300, 16, true },
- [CHIP_STONEY] = { 0x98E4, 2, false },
- [CHIP_POLARIS10] = { 0x67C0, 8, true },
- [CHIP_POLARIS11] = { 0x67E0, 4, true },
- [CHIP_POLARIS12] = { 0x6980, 4, true },
- [CHIP_VEGAM] = { 0x694C, 4, true },
- [CHIP_VEGA10] = { 0x6860, 16, true },
- [CHIP_VEGA12] = { 0x69A0, 8, true },
- [CHIP_VEGA20] = { 0x66A0, 16, true },
- [CHIP_RAVEN] = { 0x15DD, 2, false },
- [CHIP_RENOIR] = { 0x1636, 2, false },
- [CHIP_ARCTURUS] = { 0x738C, 2, true },
- [CHIP_NAVI10] = { 0x7310, 16, true },
- [CHIP_NAVI12] = { 0x7360, 8, true },
- [CHIP_NAVI14] = { 0x7340, 8, true },
- [CHIP_SIENNA_CICHLID] = { 0x73A0, 8, true },
- [CHIP_VANGOGH] = { 0x163F, 8, false },
- [CHIP_NAVY_FLOUNDER] = { 0x73C0, 8, true },
- [CHIP_DIMGREY_CAVEFISH] = { 0x73E0, 8, true },
+ [CHIP_TAHITI] = {0x6780, 8, true},
+ [CHIP_PITCAIRN] = {0x6800, 8, true},
+ [CHIP_VERDE] = {0x6820, 4, true},
+ [CHIP_OLAND] = {0x6060, 2, true},
+ [CHIP_HAINAN] = {0x6660, 2, true},
+ [CHIP_BONAIRE] = {0x6640, 4, true},
+ [CHIP_KAVERI] = {0x1304, 2, false},
+ [CHIP_KABINI] = {0x9830, 2, false},
+ [CHIP_HAWAII] = {0x67A0, 16, true},
+ [CHIP_TONGA] = {0x6920, 8, true},
+ [CHIP_ICELAND] = {0x6900, 2, true},
+ [CHIP_CARRIZO] = {0x9870, 2, false},
+ [CHIP_FIJI] = {0x7300, 16, true},
+ [CHIP_STONEY] = {0x98E4, 2, false},
+ [CHIP_POLARIS10] = {0x67C0, 8, true},
+ [CHIP_POLARIS11] = {0x67E0, 4, true},
+ [CHIP_POLARIS12] = {0x6980, 4, true},
+ [CHIP_VEGAM] = {0x694C, 4, true},
+ [CHIP_VEGA10] = {0x6860, 16, true},
+ [CHIP_VEGA12] = {0x69A0, 8, true},
+ [CHIP_VEGA20] = {0x66A0, 16, true},
+ [CHIP_RAVEN] = {0x15DD, 2, false},
+ [CHIP_RENOIR] = {0x1636, 2, false},
+ [CHIP_ARCTURUS] = {0x738C, 2, true},
+ [CHIP_NAVI10] = {0x7310, 16, true},
+ [CHIP_NAVI12] = {0x7360, 8, true},
+ [CHIP_NAVI14] = {0x7340, 8, true},
+ [CHIP_SIENNA_CICHLID] = {0x73A0, 8, true},
+ [CHIP_VANGOGH] = {0x163F, 8, false},
+ [CHIP_NAVY_FLOUNDER] = {0x73C0, 8, true},
+ [CHIP_DIMGREY_CAVEFISH] = {0x73E0, 8, true},
};
-static void radv_null_winsys_query_info(struct radeon_winsys *rws,
- struct radeon_info *info)
+static void
+radv_null_winsys_query_info(struct radeon_winsys *rws, struct radeon_info *info)
{
- const char *family = getenv("RADV_FORCE_FAMILY");
- unsigned i;
-
- info->chip_class = CLASS_UNKNOWN;
- info->family = CHIP_UNKNOWN;
-
- for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
- if (!strcmp(family, ac_get_family_name(i))) {
- /* Override family and chip_class. */
- info->family = i;
- info->name = "OVERRIDDEN";
-
- if (i >= CHIP_SIENNA_CICHLID)
- info->chip_class = GFX10_3;
- else if (i >= CHIP_NAVI10)
- info->chip_class = GFX10;
- else if (i >= CHIP_VEGA10)
- info->chip_class = GFX9;
- else if (i >= CHIP_TONGA)
- info->chip_class = GFX8;
- else if (i >= CHIP_BONAIRE)
- info->chip_class = GFX7;
- else
- info->chip_class = GFX6;
- }
- }
-
- if (info->family == CHIP_UNKNOWN) {
- fprintf(stderr, "radv: Unknown family: %s\n", family);
- abort();
- }
-
- info->pci_id = gpu_info[info->family].pci_id;
- info->max_se = 4;
- info->num_se = 4;
- if (info->chip_class >= GFX10_3)
- info->max_wave64_per_simd = 16;
- else if (info->chip_class >= GFX10)
- info->max_wave64_per_simd = 20;
- else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
- info->max_wave64_per_simd = 8;
- else
- info->max_wave64_per_simd = 10;
-
- if (info->chip_class >= GFX10)
- info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd * 2;
- else if (info->chip_class >= GFX8)
- info->num_physical_sgprs_per_simd = 800;
- else
- info->num_physical_sgprs_per_simd = 512;
-
- info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
- info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
- info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
- info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
- info->lds_alloc_granularity = info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
- info->max_render_backends = gpu_info[info->family].num_render_backends;
-
- info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram;
- info->has_packed_math_16bit = info->chip_class >= GFX9;
-
- info->has_image_load_dcc_bug = info->family == CHIP_DIMGREY_CAVEFISH ||
- info->family == CHIP_VANGOGH;
+ const char *family = getenv("RADV_FORCE_FAMILY");
+ unsigned i;
+
+ info->chip_class = CLASS_UNKNOWN;
+ info->family = CHIP_UNKNOWN;
+
+ for (i = CHIP_TAHITI; i < CHIP_LAST; i++) {
+ if (!strcmp(family, ac_get_family_name(i))) {
+ /* Override family and chip_class. */
+ info->family = i;
+ info->name = "OVERRIDDEN";
+
+ if (i >= CHIP_SIENNA_CICHLID)
+ info->chip_class = GFX10_3;
+ else if (i >= CHIP_NAVI10)
+ info->chip_class = GFX10;
+ else if (i >= CHIP_VEGA10)
+ info->chip_class = GFX9;
+ else if (i >= CHIP_TONGA)
+ info->chip_class = GFX8;
+ else if (i >= CHIP_BONAIRE)
+ info->chip_class = GFX7;
+ else
+ info->chip_class = GFX6;
+ }
+ }
+
+ if (info->family == CHIP_UNKNOWN) {
+ fprintf(stderr, "radv: Unknown family: %s\n", family);
+ abort();
+ }
+
+ info->pci_id = gpu_info[info->family].pci_id;
+ info->max_se = 4;
+ info->num_se = 4;
+ if (info->chip_class >= GFX10_3)
+ info->max_wave64_per_simd = 16;
+ else if (info->chip_class >= GFX10)
+ info->max_wave64_per_simd = 20;
+ else if (info->family >= CHIP_POLARIS10 && info->family <= CHIP_VEGAM)
+ info->max_wave64_per_simd = 8;
+ else
+ info->max_wave64_per_simd = 10;
+
+ if (info->chip_class >= GFX10)
+ info->num_physical_sgprs_per_simd = 128 * info->max_wave64_per_simd * 2;
+ else if (info->chip_class >= GFX8)
+ info->num_physical_sgprs_per_simd = 800;
+ else
+ info->num_physical_sgprs_per_simd = 512;
+
+ info->num_physical_wave64_vgprs_per_simd = info->chip_class >= GFX10 ? 512 : 256;
+ info->num_simd_per_compute_unit = info->chip_class >= GFX10 ? 2 : 4;
+ info->lds_size_per_workgroup = info->chip_class >= GFX10 ? 128 * 1024 : 64 * 1024;
+ info->lds_encode_granularity = info->chip_class >= GFX7 ? 128 * 4 : 64 * 4;
+ info->lds_alloc_granularity =
+ info->chip_class >= GFX10_3 ? 256 * 4 : info->lds_encode_granularity;
+ info->max_render_backends = gpu_info[info->family].num_render_backends;
+
+ info->has_dedicated_vram = gpu_info[info->family].has_dedicated_vram;
+ info->has_packed_math_16bit = info->chip_class >= GFX9;
+
+ info->has_image_load_dcc_bug =
+ info->family == CHIP_DIMGREY_CAVEFISH || info->family == CHIP_VANGOGH;
}
-static void radv_null_winsys_destroy(struct radeon_winsys *rws)
+static void
+radv_null_winsys_destroy(struct radeon_winsys *rws)
{
- FREE(rws);
+ FREE(rws);
}
struct radeon_winsys *
radv_null_winsys_create()
{
- struct radv_null_winsys *ws;
+ struct radv_null_winsys *ws;
- ws = calloc(1, sizeof(struct radv_null_winsys));
- if (!ws)
- return NULL;
+ ws = calloc(1, sizeof(struct radv_null_winsys));
+ if (!ws)
+ return NULL;
- ws->base.destroy = radv_null_winsys_destroy;
- ws->base.query_info = radv_null_winsys_query_info;
- radv_null_bo_init_functions(ws);
- radv_null_cs_init_functions(ws);
+ ws->base.destroy = radv_null_winsys_destroy;
+ ws->base.query_info = radv_null_winsys_query_info;
+ radv_null_bo_init_functions(ws);
+ radv_null_cs_init_functions(ws);
- return &ws->base;
+ return &ws->base;
}
diff --git a/src/amd/vulkan/winsys/null/radv_null_winsys.h b/src/amd/vulkan/winsys/null/radv_null_winsys.h
index b763875ddaa..c29b6ce5862 100644
--- a/src/amd/vulkan/winsys/null/radv_null_winsys.h
+++ b/src/amd/vulkan/winsys/null/radv_null_winsys.h
@@ -28,18 +28,18 @@
#ifndef RADV_NULL_WINSYS_H
#define RADV_NULL_WINSYS_H
-#include "radv_radeon_winsys.h"
-#include "ac_gpu_info.h"
#include "util/list.h"
+#include "ac_gpu_info.h"
+#include "radv_radeon_winsys.h"
struct radv_null_winsys {
- struct radeon_winsys base;
+ struct radeon_winsys base;
};
static inline struct radv_null_winsys *
radv_null_winsys(struct radeon_winsys *base)
{
- return (struct radv_null_winsys*)base;
+ return (struct radv_null_winsys *)base;
}
#endif /* RADV_NULL_WINSYS_H */